implemented fetch-frequency for feeds
This commit is contained in:
parent
bc1b0655e0
commit
3333d1fcaf
38
atomstrom.py
38
atomstrom.py
@ -3,7 +3,7 @@
|
|||||||
from sqlalchemy import create_engine, Table, Column, Integer, Text, Boolean, DateTime, MetaData, ForeignKey, desc
|
from sqlalchemy import create_engine, Table, Column, Integer, Text, Boolean, DateTime, MetaData, ForeignKey, desc
|
||||||
from sqlalchemy.orm import sessionmaker, relation, backref
|
from sqlalchemy.orm import sessionmaker, relation, backref
|
||||||
from sqlalchemy.ext.declarative import declarative_base
|
from sqlalchemy.ext.declarative import declarative_base
|
||||||
from datetime import datetime
|
from datetime import datetime, timedelta
|
||||||
from time import mktime
|
from time import mktime
|
||||||
import feedparser
|
import feedparser
|
||||||
import re
|
import re
|
||||||
@ -12,6 +12,7 @@ import urllib
|
|||||||
import hn
|
import hn
|
||||||
import html2text
|
import html2text
|
||||||
import ConfigParser
|
import ConfigParser
|
||||||
|
import pprint
|
||||||
import smtplib
|
import smtplib
|
||||||
from email.mime.text import MIMEText
|
from email.mime.text import MIMEText
|
||||||
from optparse import OptionParser
|
from optparse import OptionParser
|
||||||
@ -23,6 +24,7 @@ class Feed(Base):
|
|||||||
|
|
||||||
id = Column(Integer, primary_key=True)
|
id = Column(Integer, primary_key=True)
|
||||||
url = Column(Text)
|
url = Column(Text)
|
||||||
|
frequency = Column(Integer)
|
||||||
daily = Column(Boolean)
|
daily = Column(Boolean)
|
||||||
readability = Column(Boolean)
|
readability = Column(Boolean)
|
||||||
fullpage = Column(Boolean)
|
fullpage = Column(Boolean)
|
||||||
@ -225,25 +227,35 @@ def process_feed_entry(session, feed, entry):
|
|||||||
return 1
|
return 1
|
||||||
|
|
||||||
def fetch_single_feed(session, feed):
|
def fetch_single_feed(session, feed):
|
||||||
print 'fetching %s' % feed.url
|
print 'processing %s' % feed.url
|
||||||
parser = feedparser.parse(feed.url)
|
|
||||||
print 'processing feed info...'
|
|
||||||
query = session.query(Feedinfo).filter(Feedinfo.feed_id==feed.id)
|
query = session.query(Feedinfo).filter(Feedinfo.feed_id==feed.id)
|
||||||
|
fetched = False
|
||||||
try:
|
try:
|
||||||
feed.feedinfo = query.one()
|
feed.feedinfo = query.one()
|
||||||
feed.feedinfo.update(parser)
|
nextfetch = (feed.feedinfo.lastfetched + timedelta(minutes=feed.frequency))
|
||||||
|
if datetime.now() > nextfetch:
|
||||||
|
print 'fetching...'
|
||||||
|
parser = feedparser.parse(feed.url)
|
||||||
|
fetched = True
|
||||||
|
feed.feedinfo.update(parser)
|
||||||
|
else:
|
||||||
|
print 'not fetching before: %s' % nextfetch
|
||||||
except Exception, e:
|
except Exception, e:
|
||||||
print 'this feed seems to be new'
|
print 'this feed seems to be new'
|
||||||
|
print 'fetching...'
|
||||||
|
parser = feedparser.parse(feed.url)
|
||||||
|
fetched = True
|
||||||
feed.feedinfo = Feedinfo(parser)
|
feed.feedinfo = Feedinfo(parser)
|
||||||
|
|
||||||
print 'processing feed entries:'
|
if fetched:
|
||||||
entries_new = 0
|
print 'processing feed entries:'
|
||||||
entries_total = 0
|
entries_new = 0
|
||||||
for entry in parser.entries:
|
entries_total = 0
|
||||||
entries_total = entries_total + 1
|
for entry in parser.entries:
|
||||||
entries_new = entries_new + process_feed_entry(session, feed, entry)
|
entries_total = entries_total + 1
|
||||||
session.commit()
|
entries_new = entries_new + process_feed_entry(session, feed, entry)
|
||||||
print 'updated %d of %d entries' % (entries_new, entries_total)
|
session.commit()
|
||||||
|
print 'updated %d of %d entries' % (entries_new, entries_total)
|
||||||
|
|
||||||
def fetch_all_feeds(session):
|
def fetch_all_feeds(session):
|
||||||
print 'fetching all feeds...'
|
print 'fetching all feeds...'
|
||||||
|
Loading…
Reference in New Issue
Block a user