implemented fetch-frequency for feeds

This commit is contained in:
Ronald Schaten 2013-03-19 21:18:38 +01:00
parent bc1b0655e0
commit 3333d1fcaf

View File

@ -3,7 +3,7 @@
from sqlalchemy import create_engine, Table, Column, Integer, Text, Boolean, DateTime, MetaData, ForeignKey, desc from sqlalchemy import create_engine, Table, Column, Integer, Text, Boolean, DateTime, MetaData, ForeignKey, desc
from sqlalchemy.orm import sessionmaker, relation, backref from sqlalchemy.orm import sessionmaker, relation, backref
from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.ext.declarative import declarative_base
from datetime import datetime from datetime import datetime, timedelta
from time import mktime from time import mktime
import feedparser import feedparser
import re import re
@ -12,6 +12,7 @@ import urllib
import hn import hn
import html2text import html2text
import ConfigParser import ConfigParser
import pprint
import smtplib import smtplib
from email.mime.text import MIMEText from email.mime.text import MIMEText
from optparse import OptionParser from optparse import OptionParser
@ -23,6 +24,7 @@ class Feed(Base):
id = Column(Integer, primary_key=True) id = Column(Integer, primary_key=True)
url = Column(Text) url = Column(Text)
frequency = Column(Integer)
daily = Column(Boolean) daily = Column(Boolean)
readability = Column(Boolean) readability = Column(Boolean)
fullpage = Column(Boolean) fullpage = Column(Boolean)
@ -225,25 +227,35 @@ def process_feed_entry(session, feed, entry):
return 1 return 1
def fetch_single_feed(session, feed): def fetch_single_feed(session, feed):
print 'fetching %s' % feed.url print 'processing %s' % feed.url
parser = feedparser.parse(feed.url)
print 'processing feed info...'
query = session.query(Feedinfo).filter(Feedinfo.feed_id==feed.id) query = session.query(Feedinfo).filter(Feedinfo.feed_id==feed.id)
fetched = False
try: try:
feed.feedinfo = query.one() feed.feedinfo = query.one()
feed.feedinfo.update(parser) nextfetch = (feed.feedinfo.lastfetched + timedelta(minutes=feed.frequency))
if datetime.now() > nextfetch:
print 'fetching...'
parser = feedparser.parse(feed.url)
fetched = True
feed.feedinfo.update(parser)
else:
print 'not fetching before: %s' % nextfetch
except Exception, e: except Exception, e:
print 'this feed seems to be new' print 'this feed seems to be new'
print 'fetching...'
parser = feedparser.parse(feed.url)
fetched = True
feed.feedinfo = Feedinfo(parser) feed.feedinfo = Feedinfo(parser)
print 'processing feed entries:' if fetched:
entries_new = 0 print 'processing feed entries:'
entries_total = 0 entries_new = 0
for entry in parser.entries: entries_total = 0
entries_total = entries_total + 1 for entry in parser.entries:
entries_new = entries_new + process_feed_entry(session, feed, entry) entries_total = entries_total + 1
session.commit() entries_new = entries_new + process_feed_entry(session, feed, entry)
print 'updated %d of %d entries' % (entries_new, entries_total) session.commit()
print 'updated %d of %d entries' % (entries_new, entries_total)
def fetch_all_feeds(session): def fetch_all_feeds(session):
print 'fetching all feeds...' print 'fetching all feeds...'