major refactoring: feed knows how to fetch itself

This commit is contained in:
Ronald Schaten 2013-04-09 23:18:50 +02:00
parent 7e6c660b0c
commit a1c6ab0c09
2 changed files with 51 additions and 54 deletions

View File

@ -7,7 +7,6 @@ from sqlalchemy import create_engine, desc, func
from sqlalchemy.orm import sessionmaker
from datetime import datetime
from ddate import ddate
import feedparser
import sys
import codecs
import ConfigParser
@ -107,57 +106,6 @@ def mail_single_entries(session, sender, receiver, prefix):
else:
print 'no unmailed single entries found... not sending mail.'
def process_feed_entry(session, feed, entry):
thisentry = session.query(Entry).\
filter(Entry.title == entry.title).\
filter(Entry.link == entry.link).\
first()
if thisentry:
print ' entry already known <%s>' % entry.title
thisentry.lastfetched = datetime.now()
session.commit()
return 0
else:
print ' new entry <%s>' % entry.title
feed.entries.append(Entry(entry, feed))
session.commit()
return 1
def fetch_single_feed(session, feed):
print 'processing %d: %s' % (feed.id, feed.url)
fetched = False
if feed.feedinfo:
if (not feed.feedinfo.nextfetch) or (feed.feedinfo.nextfetch < datetime.now()):
print 'feed known, fetching...'
try:
parser = feedparser.parse(feed.url)
fetched = True
feed.feedinfo.update(parser)
except:
print 'ERROR parsing feed'
print sys.exc_info()
else:
print 'not fetching before: %s' % feed.feedinfo.nextfetch
else:
print 'feed seems to be new, fetching...'
try:
parser = feedparser.parse(feed.url)
fetched = True
feed.feedinfo = Feedinfo(parser)
except:
print 'ERROR parsing feed'
print sys.exc_info()
if fetched:
print 'processing feed entries:'
entries_new = 0
entries_total = 0
for entry in parser.entries:
entries_total = entries_total + 1
entries_new = entries_new + process_feed_entry(session, feed, entry)
session.commit()
print 'updated %d of %d entries' % (entries_new, entries_total)
def list_all_feeds(session):
allfeeds = session.query(Feed).\
order_by(Feed.id)
@ -175,7 +123,7 @@ def fetch_all_feeds(session):
filter_by(enabled=1).\
order_by(Feed.id)
for feed in allfeeds:
fetch_single_feed(session, feed)
feed.fetch()
print
def ask_ok(prompt, retries=4, complaint='Yes or no, please!'):

View File

@ -3,8 +3,13 @@
from sqlalchemy import Column, Integer, String, Boolean, Enum
from sqlalchemy.orm import relationship, backref
from datetime import datetime
import feedparser
import sys
from models import Base
from models.feedinfo import Feedinfo
from models.entry import Entry
class Feed(Base):
@ -22,7 +27,7 @@ class Feed(Base):
html2textignorelinks = Column(Boolean)
html2textignoreimages = Column(Boolean)
enabled = Column(Boolean)
entries = relationship("Entry", backref=backref('feed'), cascade='all, delete, delete-orphan')
entries = relationship("Entry", backref=backref('feed'), lazy='dynamic', cascade='all, delete, delete-orphan')
feedinfo = relationship("Feedinfo", backref=backref('feed'), cascade='all, delete, delete-orphan', uselist=False)
def __init__(self, url, daily, readability, fullpage, enabled, html2textcontent):
@ -55,6 +60,50 @@ class Feed(Base):
def __repr__(self):
return "<Feed('%d','%s')>" % (self.id, self.url)
def fetch(self):
print 'processing %d: %s' % (self.id, self.url)
fetched = False
if self.feedinfo:
if (not self.feedinfo.nextfetch) or (self.feedinfo.nextfetch < datetime.now()):
print 'feed known, fetching...'
try:
parser = feedparser.parse(self.url)
fetched = True
self.feedinfo.update(parser)
except:
print 'ERROR parsing feed'
print sys.exc_info()
else:
print 'not fetching before: %s' % self.feedinfo.nextfetch
else:
print 'feed seems to be new, fetching...'
try:
parser = feedparser.parse(self.url)
fetched = True
self.feedinfo = Feedinfo(parser)
except:
print 'ERROR parsing feed'
print sys.exc_info()
if fetched:
print 'processing feed entries:'
entries_new = 0
entries_total = 0
for entry in parser.entries:
entries_total += 1
thisentry = self.entries.\
filter(Entry.title == entry.title).\
filter(Entry.link == entry.link).\
first()
if thisentry:
print ' entry already known <%s>' % entry.title
thisentry.lastfetched = datetime.now()
else:
print ' new entry <%s>' % entry.title
self.entries.append(Entry(entry, self))
entries_new += 1
print 'updated %d of %d entries' % (entries_new, entries_total)
def reset(self):
self.entries[:] = []
self.feedinfo = None