diff --git a/atomstrom.py b/atomstrom.py index d825e9d..ca9d7c7 100755 --- a/atomstrom.py +++ b/atomstrom.py @@ -7,7 +7,6 @@ from sqlalchemy import create_engine, desc, func from sqlalchemy.orm import sessionmaker from datetime import datetime from ddate import ddate -import feedparser import sys import codecs import ConfigParser @@ -107,57 +106,6 @@ def mail_single_entries(session, sender, receiver, prefix): else: print 'no unmailed single entries found... not sending mail.' -def process_feed_entry(session, feed, entry): - thisentry = session.query(Entry).\ - filter(Entry.title == entry.title).\ - filter(Entry.link == entry.link).\ - first() - if thisentry: - print ' entry already known <%s>' % entry.title - thisentry.lastfetched = datetime.now() - session.commit() - return 0 - else: - print ' new entry <%s>' % entry.title - feed.entries.append(Entry(entry, feed)) - session.commit() - return 1 - -def fetch_single_feed(session, feed): - print 'processing %d: %s' % (feed.id, feed.url) - fetched = False - if feed.feedinfo: - if (not feed.feedinfo.nextfetch) or (feed.feedinfo.nextfetch < datetime.now()): - print 'feed known, fetching...' - try: - parser = feedparser.parse(feed.url) - fetched = True - feed.feedinfo.update(parser) - except: - print 'ERROR parsing feed' - print sys.exc_info() - else: - print 'not fetching before: %s' % feed.feedinfo.nextfetch - else: - print 'feed seems to be new, fetching...' - try: - parser = feedparser.parse(feed.url) - fetched = True - feed.feedinfo = Feedinfo(parser) - except: - print 'ERROR parsing feed' - print sys.exc_info() - - if fetched: - print 'processing feed entries:' - entries_new = 0 - entries_total = 0 - for entry in parser.entries: - entries_total = entries_total + 1 - entries_new = entries_new + process_feed_entry(session, feed, entry) - session.commit() - print 'updated %d of %d entries' % (entries_new, entries_total) - def list_all_feeds(session): allfeeds = session.query(Feed).\ order_by(Feed.id) @@ -175,7 +123,7 @@ def fetch_all_feeds(session): filter_by(enabled=1).\ order_by(Feed.id) for feed in allfeeds: - fetch_single_feed(session, feed) + feed.fetch() print def ask_ok(prompt, retries=4, complaint='Yes or no, please!'): diff --git a/models/feed.py b/models/feed.py index 40c5d7a..397567d 100644 --- a/models/feed.py +++ b/models/feed.py @@ -3,8 +3,13 @@ from sqlalchemy import Column, Integer, String, Boolean, Enum from sqlalchemy.orm import relationship, backref +from datetime import datetime +import feedparser +import sys from models import Base +from models.feedinfo import Feedinfo +from models.entry import Entry class Feed(Base): @@ -22,7 +27,7 @@ class Feed(Base): html2textignorelinks = Column(Boolean) html2textignoreimages = Column(Boolean) enabled = Column(Boolean) - entries = relationship("Entry", backref=backref('feed'), cascade='all, delete, delete-orphan') + entries = relationship("Entry", backref=backref('feed'), lazy='dynamic', cascade='all, delete, delete-orphan') feedinfo = relationship("Feedinfo", backref=backref('feed'), cascade='all, delete, delete-orphan', uselist=False) def __init__(self, url, daily, readability, fullpage, enabled, html2textcontent): @@ -55,6 +60,50 @@ class Feed(Base): def __repr__(self): return "" % (self.id, self.url) + def fetch(self): + print 'processing %d: %s' % (self.id, self.url) + fetched = False + if self.feedinfo: + if (not self.feedinfo.nextfetch) or (self.feedinfo.nextfetch < datetime.now()): + print 'feed known, fetching...' + try: + parser = feedparser.parse(self.url) + fetched = True + self.feedinfo.update(parser) + except: + print 'ERROR parsing feed' + print sys.exc_info() + else: + print 'not fetching before: %s' % self.feedinfo.nextfetch + else: + print 'feed seems to be new, fetching...' + try: + parser = feedparser.parse(self.url) + fetched = True + self.feedinfo = Feedinfo(parser) + except: + print 'ERROR parsing feed' + print sys.exc_info() + + if fetched: + print 'processing feed entries:' + entries_new = 0 + entries_total = 0 + for entry in parser.entries: + entries_total += 1 + thisentry = self.entries.\ + filter(Entry.title == entry.title).\ + filter(Entry.link == entry.link).\ + first() + if thisentry: + print ' entry already known <%s>' % entry.title + thisentry.lastfetched = datetime.now() + else: + print ' new entry <%s>' % entry.title + self.entries.append(Entry(entry, self)) + entries_new += 1 + print 'updated %d of %d entries' % (entries_new, entries_total) + def reset(self): self.entries[:] = [] self.feedinfo = None