major refactoring: feed knows how to fetch itself
This commit is contained in:
parent
7e6c660b0c
commit
a1c6ab0c09
54
atomstrom.py
54
atomstrom.py
@ -7,7 +7,6 @@ from sqlalchemy import create_engine, desc, func
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
from datetime import datetime
|
||||
from ddate import ddate
|
||||
import feedparser
|
||||
import sys
|
||||
import codecs
|
||||
import ConfigParser
|
||||
@ -107,57 +106,6 @@ def mail_single_entries(session, sender, receiver, prefix):
|
||||
else:
|
||||
print 'no unmailed single entries found... not sending mail.'
|
||||
|
||||
def process_feed_entry(session, feed, entry):
|
||||
thisentry = session.query(Entry).\
|
||||
filter(Entry.title == entry.title).\
|
||||
filter(Entry.link == entry.link).\
|
||||
first()
|
||||
if thisentry:
|
||||
print ' entry already known <%s>' % entry.title
|
||||
thisentry.lastfetched = datetime.now()
|
||||
session.commit()
|
||||
return 0
|
||||
else:
|
||||
print ' new entry <%s>' % entry.title
|
||||
feed.entries.append(Entry(entry, feed))
|
||||
session.commit()
|
||||
return 1
|
||||
|
||||
def fetch_single_feed(session, feed):
|
||||
print 'processing %d: %s' % (feed.id, feed.url)
|
||||
fetched = False
|
||||
if feed.feedinfo:
|
||||
if (not feed.feedinfo.nextfetch) or (feed.feedinfo.nextfetch < datetime.now()):
|
||||
print 'feed known, fetching...'
|
||||
try:
|
||||
parser = feedparser.parse(feed.url)
|
||||
fetched = True
|
||||
feed.feedinfo.update(parser)
|
||||
except:
|
||||
print 'ERROR parsing feed'
|
||||
print sys.exc_info()
|
||||
else:
|
||||
print 'not fetching before: %s' % feed.feedinfo.nextfetch
|
||||
else:
|
||||
print 'feed seems to be new, fetching...'
|
||||
try:
|
||||
parser = feedparser.parse(feed.url)
|
||||
fetched = True
|
||||
feed.feedinfo = Feedinfo(parser)
|
||||
except:
|
||||
print 'ERROR parsing feed'
|
||||
print sys.exc_info()
|
||||
|
||||
if fetched:
|
||||
print 'processing feed entries:'
|
||||
entries_new = 0
|
||||
entries_total = 0
|
||||
for entry in parser.entries:
|
||||
entries_total = entries_total + 1
|
||||
entries_new = entries_new + process_feed_entry(session, feed, entry)
|
||||
session.commit()
|
||||
print 'updated %d of %d entries' % (entries_new, entries_total)
|
||||
|
||||
def list_all_feeds(session):
|
||||
allfeeds = session.query(Feed).\
|
||||
order_by(Feed.id)
|
||||
@ -175,7 +123,7 @@ def fetch_all_feeds(session):
|
||||
filter_by(enabled=1).\
|
||||
order_by(Feed.id)
|
||||
for feed in allfeeds:
|
||||
fetch_single_feed(session, feed)
|
||||
feed.fetch()
|
||||
print
|
||||
|
||||
def ask_ok(prompt, retries=4, complaint='Yes or no, please!'):
|
||||
|
@ -3,8 +3,13 @@
|
||||
|
||||
from sqlalchemy import Column, Integer, String, Boolean, Enum
|
||||
from sqlalchemy.orm import relationship, backref
|
||||
from datetime import datetime
|
||||
import feedparser
|
||||
import sys
|
||||
|
||||
from models import Base
|
||||
from models.feedinfo import Feedinfo
|
||||
from models.entry import Entry
|
||||
|
||||
|
||||
class Feed(Base):
|
||||
@ -22,7 +27,7 @@ class Feed(Base):
|
||||
html2textignorelinks = Column(Boolean)
|
||||
html2textignoreimages = Column(Boolean)
|
||||
enabled = Column(Boolean)
|
||||
entries = relationship("Entry", backref=backref('feed'), cascade='all, delete, delete-orphan')
|
||||
entries = relationship("Entry", backref=backref('feed'), lazy='dynamic', cascade='all, delete, delete-orphan')
|
||||
feedinfo = relationship("Feedinfo", backref=backref('feed'), cascade='all, delete, delete-orphan', uselist=False)
|
||||
|
||||
def __init__(self, url, daily, readability, fullpage, enabled, html2textcontent):
|
||||
@ -55,6 +60,50 @@ class Feed(Base):
|
||||
def __repr__(self):
|
||||
return "<Feed('%d','%s')>" % (self.id, self.url)
|
||||
|
||||
def fetch(self):
|
||||
print 'processing %d: %s' % (self.id, self.url)
|
||||
fetched = False
|
||||
if self.feedinfo:
|
||||
if (not self.feedinfo.nextfetch) or (self.feedinfo.nextfetch < datetime.now()):
|
||||
print 'feed known, fetching...'
|
||||
try:
|
||||
parser = feedparser.parse(self.url)
|
||||
fetched = True
|
||||
self.feedinfo.update(parser)
|
||||
except:
|
||||
print 'ERROR parsing feed'
|
||||
print sys.exc_info()
|
||||
else:
|
||||
print 'not fetching before: %s' % self.feedinfo.nextfetch
|
||||
else:
|
||||
print 'feed seems to be new, fetching...'
|
||||
try:
|
||||
parser = feedparser.parse(self.url)
|
||||
fetched = True
|
||||
self.feedinfo = Feedinfo(parser)
|
||||
except:
|
||||
print 'ERROR parsing feed'
|
||||
print sys.exc_info()
|
||||
|
||||
if fetched:
|
||||
print 'processing feed entries:'
|
||||
entries_new = 0
|
||||
entries_total = 0
|
||||
for entry in parser.entries:
|
||||
entries_total += 1
|
||||
thisentry = self.entries.\
|
||||
filter(Entry.title == entry.title).\
|
||||
filter(Entry.link == entry.link).\
|
||||
first()
|
||||
if thisentry:
|
||||
print ' entry already known <%s>' % entry.title
|
||||
thisentry.lastfetched = datetime.now()
|
||||
else:
|
||||
print ' new entry <%s>' % entry.title
|
||||
self.entries.append(Entry(entry, self))
|
||||
entries_new += 1
|
||||
print 'updated %d of %d entries' % (entries_new, entries_total)
|
||||
|
||||
def reset(self):
|
||||
self.entries[:] = []
|
||||
self.feedinfo = None
|
||||
|
Loading…
Reference in New Issue
Block a user