major refactoring: feed knows how to fetch itself
This commit is contained in:
parent
7e6c660b0c
commit
a1c6ab0c09
54
atomstrom.py
54
atomstrom.py
@ -7,7 +7,6 @@ from sqlalchemy import create_engine, desc, func
|
|||||||
from sqlalchemy.orm import sessionmaker
|
from sqlalchemy.orm import sessionmaker
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from ddate import ddate
|
from ddate import ddate
|
||||||
import feedparser
|
|
||||||
import sys
|
import sys
|
||||||
import codecs
|
import codecs
|
||||||
import ConfigParser
|
import ConfigParser
|
||||||
@ -107,57 +106,6 @@ def mail_single_entries(session, sender, receiver, prefix):
|
|||||||
else:
|
else:
|
||||||
print 'no unmailed single entries found... not sending mail.'
|
print 'no unmailed single entries found... not sending mail.'
|
||||||
|
|
||||||
def process_feed_entry(session, feed, entry):
|
|
||||||
thisentry = session.query(Entry).\
|
|
||||||
filter(Entry.title == entry.title).\
|
|
||||||
filter(Entry.link == entry.link).\
|
|
||||||
first()
|
|
||||||
if thisentry:
|
|
||||||
print ' entry already known <%s>' % entry.title
|
|
||||||
thisentry.lastfetched = datetime.now()
|
|
||||||
session.commit()
|
|
||||||
return 0
|
|
||||||
else:
|
|
||||||
print ' new entry <%s>' % entry.title
|
|
||||||
feed.entries.append(Entry(entry, feed))
|
|
||||||
session.commit()
|
|
||||||
return 1
|
|
||||||
|
|
||||||
def fetch_single_feed(session, feed):
|
|
||||||
print 'processing %d: %s' % (feed.id, feed.url)
|
|
||||||
fetched = False
|
|
||||||
if feed.feedinfo:
|
|
||||||
if (not feed.feedinfo.nextfetch) or (feed.feedinfo.nextfetch < datetime.now()):
|
|
||||||
print 'feed known, fetching...'
|
|
||||||
try:
|
|
||||||
parser = feedparser.parse(feed.url)
|
|
||||||
fetched = True
|
|
||||||
feed.feedinfo.update(parser)
|
|
||||||
except:
|
|
||||||
print 'ERROR parsing feed'
|
|
||||||
print sys.exc_info()
|
|
||||||
else:
|
|
||||||
print 'not fetching before: %s' % feed.feedinfo.nextfetch
|
|
||||||
else:
|
|
||||||
print 'feed seems to be new, fetching...'
|
|
||||||
try:
|
|
||||||
parser = feedparser.parse(feed.url)
|
|
||||||
fetched = True
|
|
||||||
feed.feedinfo = Feedinfo(parser)
|
|
||||||
except:
|
|
||||||
print 'ERROR parsing feed'
|
|
||||||
print sys.exc_info()
|
|
||||||
|
|
||||||
if fetched:
|
|
||||||
print 'processing feed entries:'
|
|
||||||
entries_new = 0
|
|
||||||
entries_total = 0
|
|
||||||
for entry in parser.entries:
|
|
||||||
entries_total = entries_total + 1
|
|
||||||
entries_new = entries_new + process_feed_entry(session, feed, entry)
|
|
||||||
session.commit()
|
|
||||||
print 'updated %d of %d entries' % (entries_new, entries_total)
|
|
||||||
|
|
||||||
def list_all_feeds(session):
|
def list_all_feeds(session):
|
||||||
allfeeds = session.query(Feed).\
|
allfeeds = session.query(Feed).\
|
||||||
order_by(Feed.id)
|
order_by(Feed.id)
|
||||||
@ -175,7 +123,7 @@ def fetch_all_feeds(session):
|
|||||||
filter_by(enabled=1).\
|
filter_by(enabled=1).\
|
||||||
order_by(Feed.id)
|
order_by(Feed.id)
|
||||||
for feed in allfeeds:
|
for feed in allfeeds:
|
||||||
fetch_single_feed(session, feed)
|
feed.fetch()
|
||||||
print
|
print
|
||||||
|
|
||||||
def ask_ok(prompt, retries=4, complaint='Yes or no, please!'):
|
def ask_ok(prompt, retries=4, complaint='Yes or no, please!'):
|
||||||
|
@ -3,8 +3,13 @@
|
|||||||
|
|
||||||
from sqlalchemy import Column, Integer, String, Boolean, Enum
|
from sqlalchemy import Column, Integer, String, Boolean, Enum
|
||||||
from sqlalchemy.orm import relationship, backref
|
from sqlalchemy.orm import relationship, backref
|
||||||
|
from datetime import datetime
|
||||||
|
import feedparser
|
||||||
|
import sys
|
||||||
|
|
||||||
from models import Base
|
from models import Base
|
||||||
|
from models.feedinfo import Feedinfo
|
||||||
|
from models.entry import Entry
|
||||||
|
|
||||||
|
|
||||||
class Feed(Base):
|
class Feed(Base):
|
||||||
@ -22,7 +27,7 @@ class Feed(Base):
|
|||||||
html2textignorelinks = Column(Boolean)
|
html2textignorelinks = Column(Boolean)
|
||||||
html2textignoreimages = Column(Boolean)
|
html2textignoreimages = Column(Boolean)
|
||||||
enabled = Column(Boolean)
|
enabled = Column(Boolean)
|
||||||
entries = relationship("Entry", backref=backref('feed'), cascade='all, delete, delete-orphan')
|
entries = relationship("Entry", backref=backref('feed'), lazy='dynamic', cascade='all, delete, delete-orphan')
|
||||||
feedinfo = relationship("Feedinfo", backref=backref('feed'), cascade='all, delete, delete-orphan', uselist=False)
|
feedinfo = relationship("Feedinfo", backref=backref('feed'), cascade='all, delete, delete-orphan', uselist=False)
|
||||||
|
|
||||||
def __init__(self, url, daily, readability, fullpage, enabled, html2textcontent):
|
def __init__(self, url, daily, readability, fullpage, enabled, html2textcontent):
|
||||||
@ -55,6 +60,50 @@ class Feed(Base):
|
|||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return "<Feed('%d','%s')>" % (self.id, self.url)
|
return "<Feed('%d','%s')>" % (self.id, self.url)
|
||||||
|
|
||||||
|
def fetch(self):
|
||||||
|
print 'processing %d: %s' % (self.id, self.url)
|
||||||
|
fetched = False
|
||||||
|
if self.feedinfo:
|
||||||
|
if (not self.feedinfo.nextfetch) or (self.feedinfo.nextfetch < datetime.now()):
|
||||||
|
print 'feed known, fetching...'
|
||||||
|
try:
|
||||||
|
parser = feedparser.parse(self.url)
|
||||||
|
fetched = True
|
||||||
|
self.feedinfo.update(parser)
|
||||||
|
except:
|
||||||
|
print 'ERROR parsing feed'
|
||||||
|
print sys.exc_info()
|
||||||
|
else:
|
||||||
|
print 'not fetching before: %s' % self.feedinfo.nextfetch
|
||||||
|
else:
|
||||||
|
print 'feed seems to be new, fetching...'
|
||||||
|
try:
|
||||||
|
parser = feedparser.parse(self.url)
|
||||||
|
fetched = True
|
||||||
|
self.feedinfo = Feedinfo(parser)
|
||||||
|
except:
|
||||||
|
print 'ERROR parsing feed'
|
||||||
|
print sys.exc_info()
|
||||||
|
|
||||||
|
if fetched:
|
||||||
|
print 'processing feed entries:'
|
||||||
|
entries_new = 0
|
||||||
|
entries_total = 0
|
||||||
|
for entry in parser.entries:
|
||||||
|
entries_total += 1
|
||||||
|
thisentry = self.entries.\
|
||||||
|
filter(Entry.title == entry.title).\
|
||||||
|
filter(Entry.link == entry.link).\
|
||||||
|
first()
|
||||||
|
if thisentry:
|
||||||
|
print ' entry already known <%s>' % entry.title
|
||||||
|
thisentry.lastfetched = datetime.now()
|
||||||
|
else:
|
||||||
|
print ' new entry <%s>' % entry.title
|
||||||
|
self.entries.append(Entry(entry, self))
|
||||||
|
entries_new += 1
|
||||||
|
print 'updated %d of %d entries' % (entries_new, entries_total)
|
||||||
|
|
||||||
def reset(self):
|
def reset(self):
|
||||||
self.entries[:] = []
|
self.entries[:] = []
|
||||||
self.feedinfo = None
|
self.feedinfo = None
|
||||||
|
Loading…
Reference in New Issue
Block a user