html2text functionality for entry summary

This commit is contained in:
Ronald Schaten 2010-10-31 14:20:12 +01:00
parent 8871785cb1
commit 64c3506a9e

View File

@ -21,13 +21,15 @@ class Feed(Base):
daily = Column(Boolean) daily = Column(Boolean)
readability = Column(Boolean) readability = Column(Boolean)
fullpage = Column(Boolean) fullpage = Column(Boolean)
html2textsummary = Column(Boolean)
enabled = Column(Boolean) enabled = Column(Boolean)
def __init__(self, url, daily, readability, fullpage, enabled): def __init__(self, url, daily, readability, fullpage, enabled, html2textsummary):
self.url = url self.url = url
self.daily = daily self.daily = daily
self.readability = readability self.readability = readability
self.fullpage = fullpage self.fullpage = fullpage
self.html2textsummary = html2textsummary
self.enabled = enabled self.enabled = enabled
def __repr__(self): def __repr__(self):
@ -190,7 +192,10 @@ def fetch_full_page(link):
return text.encode('latin-1', 'replace') return text.encode('latin-1', 'replace')
def process_feed_entry(feed, entry): def process_feed_entry(feed, entry):
query = session.query(Entry).filter_by(feed_id=feed.id, title=entry.title.encode('latin-1', 'replace')) #query = session.query(Entry).filter_by(feed_id=feed.id, title=entry.title.encode('latin-1', 'replace'))
title = entry.title.encode('latin-1', 'replace')
link = entry.link.encode('latin-1', 'replace')
query = session.query(Entry).filter(Entry.feed_id==feed.id).filter(Entry.title==title).filter(Entry.link==link)
try: try:
thisentry = query.one() thisentry = query.one()
thisentry.update(entry) thisentry.update(entry)
@ -205,6 +210,11 @@ def process_feed_entry(feed, entry):
if feed.readability: if feed.readability:
print ' fetching readability <%s>' % entry.link print ' fetching readability <%s>' % entry.link
thisentry.readability = fetch_readability(entry.link) thisentry.readability = fetch_readability(entry.link)
if feed.html2textsummary:
print ' converting summary'
summary = thisentry.summary.decode('latin-1')
summary = html2text.html2text(summary)
thisentry.summary = summary.encode('latin-1', 'replace')
feed.entry.append(thisentry) feed.entry.append(thisentry)
return 1 return 1
@ -227,7 +237,7 @@ def fetch_single_feed(feed):
entries_total = entries_total + 1 entries_total = entries_total + 1
entries_new = entries_new + process_feed_entry(feed, entry) entries_new = entries_new + process_feed_entry(feed, entry)
session.commit() session.commit()
print 'fetched %d from %d entries' % (entries_total, entries_new) print 'updated %d of %d entries' % (entries_new, entries_total)
def fetch_all_feeds(): def fetch_all_feeds():
print 'fetching all feeds...' print 'fetching all feeds...'