html2text functionality for entry summary
This commit is contained in:
parent
8871785cb1
commit
64c3506a9e
16
atomstrom.py
16
atomstrom.py
@ -21,13 +21,15 @@ class Feed(Base):
|
||||
daily = Column(Boolean)
|
||||
readability = Column(Boolean)
|
||||
fullpage = Column(Boolean)
|
||||
html2textsummary = Column(Boolean)
|
||||
enabled = Column(Boolean)
|
||||
|
||||
def __init__(self, url, daily, readability, fullpage, enabled):
|
||||
def __init__(self, url, daily, readability, fullpage, enabled, html2textsummary):
|
||||
self.url = url
|
||||
self.daily = daily
|
||||
self.readability = readability
|
||||
self.fullpage = fullpage
|
||||
self.html2textsummary = html2textsummary
|
||||
self.enabled = enabled
|
||||
|
||||
def __repr__(self):
|
||||
@ -190,7 +192,10 @@ def fetch_full_page(link):
|
||||
return text.encode('latin-1', 'replace')
|
||||
|
||||
def process_feed_entry(feed, entry):
|
||||
query = session.query(Entry).filter_by(feed_id=feed.id, title=entry.title.encode('latin-1', 'replace'))
|
||||
#query = session.query(Entry).filter_by(feed_id=feed.id, title=entry.title.encode('latin-1', 'replace'))
|
||||
title = entry.title.encode('latin-1', 'replace')
|
||||
link = entry.link.encode('latin-1', 'replace')
|
||||
query = session.query(Entry).filter(Entry.feed_id==feed.id).filter(Entry.title==title).filter(Entry.link==link)
|
||||
try:
|
||||
thisentry = query.one()
|
||||
thisentry.update(entry)
|
||||
@ -205,6 +210,11 @@ def process_feed_entry(feed, entry):
|
||||
if feed.readability:
|
||||
print ' fetching readability <%s>' % entry.link
|
||||
thisentry.readability = fetch_readability(entry.link)
|
||||
if feed.html2textsummary:
|
||||
print ' converting summary'
|
||||
summary = thisentry.summary.decode('latin-1')
|
||||
summary = html2text.html2text(summary)
|
||||
thisentry.summary = summary.encode('latin-1', 'replace')
|
||||
feed.entry.append(thisentry)
|
||||
return 1
|
||||
|
||||
@ -227,7 +237,7 @@ def fetch_single_feed(feed):
|
||||
entries_total = entries_total + 1
|
||||
entries_new = entries_new + process_feed_entry(feed, entry)
|
||||
session.commit()
|
||||
print 'fetched %d from %d entries' % (entries_total, entries_new)
|
||||
print 'updated %d of %d entries' % (entries_new, entries_total)
|
||||
|
||||
def fetch_all_feeds():
|
||||
print 'fetching all feeds...'
|
||||
|
Loading…
Reference in New Issue
Block a user