html2text functionality for entry summary
This commit is contained in:
parent
8871785cb1
commit
64c3506a9e
16
atomstrom.py
16
atomstrom.py
@ -21,13 +21,15 @@ class Feed(Base):
|
|||||||
daily = Column(Boolean)
|
daily = Column(Boolean)
|
||||||
readability = Column(Boolean)
|
readability = Column(Boolean)
|
||||||
fullpage = Column(Boolean)
|
fullpage = Column(Boolean)
|
||||||
|
html2textsummary = Column(Boolean)
|
||||||
enabled = Column(Boolean)
|
enabled = Column(Boolean)
|
||||||
|
|
||||||
def __init__(self, url, daily, readability, fullpage, enabled):
|
def __init__(self, url, daily, readability, fullpage, enabled, html2textsummary):
|
||||||
self.url = url
|
self.url = url
|
||||||
self.daily = daily
|
self.daily = daily
|
||||||
self.readability = readability
|
self.readability = readability
|
||||||
self.fullpage = fullpage
|
self.fullpage = fullpage
|
||||||
|
self.html2textsummary = html2textsummary
|
||||||
self.enabled = enabled
|
self.enabled = enabled
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
@ -190,7 +192,10 @@ def fetch_full_page(link):
|
|||||||
return text.encode('latin-1', 'replace')
|
return text.encode('latin-1', 'replace')
|
||||||
|
|
||||||
def process_feed_entry(feed, entry):
|
def process_feed_entry(feed, entry):
|
||||||
query = session.query(Entry).filter_by(feed_id=feed.id, title=entry.title.encode('latin-1', 'replace'))
|
#query = session.query(Entry).filter_by(feed_id=feed.id, title=entry.title.encode('latin-1', 'replace'))
|
||||||
|
title = entry.title.encode('latin-1', 'replace')
|
||||||
|
link = entry.link.encode('latin-1', 'replace')
|
||||||
|
query = session.query(Entry).filter(Entry.feed_id==feed.id).filter(Entry.title==title).filter(Entry.link==link)
|
||||||
try:
|
try:
|
||||||
thisentry = query.one()
|
thisentry = query.one()
|
||||||
thisentry.update(entry)
|
thisentry.update(entry)
|
||||||
@ -205,6 +210,11 @@ def process_feed_entry(feed, entry):
|
|||||||
if feed.readability:
|
if feed.readability:
|
||||||
print ' fetching readability <%s>' % entry.link
|
print ' fetching readability <%s>' % entry.link
|
||||||
thisentry.readability = fetch_readability(entry.link)
|
thisentry.readability = fetch_readability(entry.link)
|
||||||
|
if feed.html2textsummary:
|
||||||
|
print ' converting summary'
|
||||||
|
summary = thisentry.summary.decode('latin-1')
|
||||||
|
summary = html2text.html2text(summary)
|
||||||
|
thisentry.summary = summary.encode('latin-1', 'replace')
|
||||||
feed.entry.append(thisentry)
|
feed.entry.append(thisentry)
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
@ -227,7 +237,7 @@ def fetch_single_feed(feed):
|
|||||||
entries_total = entries_total + 1
|
entries_total = entries_total + 1
|
||||||
entries_new = entries_new + process_feed_entry(feed, entry)
|
entries_new = entries_new + process_feed_entry(feed, entry)
|
||||||
session.commit()
|
session.commit()
|
||||||
print 'fetched %d from %d entries' % (entries_total, entries_new)
|
print 'updated %d of %d entries' % (entries_new, entries_total)
|
||||||
|
|
||||||
def fetch_all_feeds():
|
def fetch_all_feeds():
|
||||||
print 'fetching all feeds...'
|
print 'fetching all feeds...'
|
||||||
|
Loading…
Reference in New Issue
Block a user