new feature: ignore images in html2text

This commit is contained in:
Ronald Schaten 2013-03-20 23:28:48 +01:00
parent 7cadfb8bd1
commit f53338c3b1

View File

@ -31,6 +31,7 @@ class Feed(Base):
readability = Column(Boolean)
fullpage = Column(Boolean)
html2textsummary = Column(Boolean)
html2textignoreimages = Column(Boolean)
enabled = Column(Boolean)
def __init__(self, url, daily, readability, fullpage, enabled, html2textsummary):
@ -270,6 +271,8 @@ def process_feed_entry(session, feed, entry):
print ' converting summary'
h2t = html2text.HTML2Text()
h2t.body_width = 0
if feed.html2textignoreimages:
h2t.ignore_images = True
summary = thisentry.summary.decode('latin-1')
summary = h2t.handle(summary)
thisentry.summary = summary.encode('latin-1', 'replace')