implemented resolving of url redirects

This commit is contained in:
Ronald Schaten 2013-03-20 20:30:54 +01:00
parent b78ed4ba02
commit 3eeef717ba

View File

@ -9,6 +9,7 @@ import feedparser
import re
import sys
import urllib
import urllib2
import hn
import html2text
import ConfigParser
@ -26,6 +27,7 @@ class Feed(Base):
url = Column(Text)
frequency = Column(Integer)
daily = Column(Boolean)
resolveredirects = Column(Boolean)
readability = Column(Boolean)
fullpage = Column(Boolean)
html2textsummary = Column(Boolean)
@ -101,6 +103,7 @@ class Entry(Base):
author = Column(Text)
enclosures = Column(Text)
resolvedlink = Column(Text)
fullpage = Column(Text)
readability = Column(Text)
updated = Column(DateTime)
@ -173,10 +176,13 @@ def mail_daily_digest(session, sender, receiver, prefix):
count = 0
for feed, feedinfo, entry in entries:
count = count + 1
link = entry.link
if entry.resolvedlink:
link = entry.resolvedlink
body = body + '=> %s - %s\n' % (entry.firstfetched.strftime('%y%m%d-%H%M'), feedinfo.title)
body = body + ' %s\n' % entry.title
body = body + '%s\n' % get_entry_text(entry)[0:100]
body = body + '%s\n\n' % entry.link
body = body + '%s\n\n' % link
if count > 0:
today = datetime.now()
subject = '%s (%s) - %d entries' % (today.strftime('%y%m%d'), today.strftime('%A'), count)
@ -192,9 +198,12 @@ def mail_single_entry(feed, feedinfo, entry, sender, receiver, prefix):
subject = '%s' % (entry.title)
if prefix != '':
subject = '%s %s' % (prefix, subject)
link = entry.link
if entry.resolvedlink:
link = entry.resolvedlink
body = '%s\n\n' % get_entry_text(entry)
body = body + '%s\n' % feedinfo.link
body = body + '%s\n' % entry.link
body = body + '%s\n' % link
send_mail(sender, receiver, subject, body)
entry.sent = datetime.now()
@ -242,6 +251,13 @@ def process_feed_entry(session, feed, entry):
except Exception, e:
print ' new entry <%s>' % entry.title
thisentry = Entry(entry)
if feed.resolveredirects:
print ' fetching final link <%s>' % entry.link
request = urllib2.Request(entry.link)
opener = urllib2.build_opener()
result = opener.open(request)
thisentry.resolvedlink = result.url
print ' final link: <%s>' % result.url
if feed.fullpage:
print ' fetching full page <%s>' % entry.link
thisentry.fullpage = fetch_full_page(entry.link)