implemented resolving of url redirects

This commit is contained in:
Ronald Schaten 2013-03-20 20:30:54 +01:00
parent b78ed4ba02
commit 3eeef717ba

View File

@ -9,6 +9,7 @@ import feedparser
import re import re
import sys import sys
import urllib import urllib
import urllib2
import hn import hn
import html2text import html2text
import ConfigParser import ConfigParser
@ -26,6 +27,7 @@ class Feed(Base):
url = Column(Text) url = Column(Text)
frequency = Column(Integer) frequency = Column(Integer)
daily = Column(Boolean) daily = Column(Boolean)
resolveredirects = Column(Boolean)
readability = Column(Boolean) readability = Column(Boolean)
fullpage = Column(Boolean) fullpage = Column(Boolean)
html2textsummary = Column(Boolean) html2textsummary = Column(Boolean)
@ -101,6 +103,7 @@ class Entry(Base):
author = Column(Text) author = Column(Text)
enclosures = Column(Text) enclosures = Column(Text)
resolvedlink = Column(Text)
fullpage = Column(Text) fullpage = Column(Text)
readability = Column(Text) readability = Column(Text)
updated = Column(DateTime) updated = Column(DateTime)
@ -173,10 +176,13 @@ def mail_daily_digest(session, sender, receiver, prefix):
count = 0 count = 0
for feed, feedinfo, entry in entries: for feed, feedinfo, entry in entries:
count = count + 1 count = count + 1
link = entry.link
if entry.resolvedlink:
link = entry.resolvedlink
body = body + '=> %s - %s\n' % (entry.firstfetched.strftime('%y%m%d-%H%M'), feedinfo.title) body = body + '=> %s - %s\n' % (entry.firstfetched.strftime('%y%m%d-%H%M'), feedinfo.title)
body = body + ' %s\n' % entry.title body = body + ' %s\n' % entry.title
body = body + '%s\n' % get_entry_text(entry)[0:100] body = body + '%s\n' % get_entry_text(entry)[0:100]
body = body + '%s\n\n' % entry.link body = body + '%s\n\n' % link
if count > 0: if count > 0:
today = datetime.now() today = datetime.now()
subject = '%s (%s) - %d entries' % (today.strftime('%y%m%d'), today.strftime('%A'), count) subject = '%s (%s) - %d entries' % (today.strftime('%y%m%d'), today.strftime('%A'), count)
@ -192,9 +198,12 @@ def mail_single_entry(feed, feedinfo, entry, sender, receiver, prefix):
subject = '%s' % (entry.title) subject = '%s' % (entry.title)
if prefix != '': if prefix != '':
subject = '%s %s' % (prefix, subject) subject = '%s %s' % (prefix, subject)
link = entry.link
if entry.resolvedlink:
link = entry.resolvedlink
body = '%s\n\n' % get_entry_text(entry) body = '%s\n\n' % get_entry_text(entry)
body = body + '%s\n' % feedinfo.link body = body + '%s\n' % feedinfo.link
body = body + '%s\n' % entry.link body = body + '%s\n' % link
send_mail(sender, receiver, subject, body) send_mail(sender, receiver, subject, body)
entry.sent = datetime.now() entry.sent = datetime.now()
@ -242,6 +251,13 @@ def process_feed_entry(session, feed, entry):
except Exception, e: except Exception, e:
print ' new entry <%s>' % entry.title print ' new entry <%s>' % entry.title
thisentry = Entry(entry) thisentry = Entry(entry)
if feed.resolveredirects:
print ' fetching final link <%s>' % entry.link
request = urllib2.Request(entry.link)
opener = urllib2.build_opener()
result = opener.open(request)
thisentry.resolvedlink = result.url
print ' final link: <%s>' % result.url
if feed.fullpage: if feed.fullpage:
print ' fetching full page <%s>' % entry.link print ' fetching full page <%s>' % entry.link
thisentry.fullpage = fetch_full_page(entry.link) thisentry.fullpage = fetch_full_page(entry.link)