implemented resolving of url redirects
This commit is contained in:
parent
b78ed4ba02
commit
3eeef717ba
20
atomstrom.py
20
atomstrom.py
@ -9,6 +9,7 @@ import feedparser
|
||||
import re
|
||||
import sys
|
||||
import urllib
|
||||
import urllib2
|
||||
import hn
|
||||
import html2text
|
||||
import ConfigParser
|
||||
@ -26,6 +27,7 @@ class Feed(Base):
|
||||
url = Column(Text)
|
||||
frequency = Column(Integer)
|
||||
daily = Column(Boolean)
|
||||
resolveredirects = Column(Boolean)
|
||||
readability = Column(Boolean)
|
||||
fullpage = Column(Boolean)
|
||||
html2textsummary = Column(Boolean)
|
||||
@ -101,6 +103,7 @@ class Entry(Base):
|
||||
author = Column(Text)
|
||||
enclosures = Column(Text)
|
||||
|
||||
resolvedlink = Column(Text)
|
||||
fullpage = Column(Text)
|
||||
readability = Column(Text)
|
||||
updated = Column(DateTime)
|
||||
@ -173,10 +176,13 @@ def mail_daily_digest(session, sender, receiver, prefix):
|
||||
count = 0
|
||||
for feed, feedinfo, entry in entries:
|
||||
count = count + 1
|
||||
link = entry.link
|
||||
if entry.resolvedlink:
|
||||
link = entry.resolvedlink
|
||||
body = body + '=> %s - %s\n' % (entry.firstfetched.strftime('%y%m%d-%H%M'), feedinfo.title)
|
||||
body = body + ' %s\n' % entry.title
|
||||
body = body + '%s\n' % get_entry_text(entry)[0:100]
|
||||
body = body + '%s\n\n' % entry.link
|
||||
body = body + '%s\n\n' % link
|
||||
if count > 0:
|
||||
today = datetime.now()
|
||||
subject = '%s (%s) - %d entries' % (today.strftime('%y%m%d'), today.strftime('%A'), count)
|
||||
@ -192,9 +198,12 @@ def mail_single_entry(feed, feedinfo, entry, sender, receiver, prefix):
|
||||
subject = '%s' % (entry.title)
|
||||
if prefix != '':
|
||||
subject = '%s %s' % (prefix, subject)
|
||||
link = entry.link
|
||||
if entry.resolvedlink:
|
||||
link = entry.resolvedlink
|
||||
body = '%s\n\n' % get_entry_text(entry)
|
||||
body = body + '%s\n' % feedinfo.link
|
||||
body = body + '%s\n' % entry.link
|
||||
body = body + '%s\n' % link
|
||||
send_mail(sender, receiver, subject, body)
|
||||
entry.sent = datetime.now()
|
||||
|
||||
@ -242,6 +251,13 @@ def process_feed_entry(session, feed, entry):
|
||||
except Exception, e:
|
||||
print ' new entry <%s>' % entry.title
|
||||
thisentry = Entry(entry)
|
||||
if feed.resolveredirects:
|
||||
print ' fetching final link <%s>' % entry.link
|
||||
request = urllib2.Request(entry.link)
|
||||
opener = urllib2.build_opener()
|
||||
result = opener.open(request)
|
||||
thisentry.resolvedlink = result.url
|
||||
print ' final link: <%s>' % result.url
|
||||
if feed.fullpage:
|
||||
print ' fetching full page <%s>' % entry.link
|
||||
thisentry.fullpage = fetch_full_page(entry.link)
|
||||
|
Loading…
Reference in New Issue
Block a user