implemented resolving of url redirects
This commit is contained in:
parent
b78ed4ba02
commit
3eeef717ba
20
atomstrom.py
20
atomstrom.py
@ -9,6 +9,7 @@ import feedparser
|
|||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
import urllib
|
import urllib
|
||||||
|
import urllib2
|
||||||
import hn
|
import hn
|
||||||
import html2text
|
import html2text
|
||||||
import ConfigParser
|
import ConfigParser
|
||||||
@ -26,6 +27,7 @@ class Feed(Base):
|
|||||||
url = Column(Text)
|
url = Column(Text)
|
||||||
frequency = Column(Integer)
|
frequency = Column(Integer)
|
||||||
daily = Column(Boolean)
|
daily = Column(Boolean)
|
||||||
|
resolveredirects = Column(Boolean)
|
||||||
readability = Column(Boolean)
|
readability = Column(Boolean)
|
||||||
fullpage = Column(Boolean)
|
fullpage = Column(Boolean)
|
||||||
html2textsummary = Column(Boolean)
|
html2textsummary = Column(Boolean)
|
||||||
@ -101,6 +103,7 @@ class Entry(Base):
|
|||||||
author = Column(Text)
|
author = Column(Text)
|
||||||
enclosures = Column(Text)
|
enclosures = Column(Text)
|
||||||
|
|
||||||
|
resolvedlink = Column(Text)
|
||||||
fullpage = Column(Text)
|
fullpage = Column(Text)
|
||||||
readability = Column(Text)
|
readability = Column(Text)
|
||||||
updated = Column(DateTime)
|
updated = Column(DateTime)
|
||||||
@ -173,10 +176,13 @@ def mail_daily_digest(session, sender, receiver, prefix):
|
|||||||
count = 0
|
count = 0
|
||||||
for feed, feedinfo, entry in entries:
|
for feed, feedinfo, entry in entries:
|
||||||
count = count + 1
|
count = count + 1
|
||||||
|
link = entry.link
|
||||||
|
if entry.resolvedlink:
|
||||||
|
link = entry.resolvedlink
|
||||||
body = body + '=> %s - %s\n' % (entry.firstfetched.strftime('%y%m%d-%H%M'), feedinfo.title)
|
body = body + '=> %s - %s\n' % (entry.firstfetched.strftime('%y%m%d-%H%M'), feedinfo.title)
|
||||||
body = body + ' %s\n' % entry.title
|
body = body + ' %s\n' % entry.title
|
||||||
body = body + '%s\n' % get_entry_text(entry)[0:100]
|
body = body + '%s\n' % get_entry_text(entry)[0:100]
|
||||||
body = body + '%s\n\n' % entry.link
|
body = body + '%s\n\n' % link
|
||||||
if count > 0:
|
if count > 0:
|
||||||
today = datetime.now()
|
today = datetime.now()
|
||||||
subject = '%s (%s) - %d entries' % (today.strftime('%y%m%d'), today.strftime('%A'), count)
|
subject = '%s (%s) - %d entries' % (today.strftime('%y%m%d'), today.strftime('%A'), count)
|
||||||
@ -192,9 +198,12 @@ def mail_single_entry(feed, feedinfo, entry, sender, receiver, prefix):
|
|||||||
subject = '%s' % (entry.title)
|
subject = '%s' % (entry.title)
|
||||||
if prefix != '':
|
if prefix != '':
|
||||||
subject = '%s %s' % (prefix, subject)
|
subject = '%s %s' % (prefix, subject)
|
||||||
|
link = entry.link
|
||||||
|
if entry.resolvedlink:
|
||||||
|
link = entry.resolvedlink
|
||||||
body = '%s\n\n' % get_entry_text(entry)
|
body = '%s\n\n' % get_entry_text(entry)
|
||||||
body = body + '%s\n' % feedinfo.link
|
body = body + '%s\n' % feedinfo.link
|
||||||
body = body + '%s\n' % entry.link
|
body = body + '%s\n' % link
|
||||||
send_mail(sender, receiver, subject, body)
|
send_mail(sender, receiver, subject, body)
|
||||||
entry.sent = datetime.now()
|
entry.sent = datetime.now()
|
||||||
|
|
||||||
@ -242,6 +251,13 @@ def process_feed_entry(session, feed, entry):
|
|||||||
except Exception, e:
|
except Exception, e:
|
||||||
print ' new entry <%s>' % entry.title
|
print ' new entry <%s>' % entry.title
|
||||||
thisentry = Entry(entry)
|
thisentry = Entry(entry)
|
||||||
|
if feed.resolveredirects:
|
||||||
|
print ' fetching final link <%s>' % entry.link
|
||||||
|
request = urllib2.Request(entry.link)
|
||||||
|
opener = urllib2.build_opener()
|
||||||
|
result = opener.open(request)
|
||||||
|
thisentry.resolvedlink = result.url
|
||||||
|
print ' final link: <%s>' % result.url
|
||||||
if feed.fullpage:
|
if feed.fullpage:
|
||||||
print ' fetching full page <%s>' % entry.link
|
print ' fetching full page <%s>' % entry.link
|
||||||
thisentry.fullpage = fetch_full_page(entry.link)
|
thisentry.fullpage = fetch_full_page(entry.link)
|
||||||
|
Loading…
Reference in New Issue
Block a user