fetch full page html

This commit is contained in:
Ronald Schaten 2010-10-29 09:09:25 +02:00
parent daa2d204c1
commit 3141f82df3

View File

@ -7,6 +7,7 @@ import datetime
import feedparser import feedparser
import re import re
import sys import sys
import urllib
Base = declarative_base() Base = declarative_base()
@ -124,6 +125,11 @@ session = Session()
#session.add(Feed('http://www.heise.de/newsticker/heise-atom.xml', 1, 0, 0, 1)) #session.add(Feed('http://www.heise.de/newsticker/heise-atom.xml', 1, 0, 0, 1))
#session.add(Feed('http://blog.schatenseite.de/feed/', 1, 0, 0, 1)) #session.add(Feed('http://blog.schatenseite.de/feed/', 1, 0, 0, 1))
def fetch_full_page(link):
opener = urllib.FancyURLopener({})
response = opener.open(link)
return response.read()
def process_feed_entry(feed, entry): def process_feed_entry(feed, entry):
query = session.query(Entry).filter_by(feed_id=feed.id, title=entry.title.encode('latin-1', 'replace')) query = session.query(Entry).filter_by(feed_id=feed.id, title=entry.title.encode('latin-1', 'replace'))
try: try:
@ -131,7 +137,10 @@ def process_feed_entry(feed, entry):
thisentry.update(entry) thisentry.update(entry)
return "-" return "-"
except Exception, e: except Exception, e:
feed.entry.append(Entry(entry)) thisentry = Entry(entry)
if feed.fullpage == 1:
thisentry.fullpage = fetch_full_page(entry.link)
feed.entry.append(thisentry)
return "+" return "+"
def fetch_single_feed(feed): def fetch_single_feed(feed):