From 3141f82df39b3f910498018d9a8ddd64f7f25dc5 Mon Sep 17 00:00:00 2001 From: Ronald Schaten Date: Fri, 29 Oct 2010 09:09:25 +0200 Subject: [PATCH] fetch full page html --- atomstrom.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/atomstrom.py b/atomstrom.py index 9f716cb..e971329 100755 --- a/atomstrom.py +++ b/atomstrom.py @@ -7,6 +7,7 @@ import datetime import feedparser import re import sys +import urllib Base = declarative_base() @@ -124,6 +125,11 @@ session = Session() #session.add(Feed('http://www.heise.de/newsticker/heise-atom.xml', 1, 0, 0, 1)) #session.add(Feed('http://blog.schatenseite.de/feed/', 1, 0, 0, 1)) +def fetch_full_page(link): + opener = urllib.FancyURLopener({}) + response = opener.open(link) + return response.read() + def process_feed_entry(feed, entry): query = session.query(Entry).filter_by(feed_id=feed.id, title=entry.title.encode('latin-1', 'replace')) try: @@ -131,7 +137,10 @@ def process_feed_entry(feed, entry): thisentry.update(entry) return "-" except Exception, e: - feed.entry.append(Entry(entry)) + thisentry = Entry(entry) + if feed.fullpage == 1: + thisentry.fullpage = fetch_full_page(entry.link) + feed.entry.append(thisentry) return "+" def fetch_single_feed(feed):