fetch full page html
This commit is contained in:
parent
daa2d204c1
commit
3141f82df3
11
atomstrom.py
11
atomstrom.py
@ -7,6 +7,7 @@ import datetime
|
|||||||
import feedparser
|
import feedparser
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
|
import urllib
|
||||||
|
|
||||||
Base = declarative_base()
|
Base = declarative_base()
|
||||||
|
|
||||||
@ -124,6 +125,11 @@ session = Session()
|
|||||||
#session.add(Feed('http://www.heise.de/newsticker/heise-atom.xml', 1, 0, 0, 1))
|
#session.add(Feed('http://www.heise.de/newsticker/heise-atom.xml', 1, 0, 0, 1))
|
||||||
#session.add(Feed('http://blog.schatenseite.de/feed/', 1, 0, 0, 1))
|
#session.add(Feed('http://blog.schatenseite.de/feed/', 1, 0, 0, 1))
|
||||||
|
|
||||||
|
def fetch_full_page(link):
|
||||||
|
opener = urllib.FancyURLopener({})
|
||||||
|
response = opener.open(link)
|
||||||
|
return response.read()
|
||||||
|
|
||||||
def process_feed_entry(feed, entry):
|
def process_feed_entry(feed, entry):
|
||||||
query = session.query(Entry).filter_by(feed_id=feed.id, title=entry.title.encode('latin-1', 'replace'))
|
query = session.query(Entry).filter_by(feed_id=feed.id, title=entry.title.encode('latin-1', 'replace'))
|
||||||
try:
|
try:
|
||||||
@ -131,7 +137,10 @@ def process_feed_entry(feed, entry):
|
|||||||
thisentry.update(entry)
|
thisentry.update(entry)
|
||||||
return "-"
|
return "-"
|
||||||
except Exception, e:
|
except Exception, e:
|
||||||
feed.entry.append(Entry(entry))
|
thisentry = Entry(entry)
|
||||||
|
if feed.fullpage == 1:
|
||||||
|
thisentry.fullpage = fetch_full_page(entry.link)
|
||||||
|
feed.entry.append(thisentry)
|
||||||
return "+"
|
return "+"
|
||||||
|
|
||||||
def fetch_single_feed(feed):
|
def fetch_single_feed(feed):
|
||||||
|
Loading…
Reference in New Issue
Block a user