enabled fetching of full pages

This commit is contained in:
Ronald Schaten 2013-04-11 17:10:39 +02:00
parent f52e8dbf1d
commit f78b99e61e

View File

@ -2,7 +2,6 @@ from sqlalchemy import Column, Integer, ForeignKey, String, Text, DateTime
from datetime import datetime from datetime import datetime
from time import mktime from time import mktime
import urllib2 import urllib2
#import urllib
#import hn #import hn
import html2text import html2text
import HTMLParser import HTMLParser
@ -15,12 +14,10 @@ def fetch_readability(link):
return text return text
def fetch_full_page(link): def fetch_full_page(link):
opener = urllib.FancyURLopener({}) response = urllib2.urlopen(link)
response = opener.open(link)
html = response.read() html = response.read()
html = html.decode('utf8') html = html.decode('utf8')
text = html2text.html2text(html) return html
return text.encode('latin-1', 'replace')
def size_human_readable(bytesize): def size_human_readable(bytesize):
for x in ['bytes','KB','MB','GB']: for x in ['bytes','KB','MB','GB']: