enabled fetching of full pages
This commit is contained in:
parent
f52e8dbf1d
commit
f78b99e61e
@ -2,7 +2,6 @@ from sqlalchemy import Column, Integer, ForeignKey, String, Text, DateTime
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from time import mktime
|
from time import mktime
|
||||||
import urllib2
|
import urllib2
|
||||||
#import urllib
|
|
||||||
#import hn
|
#import hn
|
||||||
import html2text
|
import html2text
|
||||||
import HTMLParser
|
import HTMLParser
|
||||||
@ -15,12 +14,10 @@ def fetch_readability(link):
|
|||||||
return text
|
return text
|
||||||
|
|
||||||
def fetch_full_page(link):
|
def fetch_full_page(link):
|
||||||
opener = urllib.FancyURLopener({})
|
response = urllib2.urlopen(link)
|
||||||
response = opener.open(link)
|
|
||||||
html = response.read()
|
html = response.read()
|
||||||
html = html.decode('utf8')
|
html = html.decode('utf8')
|
||||||
text = html2text.html2text(html)
|
return html
|
||||||
return text.encode('latin-1', 'replace')
|
|
||||||
|
|
||||||
def size_human_readable(bytesize):
|
def size_human_readable(bytesize):
|
||||||
for x in ['bytes','KB','MB','GB']:
|
for x in ['bytes','KB','MB','GB']:
|
||||||
|
Loading…
Reference in New Issue
Block a user