implemented newer version of readability-module
This commit is contained in:
@ -2,15 +2,22 @@ from sqlalchemy import Column, Integer, ForeignKey, String, Text, DateTime
|
||||
from datetime import datetime
|
||||
from time import mktime
|
||||
import urllib2
|
||||
#import hn
|
||||
from readability.readability import Document
|
||||
import html2text
|
||||
import HTMLParser
|
||||
|
||||
from models import Base
|
||||
|
||||
def fetch_readability(link):
|
||||
text = hn.upgradeLink(link)
|
||||
text = text.decode('utf8')
|
||||
h2t = html2text.HTML2Text()
|
||||
h2t.body_width = 0
|
||||
h2t.inline_links = False
|
||||
h2t.ignore_links = True
|
||||
h2t.ignore_images = True
|
||||
response = urllib2.urlopen(link)
|
||||
text = response.read()
|
||||
text = Document(text).summary()
|
||||
text = h2t.handle(text)
|
||||
return text
|
||||
|
||||
def fetch_full_page(link):
|
||||
|
Reference in New Issue
Block a user