decode html entities
This commit is contained in:
parent
640442b95d
commit
0c6cb0c7f9
10
atomstrom.py
10
atomstrom.py
@ -14,6 +14,7 @@ import codecs
|
|||||||
import urllib2
|
import urllib2
|
||||||
#import hn
|
#import hn
|
||||||
import html2text
|
import html2text
|
||||||
|
import HTMLParser
|
||||||
import ConfigParser
|
import ConfigParser
|
||||||
from argparse import ArgumentParser
|
from argparse import ArgumentParser
|
||||||
from email.header import Header
|
from email.header import Header
|
||||||
@ -165,6 +166,15 @@ def process_feed_entry(session, feed, entry):
|
|||||||
thisentry.fullpage = h2t.handle(thisentry.fullpage)
|
thisentry.fullpage = h2t.handle(thisentry.fullpage)
|
||||||
elif feed.contentcolumn == 'readability':
|
elif feed.contentcolumn == 'readability':
|
||||||
thisentry.readability = h2t.handle(thisentry.readability)
|
thisentry.readability = h2t.handle(thisentry.readability)
|
||||||
|
hp = HTMLParser.HTMLParser()
|
||||||
|
if thisentry.summary:
|
||||||
|
thisentry.summary = hp.unescape(thisentry.summary)
|
||||||
|
if thisentry.content:
|
||||||
|
thisentry.content = hp.unescape(thisentry.content)
|
||||||
|
if thisentry.fullpage:
|
||||||
|
thisentry.fullpage = hp.unescape(thisentry.fullpage)
|
||||||
|
if thisentry.readability:
|
||||||
|
thisentry.readability = hp.unescape(thisentry.readability)
|
||||||
feed.entry.append(thisentry)
|
feed.entry.append(thisentry)
|
||||||
session.commit()
|
session.commit()
|
||||||
return 1
|
return 1
|
||||||
|
Loading…
Reference in New Issue
Block a user