globally fixed encoding issues, changed database table types

This commit is contained in:
Ronald Schaten 2013-03-21 23:43:10 +01:00
parent 53c4665c85
commit db5e4e3515

View File

@ -1,6 +1,6 @@
#!/usr/bin/env python #!/usr/bin/env python
from sqlalchemy import create_engine, Table, Column, Integer, Text, Boolean, DateTime, MetaData, ForeignKey, desc from sqlalchemy import create_engine, Table, Column, Integer, Text, String, Boolean, DateTime, MetaData, ForeignKey, desc
from sqlalchemy.orm import sessionmaker, relation, backref from sqlalchemy.orm import sessionmaker, relation, backref
from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.ext.declarative import declarative_base
from datetime import datetime, timedelta from datetime import datetime, timedelta
@ -52,14 +52,14 @@ class Feedinfo(Base):
id = Column(Integer, primary_key=True) id = Column(Integer, primary_key=True)
feed_id = Column(Integer, ForeignKey('feed.id')) feed_id = Column(Integer, ForeignKey('feed.id'))
feed = relation("Feed", backref=backref('feedinfo', uselist=False)) feed = relation("Feed", backref=backref('feedinfo', uselist=False))
title = Column(Text) title = Column(String(255))
link = Column(Text) link = Column(String(255))
subtitle = Column(Text) subtitle = Column(String(255))
author = Column(Text) author = Column(String(255))
publisher = Column(Text) publisher = Column(String(255))
status = Column(Integer) status = Column(Integer)
version = Column(Text) version = Column(String(16))
encoding = Column(Text) encoding = Column(String(16))
bozo = Column(Integer) bozo = Column(Integer)
lastfetched = Column(DateTime) lastfetched = Column(DateTime)
@ -73,15 +73,15 @@ class Feedinfo(Base):
def update(self, parser): def update(self, parser):
if parser.feed.has_key('title'): if parser.feed.has_key('title'):
self.title = parser.feed.get('title').encode('latin-1', 'replace') self.title = parser.feed.get('title')
if parser.feed.has_key('link'): if parser.feed.has_key('link'):
self.link = parser.feed.get('link') self.link = parser.feed.get('link')
if parser.feed.has_key('subtitle'): if parser.feed.has_key('subtitle'):
self.subtitle = parser.feed.get('subtitle').encode('latin-1', 'replace') self.subtitle = parser.feed.get('subtitle')
if parser.feed.has_key('author'): if parser.feed.has_key('author'):
self.author = parser.feed.get('author').encode('latin-1', 'replace') self.author = parser.feed.get('author')
if parser.feed.has_key('publisher'): if parser.feed.has_key('publisher'):
self.author = parser.feed.get('publisher').encode('latin-1', 'replace') self.author = parser.feed.get('publisher')
self.status = parser.get('status') self.status = parser.get('status')
self.version = parser.get('version') self.version = parser.get('version')
self.encoding = parser.get('encoding') self.encoding = parser.get('encoding')
@ -97,14 +97,14 @@ class Entry(Base):
id = Column(Integer, primary_key=True) id = Column(Integer, primary_key=True)
feed_id = Column(Integer, ForeignKey('feed.id')) feed_id = Column(Integer, ForeignKey('feed.id'))
feed = relation("Feed", backref=backref('entry')) feed = relation("Feed", backref=backref('entry'))
title = Column(Text) title = Column(String(255))
link = Column(Text) link = Column(String(255))
summary = Column(Text) summary = Column(Text)
content = Column(Text) content = Column(Text)
author = Column(Text) author = Column(String(255))
enclosures = Column(Text) enclosures = Column(Text)
resolvedlink = Column(Text) resolvedlink = Column(String(255))
fullpage = Column(Text) fullpage = Column(Text)
readability = Column(Text) readability = Column(Text)
updated = Column(DateTime) updated = Column(DateTime)
@ -121,15 +121,15 @@ class Entry(Base):
def update(self, entry): def update(self, entry):
if entry.has_key('title'): if entry.has_key('title'):
self.title = entry.get('title').encode('latin-1', 'replace') self.title = entry.get('title')
if entry.has_key('link'): if entry.has_key('link'):
self.link = entry.get('link').encode('latin-1', 'replace') self.link = entry.get('link')
if entry.has_key('summary'): if entry.has_key('summary'):
self.summary = entry.get('summary').encode('latin-1', 'replace') self.summary = entry.get('summary')
if entry.has_key('content'): if entry.has_key('content'):
self.content = entry.get('content')[0].value.encode('latin-1', 'replace') self.content = entry.get('content')[0].value
if entry.has_key('author'): if entry.has_key('author'):
self.author = entry.get('author').encode('latin-1', 'replace') self.author = entry.get('author')
if entry.has_key('updated_parsed'): if entry.has_key('updated_parsed'):
updated_parsed = entry.get('updated_parsed') updated_parsed = entry.get('updated_parsed')
self.updated = datetime.fromtimestamp(mktime(updated_parsed)) self.updated = datetime.fromtimestamp(mktime(updated_parsed))
@ -137,15 +137,14 @@ class Entry(Base):
print 'enclosures'; print 'enclosures';
pp=pprint.PrettyPrinter(depth=4) pp=pprint.PrettyPrinter(depth=4)
pp.pprint(entry.get('enclosures')) pp.pprint(entry.get('enclosures'))
#self.enclosures = entry.get('enclosures').encode('latin-1', 'replace') #self.enclosures = entry.get('enclosures')
self.lastfetched = datetime.now() self.lastfetched = datetime.now()
def send_mail(sender, receiver, subject, body): def send_mail(sender, receiver, subject, body):
subject = subject.decode('latin-1') print 'sending to %s: %s' % (receiver, subject)
print 'sending to %s: %s' % (receiver.decode('latin-1'), subject)
mail = MIMEText(body, _charset='utf-8') mail = MIMEText(body, _charset='utf-8')
mail['From'] = sender mail['From'] = sender.encode('utf-8')
mail['To'] = receiver mail['To'] = receiver
mail['Subject'] = subject mail['Subject'] = subject
mailserver = smtplib.SMTP('localhost') mailserver = smtplib.SMTP('localhost')
@ -162,7 +161,6 @@ def get_entry_text(entry):
text = entry.summary text = entry.summary
else: else:
text = 'no text, sorry' text = 'no text, sorry'
text = text.decode('latin-1')
return text return text
def mail_daily_digest(session, sender, receiver, prefix): def mail_daily_digest(session, sender, receiver, prefix):
@ -184,7 +182,7 @@ def mail_daily_digest(session, sender, receiver, prefix):
link = entry.resolvedlink link = entry.resolvedlink
try: try:
body = body + '=> %s - %s\n' % (entry.firstfetched.strftime('%y%m%d-%H%M'), feedinfo.title) body = body + '=> %s - %s\n' % (entry.firstfetched.strftime('%y%m%d-%H%M'), feedinfo.title)
body = body + ' %s\n' % entry.title.decode('latin-1') body = body + ' %s\n' % entry.title
body = body + '%s\n' % get_entry_text(entry)[0:100] body = body + '%s\n' % get_entry_text(entry)[0:100]
body = body + '%s\n\n' % link body = body + '%s\n\n' % link
except: except:
@ -282,9 +280,7 @@ def process_feed_entry(session, feed, entry):
h2t.body_width = 0 h2t.body_width = 0
if feed.html2textignoreimages: if feed.html2textignoreimages:
h2t.ignore_images = True h2t.ignore_images = True
summary = thisentry.summary.decode('latin-1') thisentry.summary = h2t.handle(thisentry.summary)
summary = h2t.handle(summary)
thisentry.summary = summary.encode('latin-1')
feed.entry.append(thisentry) feed.entry.append(thisentry)
session.commit() session.commit()
return 1 return 1
@ -330,7 +326,7 @@ if __name__ == '__main__':
config = ConfigParser.ConfigParser() config = ConfigParser.ConfigParser()
config.read('atomstrom.conf') config.read('atomstrom.conf')
dbconnectstring = '%s://%s:%s@%s/%s' % ( dbconnectstring = '%s://%s:%s@%s/%s?charset=utf8' % (
config.get('database', 'engine'), config.get('database', 'engine'),
config.get('database', 'user'), config.get('database', 'user'),
config.get('database', 'password'), config.get('database', 'password'),