diff --git a/atomstrom.py b/atomstrom.py index 0bcd13f..1d0b4a1 100755 --- a/atomstrom.py +++ b/atomstrom.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -from sqlalchemy import create_engine, Table, Column, Integer, Text, Boolean, DateTime, MetaData, ForeignKey, desc +from sqlalchemy import create_engine, Table, Column, Integer, Text, String, Boolean, DateTime, MetaData, ForeignKey, desc from sqlalchemy.orm import sessionmaker, relation, backref from sqlalchemy.ext.declarative import declarative_base from datetime import datetime, timedelta @@ -52,14 +52,14 @@ class Feedinfo(Base): id = Column(Integer, primary_key=True) feed_id = Column(Integer, ForeignKey('feed.id')) feed = relation("Feed", backref=backref('feedinfo', uselist=False)) - title = Column(Text) - link = Column(Text) - subtitle = Column(Text) - author = Column(Text) - publisher = Column(Text) + title = Column(String(255)) + link = Column(String(255)) + subtitle = Column(String(255)) + author = Column(String(255)) + publisher = Column(String(255)) status = Column(Integer) - version = Column(Text) - encoding = Column(Text) + version = Column(String(16)) + encoding = Column(String(16)) bozo = Column(Integer) lastfetched = Column(DateTime) @@ -73,15 +73,15 @@ class Feedinfo(Base): def update(self, parser): if parser.feed.has_key('title'): - self.title = parser.feed.get('title').encode('latin-1', 'replace') + self.title = parser.feed.get('title') if parser.feed.has_key('link'): self.link = parser.feed.get('link') if parser.feed.has_key('subtitle'): - self.subtitle = parser.feed.get('subtitle').encode('latin-1', 'replace') + self.subtitle = parser.feed.get('subtitle') if parser.feed.has_key('author'): - self.author = parser.feed.get('author').encode('latin-1', 'replace') + self.author = parser.feed.get('author') if parser.feed.has_key('publisher'): - self.author = parser.feed.get('publisher').encode('latin-1', 'replace') + self.author = parser.feed.get('publisher') self.status = parser.get('status') self.version = parser.get('version') self.encoding = parser.get('encoding') @@ -97,14 +97,14 @@ class Entry(Base): id = Column(Integer, primary_key=True) feed_id = Column(Integer, ForeignKey('feed.id')) feed = relation("Feed", backref=backref('entry')) - title = Column(Text) - link = Column(Text) + title = Column(String(255)) + link = Column(String(255)) summary = Column(Text) content = Column(Text) - author = Column(Text) + author = Column(String(255)) enclosures = Column(Text) - resolvedlink = Column(Text) + resolvedlink = Column(String(255)) fullpage = Column(Text) readability = Column(Text) updated = Column(DateTime) @@ -121,15 +121,15 @@ class Entry(Base): def update(self, entry): if entry.has_key('title'): - self.title = entry.get('title').encode('latin-1', 'replace') + self.title = entry.get('title') if entry.has_key('link'): - self.link = entry.get('link').encode('latin-1', 'replace') + self.link = entry.get('link') if entry.has_key('summary'): - self.summary = entry.get('summary').encode('latin-1', 'replace') + self.summary = entry.get('summary') if entry.has_key('content'): - self.content = entry.get('content')[0].value.encode('latin-1', 'replace') + self.content = entry.get('content')[0].value if entry.has_key('author'): - self.author = entry.get('author').encode('latin-1', 'replace') + self.author = entry.get('author') if entry.has_key('updated_parsed'): updated_parsed = entry.get('updated_parsed') self.updated = datetime.fromtimestamp(mktime(updated_parsed)) @@ -137,15 +137,14 @@ class Entry(Base): print 'enclosures'; pp=pprint.PrettyPrinter(depth=4) pp.pprint(entry.get('enclosures')) - #self.enclosures = entry.get('enclosures').encode('latin-1', 'replace') + #self.enclosures = entry.get('enclosures') self.lastfetched = datetime.now() def send_mail(sender, receiver, subject, body): - subject = subject.decode('latin-1') - print 'sending to %s: %s' % (receiver.decode('latin-1'), subject) + print 'sending to %s: %s' % (receiver, subject) mail = MIMEText(body, _charset='utf-8') - mail['From'] = sender + mail['From'] = sender.encode('utf-8') mail['To'] = receiver mail['Subject'] = subject mailserver = smtplib.SMTP('localhost') @@ -162,7 +161,6 @@ def get_entry_text(entry): text = entry.summary else: text = 'no text, sorry' - text = text.decode('latin-1') return text def mail_daily_digest(session, sender, receiver, prefix): @@ -184,7 +182,7 @@ def mail_daily_digest(session, sender, receiver, prefix): link = entry.resolvedlink try: body = body + '=> %s - %s\n' % (entry.firstfetched.strftime('%y%m%d-%H%M'), feedinfo.title) - body = body + ' %s\n' % entry.title.decode('latin-1') + body = body + ' %s\n' % entry.title body = body + '%s\n' % get_entry_text(entry)[0:100] body = body + '%s\n\n' % link except: @@ -282,9 +280,7 @@ def process_feed_entry(session, feed, entry): h2t.body_width = 0 if feed.html2textignoreimages: h2t.ignore_images = True - summary = thisentry.summary.decode('latin-1') - summary = h2t.handle(summary) - thisentry.summary = summary.encode('latin-1') + thisentry.summary = h2t.handle(thisentry.summary) feed.entry.append(thisentry) session.commit() return 1 @@ -330,7 +326,7 @@ if __name__ == '__main__': config = ConfigParser.ConfigParser() config.read('atomstrom.conf') - dbconnectstring = '%s://%s:%s@%s/%s' % ( + dbconnectstring = '%s://%s:%s@%s/%s?charset=utf8' % ( config.get('database', 'engine'), config.get('database', 'user'), config.get('database', 'password'),