globally fixed encoding issues, changed database table types
This commit is contained in:
parent
53c4665c85
commit
db5e4e3515
58
atomstrom.py
58
atomstrom.py
@ -1,6 +1,6 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
from sqlalchemy import create_engine, Table, Column, Integer, Text, Boolean, DateTime, MetaData, ForeignKey, desc
|
from sqlalchemy import create_engine, Table, Column, Integer, Text, String, Boolean, DateTime, MetaData, ForeignKey, desc
|
||||||
from sqlalchemy.orm import sessionmaker, relation, backref
|
from sqlalchemy.orm import sessionmaker, relation, backref
|
||||||
from sqlalchemy.ext.declarative import declarative_base
|
from sqlalchemy.ext.declarative import declarative_base
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
@ -52,14 +52,14 @@ class Feedinfo(Base):
|
|||||||
id = Column(Integer, primary_key=True)
|
id = Column(Integer, primary_key=True)
|
||||||
feed_id = Column(Integer, ForeignKey('feed.id'))
|
feed_id = Column(Integer, ForeignKey('feed.id'))
|
||||||
feed = relation("Feed", backref=backref('feedinfo', uselist=False))
|
feed = relation("Feed", backref=backref('feedinfo', uselist=False))
|
||||||
title = Column(Text)
|
title = Column(String(255))
|
||||||
link = Column(Text)
|
link = Column(String(255))
|
||||||
subtitle = Column(Text)
|
subtitle = Column(String(255))
|
||||||
author = Column(Text)
|
author = Column(String(255))
|
||||||
publisher = Column(Text)
|
publisher = Column(String(255))
|
||||||
status = Column(Integer)
|
status = Column(Integer)
|
||||||
version = Column(Text)
|
version = Column(String(16))
|
||||||
encoding = Column(Text)
|
encoding = Column(String(16))
|
||||||
bozo = Column(Integer)
|
bozo = Column(Integer)
|
||||||
|
|
||||||
lastfetched = Column(DateTime)
|
lastfetched = Column(DateTime)
|
||||||
@ -73,15 +73,15 @@ class Feedinfo(Base):
|
|||||||
|
|
||||||
def update(self, parser):
|
def update(self, parser):
|
||||||
if parser.feed.has_key('title'):
|
if parser.feed.has_key('title'):
|
||||||
self.title = parser.feed.get('title').encode('latin-1', 'replace')
|
self.title = parser.feed.get('title')
|
||||||
if parser.feed.has_key('link'):
|
if parser.feed.has_key('link'):
|
||||||
self.link = parser.feed.get('link')
|
self.link = parser.feed.get('link')
|
||||||
if parser.feed.has_key('subtitle'):
|
if parser.feed.has_key('subtitle'):
|
||||||
self.subtitle = parser.feed.get('subtitle').encode('latin-1', 'replace')
|
self.subtitle = parser.feed.get('subtitle')
|
||||||
if parser.feed.has_key('author'):
|
if parser.feed.has_key('author'):
|
||||||
self.author = parser.feed.get('author').encode('latin-1', 'replace')
|
self.author = parser.feed.get('author')
|
||||||
if parser.feed.has_key('publisher'):
|
if parser.feed.has_key('publisher'):
|
||||||
self.author = parser.feed.get('publisher').encode('latin-1', 'replace')
|
self.author = parser.feed.get('publisher')
|
||||||
self.status = parser.get('status')
|
self.status = parser.get('status')
|
||||||
self.version = parser.get('version')
|
self.version = parser.get('version')
|
||||||
self.encoding = parser.get('encoding')
|
self.encoding = parser.get('encoding')
|
||||||
@ -97,14 +97,14 @@ class Entry(Base):
|
|||||||
id = Column(Integer, primary_key=True)
|
id = Column(Integer, primary_key=True)
|
||||||
feed_id = Column(Integer, ForeignKey('feed.id'))
|
feed_id = Column(Integer, ForeignKey('feed.id'))
|
||||||
feed = relation("Feed", backref=backref('entry'))
|
feed = relation("Feed", backref=backref('entry'))
|
||||||
title = Column(Text)
|
title = Column(String(255))
|
||||||
link = Column(Text)
|
link = Column(String(255))
|
||||||
summary = Column(Text)
|
summary = Column(Text)
|
||||||
content = Column(Text)
|
content = Column(Text)
|
||||||
author = Column(Text)
|
author = Column(String(255))
|
||||||
enclosures = Column(Text)
|
enclosures = Column(Text)
|
||||||
|
|
||||||
resolvedlink = Column(Text)
|
resolvedlink = Column(String(255))
|
||||||
fullpage = Column(Text)
|
fullpage = Column(Text)
|
||||||
readability = Column(Text)
|
readability = Column(Text)
|
||||||
updated = Column(DateTime)
|
updated = Column(DateTime)
|
||||||
@ -121,15 +121,15 @@ class Entry(Base):
|
|||||||
|
|
||||||
def update(self, entry):
|
def update(self, entry):
|
||||||
if entry.has_key('title'):
|
if entry.has_key('title'):
|
||||||
self.title = entry.get('title').encode('latin-1', 'replace')
|
self.title = entry.get('title')
|
||||||
if entry.has_key('link'):
|
if entry.has_key('link'):
|
||||||
self.link = entry.get('link').encode('latin-1', 'replace')
|
self.link = entry.get('link')
|
||||||
if entry.has_key('summary'):
|
if entry.has_key('summary'):
|
||||||
self.summary = entry.get('summary').encode('latin-1', 'replace')
|
self.summary = entry.get('summary')
|
||||||
if entry.has_key('content'):
|
if entry.has_key('content'):
|
||||||
self.content = entry.get('content')[0].value.encode('latin-1', 'replace')
|
self.content = entry.get('content')[0].value
|
||||||
if entry.has_key('author'):
|
if entry.has_key('author'):
|
||||||
self.author = entry.get('author').encode('latin-1', 'replace')
|
self.author = entry.get('author')
|
||||||
if entry.has_key('updated_parsed'):
|
if entry.has_key('updated_parsed'):
|
||||||
updated_parsed = entry.get('updated_parsed')
|
updated_parsed = entry.get('updated_parsed')
|
||||||
self.updated = datetime.fromtimestamp(mktime(updated_parsed))
|
self.updated = datetime.fromtimestamp(mktime(updated_parsed))
|
||||||
@ -137,15 +137,14 @@ class Entry(Base):
|
|||||||
print 'enclosures';
|
print 'enclosures';
|
||||||
pp=pprint.PrettyPrinter(depth=4)
|
pp=pprint.PrettyPrinter(depth=4)
|
||||||
pp.pprint(entry.get('enclosures'))
|
pp.pprint(entry.get('enclosures'))
|
||||||
#self.enclosures = entry.get('enclosures').encode('latin-1', 'replace')
|
#self.enclosures = entry.get('enclosures')
|
||||||
self.lastfetched = datetime.now()
|
self.lastfetched = datetime.now()
|
||||||
|
|
||||||
|
|
||||||
def send_mail(sender, receiver, subject, body):
|
def send_mail(sender, receiver, subject, body):
|
||||||
subject = subject.decode('latin-1')
|
print 'sending to %s: %s' % (receiver, subject)
|
||||||
print 'sending to %s: %s' % (receiver.decode('latin-1'), subject)
|
|
||||||
mail = MIMEText(body, _charset='utf-8')
|
mail = MIMEText(body, _charset='utf-8')
|
||||||
mail['From'] = sender
|
mail['From'] = sender.encode('utf-8')
|
||||||
mail['To'] = receiver
|
mail['To'] = receiver
|
||||||
mail['Subject'] = subject
|
mail['Subject'] = subject
|
||||||
mailserver = smtplib.SMTP('localhost')
|
mailserver = smtplib.SMTP('localhost')
|
||||||
@ -162,7 +161,6 @@ def get_entry_text(entry):
|
|||||||
text = entry.summary
|
text = entry.summary
|
||||||
else:
|
else:
|
||||||
text = 'no text, sorry'
|
text = 'no text, sorry'
|
||||||
text = text.decode('latin-1')
|
|
||||||
return text
|
return text
|
||||||
|
|
||||||
def mail_daily_digest(session, sender, receiver, prefix):
|
def mail_daily_digest(session, sender, receiver, prefix):
|
||||||
@ -184,7 +182,7 @@ def mail_daily_digest(session, sender, receiver, prefix):
|
|||||||
link = entry.resolvedlink
|
link = entry.resolvedlink
|
||||||
try:
|
try:
|
||||||
body = body + '=> %s - %s\n' % (entry.firstfetched.strftime('%y%m%d-%H%M'), feedinfo.title)
|
body = body + '=> %s - %s\n' % (entry.firstfetched.strftime('%y%m%d-%H%M'), feedinfo.title)
|
||||||
body = body + ' %s\n' % entry.title.decode('latin-1')
|
body = body + ' %s\n' % entry.title
|
||||||
body = body + '%s\n' % get_entry_text(entry)[0:100]
|
body = body + '%s\n' % get_entry_text(entry)[0:100]
|
||||||
body = body + '%s\n\n' % link
|
body = body + '%s\n\n' % link
|
||||||
except:
|
except:
|
||||||
@ -282,9 +280,7 @@ def process_feed_entry(session, feed, entry):
|
|||||||
h2t.body_width = 0
|
h2t.body_width = 0
|
||||||
if feed.html2textignoreimages:
|
if feed.html2textignoreimages:
|
||||||
h2t.ignore_images = True
|
h2t.ignore_images = True
|
||||||
summary = thisentry.summary.decode('latin-1')
|
thisentry.summary = h2t.handle(thisentry.summary)
|
||||||
summary = h2t.handle(summary)
|
|
||||||
thisentry.summary = summary.encode('latin-1')
|
|
||||||
feed.entry.append(thisentry)
|
feed.entry.append(thisentry)
|
||||||
session.commit()
|
session.commit()
|
||||||
return 1
|
return 1
|
||||||
@ -330,7 +326,7 @@ if __name__ == '__main__':
|
|||||||
config = ConfigParser.ConfigParser()
|
config = ConfigParser.ConfigParser()
|
||||||
config.read('atomstrom.conf')
|
config.read('atomstrom.conf')
|
||||||
|
|
||||||
dbconnectstring = '%s://%s:%s@%s/%s' % (
|
dbconnectstring = '%s://%s:%s@%s/%s?charset=utf8' % (
|
||||||
config.get('database', 'engine'),
|
config.get('database', 'engine'),
|
||||||
config.get('database', 'user'),
|
config.get('database', 'user'),
|
||||||
config.get('database', 'password'),
|
config.get('database', 'password'),
|
||||||
|
Loading…
Reference in New Issue
Block a user