Atomstrom/atomstrom.py
2010-10-26 23:02:37 +02:00

142 lines
4.7 KiB
Python
Executable File

#!/usr/bin/env python
from sqlalchemy import create_engine, Table, Column, Integer, Text, Boolean, DateTime, MetaData, ForeignKey
from sqlalchemy.orm import sessionmaker, relation, backref
from sqlalchemy.ext.declarative import declarative_base
import datetime
import feedparser
import re
Base = declarative_base()
class Feed(Base):
__tablename__ = 'feed'
id = Column(Integer, primary_key=True)
url = Column(Text)
daily = Column(Boolean)
readability = Column(Boolean)
enabled = Column(Boolean)
def __init__(self, url, daily, readability, enabled):
self.url = url
self.daily = daily
self.readability = readability
self.enabled = enabled
def __repr__(self):
return "<Feed('%s','%s','%s')>" % (self.url, self.daily, self.readability)
class Feedinfo(Base):
__tablename__ = 'feedinfo'
id = Column(Integer, primary_key=True)
feed_id = Column(Integer, ForeignKey('feed.id'))
feed = relation("Feed", backref=backref('feedinfo', uselist=False))
title = Column(Text)
link = Column(Text)
subtitle = Column(Text)
author = Column(Text)
publisher = Column(Text)
status = Column(Integer)
version = Column(Text)
encoding = Column(Text)
bozo = Column(Integer)
lastfetched = Column(DateTime)
lastsuccessful = Column(DateTime)
def __init__(self, parser):
self.update(parser)
def __repr__(self):
return "<Feedinfo('%s','%s','%s')>" % (self.title, self.subtitle, self.author)
def update(self, parser):
if parser.feed.has_key('title'):
self.title = parser.feed.get('title').encode('latin-1', 'replace')
if parser.feed.has_key('link'):
self.link = parser.feed.get('link')
if parser.feed.has_key('subtitle'):
self.subtitle = parser.feed.get('subtitle').encode('latin-1', 'replace')
if parser.feed.has_key('author'):
self.author = parser.feed.get('author').encode('latin-1', 'replace')
if parser.feed.has_key('publisher'):
self.author = parser.feed.get('publisher').encode('latin-1', 'replace')
self.status = parser.get('status')
self.version = parser.get('version')
self.encoding = parser.get('encoding')
self.bozo = parser.get('bozo')
self.lastfetched = datetime.datetime.now()
if parser.get('status') == 200:
self.lastsuccessful = datetime.datetime.now()
class Entry(Base):
__tablename__ = 'entry'
id = Column(Integer, primary_key=True)
feed_id = Column(Integer, ForeignKey('feed.id'))
feed = relation("Feed", backref=backref('entry'))
title = Column(Text)
link = Column(Text)
summary = Column(Text)
content = Column(Text)
author = Column(Text)
enclosures = Column(Text)
lastfetched = Column(DateTime)
def __init__(self, entry):
self.update(entry)
def __repr__(self):
return "<Entry('%s','%s','%s')>" % (self.title, "", "")
def update(self, entry):
if entry.has_key('title'):
self.title = entry.get('title').encode('latin-1', 'replace')
if entry.has_key('link'):
self.link = entry.get('link').encode('latin-1', 'replace')
if entry.has_key('summary'):
self.summary = entry.get('summary').encode('latin-1', 'replace')
if entry.has_key('content'):
self.content = entry.get('content').encode('latin-1', 'replace')
if entry.has_key('author'):
self.author = entry.get('author').encode('latin-1', 'replace')
if entry.has_key('enclosures'):
self.enclosures = entry.get('enclosures').encode('latin-1', 'replace')
self.lastfetched = datetime.datetime.now()
engine = create_engine('mysql://atomstrom:mdRTR4b8PLDqRSA4@localhost/atomstrom')
Base.metadata.create_all(engine)
Session = sessionmaker(bind=engine)
session = Session()
#session.add(Feed('http://www.heise.de/newsticker/heise-atom.xml', 1, 0, 0))
#session.add(Feed('http://blog.schatenseite.de/feed/', 1, 0, 1))
for feed in session.query(Feed).filter_by(enabled=1).order_by(Feed.id):
print "fetching %s" % feed.url
parser = feedparser.parse(feed.url)
query = session.query(Feedinfo).filter(Feedinfo.feed_id==feed.id)
try:
feed.feedinfo = query.one()
feed.feedinfo.update(parser)
except Exception, e:
feed.feedinfo = Feedinfo(parser)
for entry in parser.entries:
query = session.query(Entry).filter_by(feed_id=feed.id, title=entry.title.encode('latin-1', 'replace'))
try:
thisentry = query.one()
thisentry.update(entry)
except Exception, e:
feed.entry.append(Entry(entry))
print
session.commit()