#!/usr/bin/env python from sqlalchemy import create_engine, Table, Column, Integer, Text, Boolean, DateTime, MetaData, ForeignKey from sqlalchemy.orm import sessionmaker, relation, backref from sqlalchemy.ext.declarative import declarative_base import datetime import feedparser import re Base = declarative_base() class Feed(Base): __tablename__ = 'feed' id = Column(Integer, primary_key=True) url = Column(Text) daily = Column(Boolean) readability = Column(Boolean) enabled = Column(Boolean) def __init__(self, url, daily, readability, enabled): self.url = url self.daily = daily self.readability = readability self.enabled = enabled def __repr__(self): return "" % (self.url, self.daily, self.readability) class Feedinfo(Base): __tablename__ = 'feedinfo' id = Column(Integer, primary_key=True) feed_id = Column(Integer, ForeignKey('feed.id')) feed = relation("Feed", backref=backref('feedinfo', uselist=False)) title = Column(Text) link = Column(Text) subtitle = Column(Text) author = Column(Text) publisher = Column(Text) status = Column(Integer) version = Column(Text) encoding = Column(Text) bozo = Column(Integer) lastfetched = Column(DateTime) lastsuccessful = Column(DateTime) def __init__(self, parser): self.update(parser) def __repr__(self): return "" % (self.title, self.subtitle, self.author) def update(self, parser): if parser.feed.has_key('title'): self.title = parser.feed.get('title').encode('latin-1', 'replace') if parser.feed.has_key('link'): self.link = parser.feed.get('link') if parser.feed.has_key('subtitle'): self.subtitle = parser.feed.get('subtitle').encode('latin-1', 'replace') if parser.feed.has_key('author'): self.author = parser.feed.get('author').encode('latin-1', 'replace') if parser.feed.has_key('publisher'): self.author = parser.feed.get('publisher').encode('latin-1', 'replace') self.status = parser.get('status') self.version = parser.get('version') self.encoding = parser.get('encoding') self.bozo = parser.get('bozo') self.lastfetched = datetime.datetime.now() if parser.get('status') == 200: self.lastsuccessful = datetime.datetime.now() class Entry(Base): __tablename__ = 'entry' id = Column(Integer, primary_key=True) feed_id = Column(Integer, ForeignKey('feed.id')) feed = relation("Feed", backref=backref('entry')) title = Column(Text) link = Column(Text) summary = Column(Text) content = Column(Text) author = Column(Text) enclosures = Column(Text) lastfetched = Column(DateTime) def __init__(self, entry): self.update(entry) def __repr__(self): return "" % (self.title, "", "") def update(self, entry): if entry.has_key('title'): self.title = entry.get('title').encode('latin-1', 'replace') if entry.has_key('link'): self.link = entry.get('link').encode('latin-1', 'replace') if entry.has_key('summary'): self.summary = entry.get('summary').encode('latin-1', 'replace') if entry.has_key('content'): self.content = entry.get('content').encode('latin-1', 'replace') if entry.has_key('author'): self.author = entry.get('author').encode('latin-1', 'replace') if entry.has_key('enclosures'): self.enclosures = entry.get('enclosures').encode('latin-1', 'replace') self.lastfetched = datetime.datetime.now() engine = create_engine('mysql://atomstrom:mdRTR4b8PLDqRSA4@localhost/atomstrom') Base.metadata.create_all(engine) Session = sessionmaker(bind=engine) session = Session() #session.add(Feed('http://www.heise.de/newsticker/heise-atom.xml', 1, 0, 0)) #session.add(Feed('http://blog.schatenseite.de/feed/', 1, 0, 1)) for feed in session.query(Feed).filter_by(enabled=1).order_by(Feed.id): print "fetching %s" % feed.url parser = feedparser.parse(feed.url) query = session.query(Feedinfo).filter(Feedinfo.feed_id==feed.id) try: feed.feedinfo = query.one() feed.feedinfo.update(parser) except Exception, e: feed.feedinfo = Feedinfo(parser) for entry in parser.entries: query = session.query(Entry).filter_by(feed_id=feed.id, title=entry.title.encode('latin-1', 'replace')) try: thisentry = query.one() thisentry.update(entry) except Exception, e: feed.entry.append(Entry(entry)) print session.commit()