initial commit
This commit is contained in:
commit
aaf0d493ed
141
atomstrom.py
Executable file
141
atomstrom.py
Executable file
@ -0,0 +1,141 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
from sqlalchemy import create_engine, Table, Column, Integer, Text, Boolean, DateTime, MetaData, ForeignKey
|
||||||
|
from sqlalchemy.orm import sessionmaker, relation, backref
|
||||||
|
from sqlalchemy.ext.declarative import declarative_base
|
||||||
|
import datetime
|
||||||
|
import feedparser
|
||||||
|
import re
|
||||||
|
|
||||||
|
Base = declarative_base()
|
||||||
|
|
||||||
|
class Feed(Base):
|
||||||
|
__tablename__ = 'feed'
|
||||||
|
|
||||||
|
id = Column(Integer, primary_key=True)
|
||||||
|
url = Column(Text)
|
||||||
|
daily = Column(Boolean)
|
||||||
|
readability = Column(Boolean)
|
||||||
|
enabled = Column(Boolean)
|
||||||
|
|
||||||
|
def __init__(self, url, daily, readability, enabled):
|
||||||
|
self.url = url
|
||||||
|
self.daily = daily
|
||||||
|
self.readability = readability
|
||||||
|
self.enabled = enabled
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return "<Feed('%s','%s','%s')>" % (self.url, self.daily, self.readability)
|
||||||
|
|
||||||
|
|
||||||
|
class Feedinfo(Base):
|
||||||
|
__tablename__ = 'feedinfo'
|
||||||
|
|
||||||
|
id = Column(Integer, primary_key=True)
|
||||||
|
feed_id = Column(Integer, ForeignKey('feed.id'))
|
||||||
|
feed = relation("Feed", backref=backref('feedinfo', uselist=False))
|
||||||
|
title = Column(Text)
|
||||||
|
link = Column(Text)
|
||||||
|
subtitle = Column(Text)
|
||||||
|
author = Column(Text)
|
||||||
|
publisher = Column(Text)
|
||||||
|
status = Column(Integer)
|
||||||
|
version = Column(Text)
|
||||||
|
encoding = Column(Text)
|
||||||
|
bozo = Column(Integer)
|
||||||
|
lastfetched = Column(DateTime)
|
||||||
|
lastsuccessful = Column(DateTime)
|
||||||
|
|
||||||
|
|
||||||
|
def __init__(self, parser):
|
||||||
|
self.update(parser)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return "<Feedinfo('%s','%s','%s')>" % (self.title, self.subtitle, self.author)
|
||||||
|
|
||||||
|
def update(self, parser):
|
||||||
|
if parser.feed.has_key('title'):
|
||||||
|
self.title = parser.feed.get('title').encode('latin-1', 'replace')
|
||||||
|
if parser.feed.has_key('link'):
|
||||||
|
self.link = parser.feed.get('link')
|
||||||
|
if parser.feed.has_key('subtitle'):
|
||||||
|
self.subtitle = parser.feed.get('subtitle').encode('latin-1', 'replace')
|
||||||
|
if parser.feed.has_key('author'):
|
||||||
|
self.author = parser.feed.get('author').encode('latin-1', 'replace')
|
||||||
|
if parser.feed.has_key('publisher'):
|
||||||
|
self.author = parser.feed.get('publisher').encode('latin-1', 'replace')
|
||||||
|
self.status = parser.get('status')
|
||||||
|
self.version = parser.get('version')
|
||||||
|
self.encoding = parser.get('encoding')
|
||||||
|
self.bozo = parser.get('bozo')
|
||||||
|
self.lastfetched = datetime.datetime.now()
|
||||||
|
if parser.get('status') == 200:
|
||||||
|
self.lastsuccessful = datetime.datetime.now()
|
||||||
|
|
||||||
|
|
||||||
|
class Entry(Base):
|
||||||
|
__tablename__ = 'entry'
|
||||||
|
|
||||||
|
id = Column(Integer, primary_key=True)
|
||||||
|
feed_id = Column(Integer, ForeignKey('feed.id'))
|
||||||
|
feed = relation("Feed", backref=backref('entry'))
|
||||||
|
title = Column(Text)
|
||||||
|
link = Column(Text)
|
||||||
|
summary = Column(Text)
|
||||||
|
content = Column(Text)
|
||||||
|
author = Column(Text)
|
||||||
|
enclosures = Column(Text)
|
||||||
|
lastfetched = Column(DateTime)
|
||||||
|
|
||||||
|
def __init__(self, entry):
|
||||||
|
self.update(entry)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return "<Entry('%s','%s','%s')>" % (self.title, "", "")
|
||||||
|
|
||||||
|
def update(self, entry):
|
||||||
|
if entry.has_key('title'):
|
||||||
|
self.title = entry.get('title').encode('latin-1', 'replace')
|
||||||
|
if entry.has_key('link'):
|
||||||
|
self.link = entry.get('link').encode('latin-1', 'replace')
|
||||||
|
if entry.has_key('summary'):
|
||||||
|
self.summary = entry.get('summary').encode('latin-1', 'replace')
|
||||||
|
if entry.has_key('content'):
|
||||||
|
self.content = entry.get('content').encode('latin-1', 'replace')
|
||||||
|
if entry.has_key('author'):
|
||||||
|
self.author = entry.get('author').encode('latin-1', 'replace')
|
||||||
|
if entry.has_key('enclosures'):
|
||||||
|
self.enclosures = entry.get('enclosures').encode('latin-1', 'replace')
|
||||||
|
self.lastfetched = datetime.datetime.now()
|
||||||
|
|
||||||
|
|
||||||
|
engine = create_engine('mysql://atomstrom:mdRTR4b8PLDqRSA4@localhost/atomstrom')
|
||||||
|
Base.metadata.create_all(engine)
|
||||||
|
|
||||||
|
Session = sessionmaker(bind=engine)
|
||||||
|
session = Session()
|
||||||
|
|
||||||
|
#session.add(Feed('http://www.heise.de/newsticker/heise-atom.xml', 1, 0, 0))
|
||||||
|
#session.add(Feed('http://blog.schatenseite.de/feed/', 1, 0, 1))
|
||||||
|
|
||||||
|
for feed in session.query(Feed).filter_by(enabled=1).order_by(Feed.id):
|
||||||
|
print "fetching %s" % feed.url
|
||||||
|
parser = feedparser.parse(feed.url)
|
||||||
|
query = session.query(Feedinfo).filter(Feedinfo.feed_id==feed.id)
|
||||||
|
try:
|
||||||
|
feed.feedinfo = query.one()
|
||||||
|
feed.feedinfo.update(parser)
|
||||||
|
except Exception, e:
|
||||||
|
feed.feedinfo = Feedinfo(parser)
|
||||||
|
|
||||||
|
for entry in parser.entries:
|
||||||
|
query = session.query(Entry).filter_by(feed_id=feed.id, title=entry.title.encode('latin-1', 'replace'))
|
||||||
|
try:
|
||||||
|
thisentry = query.one()
|
||||||
|
thisentry.update(entry)
|
||||||
|
except Exception, e:
|
||||||
|
feed.entry.append(Entry(entry))
|
||||||
|
|
||||||
|
print
|
||||||
|
|
||||||
|
session.commit()
|
2858
feedparser.py
Normal file
2858
feedparser.py
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user