initial commit
This commit is contained in:
		
							
								
								
									
										141
									
								
								atomstrom.py
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										141
									
								
								atomstrom.py
									
									
									
									
									
										Executable file
									
								
							@@ -0,0 +1,141 @@
 | 
				
			|||||||
 | 
					#!/usr/bin/env python
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from sqlalchemy import create_engine, Table, Column, Integer, Text, Boolean, DateTime, MetaData, ForeignKey
 | 
				
			||||||
 | 
					from sqlalchemy.orm import sessionmaker, relation, backref
 | 
				
			||||||
 | 
					from sqlalchemy.ext.declarative import declarative_base
 | 
				
			||||||
 | 
					import datetime
 | 
				
			||||||
 | 
					import feedparser
 | 
				
			||||||
 | 
					import re
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Base = declarative_base()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class Feed(Base):
 | 
				
			||||||
 | 
					    __tablename__ = 'feed'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    id = Column(Integer, primary_key=True)
 | 
				
			||||||
 | 
					    url = Column(Text)
 | 
				
			||||||
 | 
					    daily = Column(Boolean)
 | 
				
			||||||
 | 
					    readability = Column(Boolean)
 | 
				
			||||||
 | 
					    enabled = Column(Boolean)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def __init__(self, url, daily, readability, enabled):
 | 
				
			||||||
 | 
					        self.url = url
 | 
				
			||||||
 | 
					        self.daily = daily
 | 
				
			||||||
 | 
					        self.readability = readability
 | 
				
			||||||
 | 
					        self.enabled = enabled
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def __repr__(self):
 | 
				
			||||||
 | 
					       return "<Feed('%s','%s','%s')>" % (self.url, self.daily, self.readability)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class Feedinfo(Base):
 | 
				
			||||||
 | 
					    __tablename__ = 'feedinfo'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    id = Column(Integer, primary_key=True)
 | 
				
			||||||
 | 
					    feed_id = Column(Integer, ForeignKey('feed.id'))
 | 
				
			||||||
 | 
					    feed = relation("Feed", backref=backref('feedinfo', uselist=False))
 | 
				
			||||||
 | 
					    title = Column(Text)
 | 
				
			||||||
 | 
					    link = Column(Text)
 | 
				
			||||||
 | 
					    subtitle = Column(Text)
 | 
				
			||||||
 | 
					    author = Column(Text)
 | 
				
			||||||
 | 
					    publisher = Column(Text)
 | 
				
			||||||
 | 
					    status = Column(Integer)
 | 
				
			||||||
 | 
					    version = Column(Text)
 | 
				
			||||||
 | 
					    encoding = Column(Text)
 | 
				
			||||||
 | 
					    bozo = Column(Integer)
 | 
				
			||||||
 | 
					    lastfetched = Column(DateTime)
 | 
				
			||||||
 | 
					    lastsuccessful = Column(DateTime)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def __init__(self, parser):
 | 
				
			||||||
 | 
					        self.update(parser)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def __repr__(self):
 | 
				
			||||||
 | 
					       return "<Feedinfo('%s','%s','%s')>" % (self.title, self.subtitle, self.author)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def update(self, parser):
 | 
				
			||||||
 | 
					        if parser.feed.has_key('title'):
 | 
				
			||||||
 | 
					            self.title = parser.feed.get('title').encode('latin-1', 'replace')
 | 
				
			||||||
 | 
					        if parser.feed.has_key('link'):
 | 
				
			||||||
 | 
					            self.link = parser.feed.get('link')
 | 
				
			||||||
 | 
					        if parser.feed.has_key('subtitle'):
 | 
				
			||||||
 | 
					            self.subtitle = parser.feed.get('subtitle').encode('latin-1', 'replace')
 | 
				
			||||||
 | 
					        if parser.feed.has_key('author'):
 | 
				
			||||||
 | 
					            self.author = parser.feed.get('author').encode('latin-1', 'replace')
 | 
				
			||||||
 | 
					        if parser.feed.has_key('publisher'):
 | 
				
			||||||
 | 
					            self.author = parser.feed.get('publisher').encode('latin-1', 'replace')
 | 
				
			||||||
 | 
					        self.status = parser.get('status')
 | 
				
			||||||
 | 
					        self.version = parser.get('version')
 | 
				
			||||||
 | 
					        self.encoding = parser.get('encoding')
 | 
				
			||||||
 | 
					        self.bozo = parser.get('bozo')
 | 
				
			||||||
 | 
					        self.lastfetched = datetime.datetime.now()
 | 
				
			||||||
 | 
					        if parser.get('status') == 200:
 | 
				
			||||||
 | 
					            self.lastsuccessful = datetime.datetime.now()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class Entry(Base):
 | 
				
			||||||
 | 
					    __tablename__ = 'entry'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    id = Column(Integer, primary_key=True)
 | 
				
			||||||
 | 
					    feed_id = Column(Integer, ForeignKey('feed.id'))
 | 
				
			||||||
 | 
					    feed = relation("Feed", backref=backref('entry'))
 | 
				
			||||||
 | 
					    title = Column(Text)
 | 
				
			||||||
 | 
					    link = Column(Text)
 | 
				
			||||||
 | 
					    summary = Column(Text)
 | 
				
			||||||
 | 
					    content = Column(Text)
 | 
				
			||||||
 | 
					    author = Column(Text)
 | 
				
			||||||
 | 
					    enclosures = Column(Text)
 | 
				
			||||||
 | 
					    lastfetched = Column(DateTime)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def __init__(self, entry):
 | 
				
			||||||
 | 
					        self.update(entry)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def __repr__(self):
 | 
				
			||||||
 | 
					       return "<Entry('%s','%s','%s')>" % (self.title, "", "")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def update(self, entry):
 | 
				
			||||||
 | 
					        if entry.has_key('title'):
 | 
				
			||||||
 | 
					            self.title = entry.get('title').encode('latin-1', 'replace')
 | 
				
			||||||
 | 
					        if entry.has_key('link'):
 | 
				
			||||||
 | 
					            self.link = entry.get('link').encode('latin-1', 'replace')
 | 
				
			||||||
 | 
					        if entry.has_key('summary'):
 | 
				
			||||||
 | 
					            self.summary = entry.get('summary').encode('latin-1', 'replace')
 | 
				
			||||||
 | 
					        if entry.has_key('content'):
 | 
				
			||||||
 | 
					            self.content = entry.get('content').encode('latin-1', 'replace')
 | 
				
			||||||
 | 
					        if entry.has_key('author'):
 | 
				
			||||||
 | 
					            self.author = entry.get('author').encode('latin-1', 'replace')
 | 
				
			||||||
 | 
					        if entry.has_key('enclosures'):
 | 
				
			||||||
 | 
					            self.enclosures = entry.get('enclosures').encode('latin-1', 'replace')
 | 
				
			||||||
 | 
					        self.lastfetched = datetime.datetime.now()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					engine = create_engine('mysql://atomstrom:mdRTR4b8PLDqRSA4@localhost/atomstrom')
 | 
				
			||||||
 | 
					Base.metadata.create_all(engine)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Session = sessionmaker(bind=engine)
 | 
				
			||||||
 | 
					session = Session()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#session.add(Feed('http://www.heise.de/newsticker/heise-atom.xml', 1, 0, 0))
 | 
				
			||||||
 | 
					#session.add(Feed('http://blog.schatenseite.de/feed/', 1, 0, 1))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					for feed in session.query(Feed).filter_by(enabled=1).order_by(Feed.id):
 | 
				
			||||||
 | 
					    print "fetching %s" % feed.url
 | 
				
			||||||
 | 
					    parser = feedparser.parse(feed.url)
 | 
				
			||||||
 | 
					    query = session.query(Feedinfo).filter(Feedinfo.feed_id==feed.id)
 | 
				
			||||||
 | 
					    try:
 | 
				
			||||||
 | 
					        feed.feedinfo = query.one()
 | 
				
			||||||
 | 
					        feed.feedinfo.update(parser)
 | 
				
			||||||
 | 
					    except Exception, e:
 | 
				
			||||||
 | 
					        feed.feedinfo = Feedinfo(parser)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for entry in parser.entries:
 | 
				
			||||||
 | 
					        query = session.query(Entry).filter_by(feed_id=feed.id, title=entry.title.encode('latin-1', 'replace'))
 | 
				
			||||||
 | 
					        try:
 | 
				
			||||||
 | 
					            thisentry = query.one()
 | 
				
			||||||
 | 
					            thisentry.update(entry)
 | 
				
			||||||
 | 
					        except Exception, e:
 | 
				
			||||||
 | 
					            feed.entry.append(Entry(entry))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    print
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					session.commit()
 | 
				
			||||||
							
								
								
									
										2858
									
								
								feedparser.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										2858
									
								
								feedparser.py
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
		Reference in New Issue
	
	Block a user