diff --git a/atomstrom.py b/atomstrom.py index 51818d3..1fe6109 100755 --- a/atomstrom.py +++ b/atomstrom.py @@ -1,20 +1,19 @@ #!/usr/bin/env python #coding: utf-8 -from sqlalchemy import create_engine, Table, Column, Integer, Text, String, Boolean, DateTime, MetaData, ForeignKey, desc -from sqlalchemy.orm import sessionmaker, relation, backref -from sqlalchemy.ext.declarative import declarative_base +from models import Base, Feed, Feedinfo, Entry + +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker from datetime import datetime, timedelta -from time import mktime import feedparser -import re import sys -import urllib +import codecs +#import urllib import urllib2 -import hn +#import hn import html2text import ConfigParser -import pprint from optparse import OptionParser from cStringIO import StringIO from email.mime.multipart import MIMEMultipart @@ -24,129 +23,6 @@ from email import Charset from email.generator import Generator import smtplib -Base = declarative_base() - -class Feed(Base): - __tablename__ = 'feed' - - id = Column(Integer, primary_key=True) - url = Column(Text) - frequency = Column(Integer) - daily = Column(Boolean) - resolveredirects = Column(Boolean) - readability = Column(Boolean) - fullpage = Column(Boolean) - html2textsummary = Column(Boolean) - html2textignoreimages = Column(Boolean) - enabled = Column(Boolean) - - def __init__(self, url, daily, readability, fullpage, enabled, html2textsummary): - self.url = url - self.daily = daily - self.readability = readability - self.fullpage = fullpage - self.html2textsummary = html2textsummary - self.enabled = enabled - - def __repr__(self): - return "" % (self.url, self.daily, self.readability) - - -class Feedinfo(Base): - __tablename__ = 'feedinfo' - - id = Column(Integer, primary_key=True) - feed_id = Column(Integer, ForeignKey('feed.id')) - feed = relation("Feed", backref=backref('feedinfo', uselist=False)) - title = Column(String(255)) - link = Column(String(255)) - subtitle = Column(String(255)) - author = Column(String(255)) - publisher = Column(String(255)) - status = Column(Integer) - version = Column(String(16)) - encoding = Column(String(16)) - bozo = Column(Integer) - - lastfetched = Column(DateTime) - lastsuccessful = Column(DateTime) - - def __init__(self, parser): - self.update(parser) - - def __repr__(self): - return "" % (self.title, self.subtitle, self.author) - - def update(self, parser): - if parser.feed.has_key('title'): - self.title = parser.feed.get('title') - if parser.feed.has_key('link'): - self.link = parser.feed.get('link') - if parser.feed.has_key('subtitle'): - self.subtitle = parser.feed.get('subtitle') - if parser.feed.has_key('author'): - self.author = parser.feed.get('author') - if parser.feed.has_key('publisher'): - self.author = parser.feed.get('publisher') - self.status = parser.get('status') - self.version = parser.get('version') - self.encoding = parser.get('encoding') - self.bozo = parser.get('bozo') - self.lastfetched = datetime.now() - if parser.get('status') == 200 or parser.get('status') == 302: - self.lastsuccessful = datetime.now() - - -class Entry(Base): - __tablename__ = 'entry' - - id = Column(Integer, primary_key=True) - feed_id = Column(Integer, ForeignKey('feed.id')) - feed = relation("Feed", backref=backref('entry')) - title = Column(String(255)) - link = Column(String(255)) - summary = Column(Text) - content = Column(Text) - author = Column(String(255)) - enclosures = Column(Text) - - resolvedlink = Column(String(255)) - fullpage = Column(Text) - readability = Column(Text) - updated = Column(DateTime) - firstfetched = Column(DateTime) - lastfetched = Column(DateTime) - sent = Column(DateTime) - - def __init__(self, entry): - self.update(entry) - self.firstfetched = datetime.now() - - def __repr__(self): - return "" % (self.title, "", "") - - def update(self, entry): - if entry.has_key('title'): - self.title = entry.get('title') - if entry.has_key('link'): - self.link = entry.get('link') - if entry.has_key('summary'): - self.summary = entry.get('summary') - if entry.has_key('content'): - self.content = entry.get('content')[0].value - if entry.has_key('author'): - self.author = entry.get('author') - if entry.has_key('updated_parsed'): - updated_parsed = entry.get('updated_parsed') - self.updated = datetime.fromtimestamp(mktime(updated_parsed)) - if entry.has_key('enclosures') and len(entry.get('enclosures')) > 0: - print 'enclosures'; - pp=pprint.PrettyPrinter(depth=4) - pp.pprint(entry.get('enclosures')) - #self.enclosures = entry.get('enclosures') - self.lastfetched = datetime.now() - - def send_mail(sender, receiver, subject, body): print 'sending to %s: %s' % (receiver[0], subject) Charset.add_charset('utf-8', Charset.QP, Charset.QP, 'utf-8') @@ -297,6 +173,7 @@ def fetch_single_feed(session, feed): print 'processing %s' % feed.url query = session.query(Feedinfo).filter(Feedinfo.feed_id==feed.id) fetched = False + # TODO: remove exception, see above try: feed.feedinfo = query.one() nextfetch = (feed.feedinfo.lastfetched + timedelta(minutes=feed.frequency)) @@ -331,6 +208,9 @@ def fetch_all_feeds(session): print if __name__ == '__main__': + streamWriter = codecs.lookup('utf-8')[-1] + sys.stdout = streamWriter(sys.stdout) + config = ConfigParser.ConfigParser() config.read('atomstrom.conf') diff --git a/models/__init__.py b/models/__init__.py new file mode 100644 index 0000000..c31de8f --- /dev/null +++ b/models/__init__.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python +#coding: utf-8 + +#from sqlalchemy import create_engine, Table, Column, Integer, Text, String, Boolean, DateTime, MetaData, ForeignKey, desc +#from sqlalchemy.orm import sessionmaker, relation, backref +from sqlalchemy.ext.declarative import declarative_base +#from datetime import datetime, timedelta +#from time import mktime +#import feedparser +#import re +#import sys +#import urllib +#import urllib2 +#import hn +#import html2text +#import ConfigParser +#import pprint +#from optparse import OptionParser +#from cStringIO import StringIO +#from email.mime.multipart import MIMEMultipart +#from email.mime.text import MIMEText +#from email.header import Header +#from email import Charset +#from email.generator import Generator +#import smtplib + +Base = declarative_base() + +from models.feed import Feed +from models.feedinfo import Feedinfo +from models.entry import Entry diff --git a/models/entry.py b/models/entry.py new file mode 100644 index 0000000..f37b113 --- /dev/null +++ b/models/entry.py @@ -0,0 +1,79 @@ +#!/usr/bin/env python +#coding: utf-8 + +#from sqlalchemy import create_engine, Table, Column, Integer, Text, String, Boolean, DateTime, MetaData, ForeignKey, desc +from sqlalchemy import Column, Integer, ForeignKey, String, Text, DateTime +#from sqlalchemy.orm import sessionmaker, relation, backref +from sqlalchemy.orm import relation, backref +#from sqlalchemy.ext.declarative import declarative_base +#from datetime import datetime, timedelta +from datetime import datetime +from time import mktime +#import feedparser +#import re +#import sys +#import urllib +#import urllib2 +#import hn +#import html2text +#import ConfigParser +#import pprint +#from optparse import OptionParser +#from cStringIO import StringIO +#from email.mime.multipart import MIMEMultipart +#from email.mime.text import MIMEText +#from email.header import Header +#from email import Charset +#from email.generator import Generator +#import smtplib + +from models import Base + +class Entry(Base): + __tablename__ = 'entry' + + id = Column(Integer, primary_key=True) + feed_id = Column(Integer, ForeignKey('feed.id')) + feed = relation("Feed", backref=backref('entry')) + title = Column(String(255)) + link = Column(String(255)) + summary = Column(Text) + content = Column(Text) + author = Column(String(255)) + enclosures = Column(Text) + + resolvedlink = Column(String(255)) + fullpage = Column(Text) + readability = Column(Text) + updated = Column(DateTime) + firstfetched = Column(DateTime) + lastfetched = Column(DateTime) + sent = Column(DateTime) + + def __init__(self, entry): + self.update(entry) + self.firstfetched = datetime.now() + + def __repr__(self): + return "" % (self.title, "", "") + + def update(self, entry): + if entry.has_key('title'): + self.title = entry.get('title') + if entry.has_key('link'): + self.link = entry.get('link') + if entry.has_key('summary'): + self.summary = entry.get('summary') + if entry.has_key('content'): + self.content = entry.get('content')[0].value + if entry.has_key('author'): + self.author = entry.get('author') + if entry.has_key('updated_parsed'): + updated_parsed = entry.get('updated_parsed') + self.updated = datetime.fromtimestamp(mktime(updated_parsed)) + if entry.has_key('enclosures') and len(entry.get('enclosures')) > 0: + print 'enclosures'; + pp=pprint.PrettyPrinter(depth=4) + pp.pprint(entry.get('enclosures')) + #self.enclosures = entry.get('enclosures') + self.lastfetched = datetime.now() diff --git a/models/feed.py b/models/feed.py new file mode 100644 index 0000000..4f9c908 --- /dev/null +++ b/models/feed.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python +#coding: utf-8 + +#from sqlalchemy import create_engine, Table, Column, Integer, Text, String, Boolean, DateTime, MetaData, ForeignKey, desc +from sqlalchemy import Column, Integer, String, Boolean +#from sqlalchemy.orm import sessionmaker, relation, backref +#from sqlalchemy.ext.declarative import declarative_base +#from datetime import datetime, timedelta +#from time import mktime +#import feedparser +#import re +#import sys +#import urllib +#import urllib2 +#import hn +#import html2text +#import ConfigParser +#import pprint +#from optparse import OptionParser +#from cStringIO import StringIO +#from email.mime.multipart import MIMEMultipart +#from email.mime.text import MIMEText +#from email.header import Header +#from email import Charset +#from email.generator import Generator +#import smtplib + +from models import Base + +class Feed(Base): + __tablename__ = 'feed' + + id = Column(Integer, primary_key=True) + url = Column(String(255)) + frequency = Column(Integer) + daily = Column(Boolean) + resolveredirects = Column(Boolean) + readability = Column(Boolean) + fullpage = Column(Boolean) + html2textsummary = Column(Boolean) + html2textignoreimages = Column(Boolean) + enabled = Column(Boolean) + + def __init__(self, url, daily, readability, fullpage, enabled, html2textsummary): + self.url = url + self.daily = daily + self.readability = readability + self.fullpage = fullpage + self.html2textsummary = html2textsummary + self.enabled = enabled + + def __repr__(self): + return "" % (self.url, self.daily, self.readability) diff --git a/models/feedinfo.py b/models/feedinfo.py new file mode 100644 index 0000000..cbf4def --- /dev/null +++ b/models/feedinfo.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python +#coding: utf-8 + +#from sqlalchemy import create_engine, Table, Column, Integer, Text, String, Boolean, DateTime, MetaData, ForeignKey, desc +from sqlalchemy import Column, Integer, ForeignKey, String, DateTime +#from sqlalchemy.orm import sessionmaker, relation, backref +from sqlalchemy.orm import relation, backref +#from sqlalchemy.ext.declarative import declarative_base +#from datetime import datetime, timedelta +from datetime import datetime +#from time import mktime +#import feedparser +#import re +#import sys +#import urllib +#import urllib2 +#import hn +#import html2text +#import ConfigParser +#import pprint +#from optparse import OptionParser +#from cStringIO import StringIO +#from email.mime.multipart import MIMEMultipart +#from email.mime.text import MIMEText +#from email.header import Header +#from email import Charset +#from email.generator import Generator +#import smtplib + +from models import Base + +class Feedinfo(Base): + __tablename__ = 'feedinfo' + + id = Column(Integer, primary_key=True) + feed_id = Column(Integer, ForeignKey('feed.id')) + feed = relation("Feed", backref=backref('feedinfo', uselist=False)) + title = Column(String(255)) + link = Column(String(255)) + subtitle = Column(String(255)) + author = Column(String(255)) + publisher = Column(String(255)) + status = Column(Integer) + version = Column(String(16)) + encoding = Column(String(16)) + bozo = Column(Integer) + + lastfetched = Column(DateTime) + lastsuccessful = Column(DateTime) + + def __init__(self, parser): + self.update(parser) + + def __repr__(self): + return "" % (self.title, self.subtitle, self.author) + + def update(self, parser): + if parser.feed.has_key('title'): + self.title = parser.feed.get('title') + if parser.feed.has_key('link'): + self.link = parser.feed.get('link') + if parser.feed.has_key('subtitle'): + self.subtitle = parser.feed.get('subtitle') + if parser.feed.has_key('author'): + self.author = parser.feed.get('author') + if parser.feed.has_key('publisher'): + self.author = parser.feed.get('publisher') + self.status = parser.get('status') + self.version = parser.get('version') + self.encoding = parser.get('encoding') + self.bozo = parser.get('bozo') + self.lastfetched = datetime.now() + if parser.get('status') == 200 or parser.get('status') == 302: + self.lastsuccessful = datetime.now()