refactored so the data model lives in its own module

This commit is contained in:
Ronald Schaten 2013-03-26 20:20:51 +01:00
parent d300cf8f4c
commit 4f1bea7a6b
5 changed files with 248 additions and 131 deletions

View File

@ -1,20 +1,19 @@
#!/usr/bin/env python #!/usr/bin/env python
#coding: utf-8 #coding: utf-8
from sqlalchemy import create_engine, Table, Column, Integer, Text, String, Boolean, DateTime, MetaData, ForeignKey, desc from models import Base, Feed, Feedinfo, Entry
from sqlalchemy.orm import sessionmaker, relation, backref
from sqlalchemy.ext.declarative import declarative_base from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from datetime import datetime, timedelta from datetime import datetime, timedelta
from time import mktime
import feedparser import feedparser
import re
import sys import sys
import urllib import codecs
#import urllib
import urllib2 import urllib2
import hn #import hn
import html2text import html2text
import ConfigParser import ConfigParser
import pprint
from optparse import OptionParser from optparse import OptionParser
from cStringIO import StringIO from cStringIO import StringIO
from email.mime.multipart import MIMEMultipart from email.mime.multipart import MIMEMultipart
@ -24,129 +23,6 @@ from email import Charset
from email.generator import Generator from email.generator import Generator
import smtplib import smtplib
Base = declarative_base()
class Feed(Base):
__tablename__ = 'feed'
id = Column(Integer, primary_key=True)
url = Column(Text)
frequency = Column(Integer)
daily = Column(Boolean)
resolveredirects = Column(Boolean)
readability = Column(Boolean)
fullpage = Column(Boolean)
html2textsummary = Column(Boolean)
html2textignoreimages = Column(Boolean)
enabled = Column(Boolean)
def __init__(self, url, daily, readability, fullpage, enabled, html2textsummary):
self.url = url
self.daily = daily
self.readability = readability
self.fullpage = fullpage
self.html2textsummary = html2textsummary
self.enabled = enabled
def __repr__(self):
return "<Feed('%s','%s','%s')>" % (self.url, self.daily, self.readability)
class Feedinfo(Base):
__tablename__ = 'feedinfo'
id = Column(Integer, primary_key=True)
feed_id = Column(Integer, ForeignKey('feed.id'))
feed = relation("Feed", backref=backref('feedinfo', uselist=False))
title = Column(String(255))
link = Column(String(255))
subtitle = Column(String(255))
author = Column(String(255))
publisher = Column(String(255))
status = Column(Integer)
version = Column(String(16))
encoding = Column(String(16))
bozo = Column(Integer)
lastfetched = Column(DateTime)
lastsuccessful = Column(DateTime)
def __init__(self, parser):
self.update(parser)
def __repr__(self):
return "<Feedinfo('%s','%s','%s')>" % (self.title, self.subtitle, self.author)
def update(self, parser):
if parser.feed.has_key('title'):
self.title = parser.feed.get('title')
if parser.feed.has_key('link'):
self.link = parser.feed.get('link')
if parser.feed.has_key('subtitle'):
self.subtitle = parser.feed.get('subtitle')
if parser.feed.has_key('author'):
self.author = parser.feed.get('author')
if parser.feed.has_key('publisher'):
self.author = parser.feed.get('publisher')
self.status = parser.get('status')
self.version = parser.get('version')
self.encoding = parser.get('encoding')
self.bozo = parser.get('bozo')
self.lastfetched = datetime.now()
if parser.get('status') == 200 or parser.get('status') == 302:
self.lastsuccessful = datetime.now()
class Entry(Base):
__tablename__ = 'entry'
id = Column(Integer, primary_key=True)
feed_id = Column(Integer, ForeignKey('feed.id'))
feed = relation("Feed", backref=backref('entry'))
title = Column(String(255))
link = Column(String(255))
summary = Column(Text)
content = Column(Text)
author = Column(String(255))
enclosures = Column(Text)
resolvedlink = Column(String(255))
fullpage = Column(Text)
readability = Column(Text)
updated = Column(DateTime)
firstfetched = Column(DateTime)
lastfetched = Column(DateTime)
sent = Column(DateTime)
def __init__(self, entry):
self.update(entry)
self.firstfetched = datetime.now()
def __repr__(self):
return "<Entry('%s','%s','%s')>" % (self.title, "", "")
def update(self, entry):
if entry.has_key('title'):
self.title = entry.get('title')
if entry.has_key('link'):
self.link = entry.get('link')
if entry.has_key('summary'):
self.summary = entry.get('summary')
if entry.has_key('content'):
self.content = entry.get('content')[0].value
if entry.has_key('author'):
self.author = entry.get('author')
if entry.has_key('updated_parsed'):
updated_parsed = entry.get('updated_parsed')
self.updated = datetime.fromtimestamp(mktime(updated_parsed))
if entry.has_key('enclosures') and len(entry.get('enclosures')) > 0:
print 'enclosures';
pp=pprint.PrettyPrinter(depth=4)
pp.pprint(entry.get('enclosures'))
#self.enclosures = entry.get('enclosures')
self.lastfetched = datetime.now()
def send_mail(sender, receiver, subject, body): def send_mail(sender, receiver, subject, body):
print 'sending to %s: %s' % (receiver[0], subject) print 'sending to %s: %s' % (receiver[0], subject)
Charset.add_charset('utf-8', Charset.QP, Charset.QP, 'utf-8') Charset.add_charset('utf-8', Charset.QP, Charset.QP, 'utf-8')
@ -297,6 +173,7 @@ def fetch_single_feed(session, feed):
print 'processing %s' % feed.url print 'processing %s' % feed.url
query = session.query(Feedinfo).filter(Feedinfo.feed_id==feed.id) query = session.query(Feedinfo).filter(Feedinfo.feed_id==feed.id)
fetched = False fetched = False
# TODO: remove exception, see above
try: try:
feed.feedinfo = query.one() feed.feedinfo = query.one()
nextfetch = (feed.feedinfo.lastfetched + timedelta(minutes=feed.frequency)) nextfetch = (feed.feedinfo.lastfetched + timedelta(minutes=feed.frequency))
@ -331,6 +208,9 @@ def fetch_all_feeds(session):
print print
if __name__ == '__main__': if __name__ == '__main__':
streamWriter = codecs.lookup('utf-8')[-1]
sys.stdout = streamWriter(sys.stdout)
config = ConfigParser.ConfigParser() config = ConfigParser.ConfigParser()
config.read('atomstrom.conf') config.read('atomstrom.conf')

31
models/__init__.py Normal file
View File

@ -0,0 +1,31 @@
#!/usr/bin/env python
#coding: utf-8
#from sqlalchemy import create_engine, Table, Column, Integer, Text, String, Boolean, DateTime, MetaData, ForeignKey, desc
#from sqlalchemy.orm import sessionmaker, relation, backref
from sqlalchemy.ext.declarative import declarative_base
#from datetime import datetime, timedelta
#from time import mktime
#import feedparser
#import re
#import sys
#import urllib
#import urllib2
#import hn
#import html2text
#import ConfigParser
#import pprint
#from optparse import OptionParser
#from cStringIO import StringIO
#from email.mime.multipart import MIMEMultipart
#from email.mime.text import MIMEText
#from email.header import Header
#from email import Charset
#from email.generator import Generator
#import smtplib
Base = declarative_base()
from models.feed import Feed
from models.feedinfo import Feedinfo
from models.entry import Entry

79
models/entry.py Normal file
View File

@ -0,0 +1,79 @@
#!/usr/bin/env python
#coding: utf-8
#from sqlalchemy import create_engine, Table, Column, Integer, Text, String, Boolean, DateTime, MetaData, ForeignKey, desc
from sqlalchemy import Column, Integer, ForeignKey, String, Text, DateTime
#from sqlalchemy.orm import sessionmaker, relation, backref
from sqlalchemy.orm import relation, backref
#from sqlalchemy.ext.declarative import declarative_base
#from datetime import datetime, timedelta
from datetime import datetime
from time import mktime
#import feedparser
#import re
#import sys
#import urllib
#import urllib2
#import hn
#import html2text
#import ConfigParser
#import pprint
#from optparse import OptionParser
#from cStringIO import StringIO
#from email.mime.multipart import MIMEMultipart
#from email.mime.text import MIMEText
#from email.header import Header
#from email import Charset
#from email.generator import Generator
#import smtplib
from models import Base
class Entry(Base):
__tablename__ = 'entry'
id = Column(Integer, primary_key=True)
feed_id = Column(Integer, ForeignKey('feed.id'))
feed = relation("Feed", backref=backref('entry'))
title = Column(String(255))
link = Column(String(255))
summary = Column(Text)
content = Column(Text)
author = Column(String(255))
enclosures = Column(Text)
resolvedlink = Column(String(255))
fullpage = Column(Text)
readability = Column(Text)
updated = Column(DateTime)
firstfetched = Column(DateTime)
lastfetched = Column(DateTime)
sent = Column(DateTime)
def __init__(self, entry):
self.update(entry)
self.firstfetched = datetime.now()
def __repr__(self):
return "<Entry('%s','%s','%s')>" % (self.title, "", "")
def update(self, entry):
if entry.has_key('title'):
self.title = entry.get('title')
if entry.has_key('link'):
self.link = entry.get('link')
if entry.has_key('summary'):
self.summary = entry.get('summary')
if entry.has_key('content'):
self.content = entry.get('content')[0].value
if entry.has_key('author'):
self.author = entry.get('author')
if entry.has_key('updated_parsed'):
updated_parsed = entry.get('updated_parsed')
self.updated = datetime.fromtimestamp(mktime(updated_parsed))
if entry.has_key('enclosures') and len(entry.get('enclosures')) > 0:
print 'enclosures';
pp=pprint.PrettyPrinter(depth=4)
pp.pprint(entry.get('enclosures'))
#self.enclosures = entry.get('enclosures')
self.lastfetched = datetime.now()

53
models/feed.py Normal file
View File

@ -0,0 +1,53 @@
#!/usr/bin/env python
#coding: utf-8
#from sqlalchemy import create_engine, Table, Column, Integer, Text, String, Boolean, DateTime, MetaData, ForeignKey, desc
from sqlalchemy import Column, Integer, String, Boolean
#from sqlalchemy.orm import sessionmaker, relation, backref
#from sqlalchemy.ext.declarative import declarative_base
#from datetime import datetime, timedelta
#from time import mktime
#import feedparser
#import re
#import sys
#import urllib
#import urllib2
#import hn
#import html2text
#import ConfigParser
#import pprint
#from optparse import OptionParser
#from cStringIO import StringIO
#from email.mime.multipart import MIMEMultipart
#from email.mime.text import MIMEText
#from email.header import Header
#from email import Charset
#from email.generator import Generator
#import smtplib
from models import Base
class Feed(Base):
__tablename__ = 'feed'
id = Column(Integer, primary_key=True)
url = Column(String(255))
frequency = Column(Integer)
daily = Column(Boolean)
resolveredirects = Column(Boolean)
readability = Column(Boolean)
fullpage = Column(Boolean)
html2textsummary = Column(Boolean)
html2textignoreimages = Column(Boolean)
enabled = Column(Boolean)
def __init__(self, url, daily, readability, fullpage, enabled, html2textsummary):
self.url = url
self.daily = daily
self.readability = readability
self.fullpage = fullpage
self.html2textsummary = html2textsummary
self.enabled = enabled
def __repr__(self):
return "<Feed('%s','%s','%s')>" % (self.url, self.daily, self.readability)

74
models/feedinfo.py Normal file
View File

@ -0,0 +1,74 @@
#!/usr/bin/env python
#coding: utf-8
#from sqlalchemy import create_engine, Table, Column, Integer, Text, String, Boolean, DateTime, MetaData, ForeignKey, desc
from sqlalchemy import Column, Integer, ForeignKey, String, DateTime
#from sqlalchemy.orm import sessionmaker, relation, backref
from sqlalchemy.orm import relation, backref
#from sqlalchemy.ext.declarative import declarative_base
#from datetime import datetime, timedelta
from datetime import datetime
#from time import mktime
#import feedparser
#import re
#import sys
#import urllib
#import urllib2
#import hn
#import html2text
#import ConfigParser
#import pprint
#from optparse import OptionParser
#from cStringIO import StringIO
#from email.mime.multipart import MIMEMultipart
#from email.mime.text import MIMEText
#from email.header import Header
#from email import Charset
#from email.generator import Generator
#import smtplib
from models import Base
class Feedinfo(Base):
__tablename__ = 'feedinfo'
id = Column(Integer, primary_key=True)
feed_id = Column(Integer, ForeignKey('feed.id'))
feed = relation("Feed", backref=backref('feedinfo', uselist=False))
title = Column(String(255))
link = Column(String(255))
subtitle = Column(String(255))
author = Column(String(255))
publisher = Column(String(255))
status = Column(Integer)
version = Column(String(16))
encoding = Column(String(16))
bozo = Column(Integer)
lastfetched = Column(DateTime)
lastsuccessful = Column(DateTime)
def __init__(self, parser):
self.update(parser)
def __repr__(self):
return "<Feedinfo('%s','%s','%s')>" % (self.title, self.subtitle, self.author)
def update(self, parser):
if parser.feed.has_key('title'):
self.title = parser.feed.get('title')
if parser.feed.has_key('link'):
self.link = parser.feed.get('link')
if parser.feed.has_key('subtitle'):
self.subtitle = parser.feed.get('subtitle')
if parser.feed.has_key('author'):
self.author = parser.feed.get('author')
if parser.feed.has_key('publisher'):
self.author = parser.feed.get('publisher')
self.status = parser.get('status')
self.version = parser.get('version')
self.encoding = parser.get('encoding')
self.bozo = parser.get('bozo')
self.lastfetched = datetime.now()
if parser.get('status') == 200 or parser.get('status') == 302:
self.lastsuccessful = datetime.now()