refactored so the data model lives in its own module

This commit is contained in:
Ronald Schaten 2013-03-26 20:20:51 +01:00
parent d300cf8f4c
commit 4f1bea7a6b
5 changed files with 248 additions and 131 deletions

View File

@ -1,20 +1,19 @@
#!/usr/bin/env python
#coding: utf-8
from sqlalchemy import create_engine, Table, Column, Integer, Text, String, Boolean, DateTime, MetaData, ForeignKey, desc
from sqlalchemy.orm import sessionmaker, relation, backref
from sqlalchemy.ext.declarative import declarative_base
from models import Base, Feed, Feedinfo, Entry
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from datetime import datetime, timedelta
from time import mktime
import feedparser
import re
import sys
import urllib
import codecs
#import urllib
import urllib2
import hn
#import hn
import html2text
import ConfigParser
import pprint
from optparse import OptionParser
from cStringIO import StringIO
from email.mime.multipart import MIMEMultipart
@ -24,129 +23,6 @@ from email import Charset
from email.generator import Generator
import smtplib
Base = declarative_base()
class Feed(Base):
__tablename__ = 'feed'
id = Column(Integer, primary_key=True)
url = Column(Text)
frequency = Column(Integer)
daily = Column(Boolean)
resolveredirects = Column(Boolean)
readability = Column(Boolean)
fullpage = Column(Boolean)
html2textsummary = Column(Boolean)
html2textignoreimages = Column(Boolean)
enabled = Column(Boolean)
def __init__(self, url, daily, readability, fullpage, enabled, html2textsummary):
self.url = url
self.daily = daily
self.readability = readability
self.fullpage = fullpage
self.html2textsummary = html2textsummary
self.enabled = enabled
def __repr__(self):
return "<Feed('%s','%s','%s')>" % (self.url, self.daily, self.readability)
class Feedinfo(Base):
__tablename__ = 'feedinfo'
id = Column(Integer, primary_key=True)
feed_id = Column(Integer, ForeignKey('feed.id'))
feed = relation("Feed", backref=backref('feedinfo', uselist=False))
title = Column(String(255))
link = Column(String(255))
subtitle = Column(String(255))
author = Column(String(255))
publisher = Column(String(255))
status = Column(Integer)
version = Column(String(16))
encoding = Column(String(16))
bozo = Column(Integer)
lastfetched = Column(DateTime)
lastsuccessful = Column(DateTime)
def __init__(self, parser):
self.update(parser)
def __repr__(self):
return "<Feedinfo('%s','%s','%s')>" % (self.title, self.subtitle, self.author)
def update(self, parser):
if parser.feed.has_key('title'):
self.title = parser.feed.get('title')
if parser.feed.has_key('link'):
self.link = parser.feed.get('link')
if parser.feed.has_key('subtitle'):
self.subtitle = parser.feed.get('subtitle')
if parser.feed.has_key('author'):
self.author = parser.feed.get('author')
if parser.feed.has_key('publisher'):
self.author = parser.feed.get('publisher')
self.status = parser.get('status')
self.version = parser.get('version')
self.encoding = parser.get('encoding')
self.bozo = parser.get('bozo')
self.lastfetched = datetime.now()
if parser.get('status') == 200 or parser.get('status') == 302:
self.lastsuccessful = datetime.now()
class Entry(Base):
__tablename__ = 'entry'
id = Column(Integer, primary_key=True)
feed_id = Column(Integer, ForeignKey('feed.id'))
feed = relation("Feed", backref=backref('entry'))
title = Column(String(255))
link = Column(String(255))
summary = Column(Text)
content = Column(Text)
author = Column(String(255))
enclosures = Column(Text)
resolvedlink = Column(String(255))
fullpage = Column(Text)
readability = Column(Text)
updated = Column(DateTime)
firstfetched = Column(DateTime)
lastfetched = Column(DateTime)
sent = Column(DateTime)
def __init__(self, entry):
self.update(entry)
self.firstfetched = datetime.now()
def __repr__(self):
return "<Entry('%s','%s','%s')>" % (self.title, "", "")
def update(self, entry):
if entry.has_key('title'):
self.title = entry.get('title')
if entry.has_key('link'):
self.link = entry.get('link')
if entry.has_key('summary'):
self.summary = entry.get('summary')
if entry.has_key('content'):
self.content = entry.get('content')[0].value
if entry.has_key('author'):
self.author = entry.get('author')
if entry.has_key('updated_parsed'):
updated_parsed = entry.get('updated_parsed')
self.updated = datetime.fromtimestamp(mktime(updated_parsed))
if entry.has_key('enclosures') and len(entry.get('enclosures')) > 0:
print 'enclosures';
pp=pprint.PrettyPrinter(depth=4)
pp.pprint(entry.get('enclosures'))
#self.enclosures = entry.get('enclosures')
self.lastfetched = datetime.now()
def send_mail(sender, receiver, subject, body):
print 'sending to %s: %s' % (receiver[0], subject)
Charset.add_charset('utf-8', Charset.QP, Charset.QP, 'utf-8')
@ -297,6 +173,7 @@ def fetch_single_feed(session, feed):
print 'processing %s' % feed.url
query = session.query(Feedinfo).filter(Feedinfo.feed_id==feed.id)
fetched = False
# TODO: remove exception, see above
try:
feed.feedinfo = query.one()
nextfetch = (feed.feedinfo.lastfetched + timedelta(minutes=feed.frequency))
@ -331,6 +208,9 @@ def fetch_all_feeds(session):
print
if __name__ == '__main__':
streamWriter = codecs.lookup('utf-8')[-1]
sys.stdout = streamWriter(sys.stdout)
config = ConfigParser.ConfigParser()
config.read('atomstrom.conf')

31
models/__init__.py Normal file
View File

@ -0,0 +1,31 @@
#!/usr/bin/env python
#coding: utf-8
#from sqlalchemy import create_engine, Table, Column, Integer, Text, String, Boolean, DateTime, MetaData, ForeignKey, desc
#from sqlalchemy.orm import sessionmaker, relation, backref
from sqlalchemy.ext.declarative import declarative_base
#from datetime import datetime, timedelta
#from time import mktime
#import feedparser
#import re
#import sys
#import urllib
#import urllib2
#import hn
#import html2text
#import ConfigParser
#import pprint
#from optparse import OptionParser
#from cStringIO import StringIO
#from email.mime.multipart import MIMEMultipart
#from email.mime.text import MIMEText
#from email.header import Header
#from email import Charset
#from email.generator import Generator
#import smtplib
Base = declarative_base()
from models.feed import Feed
from models.feedinfo import Feedinfo
from models.entry import Entry

79
models/entry.py Normal file
View File

@ -0,0 +1,79 @@
#!/usr/bin/env python
#coding: utf-8
#from sqlalchemy import create_engine, Table, Column, Integer, Text, String, Boolean, DateTime, MetaData, ForeignKey, desc
from sqlalchemy import Column, Integer, ForeignKey, String, Text, DateTime
#from sqlalchemy.orm import sessionmaker, relation, backref
from sqlalchemy.orm import relation, backref
#from sqlalchemy.ext.declarative import declarative_base
#from datetime import datetime, timedelta
from datetime import datetime
from time import mktime
#import feedparser
#import re
#import sys
#import urllib
#import urllib2
#import hn
#import html2text
#import ConfigParser
#import pprint
#from optparse import OptionParser
#from cStringIO import StringIO
#from email.mime.multipart import MIMEMultipart
#from email.mime.text import MIMEText
#from email.header import Header
#from email import Charset
#from email.generator import Generator
#import smtplib
from models import Base
class Entry(Base):
__tablename__ = 'entry'
id = Column(Integer, primary_key=True)
feed_id = Column(Integer, ForeignKey('feed.id'))
feed = relation("Feed", backref=backref('entry'))
title = Column(String(255))
link = Column(String(255))
summary = Column(Text)
content = Column(Text)
author = Column(String(255))
enclosures = Column(Text)
resolvedlink = Column(String(255))
fullpage = Column(Text)
readability = Column(Text)
updated = Column(DateTime)
firstfetched = Column(DateTime)
lastfetched = Column(DateTime)
sent = Column(DateTime)
def __init__(self, entry):
self.update(entry)
self.firstfetched = datetime.now()
def __repr__(self):
return "<Entry('%s','%s','%s')>" % (self.title, "", "")
def update(self, entry):
if entry.has_key('title'):
self.title = entry.get('title')
if entry.has_key('link'):
self.link = entry.get('link')
if entry.has_key('summary'):
self.summary = entry.get('summary')
if entry.has_key('content'):
self.content = entry.get('content')[0].value
if entry.has_key('author'):
self.author = entry.get('author')
if entry.has_key('updated_parsed'):
updated_parsed = entry.get('updated_parsed')
self.updated = datetime.fromtimestamp(mktime(updated_parsed))
if entry.has_key('enclosures') and len(entry.get('enclosures')) > 0:
print 'enclosures';
pp=pprint.PrettyPrinter(depth=4)
pp.pprint(entry.get('enclosures'))
#self.enclosures = entry.get('enclosures')
self.lastfetched = datetime.now()

53
models/feed.py Normal file
View File

@ -0,0 +1,53 @@
#!/usr/bin/env python
#coding: utf-8
#from sqlalchemy import create_engine, Table, Column, Integer, Text, String, Boolean, DateTime, MetaData, ForeignKey, desc
from sqlalchemy import Column, Integer, String, Boolean
#from sqlalchemy.orm import sessionmaker, relation, backref
#from sqlalchemy.ext.declarative import declarative_base
#from datetime import datetime, timedelta
#from time import mktime
#import feedparser
#import re
#import sys
#import urllib
#import urllib2
#import hn
#import html2text
#import ConfigParser
#import pprint
#from optparse import OptionParser
#from cStringIO import StringIO
#from email.mime.multipart import MIMEMultipart
#from email.mime.text import MIMEText
#from email.header import Header
#from email import Charset
#from email.generator import Generator
#import smtplib
from models import Base
class Feed(Base):
__tablename__ = 'feed'
id = Column(Integer, primary_key=True)
url = Column(String(255))
frequency = Column(Integer)
daily = Column(Boolean)
resolveredirects = Column(Boolean)
readability = Column(Boolean)
fullpage = Column(Boolean)
html2textsummary = Column(Boolean)
html2textignoreimages = Column(Boolean)
enabled = Column(Boolean)
def __init__(self, url, daily, readability, fullpage, enabled, html2textsummary):
self.url = url
self.daily = daily
self.readability = readability
self.fullpage = fullpage
self.html2textsummary = html2textsummary
self.enabled = enabled
def __repr__(self):
return "<Feed('%s','%s','%s')>" % (self.url, self.daily, self.readability)

74
models/feedinfo.py Normal file
View File

@ -0,0 +1,74 @@
#!/usr/bin/env python
#coding: utf-8
#from sqlalchemy import create_engine, Table, Column, Integer, Text, String, Boolean, DateTime, MetaData, ForeignKey, desc
from sqlalchemy import Column, Integer, ForeignKey, String, DateTime
#from sqlalchemy.orm import sessionmaker, relation, backref
from sqlalchemy.orm import relation, backref
#from sqlalchemy.ext.declarative import declarative_base
#from datetime import datetime, timedelta
from datetime import datetime
#from time import mktime
#import feedparser
#import re
#import sys
#import urllib
#import urllib2
#import hn
#import html2text
#import ConfigParser
#import pprint
#from optparse import OptionParser
#from cStringIO import StringIO
#from email.mime.multipart import MIMEMultipart
#from email.mime.text import MIMEText
#from email.header import Header
#from email import Charset
#from email.generator import Generator
#import smtplib
from models import Base
class Feedinfo(Base):
__tablename__ = 'feedinfo'
id = Column(Integer, primary_key=True)
feed_id = Column(Integer, ForeignKey('feed.id'))
feed = relation("Feed", backref=backref('feedinfo', uselist=False))
title = Column(String(255))
link = Column(String(255))
subtitle = Column(String(255))
author = Column(String(255))
publisher = Column(String(255))
status = Column(Integer)
version = Column(String(16))
encoding = Column(String(16))
bozo = Column(Integer)
lastfetched = Column(DateTime)
lastsuccessful = Column(DateTime)
def __init__(self, parser):
self.update(parser)
def __repr__(self):
return "<Feedinfo('%s','%s','%s')>" % (self.title, self.subtitle, self.author)
def update(self, parser):
if parser.feed.has_key('title'):
self.title = parser.feed.get('title')
if parser.feed.has_key('link'):
self.link = parser.feed.get('link')
if parser.feed.has_key('subtitle'):
self.subtitle = parser.feed.get('subtitle')
if parser.feed.has_key('author'):
self.author = parser.feed.get('author')
if parser.feed.has_key('publisher'):
self.author = parser.feed.get('publisher')
self.status = parser.get('status')
self.version = parser.get('version')
self.encoding = parser.get('encoding')
self.bozo = parser.get('bozo')
self.lastfetched = datetime.now()
if parser.get('status') == 200 or parser.get('status') == 302:
self.lastsuccessful = datetime.now()