refactored so the data model lives in its own module
This commit is contained in:
parent
d300cf8f4c
commit
4f1bea7a6b
142
atomstrom.py
142
atomstrom.py
@ -1,20 +1,19 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
#coding: utf-8
|
#coding: utf-8
|
||||||
|
|
||||||
from sqlalchemy import create_engine, Table, Column, Integer, Text, String, Boolean, DateTime, MetaData, ForeignKey, desc
|
from models import Base, Feed, Feedinfo, Entry
|
||||||
from sqlalchemy.orm import sessionmaker, relation, backref
|
|
||||||
from sqlalchemy.ext.declarative import declarative_base
|
from sqlalchemy import create_engine
|
||||||
|
from sqlalchemy.orm import sessionmaker
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
from time import mktime
|
|
||||||
import feedparser
|
import feedparser
|
||||||
import re
|
|
||||||
import sys
|
import sys
|
||||||
import urllib
|
import codecs
|
||||||
|
#import urllib
|
||||||
import urllib2
|
import urllib2
|
||||||
import hn
|
#import hn
|
||||||
import html2text
|
import html2text
|
||||||
import ConfigParser
|
import ConfigParser
|
||||||
import pprint
|
|
||||||
from optparse import OptionParser
|
from optparse import OptionParser
|
||||||
from cStringIO import StringIO
|
from cStringIO import StringIO
|
||||||
from email.mime.multipart import MIMEMultipart
|
from email.mime.multipart import MIMEMultipart
|
||||||
@ -24,129 +23,6 @@ from email import Charset
|
|||||||
from email.generator import Generator
|
from email.generator import Generator
|
||||||
import smtplib
|
import smtplib
|
||||||
|
|
||||||
Base = declarative_base()
|
|
||||||
|
|
||||||
class Feed(Base):
|
|
||||||
__tablename__ = 'feed'
|
|
||||||
|
|
||||||
id = Column(Integer, primary_key=True)
|
|
||||||
url = Column(Text)
|
|
||||||
frequency = Column(Integer)
|
|
||||||
daily = Column(Boolean)
|
|
||||||
resolveredirects = Column(Boolean)
|
|
||||||
readability = Column(Boolean)
|
|
||||||
fullpage = Column(Boolean)
|
|
||||||
html2textsummary = Column(Boolean)
|
|
||||||
html2textignoreimages = Column(Boolean)
|
|
||||||
enabled = Column(Boolean)
|
|
||||||
|
|
||||||
def __init__(self, url, daily, readability, fullpage, enabled, html2textsummary):
|
|
||||||
self.url = url
|
|
||||||
self.daily = daily
|
|
||||||
self.readability = readability
|
|
||||||
self.fullpage = fullpage
|
|
||||||
self.html2textsummary = html2textsummary
|
|
||||||
self.enabled = enabled
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return "<Feed('%s','%s','%s')>" % (self.url, self.daily, self.readability)
|
|
||||||
|
|
||||||
|
|
||||||
class Feedinfo(Base):
|
|
||||||
__tablename__ = 'feedinfo'
|
|
||||||
|
|
||||||
id = Column(Integer, primary_key=True)
|
|
||||||
feed_id = Column(Integer, ForeignKey('feed.id'))
|
|
||||||
feed = relation("Feed", backref=backref('feedinfo', uselist=False))
|
|
||||||
title = Column(String(255))
|
|
||||||
link = Column(String(255))
|
|
||||||
subtitle = Column(String(255))
|
|
||||||
author = Column(String(255))
|
|
||||||
publisher = Column(String(255))
|
|
||||||
status = Column(Integer)
|
|
||||||
version = Column(String(16))
|
|
||||||
encoding = Column(String(16))
|
|
||||||
bozo = Column(Integer)
|
|
||||||
|
|
||||||
lastfetched = Column(DateTime)
|
|
||||||
lastsuccessful = Column(DateTime)
|
|
||||||
|
|
||||||
def __init__(self, parser):
|
|
||||||
self.update(parser)
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return "<Feedinfo('%s','%s','%s')>" % (self.title, self.subtitle, self.author)
|
|
||||||
|
|
||||||
def update(self, parser):
|
|
||||||
if parser.feed.has_key('title'):
|
|
||||||
self.title = parser.feed.get('title')
|
|
||||||
if parser.feed.has_key('link'):
|
|
||||||
self.link = parser.feed.get('link')
|
|
||||||
if parser.feed.has_key('subtitle'):
|
|
||||||
self.subtitle = parser.feed.get('subtitle')
|
|
||||||
if parser.feed.has_key('author'):
|
|
||||||
self.author = parser.feed.get('author')
|
|
||||||
if parser.feed.has_key('publisher'):
|
|
||||||
self.author = parser.feed.get('publisher')
|
|
||||||
self.status = parser.get('status')
|
|
||||||
self.version = parser.get('version')
|
|
||||||
self.encoding = parser.get('encoding')
|
|
||||||
self.bozo = parser.get('bozo')
|
|
||||||
self.lastfetched = datetime.now()
|
|
||||||
if parser.get('status') == 200 or parser.get('status') == 302:
|
|
||||||
self.lastsuccessful = datetime.now()
|
|
||||||
|
|
||||||
|
|
||||||
class Entry(Base):
|
|
||||||
__tablename__ = 'entry'
|
|
||||||
|
|
||||||
id = Column(Integer, primary_key=True)
|
|
||||||
feed_id = Column(Integer, ForeignKey('feed.id'))
|
|
||||||
feed = relation("Feed", backref=backref('entry'))
|
|
||||||
title = Column(String(255))
|
|
||||||
link = Column(String(255))
|
|
||||||
summary = Column(Text)
|
|
||||||
content = Column(Text)
|
|
||||||
author = Column(String(255))
|
|
||||||
enclosures = Column(Text)
|
|
||||||
|
|
||||||
resolvedlink = Column(String(255))
|
|
||||||
fullpage = Column(Text)
|
|
||||||
readability = Column(Text)
|
|
||||||
updated = Column(DateTime)
|
|
||||||
firstfetched = Column(DateTime)
|
|
||||||
lastfetched = Column(DateTime)
|
|
||||||
sent = Column(DateTime)
|
|
||||||
|
|
||||||
def __init__(self, entry):
|
|
||||||
self.update(entry)
|
|
||||||
self.firstfetched = datetime.now()
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return "<Entry('%s','%s','%s')>" % (self.title, "", "")
|
|
||||||
|
|
||||||
def update(self, entry):
|
|
||||||
if entry.has_key('title'):
|
|
||||||
self.title = entry.get('title')
|
|
||||||
if entry.has_key('link'):
|
|
||||||
self.link = entry.get('link')
|
|
||||||
if entry.has_key('summary'):
|
|
||||||
self.summary = entry.get('summary')
|
|
||||||
if entry.has_key('content'):
|
|
||||||
self.content = entry.get('content')[0].value
|
|
||||||
if entry.has_key('author'):
|
|
||||||
self.author = entry.get('author')
|
|
||||||
if entry.has_key('updated_parsed'):
|
|
||||||
updated_parsed = entry.get('updated_parsed')
|
|
||||||
self.updated = datetime.fromtimestamp(mktime(updated_parsed))
|
|
||||||
if entry.has_key('enclosures') and len(entry.get('enclosures')) > 0:
|
|
||||||
print 'enclosures';
|
|
||||||
pp=pprint.PrettyPrinter(depth=4)
|
|
||||||
pp.pprint(entry.get('enclosures'))
|
|
||||||
#self.enclosures = entry.get('enclosures')
|
|
||||||
self.lastfetched = datetime.now()
|
|
||||||
|
|
||||||
|
|
||||||
def send_mail(sender, receiver, subject, body):
|
def send_mail(sender, receiver, subject, body):
|
||||||
print 'sending to %s: %s' % (receiver[0], subject)
|
print 'sending to %s: %s' % (receiver[0], subject)
|
||||||
Charset.add_charset('utf-8', Charset.QP, Charset.QP, 'utf-8')
|
Charset.add_charset('utf-8', Charset.QP, Charset.QP, 'utf-8')
|
||||||
@ -297,6 +173,7 @@ def fetch_single_feed(session, feed):
|
|||||||
print 'processing %s' % feed.url
|
print 'processing %s' % feed.url
|
||||||
query = session.query(Feedinfo).filter(Feedinfo.feed_id==feed.id)
|
query = session.query(Feedinfo).filter(Feedinfo.feed_id==feed.id)
|
||||||
fetched = False
|
fetched = False
|
||||||
|
# TODO: remove exception, see above
|
||||||
try:
|
try:
|
||||||
feed.feedinfo = query.one()
|
feed.feedinfo = query.one()
|
||||||
nextfetch = (feed.feedinfo.lastfetched + timedelta(minutes=feed.frequency))
|
nextfetch = (feed.feedinfo.lastfetched + timedelta(minutes=feed.frequency))
|
||||||
@ -331,6 +208,9 @@ def fetch_all_feeds(session):
|
|||||||
print
|
print
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
streamWriter = codecs.lookup('utf-8')[-1]
|
||||||
|
sys.stdout = streamWriter(sys.stdout)
|
||||||
|
|
||||||
config = ConfigParser.ConfigParser()
|
config = ConfigParser.ConfigParser()
|
||||||
config.read('atomstrom.conf')
|
config.read('atomstrom.conf')
|
||||||
|
|
||||||
|
31
models/__init__.py
Normal file
31
models/__init__.py
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
#coding: utf-8
|
||||||
|
|
||||||
|
#from sqlalchemy import create_engine, Table, Column, Integer, Text, String, Boolean, DateTime, MetaData, ForeignKey, desc
|
||||||
|
#from sqlalchemy.orm import sessionmaker, relation, backref
|
||||||
|
from sqlalchemy.ext.declarative import declarative_base
|
||||||
|
#from datetime import datetime, timedelta
|
||||||
|
#from time import mktime
|
||||||
|
#import feedparser
|
||||||
|
#import re
|
||||||
|
#import sys
|
||||||
|
#import urllib
|
||||||
|
#import urllib2
|
||||||
|
#import hn
|
||||||
|
#import html2text
|
||||||
|
#import ConfigParser
|
||||||
|
#import pprint
|
||||||
|
#from optparse import OptionParser
|
||||||
|
#from cStringIO import StringIO
|
||||||
|
#from email.mime.multipart import MIMEMultipart
|
||||||
|
#from email.mime.text import MIMEText
|
||||||
|
#from email.header import Header
|
||||||
|
#from email import Charset
|
||||||
|
#from email.generator import Generator
|
||||||
|
#import smtplib
|
||||||
|
|
||||||
|
Base = declarative_base()
|
||||||
|
|
||||||
|
from models.feed import Feed
|
||||||
|
from models.feedinfo import Feedinfo
|
||||||
|
from models.entry import Entry
|
79
models/entry.py
Normal file
79
models/entry.py
Normal file
@ -0,0 +1,79 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
#coding: utf-8
|
||||||
|
|
||||||
|
#from sqlalchemy import create_engine, Table, Column, Integer, Text, String, Boolean, DateTime, MetaData, ForeignKey, desc
|
||||||
|
from sqlalchemy import Column, Integer, ForeignKey, String, Text, DateTime
|
||||||
|
#from sqlalchemy.orm import sessionmaker, relation, backref
|
||||||
|
from sqlalchemy.orm import relation, backref
|
||||||
|
#from sqlalchemy.ext.declarative import declarative_base
|
||||||
|
#from datetime import datetime, timedelta
|
||||||
|
from datetime import datetime
|
||||||
|
from time import mktime
|
||||||
|
#import feedparser
|
||||||
|
#import re
|
||||||
|
#import sys
|
||||||
|
#import urllib
|
||||||
|
#import urllib2
|
||||||
|
#import hn
|
||||||
|
#import html2text
|
||||||
|
#import ConfigParser
|
||||||
|
#import pprint
|
||||||
|
#from optparse import OptionParser
|
||||||
|
#from cStringIO import StringIO
|
||||||
|
#from email.mime.multipart import MIMEMultipart
|
||||||
|
#from email.mime.text import MIMEText
|
||||||
|
#from email.header import Header
|
||||||
|
#from email import Charset
|
||||||
|
#from email.generator import Generator
|
||||||
|
#import smtplib
|
||||||
|
|
||||||
|
from models import Base
|
||||||
|
|
||||||
|
class Entry(Base):
|
||||||
|
__tablename__ = 'entry'
|
||||||
|
|
||||||
|
id = Column(Integer, primary_key=True)
|
||||||
|
feed_id = Column(Integer, ForeignKey('feed.id'))
|
||||||
|
feed = relation("Feed", backref=backref('entry'))
|
||||||
|
title = Column(String(255))
|
||||||
|
link = Column(String(255))
|
||||||
|
summary = Column(Text)
|
||||||
|
content = Column(Text)
|
||||||
|
author = Column(String(255))
|
||||||
|
enclosures = Column(Text)
|
||||||
|
|
||||||
|
resolvedlink = Column(String(255))
|
||||||
|
fullpage = Column(Text)
|
||||||
|
readability = Column(Text)
|
||||||
|
updated = Column(DateTime)
|
||||||
|
firstfetched = Column(DateTime)
|
||||||
|
lastfetched = Column(DateTime)
|
||||||
|
sent = Column(DateTime)
|
||||||
|
|
||||||
|
def __init__(self, entry):
|
||||||
|
self.update(entry)
|
||||||
|
self.firstfetched = datetime.now()
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return "<Entry('%s','%s','%s')>" % (self.title, "", "")
|
||||||
|
|
||||||
|
def update(self, entry):
|
||||||
|
if entry.has_key('title'):
|
||||||
|
self.title = entry.get('title')
|
||||||
|
if entry.has_key('link'):
|
||||||
|
self.link = entry.get('link')
|
||||||
|
if entry.has_key('summary'):
|
||||||
|
self.summary = entry.get('summary')
|
||||||
|
if entry.has_key('content'):
|
||||||
|
self.content = entry.get('content')[0].value
|
||||||
|
if entry.has_key('author'):
|
||||||
|
self.author = entry.get('author')
|
||||||
|
if entry.has_key('updated_parsed'):
|
||||||
|
updated_parsed = entry.get('updated_parsed')
|
||||||
|
self.updated = datetime.fromtimestamp(mktime(updated_parsed))
|
||||||
|
if entry.has_key('enclosures') and len(entry.get('enclosures')) > 0:
|
||||||
|
print 'enclosures';
|
||||||
|
pp=pprint.PrettyPrinter(depth=4)
|
||||||
|
pp.pprint(entry.get('enclosures'))
|
||||||
|
#self.enclosures = entry.get('enclosures')
|
||||||
|
self.lastfetched = datetime.now()
|
53
models/feed.py
Normal file
53
models/feed.py
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
#coding: utf-8
|
||||||
|
|
||||||
|
#from sqlalchemy import create_engine, Table, Column, Integer, Text, String, Boolean, DateTime, MetaData, ForeignKey, desc
|
||||||
|
from sqlalchemy import Column, Integer, String, Boolean
|
||||||
|
#from sqlalchemy.orm import sessionmaker, relation, backref
|
||||||
|
#from sqlalchemy.ext.declarative import declarative_base
|
||||||
|
#from datetime import datetime, timedelta
|
||||||
|
#from time import mktime
|
||||||
|
#import feedparser
|
||||||
|
#import re
|
||||||
|
#import sys
|
||||||
|
#import urllib
|
||||||
|
#import urllib2
|
||||||
|
#import hn
|
||||||
|
#import html2text
|
||||||
|
#import ConfigParser
|
||||||
|
#import pprint
|
||||||
|
#from optparse import OptionParser
|
||||||
|
#from cStringIO import StringIO
|
||||||
|
#from email.mime.multipart import MIMEMultipart
|
||||||
|
#from email.mime.text import MIMEText
|
||||||
|
#from email.header import Header
|
||||||
|
#from email import Charset
|
||||||
|
#from email.generator import Generator
|
||||||
|
#import smtplib
|
||||||
|
|
||||||
|
from models import Base
|
||||||
|
|
||||||
|
class Feed(Base):
|
||||||
|
__tablename__ = 'feed'
|
||||||
|
|
||||||
|
id = Column(Integer, primary_key=True)
|
||||||
|
url = Column(String(255))
|
||||||
|
frequency = Column(Integer)
|
||||||
|
daily = Column(Boolean)
|
||||||
|
resolveredirects = Column(Boolean)
|
||||||
|
readability = Column(Boolean)
|
||||||
|
fullpage = Column(Boolean)
|
||||||
|
html2textsummary = Column(Boolean)
|
||||||
|
html2textignoreimages = Column(Boolean)
|
||||||
|
enabled = Column(Boolean)
|
||||||
|
|
||||||
|
def __init__(self, url, daily, readability, fullpage, enabled, html2textsummary):
|
||||||
|
self.url = url
|
||||||
|
self.daily = daily
|
||||||
|
self.readability = readability
|
||||||
|
self.fullpage = fullpage
|
||||||
|
self.html2textsummary = html2textsummary
|
||||||
|
self.enabled = enabled
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return "<Feed('%s','%s','%s')>" % (self.url, self.daily, self.readability)
|
74
models/feedinfo.py
Normal file
74
models/feedinfo.py
Normal file
@ -0,0 +1,74 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
#coding: utf-8
|
||||||
|
|
||||||
|
#from sqlalchemy import create_engine, Table, Column, Integer, Text, String, Boolean, DateTime, MetaData, ForeignKey, desc
|
||||||
|
from sqlalchemy import Column, Integer, ForeignKey, String, DateTime
|
||||||
|
#from sqlalchemy.orm import sessionmaker, relation, backref
|
||||||
|
from sqlalchemy.orm import relation, backref
|
||||||
|
#from sqlalchemy.ext.declarative import declarative_base
|
||||||
|
#from datetime import datetime, timedelta
|
||||||
|
from datetime import datetime
|
||||||
|
#from time import mktime
|
||||||
|
#import feedparser
|
||||||
|
#import re
|
||||||
|
#import sys
|
||||||
|
#import urllib
|
||||||
|
#import urllib2
|
||||||
|
#import hn
|
||||||
|
#import html2text
|
||||||
|
#import ConfigParser
|
||||||
|
#import pprint
|
||||||
|
#from optparse import OptionParser
|
||||||
|
#from cStringIO import StringIO
|
||||||
|
#from email.mime.multipart import MIMEMultipart
|
||||||
|
#from email.mime.text import MIMEText
|
||||||
|
#from email.header import Header
|
||||||
|
#from email import Charset
|
||||||
|
#from email.generator import Generator
|
||||||
|
#import smtplib
|
||||||
|
|
||||||
|
from models import Base
|
||||||
|
|
||||||
|
class Feedinfo(Base):
|
||||||
|
__tablename__ = 'feedinfo'
|
||||||
|
|
||||||
|
id = Column(Integer, primary_key=True)
|
||||||
|
feed_id = Column(Integer, ForeignKey('feed.id'))
|
||||||
|
feed = relation("Feed", backref=backref('feedinfo', uselist=False))
|
||||||
|
title = Column(String(255))
|
||||||
|
link = Column(String(255))
|
||||||
|
subtitle = Column(String(255))
|
||||||
|
author = Column(String(255))
|
||||||
|
publisher = Column(String(255))
|
||||||
|
status = Column(Integer)
|
||||||
|
version = Column(String(16))
|
||||||
|
encoding = Column(String(16))
|
||||||
|
bozo = Column(Integer)
|
||||||
|
|
||||||
|
lastfetched = Column(DateTime)
|
||||||
|
lastsuccessful = Column(DateTime)
|
||||||
|
|
||||||
|
def __init__(self, parser):
|
||||||
|
self.update(parser)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return "<Feedinfo('%s','%s','%s')>" % (self.title, self.subtitle, self.author)
|
||||||
|
|
||||||
|
def update(self, parser):
|
||||||
|
if parser.feed.has_key('title'):
|
||||||
|
self.title = parser.feed.get('title')
|
||||||
|
if parser.feed.has_key('link'):
|
||||||
|
self.link = parser.feed.get('link')
|
||||||
|
if parser.feed.has_key('subtitle'):
|
||||||
|
self.subtitle = parser.feed.get('subtitle')
|
||||||
|
if parser.feed.has_key('author'):
|
||||||
|
self.author = parser.feed.get('author')
|
||||||
|
if parser.feed.has_key('publisher'):
|
||||||
|
self.author = parser.feed.get('publisher')
|
||||||
|
self.status = parser.get('status')
|
||||||
|
self.version = parser.get('version')
|
||||||
|
self.encoding = parser.get('encoding')
|
||||||
|
self.bozo = parser.get('bozo')
|
||||||
|
self.lastfetched = datetime.now()
|
||||||
|
if parser.get('status') == 200 or parser.get('status') == 302:
|
||||||
|
self.lastsuccessful = datetime.now()
|
Loading…
Reference in New Issue
Block a user