Atomstrom/models/feed.py

121 lines
4.3 KiB
Python
Raw Normal View History

from sqlalchemy import Column, Integer, String, Boolean, Enum
2013-04-07 13:34:36 +02:00
from sqlalchemy.orm import relationship, backref
2013-04-15 21:44:49 +02:00
from datetime import datetime, timedelta
import feedparser
import sys
from models import Base
from models.feedinfo import Feedinfo
from models.entry import Entry
2013-04-07 13:34:36 +02:00
class Feed(Base):
__tablename__ = 'feed'
id = Column(Integer, primary_key=True)
url = Column(String(255))
frequency = Column(Integer)
2013-04-15 21:44:49 +02:00
keepdaysafterlastfetch = Column(Integer, default=30)
daily = Column(Boolean)
resolveredirects = Column(Boolean)
readability = Column(Boolean)
fullpage = Column(Boolean)
contentcolumn = Column(Enum('summary', 'content', 'fullpage', 'readability'))
html2textcontent = Column(Boolean)
2013-04-09 12:23:05 +02:00
html2textignorelinks = Column(Boolean)
html2textignoreimages = Column(Boolean)
enabled = Column(Boolean)
2013-04-10 20:31:34 +02:00
entries = relationship("Entry", backref=backref('feed'), cascade='all, delete, delete-orphan')
2013-04-07 13:34:36 +02:00
feedinfo = relationship("Feedinfo", backref=backref('feed'), cascade='all, delete, delete-orphan', uselist=False)
def __init__(self, url, daily, readability, fullpage, enabled, html2textcontent):
self.url = url
self.daily = daily
self.readability = readability
self.fullpage = fullpage
self.html2textcontent = html2textcontent
self.enabled = enabled
def __unicode__(self):
id = self.id
if self.feedinfo:
title = self.feedinfo.title
last = self.feedinfo.lastsuccessful
else:
title = '<unknown>'
last = '<never>'
2013-04-07 13:43:34 +02:00
if self.enabled:
enabled = 'enabled'
else:
enabled = 'DISABLED'
2013-04-07 13:34:36 +02:00
entries = len(self.entries)
url = self.url
2013-04-07 13:43:34 +02:00
return u'%3d %s (%d entries, last fetched %s, %s)\n %s' % (id, title, entries, last, enabled, url)
def __str__(self):
return unicode(self).encode('utf-8')
def __repr__(self):
return "<Feed('%d','%s')>" % (self.id, self.url)
2013-04-07 13:34:36 +02:00
def fetch(self, session):
print 'processing %d: %s' % (self.id, self.url)
fetched = False
if self.feedinfo:
if (not self.feedinfo.nextfetch) or (self.feedinfo.nextfetch < datetime.now()):
print 'feed known, fetching...'
try:
parser = feedparser.parse(self.url)
fetched = True
self.feedinfo.update(parser)
except:
print 'ERROR parsing feed'
print sys.exc_info()
else:
print 'not fetching before: %s' % self.feedinfo.nextfetch
else:
print 'feed seems to be new, fetching...'
try:
parser = feedparser.parse(self.url)
fetched = True
self.feedinfo = Feedinfo(parser)
except:
print 'ERROR parsing feed'
print sys.exc_info()
if fetched:
print 'processing feed entries:'
entries_new = 0
entries_total = 0
for entry in parser.entries:
entries_total += 1
thisentry = session.query(Entry).\
filter(Entry.title == entry.title).\
filter(Entry.link == entry.link).\
first()
if thisentry:
print ' entry already known <%s>' % entry.title
thisentry.lastfetched = datetime.now()
else:
print ' new entry <%s>' % entry.title
self.entries.append(Entry(entry, self))
entries_new += 1
print 'updated %d of %d entries' % (entries_new, entries_total)
2013-04-15 21:44:49 +02:00
self.housekeeper(session)
session.commit()
2013-04-15 21:44:49 +02:00
def housekeeper(self, session):
count = 0
for entry in self.entries:
if entry.lastfetched < (datetime.now() - timedelta(days=self.keepdaysafterlastfetch)):
session.delete(entry)
count += 1
if count > 0:
print 'housekeeper deleted %d entries older than %d days.' % (count, self.keepdaysafterlastfetch)
2013-04-07 13:34:36 +02:00
def reset(self):
self.entries[:] = []
self.feedinfo = None
2013-04-09 23:39:02 +02:00
# -*- coding: utf-8 -*-