diff --git a/atomstrom.py b/atomstrom.py index 4f98b36..737538e 100755 --- a/atomstrom.py +++ b/atomstrom.py @@ -3,7 +3,7 @@ from sqlalchemy import create_engine, Table, Column, Integer, Text, Boolean, DateTime, MetaData, ForeignKey, desc from sqlalchemy.orm import sessionmaker, relation, backref from sqlalchemy.ext.declarative import declarative_base -from datetime import datetime +from datetime import datetime, timedelta from time import mktime import feedparser import re @@ -12,6 +12,7 @@ import urllib import hn import html2text import ConfigParser +import pprint import smtplib from email.mime.text import MIMEText from optparse import OptionParser @@ -23,6 +24,7 @@ class Feed(Base): id = Column(Integer, primary_key=True) url = Column(Text) + frequency = Column(Integer) daily = Column(Boolean) readability = Column(Boolean) fullpage = Column(Boolean) @@ -225,25 +227,35 @@ def process_feed_entry(session, feed, entry): return 1 def fetch_single_feed(session, feed): - print 'fetching %s' % feed.url - parser = feedparser.parse(feed.url) - print 'processing feed info...' + print 'processing %s' % feed.url query = session.query(Feedinfo).filter(Feedinfo.feed_id==feed.id) + fetched = False try: feed.feedinfo = query.one() - feed.feedinfo.update(parser) + nextfetch = (feed.feedinfo.lastfetched + timedelta(minutes=feed.frequency)) + if datetime.now() > nextfetch: + print 'fetching...' + parser = feedparser.parse(feed.url) + fetched = True + feed.feedinfo.update(parser) + else: + print 'not fetching before: %s' % nextfetch except Exception, e: print 'this feed seems to be new' + print 'fetching...' + parser = feedparser.parse(feed.url) + fetched = True feed.feedinfo = Feedinfo(parser) - print 'processing feed entries:' - entries_new = 0 - entries_total = 0 - for entry in parser.entries: - entries_total = entries_total + 1 - entries_new = entries_new + process_feed_entry(session, feed, entry) - session.commit() - print 'updated %d of %d entries' % (entries_new, entries_total) + if fetched: + print 'processing feed entries:' + entries_new = 0 + entries_total = 0 + for entry in parser.entries: + entries_total = entries_total + 1 + entries_new = entries_new + process_feed_entry(session, feed, entry) + session.commit() + print 'updated %d of %d entries' % (entries_new, entries_total) def fetch_all_feeds(session): print 'fetching all feeds...'