implemented fetch-frequency for feeds
This commit is contained in:
		
							
								
								
									
										38
									
								
								atomstrom.py
									
									
									
									
									
								
							
							
						
						
									
										38
									
								
								atomstrom.py
									
									
									
									
									
								
							@@ -3,7 +3,7 @@
 | 
			
		||||
from sqlalchemy import create_engine, Table, Column, Integer, Text, Boolean, DateTime, MetaData, ForeignKey, desc
 | 
			
		||||
from sqlalchemy.orm import sessionmaker, relation, backref
 | 
			
		||||
from sqlalchemy.ext.declarative import declarative_base
 | 
			
		||||
from datetime import datetime
 | 
			
		||||
from datetime import datetime, timedelta
 | 
			
		||||
from time import mktime
 | 
			
		||||
import feedparser
 | 
			
		||||
import re
 | 
			
		||||
@@ -12,6 +12,7 @@ import urllib
 | 
			
		||||
import hn
 | 
			
		||||
import html2text
 | 
			
		||||
import ConfigParser
 | 
			
		||||
import pprint
 | 
			
		||||
import smtplib
 | 
			
		||||
from email.mime.text import MIMEText
 | 
			
		||||
from optparse import OptionParser
 | 
			
		||||
@@ -23,6 +24,7 @@ class Feed(Base):
 | 
			
		||||
 | 
			
		||||
    id = Column(Integer, primary_key=True)
 | 
			
		||||
    url = Column(Text)
 | 
			
		||||
    frequency = Column(Integer)
 | 
			
		||||
    daily = Column(Boolean)
 | 
			
		||||
    readability = Column(Boolean)
 | 
			
		||||
    fullpage = Column(Boolean)
 | 
			
		||||
@@ -225,25 +227,35 @@ def process_feed_entry(session, feed, entry):
 | 
			
		||||
        return 1
 | 
			
		||||
 | 
			
		||||
def fetch_single_feed(session, feed):
 | 
			
		||||
    print 'fetching %s' % feed.url
 | 
			
		||||
    parser = feedparser.parse(feed.url)
 | 
			
		||||
    print 'processing feed info...'
 | 
			
		||||
    print 'processing %s' % feed.url
 | 
			
		||||
    query = session.query(Feedinfo).filter(Feedinfo.feed_id==feed.id)
 | 
			
		||||
    fetched = False
 | 
			
		||||
    try:
 | 
			
		||||
        feed.feedinfo = query.one()
 | 
			
		||||
        feed.feedinfo.update(parser)
 | 
			
		||||
        nextfetch = (feed.feedinfo.lastfetched + timedelta(minutes=feed.frequency))
 | 
			
		||||
        if datetime.now() > nextfetch:
 | 
			
		||||
            print 'fetching...'
 | 
			
		||||
            parser = feedparser.parse(feed.url)
 | 
			
		||||
            fetched = True
 | 
			
		||||
            feed.feedinfo.update(parser)
 | 
			
		||||
        else:
 | 
			
		||||
            print 'not fetching before: %s' % nextfetch
 | 
			
		||||
    except Exception, e:
 | 
			
		||||
        print 'this feed seems to be new'
 | 
			
		||||
        print 'fetching...'
 | 
			
		||||
        parser = feedparser.parse(feed.url)
 | 
			
		||||
        fetched = True
 | 
			
		||||
        feed.feedinfo = Feedinfo(parser)
 | 
			
		||||
 | 
			
		||||
    print 'processing feed entries:'
 | 
			
		||||
    entries_new = 0
 | 
			
		||||
    entries_total = 0
 | 
			
		||||
    for entry in parser.entries:
 | 
			
		||||
        entries_total = entries_total + 1
 | 
			
		||||
        entries_new = entries_new + process_feed_entry(session, feed, entry)
 | 
			
		||||
        session.commit()
 | 
			
		||||
    print 'updated %d of %d entries' % (entries_new, entries_total)
 | 
			
		||||
    if fetched:
 | 
			
		||||
        print 'processing feed entries:'
 | 
			
		||||
        entries_new = 0
 | 
			
		||||
        entries_total = 0
 | 
			
		||||
        for entry in parser.entries:
 | 
			
		||||
            entries_total = entries_total + 1
 | 
			
		||||
            entries_new = entries_new + process_feed_entry(session, feed, entry)
 | 
			
		||||
            session.commit()
 | 
			
		||||
        print 'updated %d of %d entries' % (entries_new, entries_total)
 | 
			
		||||
 | 
			
		||||
def fetch_all_feeds(session):
 | 
			
		||||
    print 'fetching all feeds...'
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user