implemented fetch-frequency for feeds
This commit is contained in:
		
							
								
								
									
										38
									
								
								atomstrom.py
									
									
									
									
									
								
							
							
						
						
									
										38
									
								
								atomstrom.py
									
									
									
									
									
								
							@@ -3,7 +3,7 @@
 | 
				
			|||||||
from sqlalchemy import create_engine, Table, Column, Integer, Text, Boolean, DateTime, MetaData, ForeignKey, desc
 | 
					from sqlalchemy import create_engine, Table, Column, Integer, Text, Boolean, DateTime, MetaData, ForeignKey, desc
 | 
				
			||||||
from sqlalchemy.orm import sessionmaker, relation, backref
 | 
					from sqlalchemy.orm import sessionmaker, relation, backref
 | 
				
			||||||
from sqlalchemy.ext.declarative import declarative_base
 | 
					from sqlalchemy.ext.declarative import declarative_base
 | 
				
			||||||
from datetime import datetime
 | 
					from datetime import datetime, timedelta
 | 
				
			||||||
from time import mktime
 | 
					from time import mktime
 | 
				
			||||||
import feedparser
 | 
					import feedparser
 | 
				
			||||||
import re
 | 
					import re
 | 
				
			||||||
@@ -12,6 +12,7 @@ import urllib
 | 
				
			|||||||
import hn
 | 
					import hn
 | 
				
			||||||
import html2text
 | 
					import html2text
 | 
				
			||||||
import ConfigParser
 | 
					import ConfigParser
 | 
				
			||||||
 | 
					import pprint
 | 
				
			||||||
import smtplib
 | 
					import smtplib
 | 
				
			||||||
from email.mime.text import MIMEText
 | 
					from email.mime.text import MIMEText
 | 
				
			||||||
from optparse import OptionParser
 | 
					from optparse import OptionParser
 | 
				
			||||||
@@ -23,6 +24,7 @@ class Feed(Base):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    id = Column(Integer, primary_key=True)
 | 
					    id = Column(Integer, primary_key=True)
 | 
				
			||||||
    url = Column(Text)
 | 
					    url = Column(Text)
 | 
				
			||||||
 | 
					    frequency = Column(Integer)
 | 
				
			||||||
    daily = Column(Boolean)
 | 
					    daily = Column(Boolean)
 | 
				
			||||||
    readability = Column(Boolean)
 | 
					    readability = Column(Boolean)
 | 
				
			||||||
    fullpage = Column(Boolean)
 | 
					    fullpage = Column(Boolean)
 | 
				
			||||||
@@ -225,25 +227,35 @@ def process_feed_entry(session, feed, entry):
 | 
				
			|||||||
        return 1
 | 
					        return 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def fetch_single_feed(session, feed):
 | 
					def fetch_single_feed(session, feed):
 | 
				
			||||||
    print 'fetching %s' % feed.url
 | 
					    print 'processing %s' % feed.url
 | 
				
			||||||
    parser = feedparser.parse(feed.url)
 | 
					 | 
				
			||||||
    print 'processing feed info...'
 | 
					 | 
				
			||||||
    query = session.query(Feedinfo).filter(Feedinfo.feed_id==feed.id)
 | 
					    query = session.query(Feedinfo).filter(Feedinfo.feed_id==feed.id)
 | 
				
			||||||
 | 
					    fetched = False
 | 
				
			||||||
    try:
 | 
					    try:
 | 
				
			||||||
        feed.feedinfo = query.one()
 | 
					        feed.feedinfo = query.one()
 | 
				
			||||||
        feed.feedinfo.update(parser)
 | 
					        nextfetch = (feed.feedinfo.lastfetched + timedelta(minutes=feed.frequency))
 | 
				
			||||||
 | 
					        if datetime.now() > nextfetch:
 | 
				
			||||||
 | 
					            print 'fetching...'
 | 
				
			||||||
 | 
					            parser = feedparser.parse(feed.url)
 | 
				
			||||||
 | 
					            fetched = True
 | 
				
			||||||
 | 
					            feed.feedinfo.update(parser)
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            print 'not fetching before: %s' % nextfetch
 | 
				
			||||||
    except Exception, e:
 | 
					    except Exception, e:
 | 
				
			||||||
        print 'this feed seems to be new'
 | 
					        print 'this feed seems to be new'
 | 
				
			||||||
 | 
					        print 'fetching...'
 | 
				
			||||||
 | 
					        parser = feedparser.parse(feed.url)
 | 
				
			||||||
 | 
					        fetched = True
 | 
				
			||||||
        feed.feedinfo = Feedinfo(parser)
 | 
					        feed.feedinfo = Feedinfo(parser)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    print 'processing feed entries:'
 | 
					    if fetched:
 | 
				
			||||||
    entries_new = 0
 | 
					        print 'processing feed entries:'
 | 
				
			||||||
    entries_total = 0
 | 
					        entries_new = 0
 | 
				
			||||||
    for entry in parser.entries:
 | 
					        entries_total = 0
 | 
				
			||||||
        entries_total = entries_total + 1
 | 
					        for entry in parser.entries:
 | 
				
			||||||
        entries_new = entries_new + process_feed_entry(session, feed, entry)
 | 
					            entries_total = entries_total + 1
 | 
				
			||||||
        session.commit()
 | 
					            entries_new = entries_new + process_feed_entry(session, feed, entry)
 | 
				
			||||||
    print 'updated %d of %d entries' % (entries_new, entries_total)
 | 
					            session.commit()
 | 
				
			||||||
 | 
					        print 'updated %d of %d entries' % (entries_new, entries_total)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def fetch_all_feeds(session):
 | 
					def fetch_all_feeds(session):
 | 
				
			||||||
    print 'fetching all feeds...'
 | 
					    print 'fetching all feeds...'
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user