Thursday, November 29, 2007

RSS feed reader

The idea is to have a pop up notification for blog updates..But was too bored to complete it.






import sgmllib
import feedparser
from threading import Thread
import time
import re
#parse the list
class myparse(sgmllib.SGMLParser):
def get_vals(self):
return self.datalist
def parse(self, data):
self.feed(data)
self.close()
def __init__(self, verbose=0):
sgmllib.SGMLParser.__init__(self, verbose=0)
self.datalist = []
def start_outline(self, attributes):
attrdict = {}
for name, value in attributes:
attrdict[name]=value
self.datalist.append(attrdict)


class fetchfeed(Thread):
def __init__(self, xmlurl):
Thread.__init__(self)
self.xmlurl = xmlurl
self.feedresult = None

def run(self):
d = feedparser.parse(self.xmlurl)
if d.feed.has_key('title'):
self.feedresult = d
def strip_html(text):
def fixup(m):
text = m.group(0)
if text[:1] == "<":
return ""
if text[:2] == "&#":
try:
if text[:3] == "&#x":
return unichr(int(text[3:-1], 16))
else:
return unichr(int(text[2:-1]))
except ValueError:
pass
elif text[:1] == "&":
import htmlentitydefs
entity = htmlentitydefs.entitydefs.get(text[1:-1])
if entity:
if entity[:2] == "&#":
try:
return unichr(int(entity[2:-1]))
except ValueError:
pass
else:
return unicode(entity, "iso-8859-1")
return text
return re.sub("(?s)<[^>]*>|&#?\w+;", fixup, text)

#parse the OPML document.
fileobj = open('mvblogs-export.xml');
filedata = fileobj.read()
fileobj.close()
parseblogs = myparse()
parseblogs.parse(filedata)

blogdata = parseblogs.get_vals()
del blogdata[0]
tlist = []
for i in blogdata[0:10]:
t = fetchfeed(i['xmlurl'])
t.start()
tlist.append(t)

for tl in tlist:
tl.join()
if tl.feedresult:
tlfeed = tl.feedresult.feed
tlresult = tl.feedresult.entries
print "author:" + tlfeed['author']
print "title:" + tlfeed['title']
for i in range(3):
print tlresult[i].date
print tlresult[i].title
print tlresult[i].link
print strip_html(tlresult[i].description)
print "#"*100



1 comment:

subcorpus said...

i use netnewswire ...
its very good ...
has a free version too ...
hehe ...

http://en.wikipedia.org/wiki/Netnewswire