#!/usr/bin/python
# -*- coding: utf-8 -*-
import os, sys, re
import wikipedia, query, datetime, BeautifulSoup
mysite = wikipedia.getSite()
itntext=u'{{Template:Itn}}'
itntitle=u'Template:Itn'
rsstitle=u'Template:Itn/rss'
rsspage=wikipedia.Page(mysite,rsstitle)
itnpage=wikipedia.Page(mysite,itntitle)
params = {
'action' :'parse',
'text' :itntext,
}
text = query.GetData(params, encodeTitle = False)[u'parse'][u'text'][u'*']
soup = BeautifulSoup.BeautifulSoup(text)
image=u''
for i in soup.ul:
try:
if i.name == 'li':
if i.small <> None:
i.small.extract()
image=unicode(soup.find('div', "floatright"))
else:
image=u''
r=ur'<li>(?P<li>.*?)</li>'
for m in re.finditer(r,unicode(i),re.I):
mm = m.groupdict()
href=u'{{subst:fullurl:%s}}' % i.b.a['title']
rtext= image+u'<p>'+mm['li'] + u'</p><p><a href=\"'+href+ u'\">阅读条目全文 >>></a></p>'
wikitext=u'<title>新闻动态:%s</title>\n<link>%s</link>\n<guid>%s</guid>\n<description>%s</description>\n<pubDate>{{subst:#time:r}}</pubDate>\n<dc:creator>中文维基百科编者</dc:creator></item>\n' %(i.b.a['title'], href, href, rtext)
rsstext=rsspage.get(force=True)
ritem=rsstext.split(u'<item>')
if i.b.a['title'] not in rsstext:
if len(ritem)<7:
ritem.insert(1, wikitext)
s = u'<item>'.join(ritem)
else:
ritem.insert(1, wikitext)
ritem.pop()
s = u'<item>'.join(ritem)
s=s+u'\n</channel>\n</rss>'
rsspage.put(s, u'Bot:新闻动态rss更新: [[%s]]' % i.b.a['title'])
else:
print "DYK no update."
except AttributeError, KeyError:
pass
wikipedia.stopme()