User:Lemmey/TheDaily
Appearance
from BeautifulSoup import BeautifulStoneSoup
import datetime
import urllib
import wikipedia ##replace with whoipedia for IP usage
import time
##import sentance_case
import re
rtnln = u'''
'''
def AddtoPage(PageName,newtext,checklink,message):
##print newtext
Page = wikipedia.Page(wikipedia.getSite(), PageName)
Pagetext = Page.get()
oldPageText = Pagetext
newtext = newtext.encode('utf8')
if Pagetext.find(checklink) == -1:
##position = Pagetext.find('<!-- All news items below this line -->')+len('<!-- All news items below this line -->')
position = Pagetext.find('<!-- All news items above this line -->|}')
##print Pagetext[:position]+rtnln+ newtext + Pagetext[position:]
Pagetext = Pagetext[:position]+ newtext + rtnln + Pagetext[position:]
save_page(Page,oldPageText,Pagetext,message)
def save_page(page,oldtext,newtext,message):
print "Message: ",message
wikipedia.showDiff(oldtext, newtext)
choice = wikipedia.inputChoice(u'Do you want to accept these changes?', ['Yes', 'No'], ['y', 'N'], 'N')
text = newtext
##choice = 'y' #HARD CODED
if choice == 'y':
try:
# Save the page
page.put(newtext,minorEdit=False,comment=message)
time.sleep(69)
except wikipedia.EditConflict:
wikipedia.output(u'Skipping %s because of edit conflict')
except:
pass
def sortstory(a,b):
a = filter(lambda x: x.isalnum(), a.upper())
b = filter(lambda x: x.isalnum(), b.upper())
return cmp(a,b)
def AlphaBeta(Pagetext):
##Doesn't work on pages with sub bullets **
start = Pagetext.find('<!-- All news items below this line -->')+len('<!-- All news items below this line -->')
end = Pagetext.find('<!-- All news items above this line -->|}')
stories = Pagetext[start:end]
stories = stories.replace("* ","*")
stories = stories.replace("**","%%")
stories = stories.replace("*","~")
story_list = re.split(r'([\b~+])', stories)
story_list=sorted(story_list,cmp=sortstory)
entires = []
for story in story_list:
if len(story) > 4:
entires.append('*'+story+'\n')
##print story
stories=''.join(entires)
stories = stories.replace("\n\n","\n")
stories = stories.replace("%%","**")
stories = stories.replace("~","*")
text = Pagetext[:start]+"\n"+stories+Pagetext[end:]
return text
def MakePageName(date):
date = date.replace("-"," ")
y=int(date[:4])
m=int(date[6:8])
d=int(date[8:])
now = datetime.datetime(y,m,d).strftime('%Y %B ')
PageName = "Portal:Current_events/" + str(now)+str(d)
return PageName
def SentanceCase(oldtext):
newtext=sentance_case.sentence_caser(oldtext)
sentance = oldtext.split(" ")
for word in sentance:
if word.isupper():
#print word,word.lower()
newtext=newtext.replace(" "+word.lower()," "+word)
if sentance[0].isupper():
newtext=newtext.replace(sentance[0].capitalize(),sentance[0],1)
return newtext
def WikiLink(oldtext):
newtext=oldtext
sentance = oldtext.split(" ")
###Build individual links
for word in sentance:
if word.istitle() and word!="The":
newtext=newtext.replace(word,'[['+word+']]')
###Build Compound links
sentance = newtext.split(" ")
Cap = False
for w in xrange(len(sentance)):
if sentance[w]=="The":
sentance[w]=sentance[w].replace(']]','')
if Cap==True:
sentance[w]=sentance[w].replace('[[','')
##print sentance[w]
if sentance[w].istitle() and sentance[w]!="The":
Cap=True
sentance[w]=sentance[w].replace(']]','')
##print sentance[w]
else:
Cap=False
if sentance[w-1].istitle() and sentance[w-1]!="The":
sentance[w-1]=sentance[w-1]+']]'
###Put the words back together
nt=""
for word in sentance:
if word.isupper():
##print word, "[["+word+"]]"
word=word.replace(word,'[['+word+']]')
nt=nt+word+" "
###Replace punct links
nt=nt.replace(".]]","]].")
nt=nt.replace(",]]","]],")
return nt
def setimes():
array = []
# Get a file-like object for the Python Web site's home page.
f = urllib.urlopen("http://www.setimes.com/cocoon/setimes/rss/en_GB/setimes.rss")
# Read from the object, storing the page's contents in 's'.
xml = f.read()
f.close()
soup = BeautifulStoneSoup(xml)
for item in soup('item'):
##print item
i = BeautifulStoneSoup(str(item))
if '...' not in i.description.string:
#print i('description')
date = str(i('dc:date')).strip('[]')
date = date[9:-10]
PageName = MakePageName(date)
##print PageName
link = i.link.string.strip()
text = i.description.string
##MakeWikiLinks(i.title.string)
text = "*"+WikiLink(i.description.string)+ "["+link+" (Southeast European Times)]"
##SentanceCase(i.description.string)
array.append([PageName,text,link,date])
return array
def VOA():
array = []
# Get a file-like object for the Python Web site's home page.
f = urllib.urlopen("http://www.voanews.com/english/customCF/RecentStoriesRSS.cfm?keyword=TopStories ")
# Read from the object, storing the page's contents in 's'.
xml = f.read()
f.close()
soup = BeautifulStoneSoup(xml)
#print soup.prettify()
for item in soup('item'):
text = SentanceCase(item.title.string)+"; "+item.description.string+"."
##text=WikiLink(text)
##text = SentanceCase(item.description.string)+"." #"; "+item.description.string+"
link = item.link.string.replace("?rss=topstories","")
date = link[len("http://www.voanews.com/english/"):]
date = date[:len("2008-05-30")]
##print date
PageName = MakePageName(date)
text = "*" + text + "["+link+" (VOA)]"
array.append([PageName,text,link,date])
return array
##articlearray=[]
##articlearray=setimes()
##articlearray=articlearray+VOA()
##print articlearray
##articlearray=sorted(articlearray)
##for a in articlearray:
## PageName=a[0]
## text=a[1]
## link=a[2]
## date=a[3]
## ##print date,text
## if date!="2008-05-30":
## ##print text
## AddtoPage(PageName,text,link,date)
####AddtoPage(PageName,text,link,date)
Page = wikipedia.Page(wikipedia.getSite(), "Portal:Current events/2008 June 3")
Pagetext = Page.get()
oldPageText=Pagetext
Pagetext=AlphaBeta(Pagetext)
##print Pagetext
save_page(Page,oldPageText,Pagetext,"Sorted Alphabukenly")