Jump to content

User:CobraBot/Code 3

From Wikipedia, the free encyclopedia
#!/usr/bin/env python
# -*- coding: utf-8  -*-

import wikipedia
import pagegenerators
import re
import warnings
from time import sleep
from contextlib import closing
from sys import stdout
from json import dump, load
from itertools import ifilter

# This is required for the text that is shown when you run this script
# with the parameter -help.
docuReplacements = {
    '&params;': pagegenerators.parameterHelp
}

SITE = wikipedia.getSite()
def pagesUsingTemplate(templateName):
    transclusionPageName = unicode(SITE.namespace(10)) + u":" + templateName
    transclusionPage = wikipedia.Page(SITE, transclusionPageName)
    gen = pagegenerators.ReferringPageGenerator(transclusionPage, onlyTemplateInclusion=True)
    return gen

def has_disambiguator(page):
    return u'(' in page.title()

def list_redirects_to(page):
    return page.getReferences(follow_redirects=False,redirectsOnly=True)

def wordsRegex(words):
    return "(?:%s)" % ("|".join("(?:%s)" % word for word in words))

class CobraBot(object):
    EDIT_SUMMARY = u'Superfluous disambiguation removed per [[WP:NAMB]] ([[Wikipedia:BOTPOL#Assisted_editing_guidelines|assisted editing]] using [[User:CobraBot|CobraBot]]; [[User talk:Cybercobra]])'
    PERSON_SUMMARY = u'Person disambiguation tweaked ([[Wikipedia:BOTPOL#Assisted_editing_guidelines|assisted editing]] using [[User:CobraBot|CobraBot]]; [[User talk:Cybercobra]])'
    DABLINK = u"Dablink"
    DISAMBIGUATION = re.compile(u"\\{\\{[ \t]*" + wordsRegex("about dablink otheruses for the redirect this twootheruses".split() + ["other uses", "two other uses"]) +"[^}]*\\}\\}(\n?)", re.IGNORECASE)
    DB_MOVE = "{{db-move|%s|Evidently not ambiguous}}\n"
    OFFSET_FILE = 'N.json'


    def __init__(self, debug):
        """
        Constructor. Parameters:
            * generator - The page generator that determines on which pages
                          to work on.
            * debug     - If True, doesn't do any real changes, but only shows
                          what would have been changed.
        """
        
        self.generator = ifilter(has_disambiguator, pagesUsingTemplate(self.DABLINK))
        self.debug = debug
        self.editCount = 0
        self.log = file("skipped.log", 'a')
        self.log.write("BEGIN NEW SESSION\n")
        wikipedia.setAction(self.EDIT_SUMMARY)

    def run(self):
        with closing(file(self.OFFSET_FILE, 'r')) as f:
            N = load(f)
        # Set the edit summary message
        print "Advancing by %s..." % N
        stdout.flush()
        for i in xrange(N):
            next(self.generator)
        print "Done advancing!"
        stdout.flush()
        try:
            for pageIndex, page in enumerate(self.generator):
                wikipedia.setAction(self.EDIT_SUMMARY)
                self.treat(page, pageIndex)
        finally:
            self.log.close()
            with closing(file(self.OFFSET_FILE, 'w')) as f:
                dump(N+pageIndex-5, f)

    #########
    
    def treat(self, page, pageIndex):
        """
        Loads the given page, does some changes, and saves it.
        """
        
        print "=================================================================="
        
        print "PAGE TITLE:", page.title()
        print "PAGE#:", pageIndex+1
        print "EDIT COUNT:", self.editCount
        if page.namespace() != 0:
            wikipedia.output(u"SKIPPING: Non-article namespace!")
            return
        
        try:
            # Load the page
            text = page.get()
        except wikipedia.NoPage:
            wikipedia.output(u"Page %s does not exist; skipping." % page.aslink())
            return
        except wikipedia.IsRedirectPage:
            wikipedia.output(u"Page %s is a redirect; skipping." % page.aslink())
            return
            
        disams = list(re.finditer(self.DISAMBIGUATION, text))
        if not disams:
            self.log.write("FALSE POSITIVE: "+page.title().encode('utf8')+"\n")
            print "FALSE POSITIVE:", page.title().encode('utf8')
            return
        print "REDIRECTS:"
        redirects = list(list_redirects_to(page))
        print "   ", "\n    ".join([redirect.title() for redirect in redirects])
        norm_with_caps = page.title().split(u"(")[0].strip()
        normalized_title = norm_with_caps.lower()
        if any(redir.title().lower() == normalized_title for redir in redirects):
            print "***PRIMARY TOPIC REDIRECTS HERE***"
        person = False
        dbmove = False
        while True:
            print "Choose option:"
            print "[0] Skip page"
            for i, disamb in enumerate(disams):
                lineno = text[:disamb.start()].count("\n")
                print "[%s] (line %s): %s" % (i+1, lineno, disamb.group().strip())
            try:
                input = raw_input("Enter number of your choice: ")
                choice = int(input)
            except ValueError:
                if input == "person":
                    person = True
                    choice = 1
                    break
                if input == "dbmove":
                    dbmove = True
                    break
                print "Invalid input; try again."
            else:
                if choice <= len(disams):
                    break
                else:
                    print "Invalid input; try again."
        if dbmove:
            target = wikipedia.Page(SITE, norm_with_caps)
            text = self.DB_MOVE % page.title() + target.get()
            page = target
        elif choice == 0:
            print "SKIPPED"
            return
        else:
            redo = choice < 0
            if choice < 0: choice = -choice
            choice -= 1
            redact = disams[choice]
            if person:
                wikipedia.setAction(self.PERSON_SUMMARY)
                text = text[:redact.start()] + "{{otherpeople|%s}}\n" % norm_with_caps + text[redact.end():]
            else:
                text = text[:redact.start()] + text[redact.end():]
        # only save if something was changed
        if text != page.get():
            # Show the title of the page we're working on.
            # Highlight the title in purple.
            wikipedia.output(u"\n>>> \03{lightpurple}%s\03{default} <<<" % page.title())
            # show what was changed
            wikipedia.showDiff(page.get(), text)
            # raw_input("Continue?")
            # sleep(3)
            if dbmove or self.debug:
                choice = wikipedia.inputChoice(u'Do you want to accept these changes?', ['Yes', 'No'], ['y', 'N'], 'N')
                if choice == 'n':
                    return
            try:
                # Save the page
                page.put(text)
            except wikipedia.LockedPage:
                wikipedia.output(u"Page %s is locked; skipping." % page.aslink())
            except wikipedia.EditConflict:
                wikipedia.output(u'Skipping %s because of edit conflict' % (page.title()))
            except wikipedia.SpamfilterError, error:
                wikipedia.output(u'Cannot change %s because of spam blacklist entry %s' % (page.title(), error.url))
            else:
                self.editCount += 1
        if redo:
            self.treat(wikipedia.Page(SITE, page.title()), pageIndex)


def main():
    DEBUG = False
    bot = CobraBot(DEBUG)
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        bot.run()

if __name__ == "__main__":
    try:
        main()
    finally:
        wikipedia.stopme()