Jump to content

User:Stefan2bot/shadowsCommons.py

From Wikipedia, the free encyclopedia
# <nowiki>
import wikipedia
import catlib
import sys
commons = wikipedia.getSite("commons", "commons")
local = wikipedia.getSite()

editSummary = "[[WP:B|Robot]]: Marking a file which shadows a file on [[commons:|Wikimedia Commons]]"
shadowTag = "{{ShadowsCommons}}\n"
# The bot will edit at most maxEdit pages, even if more shadows are found. Set to negative for an infinite number of edits.
# The edit count isn't 100% accurate.
maxEdits = 47

def imageHash(image):
  return image.getHash()

def imageExists(image):
# Returns True if an image exists locally.
  if image.isImage() and image.exists():
    return not image.fileIsOnCommons()
  else:
    return False

def isDifferent(filename):
# Returns True if both projects have images with the same file name but with
# different SHA1 hash values. Returns False otherwise.
  commonsImage = wikipedia.ImagePage(commons, filename)
  localImage = wikipedia.ImagePage(local, filename)
  if not imageExists(commonsImage):
    return False
  # It is still necessary to check that the image exists locally: it might have
  # been deleted after the script started.
  if not imageExists(localImage):
    return False
  # All redirects are skipped. Not sure what people wish to do with those.
  if localImage.isRedirectPage():
    print "Redirect which shadows Commons: " + localImage.title().encode("utf-8")
    return False 
  if commonsImage.isRedirectPage():
    print "Commons redirect shadowed by Wikipedia: " + localImage.title().encode("utf-8")
    return False
  return imageHash(localImage) != imageHash(commonsImage)

def tagFile(file):
  if isDifferent(file) and maxEdits != 0:
    localfile = wikipedia.ImagePage(local, file)
    categories = localfile.categories(False, True)
    for cats in categories:
      if cats.title() == "Category:Wikipedia files that shadow a file on Wikimedia Commons":
        return False
    fileInfo = localfile.get()
    fileInfo = shadowTag + fileInfo
    if local.loggedInAs() != "Stefan2bot":
      return False
    print fileInfo
    try:
      localfile.put(fileInfo, editSummary)
    except:
      print "Error: Can't edit " + localfile.title().encode("utf-8")
    sys.stderr.write(file.title().encode("utf-8") + " tagged with {{ShadowsCommons}}\n")
    return True
  return False

# All file names are listed in the file conflicts.txt. One file name per line, without namespace. Designed to support both commercial and non-commercial line breaks.
fileList = open("conflicts.txt", "r")
for line in fileList.readlines():
  print line
  replaced = line.decode("utf-8").replace(u"\n", u"").replace(u"\r", u"")
  if tagFile(u"File:" + replaced):
    maxEdits -= 1
# </nowiki>