User:Ritchie333/arcsinebot.py

# Open-source emulation of [[User:SineBot]]
# Will currently dump out posts in common talk namespaces in the last five minutes that might need to be signed
# Work in progress - many false positives

import pywikibot
import lxml.etree
import datetime
import re

def RecentChanges(site):
  start = site.server_time() - datetime.timedelta(minutes=1)
  end = start - datetime.timedelta(minutes=5)
  return site.recentchanges(namespaces=u'1|3|5|7',bot=False,start=start,end=end)

def OptedInOrOut(site,user,name):
  page = pywikibot.Page(site,name)
  transclusions = page.getReferences(namespaces='3',only_template_inclusion=True)
  for t in transclusions:
    if user == t.title(with_ns=False):
      return True
  return False

def OptedIn(site,user):
  return OptedInOrOut(site,user,'Template:YesAutosign')

def OptedOut(site,user):
  return OptedInOrOut(site,user,u'Template:NoAutosign')

def IsNonText(text):
  if re.match( '^[^\w\d]*{{.*}}[^\w\d]*$', text ) is not None: # Ignore template banner creation
    return True
  if re.search( '<!-- .* -->$', text ) is not None:
    return True
  if re.search( '<!-- Template:Unsigned.*<!--Autosigned by SineBot-->$', text ) is not None: # Already signed it
    return True
  return False

def CreateSearch(user):
  return '\[\[[uU]ser([ _][tT]alk)?:' + user.replace( '(', '\(' ).replace( ')', '\)' ) + '(#.*)?\|.*\]\].*\d+:\d+, \d+ \w+ \d+ \(UTC\)'

def CreateUnsigned(user,anon,timestamp):
  unsigned = 'unsigned'
  if anon:
    unsigned = 'unsignedIP'
  change = '<!-- Template:' + unsigned + '-->{{subst:' + unsigned + '|' + user + '|' + timestamp.strftime( '%H:%M, %d %B %Y (UTC)') + '}}<!--Autosigned by SineBot-->'
  return change

def ProcessTag(line,title,user,anon,timestamp,text):
  if not text is None:
    if not IsNonText( text ): 
      expr = CreateSearch( user )
      if re.search( expr, text, re.IGNORECASE ) is None:
        print( title + ' : line ' + str( line ) )
        change = CreateUnsigned(user,anon,timestamp)
        
        print( change )
        print('          Searching for : ' + expr)
        print('          in : ' + text)
        print('--------------------')

def GetFirstTextLine(site,title):
  page = pywikibot.Page(site,title)
  lines = page.text.split('\n')
  firstTextLine = 1
  for textLine in lines:
    if re.match( '^[^\w\d]*==.*==[^\w\d]*$', textLine):
      break
    firstTextLine += 1

  return firstTextLine

def ProcessDiff(site,title,user,anon,timestamp,diff):
  dom = lxml.etree.HTML(diff)
  tdLines = dom.xpath( '//tr/td[@class="diff-lineno"]' )
  if( len( tdLines ) > 0 ):
    tdLine = tdLines[-1]
    match = re.match( 'Line (\d+):', tdLine.text )
    if not match is None:
      line = int( match.group(1) )
      # If the comment is above the first section, ignore it
      if line > GetFirstTextLine(site,title):
        tags = dom.xpath('//tr/td[@class="diff-addedline"]/div')
        text = ''
        for tag in tags:
          for part in tag.itertext():
            text += part
        if text != '':
          ProcessTag(line,title,user,anon,timestamp,text)

def IsUserValid(site,username): 
  user = pywikibot.page.User(site,username)
  if user is not None:
    editCount = int( user.editCount() )   
    if editCount < 800:     
      if not OptedOut(site,username):
        return True
    else:      
      if OptedIn(site,username):
        return True
  return False

def ProcessChange(site, rc):  
  if not 'Undo' in rc['tags'] and not 'Rollback' in rc['tags']:	# Ignore reverts / anti-vandalism
    user = rc['user']
    anon = False
    if( 'anon' in rc ):
      anon = True
    if anon or IsUserValid(site,user):     
      title = rc['title']    
      old_revid = rc['old_revid']
      revid = rc['revid']
      if( old_revid > 0 ):
        timestamp = pywikibot.Timestamp.fromISOformat(rc['timestamp'])      
        diff = site.compare(old_revid,revid)
        ProcessDiff(site,title,user,anon,timestamp,diff)

def Main():
  site = pywikibot.Site()
  for rc in RecentChanges(site):
    ProcessChange(site, rc)

Main()