Jump to content

User:PotatoBot/Code/4

From Wikipedia, the free encyclopedia
#!/usr/bin/python
# -*- coding: utf-8  -*-

import wikipedia as w
import codecs, catlib, re, pagegenerators, time, mysave
from datetime import date

stubtypes = {
'': ('Pharma', 'Pharm', 'Pharmacy', 'Pharmacology', 'Drug'),
'A': ('Gastrointestinal-drug', 'Gastrointestinal-system-drug'),
'B': ('Blood-drug', ),
'C': ('Cardiovascular-drug', 'Cardiovascular-system-drug'),
	'C02': ('Antihypertensive', ),
'D': ('Dermatologic-drug', ),
'G': ('Genito-urinary-drug', 'Genito-urinary-system-drug'),
'H': ('Systemic-hormonal-drug', 'Systemic-hormonal-preparation'),
'J': ('Antiinfective-drug', 'Antiinfective-agent', 'Anti-infective-agent', 'Antimicrobial'),
	'J01': ('Antibiotic', ), 'J07': ('Vaccine', ),
'L': ('Antineoplastic-drug', ),
'M': ('Musculoskeletal-drug', 'Musculoskeletal-system-drug'),
'N': ('Nervous-system-drug', ),
	'N02': ('Analgesic', ), 'N03': ('Anticonvulsant', ), 'N05C': ('Sedative', ),
	'Nps': ('Psychoactive', 'Anxiolytic'), 'Npsc': ('Cannabinoid', ), 'Npsh': ('Hallucinogen', ),
'P': ('Antiinfective-drug aux entry', ),
'R': ('Respiratory-system-drug', ),
'S': ('Sensory-organ-drug', ),
'mab': ('Monoclonal-antibody', ),
'str': ('Signaltransduction', ),
}

navboxes = {
'Stomatological preparations': 'A01', 'Antacids': 'A02A', 'Drugs for peptic ulcer and GORD': 'A02B',
'Drugs for functional gastrointestinal disorders': 'A03', 'Antiemetics': 'A04', 'Bile and liver therapy': 'A05',
'Laxatives': 'A06', 'Antidiarrheals, intestinal anti-inflammatory/anti-infective agents': 'A07',
'Antiobesity preparations': 'A08', 'Oral hypoglycemics and insulin analogs': 'A10', 'Vitamin': 'A11',
'Mineral supplements': 'A12', 'Anabolic steroids': 'A14', 'Other alimentary tract and metabolism products': 'A16',
'Antithrombotics': 'B01', 'Antihemorrhagics': 'B02', 'B03, B05, B06': 'B',
'Cardiac glycosides': 'C01A', 'Antiarrhythmic agents': 'C01B', 'Cardiac stimulants excluding cardiac glycosides': 'C01C',
'Vasodilators used in cardiac diseases': 'C01D', 'Sympatholytic antihypertensives': 'C02',
'Nonsympatholytic vasodilatory antihypertensives': 'C02', 'Diuretics': 'C03', 'Peripheral vasodilators': 'C04',
'Vasoprotectives': 'C05', 'Calcium channel blockers': 'C08', 'Agents acting on the renin-angiotensin system': 'C09',
'Lipid modifying agents': 'C10',
'Emollients  and protectives': 'D02', 'Preparations for treatment of wounds and ulcers': 'D03', 'Antipruritics': 'D04',
'Antipsoriatics': 'D05', 'Antibiotics and chemotherapeutics for dermatological use': 'D06', 'Antiseptics  and disinfectants': 'D08',
'Medicated dressings': 'D09', 'Acne Agents': 'D10', 'Other dermatological preparations': 'D11',
'Gynecological anti-infectives and antiseptics': 'G01', 'Uterotonic': 'G02A', 'Birth control methods': 'G02B', 'Labor repressants': 'G02CA',
'Androgens': 'G03B', 'Gonadotropins and GnRH': 'G03G', 'Urologicals, including antispasmodics': 'G04B',
'Drugs used in benign prostatic hypertrophy': 'G04C',
'Pituitary and hypothalamic hormones and analogues': 'H01', 'Corticosteroids for systemic use': 'H02', 'Thyroid therapy': 'H03',
'Calcium homeostasis': 'H05',
'Protein synthesis inhibitor antibiotics': 'J01', 'Cell wall disruptive antibiotics': 'J01', 'Nucleic acid inhibitors': 'J01',
'Other antibacterials': 'J01X', 'Antimycobacterials': 'J04', 'DNA antivirals': 'J05', 'RNA antivirals': 'J05', 'Antiretroviral drug': 'J05', 
'Vaccines': 'J07',
'Intracellular chemotherapeutic agents': 'L01', 'Immunostimulants': 'L03', 'Immunosuppressants': 'L04',
'Anti-inflammatory products': 'M01A', 'Antirheumatic products': 'M01C', 'Topical products for joint and muscular pain': 'M02',
'Muscle relaxants': 'M03', 'Antigout preparations': 'M04', 'Drugs for treatment of bone diseases': 'M05',
'General anesthetics': 'N01A', 'Local anesthetics': 'N01B', 'Analgesics': 'N02', 'Antimigraine preparations': 'N02C',
'Anticonvulsants': 'N03', 'Antiparkinson': 'N04', 'Antipsychotics': 'N05A', 'Anxiolytics': 'N05B', 'Hypnotics and sedatives': 'N05C',
'Antidepressants': 'N06A', 'Antihyperkinetics': 'N06B', 'Racetams': 'N06B', 'Anti-dementia drugs': 'N06D', 'Antiaddictives': 'N07B',
'Antivertigo preparations': 'N07C', 'Other nervous system drugs': 'N07X',
'Cannabinoids': 'Npsc', 'Hallucinogens': 'Npsh',
'Chromalveolate antiparasitics': 'P01', 'Excavata antiparasitics': 'P01', 'Agents against amoebozoa': 'P01', 'Anthelmintics': 'P02',
'Anti-arthropod medications': 'P03A',
'Nasal preparations': 'R01', 'Throat preparations': 'R02', 'Drugs for obstructive airway diseases': 'R03',
'Cough and cold preparations': 'R05', 'Antihistamines': 'R06', 'Other respiratory system products': 'R07',
'Ophthalmological anti-infectives': 'S01A', 'Antiglaucoma preparations and miotics': 'S01E', 'Mydriatics and cycloplegics': 'S01F',
'Ocular vascular disorder agents': 'S01L', 'Otologicals': 'S02',
'Antidotes': 'V03AB', 'Drugs for treatment of hyperkalemia and hyperphosphatemia': 'V03AE',
'Detoxifying agents for antineoplastic treatment': 'V03AF', 'Other therapeutic products': 'V03A', 'Diagnostic agents': 'V04',
'Contrast media': 'V08', 'Radiopharmaceuticals': 'V',
}

def main():
	listout = 'Log for sorting of {{tl|pharma-stub}} and its sub-categories ([[WP:Bots/Requests for approval/PotatoBot 4|Task 4]]). Date: %s.\n'\
		% mysave.fmtdate(date.today())
	sensorystubs, onlysensorystubs = 0, 0
	
	# Redirs to navboxes
	for navbox in navboxes.copy():
		navboxpage = w.Page(w.getSite(), 'Template:' + navbox)
		for redir in navboxpage.getReferences(redirectsOnly = True):
			w.output('> T:%s redirects to T:%s, ATC code %s' % (redir.titleWithoutNamespace(), \
				navboxpage.titleWithoutNamespace(), navboxes[navbox]))
			navboxes[redir.titleWithoutNamespace()] = navboxes[navbox]
	
	# Fetch ATC code categories
	w.output('')
	categories = {}
	for catpage in pagegenerators.ReferringPageGenerator(w.Page(w.getSite(), 'Template:ATC category'), onlyTemplateInclusion = True):
		if catpage.namespace() == 14:
			w.output('> ' + catpage.title())
			templates = catpage.templatesWithParams()
			for tuple in templates:
				if tuple[0] == 'ATC category' and len(tuple[1]):
					categories[catpage.titleWithoutNamespace()] = tuple[1][0]
					w.output('  ATC code ' + tuple[1][0])
					break
	
	# Iterate through pharma stubs
	for page in pagegenerators.CategorizedPageGenerator(catlib.Category(w.getSite(), 'Category:Pharmacology stubs'), recurse = True):
		if page.namespace() == 0:
			w.output('\n> ' + page.title())
			text = page.get()
			langlinks = w.getLanguageLinks(text)
			newtext = w.removeLanguageLinks(text)
			
			# Find stub templates
			oldATCs = []
			for code, list in stubtypes.items():
				for title in list:
					templ = re.search(r'\{\{\s*[%s%s]%s-stub\s*\}\}' % (title[0], title[0].lower(), title[1:]), newtext)
					if templ:
						oldATCs.append(code)
						newtext = newtext[:templ.start()] + \
							newtext[templ.end() + (newtext[templ.end():templ.end() + 1] == '\n'):]
			templ, otherstubs = True, ''
			while templ:
				templ = re.search(r'\{\{\s*(\w|-)*-stub\s*\}\}', newtext)
				if templ:
					otherstubs += newtext[templ.start():templ.end()] + '\n'
					newtext = newtext[:templ.start()] + \
						newtext[templ.end() + (newtext[templ.end():templ.end() + 1] == '\n'):]
			w.output('  ATC stub(s) found: %s' % oldATCs)
			
			# Search for new stub types via ATC codes and navboxes
			newATCs = mysave.findATCs(page, includeVet = False)[0]
			for navbox in navboxes:
				if navbox in page.templates() or navbox.replace(' ', '_') in page.templates():
					newATCs.append(navboxes[navbox])
			for category in categories:
				if category in [catpage.titleWithoutNamespace() for catpage in page.categories()]:
					newATCs.append(categories[category])
			for n in range(len(newATCs)):
				while newATCs[n] not in stubtypes:
					newATCs[n] = newATCs[n][:-1]
			
			# Get rid of redundant stub types
			newATCset = set(newATCs)
			if 'P' in newATCset:
				newATCset.remove('P')
				newATCset.add('J')
			ATCset = newATCset | set(oldATCs)
			if 'S' in ATCset:
				ATCset.remove('S')
				ATCset.add('')
				sensorystubs += 1
				if ATCset == set(['']):
					onlysensorystubs += 1
			if '' in ATCset and len(ATCset) > 1:
				ATCset.remove('')
			for ATC in ATCset.copy():
				for code in stubtypes.keys():
					if code[0:1] == ATC[0:1] and not code == ATC and code in ATCset:
						ATCset.add(ATC[0:1])
			for ATC in ATCset.copy():
				if len(ATC) > 1 and ATC[0] in ATCset:
					ATCset.remove(ATC)
			
			# Save page, log problems
			w.output('  updated ATC stub(s): %s' % ATCset)
			if otherstubs:
				w.output('\03{yellow}  Non-pharma stubs found: logging\03{default}')
				listout += '# %s: non-pharmacology stub(s) %s found\n' % (page.aslink(), \
					otherstubs.replace('{{', '{{tl|').replace('\n', ', '))
			strangestubs = ATCset - newATCset - set(('', 'S', 'Nps', 'Npsc', 'Nsph', 'mab', 'str'))
			if strangestubs:
				w.output('\03{yellow}  Unexplained pharma stubs found: logging; %s\03{default}' % strangestubs)
				listout += '# %s: unexplained pharmacology stub(s) %s found\n' % (page.aslink(), \
					', '.join(['{{tl|' + stubtypes[stub][0].lower() + '}}' for stub in strangestubs]))
			if ATCset in (set(oldATCs), set(oldATCs) - set(('', 'S'))):
				w.output('  No changes required')
			elif len(ATCset) > 3:
				w.output('\03{yellow}  More than three stub types apply: logging\03{default}')
				listout += '# %s: too many stub types apply (%s)\n' % (page.aslink(), \
					', '.join(['{{tl|' + stubtypes[ATC][0].lower() + '-stub}}' for ATC in ATCset]))
			elif set(('Link GA', 'Link_GA', 'Link FA', 'Link_FA')) & set(page.templates()):
				w.output('\03{yellow}  GA or FA in other language: logging\03{default}')
				listout += '# %s: GA or FA in other language currently can\'t be handled\n' % page.aslink()
			else:
				newtext = newtext.rstrip()
				if 'DEFAULTSORT' in newtext.splitlines()[-1]:
					w.output('\03{yellow}  DEFAULTSORT below catlinks: logging\03{default}')
					listout += u'# %s: {{tl|DEFAULTSORT}} below category links – please check\n' % page.aslink()
				mysave.savepage(page, w.replaceLanguageLinks(newtext + '\n\n\n' + \
					''.join(['{{' + stubtypes[ATC][0].lower() + '-stub}}\n' for ATC in ATCset]) + otherstubs, langlinks), \
					'Stub sorting and placement of stub template(s): ' + \
					', '.join([stubtypes[ATC][0].lower() + '-' for ATC in ATCset]) + 'stub', minor = True)

	mysave.savepage(w.Page(w.getSite(), 'User:PotatoBot/Lists/Pharma stubs log'), listout + \
		"%d potential {{tl|sensory-organ-drug-stub}}s found, of which %d would ''only'' be sorted as such.\n" % \
		(sensorystubs, onlysensorystubs), 'Creating log for pharma stub sorting')

if __name__ == "__main__":
	try:
		main()
	finally:
		w.stopme()

mysave.py

[edit]
#!/usr/bin/python
# -*- coding: utf-8  -*-

import pywikibot as w
import re

# Code for saving redirects and other pages

def savepage(page, text, BRFANo, summary = '', minor = False):
	"""Save text to a page and log exceptions."""
	if summary != '':
		w.setAction(summary + '. See [[Wikipedia:Bots/Requests for approval/PotatoBot ' + BRFANo + '|approval]]. Report errors and suggestions at [[User talk:PotatoBot]].')
	try:
		if not '#' in page.title():
			page.put(text, minorEdit = minor)
			w.output('  \03{green}saving %s -> \03{gray}%s\03{default}' % (page.title(), text))
			return ''
		else:
			w.output('  \03{red}cannot save %s because it is a section\03{default}' % page.title())
			return '# %s: this is a secion title' % page.title(aslink=True)
	except w.LockedPage:
		w.output('  \03{red}cannot save %s because it is locked\03{default}' % page.title())
		return '# %s: page was locked\n' % page.title(aslink=True)
	except w.EditConflict:
		w.output('  \03{red}cannot save %s because of edit conflict\03{default}' % page.title())
		return '# %s: edit conflict occurred\n' % page.title(aslink=True)
	except w.SpamfilterError, error:
		w.output('  \03{red}cannot save %s because of spam blacklist entry %s\03{default}' % (page.title(), error.url))
		return '# %s: spam blacklist entry\n' % page.title(aslink=True)
	except:
		w.output('  \03{red}unknown error on saving %s\03{default}' % page.title())
		return '# %s: unknown error occurred\n' % page.title(aslink=True)

def resolveredir(page):
	"""Return target if input is a redirect, else return input."""
	try:
		if page.isRedirectPage():
			try:
				w.output('  \03{gray}resolving redir %s to %s\03{default}'\
					% (page.title(), page.getRedirectTarget().title()))
				return page.getRedirectTarget()
			except:
				w.output('  \03{yellow}target %s is a broken redir\03{default}' % page.title())
				return w.Page(w.getSite(), page.title() + ' (broken redirect)')
		else:
			return page
	except:
		w.output('  \03{yellow}target %s is a bad link\03{default}' % page.title())
		return w.Page(w.getSite(), page.title() + ' (bad link)') # workaround for wikipedia.py breaking wikiasite: links

def makeredir(redirpage, page, BRFANo, templates = ''):
	"""Create a redirect and log existing page that isn't a redirect to the desired article."""
	page = resolveredir(page)
	if redirpage.exists():
		comment = ''
		if redirpage.isDisambig():
			comment = ' (disambiguation)'
			dab = redirpage
		if redirpage.isRedirectPage():
			try:
				if redirpage.getRedirectTarget().title() == page.title() or \
						redirpage.getRedirectTarget().sectionFreeTitle() == page.title():
					# Already a redir to the desired article
					return ''
				elif redirpage.getRedirectTarget().isDisambig():
					comment = ' (redirect to disambiguation)'
					dab = redirpage.getRedirectTarget()
				else:
					comment = ' (redirect)'
			except:
				comment = ' (broken redir)'
		if 'disambiguation' in comment and page in [resolveredir(p) for p in dab.linkedPages()]:
			w.output('  link to %s already on dab page %s' % (page.title(), redirpage.title()))
			return ''
		elif redirpage.title() != page.title():
			w.output('  \03{yellow}redir to %s failed, page %s already exists\03{default}' % (page.title(), redirpage.title()))
			return '# %s: redirecting to %s failed, page already exists%s\n' % (redirpage.title(aslink=True), page.title(aslink=True), comment)
		else:
			return ''
	# Else create redirect, or write page name to list if an error occurs
	else:
		return savepage(redirpage, '#REDIRECT %s %s' % (page.title(aslink=True), templates), BRFANo, 'Redirect to ' + page.title(aslink=True))

def findATCs(page, includeVet = True):
	"""Look for ATC codes in infoboxes."""
	ATCvet, prefix, suffix, supp = False, '', '', ''
	ATCvetpos, prefixpos, suffixpos, supppos = -1, -1, -1, -1
	templatenames = ('Drugbox', 'Chembox Identifiers')
	templates = page.templatesWithParams()
	for tuple in templates:
		if tuple[0] in templatenames:
			idx = templatenames.index(tuple[0])
			templatepos = templates.index(tuple)
			for param in tuple[1]:
				value = param.partition('=')
				if value[0].strip() == 'ATCvet':
					ATCvet = value[2].strip() == 'yes' and includeVet
					ATCvetpos = tuple[1].index(param)
				elif value[0].strip() == ('ATC_prefix', 'ATCCode_prefix')[idx] and value[2].strip().lower != 'none':
					prefix = value[2].strip()
					prefixpos = tuple[1].index(param)
				elif value[0].strip() == ('ATC_suffix', 'ATCCode_suffix')[idx]:
					suffix = value[2].strip()
					suffixpos = tuple[1].index(param)
				elif value[0].strip() == ('ATC_supplemental', 'ATC_Supplemental')[idx]:
					supp = value[2].strip()
					supppos = tuple[1].index(param)
	codes = (prefix != '') * [(ATCvet*'Q' + prefix + suffix)]
	for tupleSupp in page.templatesWithParams(supp):
		if tupleSupp[0] in ['ATC', 'ATCvet']:
			codes.append((tupleSupp[0] == 'ATCvet')*'Q' + tupleSupp[1][0] + tupleSupp[1][1])
	return (codes, ATCvetpos, prefixpos, suffixpos, supppos)

def addTemplateParam(page, newtemplates, BRFANo, summary = 'Updating template', minor = False):
	text = page.get()
	oldtemplates = page.templatesWithParams()
	pointer = 0
	for i in range(len(oldtemplates)):
		search1 = re.compile(r'\{\{\s*(%s|%s)%s\s*\|' % (oldtemplates[i][0][0].upper(), oldtemplates[i][0][0].lower(),\
			oldtemplates[i][0].replace(' ', '( |_)'))).search(text, pointer)
		if search1:
			pointer = end() - 1
		if newtemplates[i] != oldtemplates[i]:
			if newtemplates[i][0].strip() == oldtemplates[i][0].strip():
				for j in range(len(oldtemplates[i][1])):
					oldparam = oldtemplates[i][1][j].partition('=')
					newparam = newtemplates[i][1][j].partition('=')
					# Todo: unnamed params #
					if newparam[0] == oldparam[0]:
						span = re.compile(r'\|\s*%s\s*=\s*([^|}\s]*)\s*(}|\|)' % oldparam[0]).\
							search(text, pointer).span(1)
						pointer = span(1)
						if newparam[2].strip() != oldparam[2].strip():
							text = text[:span(0)] + newparam[2] + text[span(1):]
					else:
						text = text[:] + newtemplates[i][1][j] + text[:]
						pointer = len(text[:] + newtemplates[i][1][j])
			else:
				w.output('\03{yellow}template list does not match page %s: %s vs. %s\03{default}' % \
					(page.title(), newtemplates[i][0].strip(), oldtemplates[i][0].strip()))
				return '# %s: template list did not match templates on page' % page.title(aslink=True)
	if text != page.get():
		return savepage(page, text, BRFANo, summary, minor)
	else:
		return ''

def fmtdate(date):
	"""Format date in English w style."""
	return '%d %s %d' % (date.day, ('', 'January', 'February', 'March', 'April', 'May', 'June', 'July', 'August',\
		'September', 'October', 'November', 'December')[date.month], date.year)