Jump to content

User:ZackBot/airport cleanup

From Wikipedia, the free encyclopedia
#!/usr/bin/env ruby
# encoding: utf-8

require 'mediawiki_api'
require 'HTTParty'
require 'open-uri'
require './helper'
require 'fileutils'

INFOBOX_REGEX   = /(?=\{\{[Ii]nfobox\s[Aa]irport)(\{\{(?>[^{}]++|\g<1>)*}})/
PUSHPIN_REGEX   = /\spushpin_[a-z_]*\s*/
MAP_REGEX       = /\<center\>(?=\{\{[Ll]ocation\s[Mm]ap)(\{\{(?>[^{}]++|\g<1>)*}})(?:\<small\>)(.*)(?:\<\/small>)\<\/center\>/
MAP_NAME_REGEX  = /\{\{[Ll]ocation\smap\|(?<name>[A-Za-z\-\s:',.]*)\|/
POSITION_REGEX  = /\|\s*position\s*=\s*(?<position>[A-Za-z]*)/
LABEL_REGEX     = /\|\s*label\s*=\s*(?<label>[A-Za-z0-9]*)/

LOCATION_MAP_REGEX = /(:?\|\s*position\s*=\s*(?<position>left|right|center|none)?)/

def exactly_one_time(text, param, regex)
  count = text.scan(regex).size
  if count > 1
    puts "- ERROR: '#{param}' appears more than one time on the page."
    return false
  elsif count == 0
    puts "- ERROR: '#{param}' does not appear on the page"
    return false
  end
  true
end

QUERY_URL = "https://petscan.wmflabs.org/?psid=600659&format=json"

Helper.read_env_vars

client = MediawikiApi::Client.new 'https://wiki.riteme.site/w/api.php'
client.log_in ENV['USERNAME'], ENV['PASSWORD']

json = JSON.load(open(QUERY_URL))
titles = json["*"].first["a"]["*"].map{ | page| page["title"].gsub("_"," ")}
puts titles.size

# For testing
# pages = File.open('test.txt').read
# pages.each_line do |title|
titles.each do |title|
  title.strip!
  puts title
  full_text = client.get_wikitext(title).body

  next unless (exactly_one_time(full_text, "Infobox Airport", INFOBOX_REGEX))

  # Get text of just the infobox
  infobox_text = full_text.match(INFOBOX_REGEX)[0]

  # Make sure pushpin_ params not already in the infobox. I am not supporting those cases.
  if infobox_text.match(PUSHPIN_REGEX)
    puts "- ERROR: 'pushpin' param appears in the infobox already."
    next
  end

  # Both {{coords}} and {{location map}} MUST be in the infobox for this to work
  next unless (exactly_one_time(infobox_text, "Coords", /\{\{\s*[Cc]oor/))
  next unless (exactly_one_time(infobox_text, "Location Map", MAP_REGEX))

  # Get the deprecated text containing the {{location map}} and possible caption
  location_text = infobox_text.match(MAP_REGEX)

  # Get just the {{locaiton map}} part
  location_map_text = location_text[0]

  # Pull out individual parts
  map_name = location_map_text.match(MAP_NAME_REGEX)
  map_position = location_map_text.match(POSITION_REGEX)
  pin_label = location_map_text.match(LABEL_REGEX)

  # Build the next text
  new_text = %Q(| pushpin_map            = #{map_name[:name] if map_name.names.include?("name")}
| pushpin_map_caption    = #{location_text[2]}
| pushpin_label          = #{pin_label[:label] if pin_label.names.include?("label")}
| pushpin_label_position = #{map_position[:position] if map_position.names.include?("position")})

  # Insert the next text into the infobox
  infobox_text.gsub!(MAP_REGEX, new_text)

  # Insert the next infobox into the page
  full_text.gsub!(INFOBOX_REGEX, infobox_text)

  client.edit(title: title, text: full_text, summary: 'Fixing infobox not to use [[:Category:Pages using infobox airport with deprecated syntax|deprecated map syntax]]')
  puts "- SUCCESS"
end

puts "DONE"