Jump to content

User:Aidan9382/dumps/UnorderedArchives/script

From Wikipedia, the free encyclopedia
""" How to use
Run query 70503, 70508, and 70509 and download results as tsv
Put said tsvs in the same directory as this python file
Run this python file and get the output from unordered_output.txt
Very lazily done and poorly implemented, but it works, so good enough
"""
import os

collection = {}
for f in os.listdir():
    if f.endswith(".tsv") and f.find("quarry-7050") > -1: #Stupid but works
        print("Parsing",f)

        with open(f, encoding="utf-8") as file:
            for line in file.readlines():
                line = line.strip()
                if line == "basepage_title": #Dont parse the header
                    continue

                if line[0] == "\"": #Quote escaping
                    line = line[1:-1].replace("\"\"","\"")

                split = line.split("/") #Split the archive from the title
                basepage, archive = str.join("/",split[:-1]), split[-1]
                archiveNumber = int(archive.split("_")[-1]) #Get the number of the archive

                if basepage in collection: #Note said number down for later
                    collection[basepage].append(archiveNumber)
                else:
                    collection[basepage] = [archiveNumber]
print("Part 1 done")

unordered = {}
for basepage, archives in collection.items():
    for i in range(1,len(archives)+1):
        if not i in archives: #If there's a gap anywhere, this'll fail
            closestIsolated = -1
            for i2 in range(i, i+500):
                if i2 in archives:
                    closestIsolated = i2
                    break
            unordered[basepage] = {"Archives":len(archives), "FMI":i, "IA":len(archives)-i+1, "FIA":closestIsolated}
            break
print("Unordered pages:",len(unordered))

final = """;Legend
* ''FMI'' - First Missing Index
* ''IA'' - Isolated Archives (amount of archives beyond the FMI)
* ''FIA'' - First Isolated Archive

{{static row numbers}}
{| class="wikitable sortable static-row-numbers static-row-header-text"
|+ Archives
|-
! Page !! Archives !! FMI !! IA !! FIA
|-"""
for basepage, data in unordered.items(): #List for the dump page
    Archives, FMI, IA, FIA = data["Archives"], data["FMI"], data["IA"], data["FIA"]
    final = final + f"\n| {{{{User:Aidan9382//ade|1={basepage}}}}} || {Archives} || {FMI} || {IA} || {FIA}\n|-"
final = final + "\n|}"
print("Formed output")

open("unordered_output.txt", "w", encoding="utf-8").write(final)
print("Finished")

input("Press enter to close...")