User:Aidan9382/dumps/UnorderedArchives/script
Appearance
""" How to use
Run query 70503, 70508, and 70509 and download results as tsv
Put said tsvs in the same directory as this python file
Run this python file and get the output from unordered_output.txt
Very lazily done and poorly implemented, but it works, so good enough
"""
import os
collection = {}
for f in os.listdir():
if f.endswith(".tsv") and f.find("quarry-7050") > -1: #Stupid but works
print("Parsing",f)
with open(f, encoding="utf-8") as file:
for line in file.readlines():
line = line.strip()
if line == "basepage_title": #Dont parse the header
continue
if line[0] == "\"": #Quote escaping
line = line[1:-1].replace("\"\"","\"")
split = line.split("/") #Split the archive from the title
basepage, archive = str.join("/",split[:-1]), split[-1]
archiveNumber = int(archive.split("_")[-1]) #Get the number of the archive
if basepage in collection: #Note said number down for later
collection[basepage].append(archiveNumber)
else:
collection[basepage] = [archiveNumber]
print("Part 1 done")
unordered = {}
for basepage, archives in collection.items():
for i in range(1,len(archives)+1):
if not i in archives: #If there's a gap anywhere, this'll fail
closestIsolated = -1
for i2 in range(i, i+500):
if i2 in archives:
closestIsolated = i2
break
unordered[basepage] = {"Archives":len(archives), "FMI":i, "IA":len(archives)-i+1, "FIA":closestIsolated}
break
print("Unordered pages:",len(unordered))
final = """;Legend
* ''FMI'' - First Missing Index
* ''IA'' - Isolated Archives (amount of archives beyond the FMI)
* ''FIA'' - First Isolated Archive
{{static row numbers}}
{| class="wikitable sortable static-row-numbers static-row-header-text"
|+ Archives
|-
! Page !! Archives !! FMI !! IA !! FIA
|-"""
for basepage, data in unordered.items(): #List for the dump page
Archives, FMI, IA, FIA = data["Archives"], data["FMI"], data["IA"], data["FIA"]
final = final + f"\n| {{{{User:Aidan9382//ade|1={basepage}}}}} || {Archives} || {FMI} || {IA} || {FIA}\n|-"
final = final + "\n|}"
print("Formed output")
open("unordered_output.txt", "w", encoding="utf-8").write(final)
print("Finished")
input("Press enter to close...")