Jump to content

Wikipedia:WikiProject Missing encyclopedic articles/Mw links

From Wikipedia, the free encyclopedia
#!/usr/bin/env perl
# Author: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
# Copyright: 2006, Ævar Arnfjörð Bjarmason
# License: The DWTFYWWI LICENSE, see http://tools.wikimedia.de/~avar/COPYING

use strict;
use warnings;

use LWP::UserAgent;
use HTML::TreeBuilder;

my $arg = shift;

&help unless defined $arg;

my $ua = LWP::UserAgent->new(
	agent => ''
);

my $res = $ua->get($arg);

my %links = getlinks($res->content);

print "==Nonexisting==\n";
for my $i (@{$links{red}}) {
	print "# [[$i]]\n"
}

print "==Existing==\n";
for my $i (@{$links{blue}}) {
	print "# [[$i]]\n"
}

sub getlinks
{
	my $tree = HTML::TreeBuilder->new_from_content(shift);
	
	my %links = (
		blue => [],
		red => []
	);

	my $yummie = $tree->look_down(
		'_tag' => 'div',
		id => 'bodyContent'
	);

	my @blue = $yummie->look_down(
		'_tag' => 'a',
		sub {
			no warnings;
			$_[0]->attr('class') ne 'new'
		},
		sub {
			no warnings;
			$_[0]->attr('class') !~ / (?: external | extiw ) /x
		}
	);

	my @red = $yummie->look_down(
		'_tag' => 'a',
		class => 'new'
	);

	@{$links{blue}} = booya(\@blue);
	@{$links{red}} = booya(\@red);
	
	%links;
}

sub booya
{
	my $links = shift;
	my @ret;

	for my $i (@$links) {
		my $j = $i->attr('title');
		next unless defined $j;
		chomp $j;
		push @ret, $j unless $j eq ''
	}
	
	@ret
}

sub help
{
print <<HELP;
usage: $0 url
HELP
exit 1
}