User:AnomieBOT/source/tasks/ReplaceExternalLinks.pm
Appearance
Due to breaking changes in AnomieBOT::API, this task will probably not run anymore. If you really must run it, try getting a version from before 2009-03-23. |
Approved 2008-11-11, completed 2008-11-12 Wikipedia:Bots/Requests for approval/AnomieBOT 9 |
package tasks::ReplaceExternalLinks;
=pod
=for warning
Due to breaking changes in AnomieBOT::API, this task will probably not run
anymore. If you really must run it, try getting a version from before
2009-03-23.
=begin metadata
Bot: AnomieBOT
Task: ReplaceExternalLinks
BRFA: Wikipedia:Bots/Requests for approval/AnomieBOT 9
Status: Completed 2008-11-12
Created: 2008-11-08
Replace links to the domains w*.allmusic.com with just "allmusic.com", as those other domains no longer function.
=end metadata
=cut
use utf8;
use strict;
use AnomieBOT::Task;
use vars qw/@ISA/;
@ISA=qw/AnomieBOT::Task/;
sub new {
my $class=shift;
my $self=$class->SUPER::new();
bless $self, $class;
return $self;
}
=pod
=for info
Approved 2008-11-11, completed 2008-11-12<br />[[Wikipedia:Bots/Requests for approval/AnomieBOT 9]]
=cut
sub approved {
return -1;
}
sub run {
my ($self, $api)=@_;
my $res;
$api->task('ReplaceExternalLinks');
$api->read_throttle(0);
$api->edit_throttle(10);
# Spend a max of 5 minutes on this task before restarting
my $endtime=time()+300;
# Replacements
my @euqueries=(
'wc)(.allmusic.com',
'wc01.allmusic.com',
'wc02.allmusic.com',
'wc03.allmusic.com',
'wc04.allmusic.com',
'wc05.allmusic.com',
'wc06.allmusic.com',
'wc07.allmusic.com',
'wc08.allmusic.com',
'wc09.allmusic.com',
'wc10.allmusic.com',
'wm01.allmusic.com',
'wm02.allmusic.com',
'wm03.allmusic.com',
'wm04.allmusic.com',
'wm05.allmusic.com',
'wm06.allmusic.com',
'wm07.allmusic.com',
'wm08.allmusic.com',
'wm09.allmusic.com',
'wm10.allmusic.com',
'wm11.allmusic.com',
);
my @repl=(
[ qr{\bhttp://(?:w[cm][0-9][0-9]|wc\)\()\.allmusic\.com(?=[][/<>"\x00-\x20\x7F]|$)}i, 'http://allmusic.com', 'updating broken allmusic.com links' ],
[ qr{\bhttp://www\.allmusic\.com(?=[][/<>"\x00-\x20\x7F]|$)}i, 'http://allmusic.com', 'changing www.allmusic.com to allmusic.com' ],
);
my $req=" per [[WP:BOTREQ#Allmusic links|request]]";
my %q=(
generator => 'exturlusage',
geulimit => 'max',
prop => 'info',
);
foreach my $q (@euqueries){
$q{'geuquery'}=$q;
delete $q{'geuoffset'};
# Get the list of pages to check
do {
$res=$api->query(%q);
if($res->{'code'} ne 'success'){
$self->warn("Failed to retrieve usage list for $q: ".$res->{'error'}."\n");
return 60;
}
if(exists($res->{'query-continue'})){
$q{'geuoffset'}=$res->{'query-continue'}{'exturlusage'}{'geuoffset'};
} else {
delete $q{'geuoffset'};
}
foreach (values %{$res->{'query'}{'pages'}}){
my $pageid=$_->{'pageid'};
my $revid=$_->{'lastrevid'};
my $checked=$api->fetch($pageid);
next if(defined($checked) && $$checked>=$revid);
my $title=$_->{'title'};
$self->warn("Checking external links in $title\n");
# Ok, check the page
my $tok=$api->edittoken($title, EditRedir => 1);
if($tok->{'code'} eq 'shutoff'){
$self->warn("Task disabled: ".$tok->{'content'}."\n");
return 300;
}
if($tok->{'code'} ne 'success'){
$self->warn("Failed to get edit token for $title: ".$tok->{'error'}."\n");
next;
}
next if exists($tok->{'missing'});
$revid=$tok->{'lastrevid'};
# Get page text
my $intxt=$tok->{'revisions'}[0]{'*'};
# Perform the replacements
my ($outtxt,$nowiki)=$self->strip_nowiki($intxt);
my @s=();
foreach (@repl){
my ($re, $repl, $s)=@$_;
my $old=$outtxt;
$outtxt=~s/$re/$repl/g;
push @s, $s if $outtxt ne $old;
}
$outtxt=$self->replace_nowiki($outtxt, $nowiki);
# Need to edit?
if($outtxt ne $intxt){
if(!@s){
$self->warn("No summary for $title even though changes were made, WTF?\n");
next;
}
$s[-1]='and '.$s[-1] if @s>1;
my $summary=ucfirst(join((@s>2)?', ':' ', @s)).$req;
$self->warn("$summary in $title\n");
my $r=$api->edit($tok, $outtxt, $summary, 1, 1);
if($r->{'code'} ne 'success'){
$self->warn("Write failed on $title: ".$r->{'error'}."\n");
next;
}
$revid=$r->{'edit'}{'newrevid'};
} else {
$self->warn("Nothing to do in $title\n");
}
# Save checked revision
$api->store($pageid, \$revid);
# If we've been at it long enough, let another task have a
# go.
return 0 if time()>=$endtime;
}
} while(exists($q{'geuoffset'}));
}
# No more pages to check, try again in 10 minutes or so in case of errors.
return 600;
}
1;