User:XLinkBot/Code/LinkWatcher.pl
Appearance
< User:XLinkBot | Code
#!/usr/bin/perl use strict; use POE qw(Component::IRC Component::IRC::Plugin::BotAddressed Component::Server::TCP); use Socket; use DBI; use Data::Dumper; use perlwikipedia; use URI::Escape qw(uri_escape_utf8); use Date::Parse qw(str2time); use XML::Simple; my %settings; my @editQueue; my @months = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec); my $username = "LinkWatcher"; my $editor=Perlwikipedia->new($username,$username); my $editor2=Perlwikipedia->new($username,$username); my $reconnectcounter = 0; $settings{'nodb'} = 0; my $counter=0; my $editcounter=0; my $watchedspacecounter=0; my $totallinks=0; my $whitelinks=0; my $linkadditions=0; my $blacklinks=0; my $redlinks=0; my $responsetime=time(); my $whiteuser=0; my $IPusers=0; my $ratio1=0; my $ratio2=0; my $ratio3=0; my $ratio4=0; my $ratio5=0; my $ratio6=0; my $ratio7=0; my $ratio8=0; my $ratio9=0; my $ratio10=0; my $starttime = time(); my $lastparsetime = time(); $settings{'debug'} = 0; print "Reading config file..."; open (CONFIG,"<linkwatcher-config") or die "Can't open LinkWatcher config: $!"; foreach (<CONFIG>) { unless (/^#/) { if(/(.+?)=(.+)/) { $settings{$1}=$2; } } } close (CONFIG); print "done\n"; print "Initializing IRC subsystem..."; my @rcchannels = split( /\s,?/, $settings{'rcchannels'} ); print "RC channels are: " . join(' ',@rcchannels) . "\n"; print "done\n"; my $ircsettings= { $settings{'rcserver'} => { port => $settings{'ircport'}, channels=> @rcchannels }, $settings{'ircserver'} => { port => $settings{'ircport'}, channels=> [ $settings{'ircreportchannel'}, $settings{'ircbotchannel'} ], }, }; foreach my $server ( keys %{ $ircsettings } ) { POE::Component::IRC->spawn( alias => $server, nick => $settings{'ircnick'}, ircname => $settings{'ircname'}, username=> $settings{'ircusername'}, ); } foreach my $parserNumber ( 1 .. $settings{'numberofparsers'} ) { print "Starting slave $parserNumber..."; system("perl","LinkParser.pl",$settings{'serverport'}); print "done\n"; } print "Initializing LinkWatcher2 master server on port $settings{'serverport'}..."; POE::Component::Server::TCP->new( Port => $settings{'serverport'}, ClientInput => \&slave_input, ClientDisconnected => \&slave_disconnect, ); print "done\n"; POE::Session->create( package_states => [ 'main' => [ qw(_start irc_registered irc_001 irc_public irc_bot_addressed irc_disconnected) ], ], heap => { config => $ircsettings }, ); $settings{'altmysqldb'} = "alt$settings{'mysqldb'}"; print "Connecting to MySQL..."; my $alt=0; my $mysql=DBI->connect("dbi:mysql:$settings{'mysqldb'};$settings{'mysqlhost'}",$settings{'mysqluser'},$settings{'mysqlpassword'}) or die "Can't connect to MySQL: $DBI::errstr"; $mysql->{mysql_auto_reconnect} = 1; print "done\n"; my $rulespage=$editor->get_text($settings{'blacklist'}); my @tempwikiblacklist = split(/\n/,$rulespage); my @wikiblacklist = (); my $garbage; foreach my $item (@tempwikiblacklist) { $item = lc($item); $item .= "#"; ($item,$garbage) = split(/#/,$item); $item .= " "; if ($item =~ m/(resolve \d+\.\d+\.\d+\.\d+)/) { $item = $1; } else { ($item,$garbage) = split(/\s/,$item); } if (length($item) > 0) { push(@wikiblacklist, $item) } } my $query_handle; $query_handle=$mysql->prepare("SELECT rule FROM $settings{'mysqltableprefix'}redlist"); $query_handle->execute; my @redlist = (); while (my $rule=$query_handle->fetchrow_array) { push(@redlist,$rule); } print ("redlist: " . join(" - ", @redlist) . "\n") if $settings{'debug'}; $query_handle=$mysql->prepare("SELECT rule FROM $settings{'mysqltableprefix'}whitelist"); $query_handle->execute; my @whitelist = (); while (my $rule=$query_handle->fetchrow_array) { push(@whitelist,$rule); } print ("whitelist: " . join(" - ", @whitelist) . "\n") if $settings{'debug'}; $query_handle=$mysql->prepare("SELECT rule FROM $settings{'mysqltableprefix'}blacklist"); $query_handle->execute; my @blacklist = (); while (my $rule=$query_handle->fetchrow_array) { push(@blacklist,$rule); } print ("blacklist: " . join(" - ", @blacklist) . "\n") if $settings{'debug'}; print "Starting LinkWatcher2...\n"; POE::Kernel->run(); exit 0; sub _start { my ($kernel,$session) = @_[KERNEL,SESSION]; # Send a POCOIRC_REGISTER signal to all poco-ircs $kernel->signal( $kernel, 'POCOIRC_REGISTER', $session->ID(), 'all' ); $kernel->signal( $kernel, 'POCOIRC_CONNECT', $session->ID(), 'all' ); undef; } sub irc_registered { my ($kernel,$heap,$sender,$irc_object) = @_[KERNEL,HEAP,SENDER,ARG0]; my $alias = $irc_object->session_alias(); $irc_object->plugin_add( 'BotAddressed', POE::Component::IRC::Plugin::BotAddressed->new( eat=> 1 ) ); my %conn_hash = ( server => $alias, port => $heap->{config}->{ $alias }->{port}, ); # In any irc_* events SENDER will be the PoCo-IRC session $kernel->post( $sender, 'connect', \%conn_hash ); undef; } sub irc_001 { my ($kernel,$heap,$sender) = @_[KERNEL,HEAP,SENDER]; # Get the component's object at any time by accessing the heap of # the SENDER my $poco_object = $sender->get_heap(); print "Connected to ", $poco_object->server_name(), "\n" if $settings{'debug'}; if ($poco_object->server_name() eq $settings{'rcserver'}) { my $alias = $poco_object->session_alias(); $kernel->post( $sender => join => $_ ) for @rcchannels; } elsif ($poco_object->server_name() eq $settings{'ircserver'}) { $kernel->post( $sender => privmsg => 'NickServ',"identify $settings{'ircpassword'}" ); sleep 4; $kernel->post( $sender => join => $settings{'ircreportchannel'} ) if $settings{'ircreportchannel'}; $kernel->post( $sender => join => $settings{'ircbotchannel'} ) if $settings{'ircbotchannel'}; } undef; } sub irc_public { my ($kernel,$sender,$who,$where,$message) = @_[KERNEL,SENDER,ARG0,ARG1,ARG2]; my $nick = ( split /!/, $who )[0]; my $cloak = ( split /@/, $who )[1]; $message=~ s/\cC\d{1,2}(?:,\d{1,2})?|[\cC\cB\cI\cU\cR\cO]//g; my $page; my $lang; my $diffurl; my $user; my $size; my $space; my $domain1; my $domain; my $domainpage; my $garbage; $counter++; $reconnectcounter++; if ($reconnectcounter > 500) { $reconnectcounter = 0; $kernel->post( $settings{'rcserver'} => join => $_ ) for @rcchannels; $kernel->post( $settings{'ircserver'} => join => $settings{'ircreportchannel'} ) if $settings{'ircreportchannel'}; $kernel->post( $settings{'ircserver'} => join => $settings{'ircbotchannel'} ) if $settings{'ircbotchannel'}; } if ($counter > $settings{'refreshevery'}) { if ($settings{'source'} eq "Wiki") { my $rulespage=$editor->get_text($settings{'blacklist'}); my @tempwikiblacklist = split(/\n/,$rulespage); my @newwikiblacklist = (); foreach my $item (@tempwikiblacklist) { $item .= "#"; ($item,$garbage) = split(/#/,$item); if ($item =~ m/(resolve \d+\.\d+\.\d+\.\d+)/) { $item = $1; } else { ($item,$garbage) = split(/\s/,$item); } if (length($item) > 0) { push(@newwikiblacklist, $item) } } #wikiblacklist = @newwikiblacklist; } $counter=0; $query_handle=$mysql->prepare("SELECT rule FROM $settings{'mysqltableprefix'}redlist"); $query_handle->execute; my @newredlist = (); while (my $rule=$query_handle->fetchrow_array) { push(@newredlist,$rule); } @redlist = @newredlist; $query_handle=$mysql->prepare("SELECT rule FROM $settings{'mysqltableprefix'}whitelist"); $query_handle->execute; my @newwhitelist = (); while (my $rule=$query_handle->fetchrow_array) { push(@newwhitelist,$rule); } @whitelist = @newwhitelist; $query_handle=$mysql->prepare("SELECT rule FROM $settings{'mysqltableprefix'}blacklist"); $query_handle->execute; my @newblacklist = (); while (my $rule=$query_handle->fetchrow_array) { push(@newblacklist,$rule); } @blacklist = @newblacklist; print ("redlist: " . join(" - ", @redlist) . "\n") if $settings{'debug'}; print ("whitelist: " . join(" - ", @whitelist) . "\n") if $settings{'debug'}; print ("blacklist: " . join(" - ", @blacklist) . "\n") if $settings{'debug'}; print ("wikiblacklist: " . join(" - ", @wikiblacklist) . "\n") if $settings{'debug'}; $kernel->post( $settings{'ircserver'} => privmsg => $settings{'ircreportchannel'} => "Refreshed lists." ) if $settings{'debug'}; } if ($settings{'source'} eq "Wiki") { if (($message =~ m/\[\[$settings{'blacklist'}\]\]/) && ($nick eq "rc")) { my $rulespage=$editor->get_text($settings{'blacklist'}); my @tempwikiblacklist = split(/\n/,$rulespage); my @newwikiblacklist = (); foreach my $item (@tempwikiblacklist) { $item .= "#"; ($item,$garbage) = split(/#/,$item); if ($item =~ m/(resolve \d+\.\d+\.\d+\.\d+)/) { $item = $1; } else { ($item,$garbage) = split(/\s/,$item); } if (length($item) > 0) { push(@newwikiblacklist, $item) } } @wikiblacklist = @newwikiblacklist; $counter=0; $kernel->post( $settings{'ircserver'} => privmsg => $settings{'ircreportchannel'} => "[[$settings{'blacklist'}]] edited." ); $query_handle=$mysql->prepare("SELECT rule FROM $settings{'mysqltableprefix'}redlist"); $query_handle->execute; my @newredlist = (); while (my $rule=$query_handle->fetchrow_array) { push(@newredlist,$rule); } @redlist = @newredlist; $query_handle=$mysql->prepare("SELECT rule FROM $settings{'mysqltableprefix'}whitelist"); $query_handle->execute; my @newwhitelist = (); while (my $rule=$query_handle->fetchrow_array) { push(@newwhitelist,$rule); } @whitelist = @newwhitelist; $query_handle=$mysql->prepare("SELECT rule FROM $settings{'mysqltableprefix'}blacklist"); $query_handle->execute; my @newblacklist = (); while (my $rule=$query_handle->fetchrow_array) { push(@newblacklist,$rule); } @blacklist = @newblacklist; print ("redlist: " . join(" - ", @redlist) . "\n") if $settings{'debug'}; print ("whitelist: " . join(" - ", @whitelist) . "\n") if $settings{'debug'}; print ("blacklist: " . join(" - ", @blacklist) . "\n") if $settings{'debug'}; print ("wikiblacklist: " . join(" - ", @wikiblacklist) . "\n") if $settings{'debug'}; $kernel->post( $settings{'ircserver'} => privmsg => $settings{'ircreportchannel'} => "Refreshed lists." ) if $settings{'debug'}; } } if ($message eq "!alt") { if (($cloak eq "Wikimedia/Beetstra") || ($cloak eq "wikimedia/Versageek")) { $mysql=DBI->connect("dbi:mysql:$settings{'altmysqldb'};$settings{'mysqlhost'}",$settings{'mysqluser'},$settings{'mysqlpassword'}) or die "Can't connect to MySQL: $DBI::errstr"; $mysql->{mysql_auto_reconnect} = 1; $kernel->post( $settings{'ircserver'} => privmsg => $settings{'ircreportchannel'} => "Using alternative linkwatcher database ($settings{'altmysqldb'})!" ); $alt = 1; } else { $kernel->post( $settings{'ircserver'} => privmsg => $settings{'ircreportchannel'} => "Cloak protected." ); } } if ($message eq "!normal") { if (($cloak eq "Wikimedia/Beetstra") || ($cloak eq "wikimedia/Versageek")) { $mysql=DBI->connect("dbi:mysql:$settings{'mysqldb'};$settings{'mysqlhost'}",$settings{'mysqluser'},$settings{'mysqlpassword'}) or die "Can't connect to MySQL: $DBI::errstr"; $mysql->{mysql_auto_reconnect} = 1; $kernel->post( $settings{'ircserver'} => privmsg => $settings{'ircreportchannel'} => "Using normal linkwatcher database ($settings{'mysqldb'})!" ); $alt = 0; } else { $kernel->post( $settings{'ircserver'} => privmsg => $settings{'ircreportchannel'} => "Cloak protected." ); } } if ( $message =~ m/^!refresh/) { if ($settings{'source'} eq "Wiki") { my $rulespage=$editor->get_text($settings{'blacklist'}); my @tempwikiblacklist = split(/\n/,$rulespage); my @newwikiblacklist = (); foreach my $item (@tempwikiblacklist) { $item .= "#"; ($item,$garbage) = split(/#/,$item); if ($item =~ m/(resolve \d+\.\d+\.\d+\.\d+)/) { $item = $1; } else { ($item,$garbage) = split(/\s/,$item); } if (length($item) > 0) { push(@newwikiblacklist, $item) } } @wikiblacklist = @newwikiblacklist; $counter=0; $query_handle=$mysql->prepare("SELECT rule FROM $settings{'mysqltableprefix'}redlist"); $query_handle->execute; my @newredlist = (); while (my $rule=$query_handle->fetchrow_array) { push(@newredlist,$rule); } @redlist = @newredlist; $query_handle=$mysql->prepare("SELECT rule FROM $settings{'mysqltableprefix'}whitelist"); $query_handle->execute; my @newwhitelist = (); while (my $rule=$query_handle->fetchrow_array) { push(@newwhitelist,$rule); } @whitelist = @newwhitelist; $query_handle=$mysql->prepare("SELECT rule FROM $settings{'mysqltableprefix'}blacklist"); $query_handle->execute; my @newblacklist = (); while (my $rule=$query_handle->fetchrow_array) { push(@newblacklist,$rule); } @blacklist = @newblacklist; print ("redlist: " . join(" - ", @redlist) . "\n"); print ("whitelist: " . join(" - ", @whitelist) . "\n"); print ("blacklist: " . join(" - ", @blacklist) . "\n"); print ("wikiblacklist: " . join(" - ", @wikiblacklist) . "\n"); $kernel->post( $settings{'ircserver'} => privmsg => $settings{'ircreportchannel'} => "Refreshed lists." ); } else { $kernel->post( $settings{'ircserver'} => privmsg => $settings{'ircreportchannel'} => "Sourcing $settings{'source'}, no need to refresh." ); } } if ( $message =~ m/!info/) { my $theTime; my $currenttime = time(); my $timedifference = $currenttime - $starttime; my $minute = int($timedifference / 60); my $second = $timedifference - $minute * 60; my $hour = int($minute / 60); $minute = $minute - $hour * 60; my $dayOfYear = int($hour / 24); $hour = $hour - $dayOfYear * 24; if (length("$hour") == 1) { $hour = "0$hour"; } if (length("$minute") == 1) { $minute = "0$minute"; } if (length("$second") == 1) { $second = "0$second"; } if ($dayOfYear > 1) { $theTime = "$dayOfYear days, $hour:$minute:$second hours"; } elsif ($dayOfYear == 1) { $theTime = "$dayOfYear day, $hour:$minute:$second hours"; } else { if ($hour > 1) { $theTime = "$hour hours $minute:$second minutes"; } elsif ($hour == 1) { $theTime = "$hour hour $minute:$second minutes"; } else { if ($minute > 1) { $theTime = "$minute minutes $second seconds"; } elsif ($minute == 1) { $theTime = "$minute minute $second seconds"; } else { $theTime = "$second seconds"; } } } $timedifference = $timedifference/60; my $lagtime = time() - $lastparsetime + 14558; my $ratio1 = int(10000 * $watchedspacecounter/$editcounter)/100 unless ($editcounter == 0); my $ratio2 = int(10000 * $whiteuser/$watchedspacecounter)/100 unless ($watchedspacecounter == 0); my $ratio3 = int(10000 * $linkadditions/$watchedspacecounter)/100 unless ($watchedspacecounter == 0); my $ratio4 = int(10000 * $whitelinks/$totallinks)/100 unless ($totallinks == 0); my $ratio5 = int(10000 * $redlinks/$totallinks)/100 unless ($totallinks == 0); my $ratio6 = int(10000 * $blacklinks/$totallinks)/100 unless ($totallinks == 0); my $ratio7 = int(10000 * $IPusers/$watchedspacecounter)/100 unless ($watchedspacecounter == 0); my $ratio8 = int(100 * $totallinks/$linkadditions)/100 unless ($linkadditions == 0); my $editsperminute = int($editcounter/$timedifference) unless ($timedifference == 0); my $watchededitsperminute = int($watchedspacecounter/$timedifference) unless ($timedifference == 0); my $linksperminute = int($totallinks/$timedifference) unless ($timedifference == 0); my $blacklinksperminute = int($blacklinks/$timedifference) unless ($timedifference == 0); my $whitelinksperminute = int($whitelinks/$timedifference) unless ($timedifference == 0); my $redlinksperminute = int($redlinks/$timedifference) unless ($timedifference == 0); my $lastmessage=time() - $responsetime; $kernel->post( $settings{'ircserver'} => privmsg => $where->[0] => "LW: $theTime active, rc: $lastmessage sec. ago. Lag: $lagtime sec. Total: $editcounter edits ($editsperminute PM); Watched $watchedspacecounter ($ratio1%; $watchededitsperminute PM); $linkadditions with EL ($ratio3%); $IPusers IP users ($ratio7%); $whiteuser wl users ($ratio2%); $totallinks links total ($linksperminute PM; $ratio8 per EL add edit); $whitelinks wl links ($ratio4%; $whitelinksperminute PM); $redlinks rl links ($ratio5%; $redlinksperminute PM); $blacklinks bl links ($ratio6%; $blacklinksperminute PM). Source: $settings{'source'}" ); } if ($message=~m/\[\[(.+?)\]\] M?\s*?http:\/\/(.+?)\.(.+?)\/(.+?) \* (.+?) \* \(([^)]+)\)/) { $responsetime = time(); $editcounter++; $page = $1; $lang = $2; $domain1 = $3; $diffurl = "http://$lang.$domain1/$4"; $user = $5; $size = $6; my ($wikidomain,$garbage) = split(/\./,$domain1); if ($domain1 eq "org") { $domain1 = "$lang.$domain"; $lang = ""; } ($space,$garbage) = split(/:/,$1,2); if ($garbage eq "") { $space = ""; } $domain = ""; if ($domain1 eq "wiktionary.org") { $domain = "wikt:"; } if ($domain1 eq "wikibooks.org") { $domain = "b:"; } if ($domain1 eq "wikinews.org") { $domain = "n:"; } if ($domain1 eq "wikisource.org") { $domain = "s:"; } if ($domain1 eq "wikiquote.org") { $domain = "q:"; } if ($domain1 eq "wikimedia.org") { if ($space eq "species") { $domain = "wikispecies:"; } elsif ($space eq "") { $domain = ""; } } print ("Read: $domain1 - $domain - $page - $lang - $space - $diffurl - $user - $size\n") if $settings{'debug'}; $domainpage = "$domain$lang:$page"; if ( ($space eq "") || ($space eq " ") || ($space eq "Category") || ($space eq "Template") || ($space eq "Categorie") || ($space eq "Sjabloon") ) { $watchedspacecounter++; if ($user =~ m/(\d)+\.(\d)+\.(\d)+\.(\d)+/) { $IPusers++; } unshift(@editQueue,{pagename=>$page,domain=>$domain,lang=>$lang,diffurl=>$diffurl,user=>$user,size=>$size}); print ("Push: $domain1 - $domain - $page - $lang - $space - $diffurl - $user - $size\n") if $settings{'debug'}; } } } sub irc_bot_addressed { my ( $kernel, $sender, $who, $where, $message ) = @_[ KERNEL, SENDER, ARG0, ARG1, ARG2 ]; my $nick = ( split /!/, $who)[0]; my $cloak = ( split /@/, $who )[1]; my $channels; my $channel; my $channel2; my $oldchannel; my $newchannel; unless ( $where->[0] eq $settings{'ircreportchannel'} ) { return; } if ( $message =~m/link (.+?) (.{1,3}) (.+)/ ) { my $list = $1; my $operation = $2; my $link = $3; $link =~ s/\s//g; $link = $mysql->quote($link); my $addcloak = $mysql->quote($cloak); my $query; if ( $operation eq 'add' ) { $query = "INSERT INTO "; } elsif ( $operation eq 'del' ) { $query = "DELETE FROM "; } else { return } if ( $list eq 'wl' ) { $query .= "$settings{'mysqltableprefix'}whitelist "; } elsif ( $list eq 'rl' ) { $query .= "$settings{'mysqltableprefix'}redlist "; } elsif ( $list eq 'bl' ) { $query .= "$settings{'mysqltableprefix'}blacklist "; } else { return; } if ( $operation eq 'add' ) { $query .= "(rule,cloak,reason) VALUES ($link,$addcloak,'')"; } elsif ( $operation eq 'del' ) { $query .= "WHERE rule=$link"; } if ( $operation eq 'add' ) { eval { my $errorquery = "http://www.pornographicfireparrot.com/" =~ m/$link/; }; unless ($@) { &query($query); $kernel->post( $settings{'ircserver'} => privmsg => $where->[0] => "Item $link added to $list ($query)." ); } else { $kernel->post( $settings{'ircserver'} => privmsg => $where->[0] => "Your regex appears to be broken: $link." ); } } elsif ( $operation eq 'del' ) { &query($query); $kernel->post( $settings{'ircserver'} => privmsg => $where->[0] => "Item $link removed from $list ($query)." ); } } if ( $message =~m/user (.+?) (.+)/ ) { my $operation = $1; my $user = $mysql->quote($2); my $query; if ( $operation eq 'add' ) { $query = "INSERT INTO "; } elsif ( $operation eq 'del' ) { $query = "DELETE FROM "; } $query .= "$settings{'mysqltableprefix'}users "; if ( $operation eq 'add' ) { $query .= "(username,status) VALUES ($user,'ignore')"; } elsif ( $operation eq 'del' ) { $query .= "WHERE username=$user"; } &query($query); if ( $operation eq 'add' ) { $kernel->post( $settings{'ircserver'} => privmsg => $where->[0] => "user $user added to whitelist" ); } elsif ( $operation eq 'del' ) { $kernel->post( $settings{'ircserver'} => privmsg => $where->[0] => "user $user removed from whitelist" ); } } if ( $message =~ m/^refresh/) { my $rulespage=$editor->get_text("User:XLinkBot/RevertList"); my @tempwikiblacklist = split(/\n/,$rulespage); @wikiblacklist = (); my $garbage; foreach my $item (@tempwikiblacklist) { $item .= "#"; ($item,$garbage) = split(/#/,$item); if ($item =~ m/(resolve \d+\.\d+\.\d+\.\d+)/) { $item = $1; } else { ($item,$garbage) = split(/\s/,$item); } if (length($item) > 0) { push(@wikiblacklist, $item) } } $counter=0; } if ( $message =~ m/^source (.+)/ ) { my $revertfrom = $1; if (lc($cloak) eq "wikimedia/beetstra" || lc($cloak) eq "wikimedia/versageek") { if ($revertfrom =~ m/(SQL|Wiki)/) { $settings{'source'} = $revertfrom; $kernel->post( $settings{'ircserver'} => privmsg => $where->[0] => "Blacklist source is now $revertfrom." ); } else { $kernel->post( $settings{'ircserver'} => privmsg => $where->[0] => "'$revertfrom' is not a valid Blacklist source." ); } } else { $kernel->post( $settings{'ircserver'} => privmsg => $where->[0] => "I only trust Versageek and Beetstra to issue this command." ); } my $rulespage=$editor->get_text("User:XLinkBot/RevertList"); my @tempwikiblacklist = split(/\n/,$rulespage); @wikiblacklist = (); my $garbage; foreach my $item (@tempwikiblacklist) { $item .= "#"; ($item,$garbage) = split(/#/,$item); if ($item =~ m/(resolve \d+\.\d+\.\d+\.\d+)/) { $item = $1; } else { ($item,$garbage) = split(/\s/,$item); } if (length($item) > 0) { push(@wikiblacklist, $item) } } $counter=0; } if ($message=~ m/^quit$/) { $kernel->post( $settings{'ircserver'} => privmsg => $where->[0] => "Quiting." ); if ((lc($cloak) eq 'wikimedia/beetstra') || (lc($cloak) eq 'wikimedia/versageek') || (lc($cloak) eq 'unaffiliated/nixeagle') ) { $kernel->signal($kernel, 'POCOIRC_SHUTDOWN', "Bye!"); die "\nDied after quit!\n"; } } if ($message=~ m/^set (.+) (.+)/) { if (($cloak eq 'wikimedia/Versageek') || ($cloak eq 'Wikimedia/Beetstra') ) { $settings{'$1'} = $2; } $kernel->post( $settings{'ircserver'} => privmsg => $where->[0] => "Setting '$1' set to '$2'"); } if ($message=~ m/^channels/ ) { # ask for which channels LinkWatcher is on $channels = join(", ",@rcchannels); $kernel->post( $sender => privmsg => $where->[0] => "$settings{ircname} listens on irc.wikimedia.org to $channels." ); } if ($message=~ m/^show whitelisted users/ ) { $settings{'showwhitelistedusers'}=1; $kernel->post( $sender => privmsg => $where->[0] => "Showing whitelisted users" ); } if ($message=~ m/^hide whitelisted users/ ) { $settings{'showwhitelistedusers'}=0; $kernel->post( $sender => privmsg => $where->[0] => "Hiding whitelisted users" ); } if ($message=~ m/^show whitelisted links/ ) { $settings{'showwhitelistedlinks'}=1; $kernel->post( $sender => privmsg => $where->[0] => "Showing whitelisted links" ); } if ($message=~ m/^hide whitelisted links/ ) { $settings{'showwhitelistedlinks'}=0; $kernel->post( $sender => privmsg => $where->[0] => "Hiding whitelisted links" ); } if ($message=~ m/^add channel (.+)/ || $message=~ m/^join channel (.+)/ ) { if (($cloak = 'wikimedia/Versageek') || ($cloak eq 'Wikimedia/Beetstra') ) { $newchannel = $1; unless ($newchannel eq '#en.wikipedia') { if (grep(/$newchannel/,@rcchannels)) { $kernel->post( $sender => privmsg => $where->[0] => "Channel $newchannel already joined." ); } else { push (@rcchannels,$newchannel); $kernel->post( 'irc.wikimedia.org' => join => $newchannel ); $kernel->post( $sender => privmsg => $where->[0] => "$settings{ircname} is now also parsing $newchannel." ); print "Connected to $newchannel.\n"; } } } } if ($message=~ m/^part channel (.+)/ ) { if (($cloak = 'wikimedia/Versageek') || ($cloak eq 'Wikimedia/Beetstra') ) { $oldchannel = $1; $channel2 = join(",",@rcchannels); $channel = "!!$channel2"; if ($channel =~ m/$oldchannel/) { $kernel->post( $settings{reportserver} => part => $oldchannel ); $channel =~ s/,$oldchannel//; $channel =~ s/!$oldchannel//; $channel =~ s/!,//g; $channel =~ s/!//g; @rcchannels = split(/,/,$channel); $kernel->post( $sender => privmsg => $where->[0] => "$settings{ircname} has parted $oldchannel." ); print "Parted $oldchannel on freenode.\n"; } else { $kernel->post( $sender => privmsg => $where->[0] => "Channel $oldchannel not in list." ); } } } } sub slave_input { my ( $kernel, $heap, $input ) = @_[ KERNEL, HEAP, ARG0 ]; my $usercount = 0; my $linkcount = 0; my $userlinkcount = 0; my $userlinklangcount = 0; my $query; if ($input=~m/REQUEST/) { my $editref = pop(@editQueue); if ($editref) { my $message = "EDIT [[" . $editref->{pagename} . "]] [[" . $editref->{domain} . $editref->{lang} . ":User:" . $editref->{user} . "]] " . $editref->{diffurl} . " " . $editref->{size}; print (" message request: $message\n") if $settings{'debug'}; $heap->{client}->put($message); } else { $heap->{client}->put("NOEDIT"); } } elsif ($input=~m{PARSED \[\[(.+)\]\] (http://.+?) (.+) \[\[(.+):User:(.+)\]\] \|(.+)\|}) { my $pagename = $1; my $diffurl = $2; my $size = $3; my $lang = $4; my $username = $5; my $links = $6; print (" message parsed: $1 - $2 - $3 - $4 - $5 - $6\n") if $settings{'debug'}; if ($settings{'loglinks'}) { my @alllinks = split(/ /,$links); my $edit_id = -2; my $wikidomain; my $wikilang1; if ($diffurl =~ m/http:\/\/(.*?)\.(.*?)\.org/) { $wikilang1 = $1; $wikidomain = $2; if ($wikidomain eq "wikimedia") { $wikidomain = $wikilang1; } } my $namespace = ""; if ($pagename =~ m/:/) { ($namespace,$pagename) = split(/:/,$pagename); } unless ( ($namespace eq "") || ($namespace eq "Category") || ($namespace eq "Template") || ($namespace eq "Categorie") || ($namespace eq "Sjabloon") ) { $pagename = "$namespace:$pagename"; $namespace = ""; } my $isIP = 0; if ($username =~ m/\d+\.\d+\.\d+\.\d+/) { $isIP = 1; } my $revid = 0; if ($diffurl =~ m/diff=(\d+)/) { $revid = $1; } my $oldid = 0; if ($diffurl =~ m/oldid=(\d+)/) { $oldid = $1; } $editor2->set_wiki("$lang.$wikidomain.org","w"); my $sql; eval { $sql = $editor2->_get_api("action=query&prop=info&revids=$revid&format=xml")->decoded_content; }; my $xml; eval { $xml = XMLin( $sql ); }; my $wikitimestamp; eval{ $wikitimestamp = $xml->{query}->{pages}->{page}->{touched}; }; my $mnth; my $timestamp; if ($wikitimestamp =~ m/(\d\d\d\d)-(\d\d)-(\d\d)T(\d\d:\d\d:\d\d)Z/) { $mnth = $months[$2-1]; $lastparsetime = str2time("$4, $3 $mnth $1"); $timestamp = "$1-$2-$3 $4"; } else { $edit_id = -1; } print ("LOADED TIMESTAMP: $lang.$wikidomain.org - $revid - $wikitimestamp - $edit_id - $timestamp\n") if $settings{'debug'}; foreach my $thislink(@alllinks) { my $garbage; my $garbage2; my $parselink = $thislink; my $output = $thislink; my $splitlink = $thislink; ($garbage,$splitlink) = split(/:\/\//,$splitlink,2); ($splitlink,$garbage) = split(/\//,$splitlink,2); ($splitlink,$garbage2) = split(/:/,$splitlink,2); my $ip = &resolve( $splitlink ); $splitlink =~ s/www\.//; $splitlink =~ s/www\d\.//; my @thislink = split(/\./,$splitlink); my $domain = ""; foreach my $linkpart (@thislink) { $domain = $linkpart . "." . $domain; } my $index = $domain . "/" . $garbage; my $whitelisted = 0; my $addoutput = ""; if ($edit_id == -2) { $query="INSERT INTO $settings{'mysqltableprefix'}newlinklog (timestamp,edit_id,lang,pagename,namespace,diff,revid,oldid,wikidomain,user,fullurl,domain,indexedlink,resolved,ip) VALUES ("; $query .= $mysql->quote($timestamp) . ","; } else { $query="INSERT INTO $settings{'mysqltableprefix'}newlinklog (edit_id,lang,pagename,namespace,diff,revid,oldid,wikidomain,user,fullurl,domain,indexedlink,resolved,ip) VALUES ("; } $query .= $mysql->quote($edit_id) . ","; $query .= $mysql->quote($lang) . ","; $query .= $mysql->quote($pagename) . ","; $query .= $mysql->quote($namespace) . ","; $query .= $mysql->quote($diffurl) . ","; $query .= $mysql->quote($revid) . ","; $query .= $mysql->quote($oldid) . ","; $query .= $mysql->quote($wikidomain) . ","; $query .= $mysql->quote($username) . ","; $query .= $mysql->quote($thislink) . ","; $query .= $mysql->quote($domain) . ","; $query .= $mysql->quote($index) . ","; $query .= $mysql->quote($ip) . ","; $query .= $mysql->quote($isIP) . ")"; print ("QUERY: $query\n") if $settings{'debug'}; &query($query) unless ($settings{'nodb'}); } } else { my $wikidomain; my $lang; if ($diffurl =~ m/http:\/\/(.*?)\.(.*?)\.org/) { $lang = $1; $wikidomain = $2; if ($wikidomain eq "wikimedia") { $wikidomain = $lang; } } $editor2->set_wiki("$lang.$wikidomain.org","w"); my $revid = 0; if ($diffurl =~ m/diff=(\d+)/) { $revid = $1; } my $sql; eval { $sql = $editor2->_get_api("action=query&prop=info&revids=$revid&format=xml")->decoded_content; }; my $xml; eval { $xml = XMLin( $sql ); }; my $wikitimestamp; eval{ $wikitimestamp = $xml->{query}->{pages}->{page}->{touched}; }; my $mnth; if ($wikitimestamp =~ m/(\d\d\d\d)-(\d\d)-(\d\d)T(\d\d:\d\d:\d\d)Z/) { $mnth = $months[$2-1]; $lastparsetime = str2time("$4, $3 $mnth $1"); } } my $sql; my $sql="SELECT status FROM $settings{'mysqltableprefix'}users WHERE username=" . $mysql->quote($username); my $whitelisted = 0; $sql=&query($sql); if (defined $sql) { if (@{$sql}[0]->{status} eq 'ignore') { $whitelisted = 1; $whiteuser++; } } if ($whitelisted) { if ($settings{'showwhitelistedusers'}) { my $message=&build_output($kernel,$pagename,$diffurl,$size,$lang,$username,$whitelisted,$links); unless (length($message) < 2) { $kernel->post($settings{'ircserver'}=>privmsg=>$settings{'ircreportchannel'}=>$message) unless ($settings{'spambotfeeder'}); } } } else { my $message=&build_output($kernel,$pagename,$diffurl,$size,$lang,$username,$whitelisted,$links); print ("$message\n") if $settings{'debug'}; unless (length($message) < 2) { $kernel->post($settings{'ircserver'}=>privmsg=>$settings{'ircreportchannel'}=>$message) unless ($settings{'spambotfeeder'}); } } print("Found: [[$lang:$pagename]] $diffurl [[$lang:User:$username]] $links\n") if $settings{'debug'}; my @split_links = split( /\s/, $links ); if (@split_links) { $linkadditions++; } } } sub slave_disconnect { system("perl","LinkParser.pl",$settings{'serverport'}); } sub query { my $query=shift; print ("QR: $query\n") if $settings{'debug'}; if ( $query !~ m/^select/i ) { my $status = 0; eval { my $status=$mysql->do($query); }; if ($@) { return undef; } else { return $status; } } else { my $query_handle=$mysql->prepare($query); $query_handle->execute; my $results=$query_handle->fetchall_arrayref( {} ); if ( $query_handle->rows > 0 ) { return $results; } else { return undef; } } } sub build_output { my ( $kernel, $pagename, $diffurl, $size, $lang, $username, $whitelisted, $links_pre ) = @_; my $message="\x034[[$lang:$pagename]]\x03 \x033$diffurl\x03 \x0312[[$lang:User:$username]]\x03 "; my @links = split( /\s/, $links_pre ); my %links_hash; foreach (@links) { $links_hash{$_}=''; } my $query_handle; my $blacklistrule; my $rule; my $blacklisted; my @resolved_ips = (); my $fulloutput = ""; my $usercount = 0; my $linkcount = 0; my $userlinkcount = 0; my $userlinklangcount; my $query; $query="SELECT COUNT(*) as total_record FROM $settings{'mysqltableprefix'}newlinklog WHERE user=" . $mysql->quote($username); unless ($settings{'spambotfeeder'} or $settings{'nodb'}) { $usercount = &query($query); if (defined $usercount) { $usercount = @{ $usercount }[0]->{total_record}; } } foreach my $link ( @links) { $totallinks++; my $garbage; my $garbage2; my $parselink = $link; my $output = $link; my $splitlink = $link; ($garbage,$splitlink) = split(/:\/\//,$splitlink,2); ($splitlink,$garbage) = split(/\//,$splitlink,2); ($splitlink,$garbage2) = split(/:/,$splitlink,2); my $ip = &resolve( $splitlink ); $splitlink =~ s/www\.//; $splitlink =~ s/www\d\.//; my @thislink = split(/\./,$splitlink); my $domain = ""; foreach my $linkpart (@thislink) { $domain = $linkpart . "." . $domain; } my $index = $domain . "/" . $garbage; my $whitelisted = 0; my $addoutput = ""; unless ($settings{'spambotfeeder'}) { foreach $rule (@whitelist) { if ($rule =~ m/resolve (\d+\.\d+\.\d+\.\d+)/i) { my $resolved = $1; if ($ip eq $resolved) { print ("WL resolve match: $ip = $resolved\n") if $settings{'debug'}; $addoutput.=" \x039(WL: $rule)\x03 "; $whitelinks++; $whitelisted = 1; } } else { if ($link=~m/$rule/i) { $addoutput.=" \x039(WL: $rule)\x03 "; $whitelinks++; $whitelisted = 1; } } } foreach $rule (@redlist) { if ($rule =~ m/resolve (\d+\.\d+\.\d+\.\d+)/i) { my $resolved = $1; if ($ip eq $resolved) { print ("RL resolve match: $ip = $resolved\n") if $settings{'debug'}; $addoutput.=" \x034(RL: $rule)\x03 "; $redlinks++ } } else { if ($link =~ m/$rule/i) { $addoutput .=" \x034(RL: $rule)\x03 "; $redlinks++; } } } unless ($settings{'nodb'}) { $query="SELECT COUNT(*) as total_record FROM $settings{'mysqltableprefix'}newlinklog WHERE domain=" . $mysql->quote($domain); $linkcount = &query($query) ; if (defined $linkcount) { $linkcount = @{ $linkcount }[0]->{total_record}; } $query="SELECT COUNT(*) as total_record FROM $settings{'mysqltableprefix'}newlinklog WHERE domain=" . $mysql->quote($domain) . " AND user=" . $mysql->quote($username); $userlinkcount = &query($query); if (defined $userlinkcount) { $userlinkcount = @{ $userlinkcount }[0]->{total_record}; } $query="select count(distinct concat(lang,wikidomain)) as total_record from linkwatcher_newlinklog where user=" . $mysql->quote($username) . "AND domain= ". $mysql->quote($domain) ." group by concat(user)"; $userlinklangcount = &query($query); if (defined $userlinklangcount) { $userlinklangcount = @{ $userlinklangcount }[0]->{total_record}; } } print ("STATISTICS $domain - $ip -> $usercount = $linkcount = $userlinkcount = $userlinklangcount\n") if $settings{'debug'}; if ($usercount eq "") { $usercount = 0; } if ($usercount > 3) { $addoutput .= " (\x034 $usercount \x03"; } else { $addoutput .= " ( $usercount"; } if ($linkcount eq "") { $linkcount = 0; } if ($linkcount > 250) { $addoutput .= ",\x034 $linkcount \x03"; } else { $addoutput .= ", $linkcount"; } if ($userlinkcount eq "") { $userlinkcount = 0; } if ($userlinkcount > 25) { $addoutput .= ",\x034 $userlinkcount \x03"; } elsif ($userlinkcount > 5) { $addoutput .= ",\x0312 $userlinkcount \x03"; } else { $addoutput .= ", $userlinkcount"; } unless ($linkcount eq 0) { if ((($userlinkcount / $linkcount) > 0.9) && ($linkcount > 2) && ($userlinkcount > 2)) { $addoutput .= "\x034!!\x03"; } elsif ((($userlinkcount / $linkcount) > 0.66) && ($linkcount > 2) && ($userlinkcount > 2)) { $addoutput .= "\x034!\x03"; } elsif ((($userlinkcount / $linkcount) > 0.33) && ($linkcount > 2) && ($userlinkcount > 2)) { $addoutput .= "?"; } } if ($userlinklangcount eq "") { $userlinklangcount = 0; } if ($userlinklangcount > 5) { $addoutput .= ",\x034 $userlinklangcount \x03"; } elsif ($userlinklangcount > 2) { $addoutput .= ",\x0312 $userlinklangcount \x03"; } else { $addoutput .= ", $userlinklangcount"; } unless ($linkcount eq 0) { if ((($userlinklangcount / $linkcount) > 0.75) && ($linkcount > 1) && ($userlinklangcount > 1)) { $addoutput .= "\x034!!\x03"; } elsif ((($userlinklangcount / $linkcount) > 0.5) && ($linkcount > 1) && ($userlinklangcount > 1)) { $addoutput .= "\x034!\x03"; } elsif ((($userlinklangcount / $linkcount) > 0.33) && ($linkcount > 1) && ($userlinklangcount > 1)) { $addoutput .= "?"; } unless ($whitelisted == 1) { if ((($userlinklangcount / $linkcount) > 0.90) && ($linkcount > 2) && ($userlinklangcount > 4)) { my $url1 = $link; ($garbage,$url1) = split(/:\/\//,$url1); if ($url1=~ m/^www\.(.*)/) { $url1 = $1; } if ($url1=~ m/^www\d\.(.*)/) { $url1 = $1; } ($url1,$garbage) = split(/\//,$url1); $url1 = lc($url1); my $url2 = $url1; $url1 = "\\b$url1"; $url1 =~ s/\./\\\./g; $url1 =~ s/ //g; eval { my $errorquery = "http://www.pornographicfireparrot.com/" =~ m/$url1/; }; unless ($@) { $kernel->post($settings{'ircserver'}=>privmsg=>$settings{'ircreportchannel'}=>"LWCOIBot ml add $url1 Link has been added to more than 5 wikipedia by $username") unless ($settings{'spambotfeeder'}); } } if ((($userlinkcount / $linkcount) > 0.66) && (($userlinklangcount / $linkcount) > 0.66 ) && ($userlinklangcount > 2)) { my $url1 = $link; ($garbage,$url1) = split(/:\/\//,$url1); if ($url1=~ m/^www.(.*)/) { $url1 = $1; } if ($url1=~ m/^www\d\.(.*)/) { $url1 = $1; } ($url1,$garbage) = split(/\//,$url1); $url1 = lc($url1); my $url2 = $url1; $url1 = "\\b$url1"; $url1 =~ s/\./\\\./g; $url1 =~ s/ //g; $url1 =~ s/[\)\(\}\{\[\]]//g; eval { my $errorquery = "http://www.pornographicfireparrot.com/" =~ m/$url1/; }; unless ($@) { my $query; my $linkcount; $query="SELECT COUNT(*) as total_record FROM $settings{'mysqltableprefix'}redlist WHERE rule=" . $mysql->quote($url1); $linkcount = &query($query); if (defined $linkcount) { $linkcount = @{ $linkcount }[0]->{total_record}; } unless ($linkcount > 0) { $query = "INSERT INTO $settings{'mysqltableprefix'}redlist (rule,cloak,reason) VALUES (" . $mysql->quote($url1) .",'AUTOREDLIST','Autoredlist, user $username uses this link crosswiki.')"; &query($query); $kernel->post($settings{'ircserver'}=>privmsg=>$settings{'ircreportchannel'}=>"\x034WARNING: Link $url1 added to redlist, [[$lang:User:$username]] uses link crosswiki.\x03") unless ($settings{'spambotfeeder'}); } $kernel->post($settings{'ircserver'}=>privmsg=>$settings{'ircreportchannel'}=>"LWCOIBot report xwiki-spam $url2 $username") unless ($settings{'spambotfeeder'}); } } if ((($userlinkcount / $linkcount) > 0.90) && ($linkcount > 4) && ($userlinkcount > 4)) { my $url1 = $link; ($garbage,$url1) = split(/:\/\//,$url1); if ($url1=~ m/^www\.(.*)/) { $url1 = $1; } if ($url1=~ m/^www\d\.(.*)/) { $url1 = $1; } ($url1,$garbage) = split(/\//,$url1); $url1 = lc($url1); $url1 = "\\b$url1"; $url1 =~ s/\./\\\./g; $url1 =~ s/ //g; $url1 =~ s/[\)\(\}\{\[\]]//g; eval { my $errorquery = "http://www.pornographicfireparrot.com/" =~ m/$url1/; }; unless ($@) { my $query; my $linkcount; $query = "SELECT COUNT(*) as total_record FROM $settings{'mysqltableprefix'}redlist WHERE rule=" . $mysql->quote($url1); $linkcount = &query($query); if (defined $linkcount) { $linkcount = @{ $linkcount }[0]->{total_record}; } unless ($linkcount > 0) { $query = "INSERT INTO $settings{'mysqltableprefix'}redlist (rule,cloak,reason) VALUES (" . $mysql->quote($url1) . ",'AUTOREDLIST','Autoredlist, this link is only used by user $username.')"; &query($query); $kernel->post($settings{'ircserver'}=>privmsg=>$settings{'ircreportchannel'}=>"\x034WARNING: Link $url1 added to redlist, [[$lang:User:$username]] is only user adding this link.\x03") unless ($settings{'spambotfeeder'}); } } } } } $addoutput .=") "; } if ($settings{source} eq "SQL") { my $blacklisted=0; foreach $rule (@blacklist) { if ($rule =~ m/resolve (\d+\.\d+\.\d+\.\d+)/i) { my $resolved = $1; if ($ip eq $resolved) { print ("BL resolve match: $ip = $resolved\n") if $settings{'debug'}; unless ($whitelisted) { &generate_alert( $pagename, $diffurl, $size, $lang, $username, $rule, $kernel ) unless $blacklisted; } $output.=" \x035(BL: $rule)\x03 "; $blacklisted=1; $blacklinks++; } } else { if ($link=~m/$rule/i) { unless ($whitelisted) { &generate_alert( $pagename, $diffurl, $size, $lang, $username, $rule, $kernel ) unless $blacklisted; } $output .=" \x035(BL: $rule)\x03 "; $blacklisted=1; $blacklinks++; } } } } elsif ($settings{source} eq "Wiki") { foreach $rule (@wikiblacklist) { if (length($rule) > 2) { if ($rule =~ m/resolve (\d+\.\d+\.\d+\.\d+)/i) { my $resolved = $1; if ($ip eq $resolved) { print ("BL resolve match: $ip = $resolved\n") if $settings{'debug'}; unless ($whitelisted) { &generate_alert( $pagename, $diffurl, $size, $lang, $username, $rule, $kernel ) unless $blacklisted; } $output.=" \x035(BL: $rule)\x03 "; $blacklisted=1; $blacklinks++; } } else { if ($link=~m/$rule/i) { unless ($whitelisted) { &generate_alert( $pagename, $diffurl, $size, $lang, $username, $rule, $kernel ) unless $blacklisted; } $output.=" \x035(BL: $rule)\x03 "; $blacklisted=1; $blacklinks++; } } } } } else { $kernel->post($settings{'ircserver'}=>privmsg=>$settings{'ircreportchannel'}=>"Incorrect setting for $settings{source}"); } $output .= $addoutput; unless ($output =~ m/(BL:|RL:)/) { if ($output =~ m/WL:/) { unless ($settings{'showwhitelistedlinks'}) { $output = ""; } } } $fulloutput .= "$output"; } if (length($fulloutput)>5) { $message .= $fulloutput; } else { $message = ""; } return $message; } sub generate_alert { my ( $pagename, $diffurl, $size, $lang, $username, $rule, $kernel ) = @_; my $message="diff=<$diffurl> user=<$username> title=<$pagename> size=<$size> rule=<$rule>"; $kernel->post( $settings{'ircserver'} => privmsg => $settings{'ircbotchannel'} => $message ); } sub resolve { my $link = shift; my $ip_address = inet_aton( $link ); if( length( $ip_address ) != 4 ) { print "$link didn't resolve properly, bailing out\n" if $settings{'debug'}; return 0; } $ip_address = inet_ntoa( $ip_address ); print "Resolved $link -> $ip_address\n" if $settings{'debug'}; return $ip_address; } sub irc_disconnected { my ( $kernel, $sender, $server_name ) = @_[ KERNEL, SENDER, ARG0 ]; if ( $server_name eq $settings{'ircserver'} ) { $kernel->post( $sender => 'connect' => { server=>$settings{'ircserver'}, port=>$settings{'ircport'}, } ); } elsif ( $server_name eq $settings{'rcserver'} ) { $kernel->post( $sender => 'connect' => { server=>$settings{'rcserver'}, port=>$settings{'ircport'}, } ); } }