use MediaWiki::API; # Gently pruned from the standard exclusion code to hardcode $user and $opt sub allowBots { my($text) = @_; my $user = "Joe's Null Bot"; return 0 if $text =~ /{{[nN]obots}}/; return 1 if $text =~ /{{[bB]ots}}/; if($text =~ /{{[bB]ots\s*\|\s*allow\s*=\s*(.*?)\s*}}/s){ return 1 if $1 eq 'all'; return 0 if $1 eq 'none'; my @bots = split(/\s*,\s*/, $1); return (grep $_ eq $user, @bots)?1:0; } if($text =~ /{{[bB]ots\s*\|\s*deny\s*=\s*(.*?)\s*}}/s){ return 0 if $1 eq 'all'; return 1 if $1 eq 'none'; my @bots = split(/\s*,\s*/, $1); return (grep $_ eq $user, @bots)?0:1; } return 1; } # Have the bot check in to see if it's run past it's "expiration date", typically of 86400 seconds # (that is, one day). Mostly here to avoid ten copies of the bot running if nothing can run for # ten days. $epoch = time(); $listcount =0; $purgecount = 0; sub check_expirations() { my $secs = time() - $epoch; if ($secs > 86400) { die "Bot expired of old age.\n"; } if ($purgecount > 250) { die "This category is looking disturbingly large. Quitting.\n"; } } # Within a single MediaWiki call, we ask the API to make up to 5 attempts, 10 s apart, until # the worst-case server lag is better than 5s. my $mw = MediaWiki::API->new(); $mw->{config}->{api_url} = 'http://en.wikipedia.org/w/api.php'; # Delay/retry parameters $mw->{config}->{max_lag} = 5; # Tell MediaWiki to put us off it there's a 5s+ db lag out there $mw->{config}->{max_lag_delay} = 10; # ..and to wait 10s between retries $mw->{config}->{max_lag_retries} = 4; # ..and to only make 4 retries before dropping back to our code # Our own delay parameters $standardelay = 15; # Wait 15s or more between purge calls.... $longdelay = 900; # ...if the API puts us off several times in a row, take a 15-minute break my $articles = null; # login while (1) { if ($mw->login( { lgname => "Joe's Null Bot", lgpassword => 'REDACTED' } )) { last; } check_expirations(); if ($mw->{error}->{details} =~ /Server has reported lag above the configure/) { sleep $longdelay; } else { die $mw->{error}->{code} . ': ' . $mw->{error}->{details}; } } # Get list of articles while (1) { check_expirations(); $articles = $mw->list ( { action => 'query', list => 'categorymembers', cmtitle => 'Category:BLP articles proposed for deletion by days left', cmlimit => 'max'} ); if ($articles) { last; } if ($mw->{error}->{details} =~ /Server has reported lag above the configure/) { sleep $longdelay; } else { die $mw->{error}->{code} . ': ' . $mw->{error}->{details}; } } # scan through the articles... foreach (@{$articles}) { my $thistitle = $_->{title}; $listcount++; print "T: " . $thistitle . "\n"; while (1) { check_expirations(); my $pagehash = $mw->get_page( { title => $thistitle } ); if ($pagehash) { last; } if ($mw->{error}->{details} =~ /Server has reported lag above the configure/) { sleep $longdelay; } else { die $mw->{error}->{code} . ': ' . $mw->{error}->{details}; } } sleep $standardelay; # There's no hurry! if (allowBots($pagehash->{'*'})) { $purgecount++; while (1) { check_expirations(); # …and purge each one my $apires = $mw->api( { action => 'purge', titles => $thistitle, forcelinkupdate => 1} ); if ($apires) { last; } if ($mw->{error}->{details} =~ /Server has reported lag above the configure/) { sleep $longdelay; } else { die $mw->{error}->{code} . ': ' . $mw->{error}->{details}; } } } else { print "….DENIED\n"; } } print $purgecount . " from a total list of " . $listcount . " articles in " . (time()-$epoch) . "seconds.\n";