use strict; use Perlwikipedia; use URI::Escape; my $soonest_next_op = time; my $pw=Perlwikipedia->new(); #$pw->{debug} = 1; $pw->{mech}->agent('Bot/WP/EN/Quadell/polbot'); print "\nStarting polbot, logging in.\n" ; my $login_status=$pw->login('bot name','bot password'); die "I can't log in." unless ($login_status eq 0); my @lines; print "Reading needingdab file\n"; my %needingdab_names = (); my $needingdab_list = $pw->get_text('User:Polbot/altnames/needingdab'); @lines = split(/\n/, $needingdab_list); foreach my $line (@lines) { if ($line =~ m/^\* \[\[([^]]*)\]\].*$/) { $needingdab_names{$1} = $line; } } print "Reading in-process file\n"; my %inprocess_names = (); my $inprocess_list = $pw->get_text('User:Polbot/altnames/inprocess'); @lines = split(/\n/, $inprocess_list); foreach my $line (@lines) { if ($line =~ m/^\* \[\[([^]]*)\]\]\|([^*]*)\*(.*)$/) { my $main_name = $1; my $altnames = $2; my $jobtitle = $3; # Escape wikichars for the main (article) name my $enc_main_name = $main_name; $enc_main_name =~ s/ /[ _]/g; $enc_main_name =~ s/\(/\\\(/g; $enc_main_name =~ s/\)/\\\)/g; $enc_main_name =~ s/\./\\\./g; print "\nReading $main_name ($jobtitle)\n"; my @altnames = split(/\|/, $altnames); foreach my $altname (@altnames) { my $link_descr = $altname; $altname =~ s/\[\[(.*)\]\]/$1/; my $final_article_name = $altname; # Read the altname article. my $wiki = wikiread($altname, $pw); if ($wiki =~ /\S/) { # The article exists. # Okay, here are The Rules: # 1. If it mentions $main_name, then DELETE. End. # 2. If it's a redirect, then follow it, and check #1 again. # 3. If there's a link to [[anything (disambiguation)]], then follow it, and check #1 again. # 4. We won't be deleting at this point. But if it's a dab, then note that. # These rules will fail in the following situations: # A. $altname is an article that dabs to [[x]], a dab page that mentions $main_name, but that isn't [[x (disambiguation)]] # B. $altname is a dab that links to a rd to $main_name. # 1. Does it mention the main article (perhaps even rd-ing there)? if (($wiki =~ m/\[\[\s*$enc_main_name\s*(\||\]\])/) or ($wiki =~ m/\|\s*$enc_main_name\s*}}/)) { $altname = "DELETE"; } if ($altname ne "DELETE") { # 2. Is it a rd? If so, follow. if ($wiki =~ /\#\s*redirect\s*\[\[(.*)\]\]/i) { $final_article_name = $1; $link_descr .= " r [[$final_article_name]]"; $wiki = wikiread($final_article_name, $pw); # 1. again. if (($wiki =~ m/\[\[\s*$enc_main_name\s*(\||\]\])/) or ($wiki =~ m/\|\s*$enc_main_name\s*}}/)) { $altname = "DELETE"; } } } if ($altname ne "DELETE") { # 3. Does it link to a dab page? If so, follow. if (($wiki =~ m/\[\[([^]]* \(disambiguation\))\]\]/) or ($wiki =~ m/{{\s*(?:otherpersons2|otherpeople2)\s*\|\s*(.*?)\s*}}/)) { $final_article_name = $1; $link_descr .= " f [[$final_article_name]]"; $wiki = wikiread($final_article_name, $pw); # 1. again. if (($wiki =~ m/\[\[\s*$enc_main_name\s*(\||\]\])/) or ($wiki =~ m/\|\s*$enc_main_name\s*}}/)) { $altname = "DELETE"; } } elsif ($wiki =~ m/{{\s*(?:otherpeople|otherpersons|othernames)\s*}}/) { $final_article_name .= " (disambiguation)"; $link_descr .= " f [[$final_article_name]]"; $wiki = wikiread($final_article_name, $pw); # 1. again. if (($wiki =~ m/\[\[\s*$enc_main_name\s*(\||\]\])/) or ($wiki =~ m/\|\s*$enc_main_name\s*}}/)) { $altname = "DELETE"; } } elsif ($wiki =~ m/{{\s*(?:otherpeople|otherpersons|othernames)\s*\|\s*(.*?)\s*}}/) { $final_article_name = "$1 (disambiguation)"; $link_descr .= " f [[$final_article_name]]"; $wiki = wikiread($final_article_name, $pw); # 1. again. if (($wiki =~ m/\[\[\s*$enc_main_name\s*(\||\]\])/) or ($wiki =~ m/\|\s*$enc_main_name\s*}}/)) { $altname = "DELETE"; } } } if ($wiki =~ m/({{dab|{{disambig|{{disamb|{{disambiguation|{{hndis|{{namedab|{{bio-dab|{{hndisambig)(}}|\|)/i) { $link_descr .= " d"; } if ($altname ne "DELETE") { $altname = $link_descr; } } else { # No article exists. Make a rd! $|=1; print "Waiting " . ($soonest_next_op - time) . " secs... "; $|=1; while (time < $soonest_next_op) {}; $soonest_next_op = time + 9; print "rd [[$altname]] to [[$main_name]]\n"; $pw->edit($altname, "#Redirect [[$main_name]]", "Redirecting to [[$main_name]], auto-generated by [[User:polbot]]"); $altname = "DELETE"; } } # Remove the elements that say "DELETE" @altnames = grep(!/^DELETE$/, @altnames); if (scalar(@altnames) == 0) { # No altnames left. Do nothing. } else { # Put it in inprocess $inprocess_names{$main_name} = "* [[$main_name]]|" . join('|', @altnames) . "*$jobtitle"; } } } print "Merging old and new needingdab lists\n"; foreach my $inprocess_key (keys %inprocess_names) { $needingdab_names{$inprocess_key} = $inprocess_names{$inprocess_key} unless ($needingdab_names{$inprocess_key}); } print "Writing needingdab list\n"; my $wiki_code = ""; foreach my $dab_key (sort keys %needingdab_names) { $wiki_code .= $needingdab_names{$dab_key} . "\n"; } $pw->edit('User:Polbot/altnames/needingdab', $wiki_code, "Auto-updating based on input at inprocess list"); print "Finis!"; sub wikiread { my $article = shift; my $connection = shift; my $i = 0; my $wiki = ''; $wiki = $connection->get_text($article); while ($wiki eq "0") { $i++; if ($i > 5) { return ''; } sleep $i; print " retry. . .\n"; $wiki = $connection->get_text($article); } return $wiki; }