User:Bot1058/unsynced-dab-redirects.php

<?php
/** unsynced-dab-redirects.php - Sync unsynchronized disambiguation talk pages and redirects
 *                             - Clear [[Category:Long monitored short pages]]
 *
 *  (c) 2017-2023 Bill - http://en.wikipedia.orgview_html.php?sq=Qlik&lang=&q=User:Wbm1058
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *   
 *  Developers (add yourself here if you worked on the code):
 *    WBM - [[User:Wbm1058]] - November 2017–January 2018 - initial release
 *                           - December 2018 - Clear [[Category:Long monitored short pages]]
 **/
ini_set("display_errors", 1);
error_reporting(E_ALL ^ E_NOTICE);
require_once 'botclasses.php';  // Botclasses.php was written by User:Chris_G and is available under the GNU General Public License
include("logininfo.php");

const bot_version = "3.72";
const anthroponymy = "first name|forename|given name|nickname|surname|surnames|surname page";

const long_comment = "{{Short pages monitor}}<!-- This long comment was added to the page to prevent it from being listed on Special:Shortpages. " .
	"It and the accompanying monitoring template were generated via Template:Long comment. " .
	"Please do not remove the monitor template without removing the comment as well.-->";

const long_comment1 = "{{Short pages monitor}}<!-- This long comment was added to the page to prevent it from being listed on Special:Shortpages. " .
	"It and the accompanying monitoring template were generated via Template:Long comment. " .
	"Please do not remove the monitoring template without removing this comment as well.-->";

const long_comment2 = "{{Short pages monitor}}<!-- This long comment was added to the page to prevent it being listed on Special:Shortpages. " .
	"It and the accompanying monitoring template were generated via Template:Longcomment. " .
	"Please do not remove the monitor template without removing the comment as well. \n" .
	"                                                                                                                                               -->";
const long_comment3 = "{{Short pages monitor}}<!-- This long comment was added to the page to prevent it being listed on Special:Shortpages. " .
	"It and the accompanying monitoring template were generated via Template:Longcomment. " .
	"Please do not remove the monitor template without removing the comment as well.-->";

const long_comment4 = "{{Short pages monitor}}<!-- This long comment was added to the page to prevent it being listed on Special:Shortpages. " .
	"It and the accompanying monitoring template were generated via Template:Longcomment. " .
	"Please do not remove the monitor template without removing the comment as well. -->";

const long_comment5 = "{{Short pages monitor}}<!-- This long comment was added to the page to prevent it being listed on Special:Shortpages. " .
	"It and the accompanying monitoring template were generated via Template:Longcomment. " .
	"Please do not remove the monitor template without removing the comment as well.\n" .
	"                                                                                                                                               -->";

echo "Current time: " . date("Y-m-d H:i:s", time()) . "\n";
echo "PHP version: " . PHP_VERSION . "\n";
#phpinfo();
echo "Bot version: " . bot_version . "\n";

echo "Logging in...\n";
$objwiki = new wikipedia();
$objwiki->http->useragent = '[[User:Bot1058]] php wikibot classes';
$objwiki->login($user, $pass);
echo "...done.\n";

/** TASK 3 **/
$dabs = $objwiki->categorymembers("Category:Unsynchronized disambiguation talk pages");
print_r($dabs);

for ($a = 0; $a < count($dabs); $a++) {
	echo "\n__________\n" . $a . " Retrieving Talk:$dabs[$a] contents...\n";
	$contents = $objwiki->getpage("Talk:" . $dabs[$a]);
	echo "\nOLD: $contents\n";
	$subjectcontents = $objwiki->getpage($dabs[$a]);
	#echo "\n$subjectcontents\n";

	if (preg_match("/^\n*\#REDIRECT(\s*|:)\[{2}.*\]{2}/i", $contents, $redirect)) {
		if (preg_match("/\{{2}(" . anthroponymy . ")\}{2}/iu", $subjectcontents)) {
			/** Find: #REDIRECT(.|\n)* (regex); Replace: {{WikiProject Anthroponymy}} **/
			$contents = preg_replace("/#REDIRECT(.|\n)*/", "{{WikiProject Anthroponymy}}", $contents);
			echo "\nNEW: $contents\n";
			$objwiki->edit("Talk:" . $dabs[$a],$contents,
			    "[[User:Bot1058|Task 3]]: Sync unsynchronized anthroponymy talk page: replaced #REDIRECT with {{WikiProject Anthroponymy}}",
			    false,true);
		}
		else {
			/** Find: #REDIRECT(.|\n)* (regex); Replace: {{WikiProject Disambiguation}} **/
			$contents = preg_replace("/#REDIRECT(.|\n)*/", "{{WikiProject Disambiguation}}", $contents);
			echo "\nNEW: $contents\n";
			$objwiki->edit("Talk:" . $dabs[$a],$contents,
			    "[[User:Bot1058|Task 3]]: Sync unsynchronized disambiguation talk page: replaced #REDIRECT with {{WikiProject Disambiguation}}",
			    false,true);
		}
	}
	else {
		echo "\n*** Not a redirect ***\n";
	}

	$objwiki->purgeCache($dabs[$a]);
	sleep (4);
}

echo "\nTask 3 done.\n\n";

/** TASK 4 **/
$page_exists = 0;
$no_talkpage = 0;
$not_redirect = 0;
$dabs = $objwiki->categorymembers("Category:Unsynchronized disambiguation talk page redirects");

print_r($dabs);

for ($a = 0; $a < count($dabs); $a++) {
	echo "\n__________\n" . $a . " Retrieving $dabs[$a] contents...\n";
	$contents = $objwiki->getpage($dabs[$a]);
	$talkcontents = $objwiki->getpage("Talk:" . $dabs[$a]);
	echo "\n$contents\n";
	echo "\n$talkcontents\n";

	if ($talkcontents != "") {
		if (preg_match("/^\n*\#REDIRECT(\s*|:)\[{2}.*\]{2}/i", $contents, $redirect)) {
			preg_match("/(?<=\[{2}).+(?=(\]{2}))/i", $redirect[0], $target);

			if (strpos($target[0],"#")) {
				$len = strpos($target[0],"#");
				$target[0] = substr($target[0],0,$len); // strip section links
			}

			echo "Target: " . $target[0] . "\n";
			$redrtalkcontents = "";
			$redrtalkcontents = $objwiki->getpage("Talk:" . $target[0]);
			#echo "\n$redrtalkcontents\n";

			if ($redrtalkcontents != "") {
				$page_exists += 1;
				$newredr = "#REDIRECT [[Talk:" . $target[0] . "]]\n";
				$objwiki->edit("Talk:" . $dabs[$a],$newredr,
					"[[User:Bot1058|Task 4]]: Syncing unsynchronized disambiguation talk page redirect",false,true);
			}
			else {
				$no_talkpage += 1;
				echo "\nNo talk page: " . "Talk:" . $target[0] . "\n";
				$objwiki->edit("Talk:" . $dabs[$a],"{{Talk page of a redirect}}\n{{WikiProject Disambiguation}}",
					"[[User:Bot1058|Task 4]]: Fixing unsynchronized disambiguation {{Talk page of a redirect}}",false,true);
			}

			$objwiki->purgeCache($dabs[$a]);
			sleep (4);
		}
		else {
			$not_redirect += 1;
			echo "\n? Not a redirect!";
		}
	}
	else {
		echo "\n? Talk page is NULL or deleted!\n";
		$objwiki->purgeCache($dabs[$a]);
	}
}

echo "\nPages synced: " . $page_exists;
echo "\nNo talk page: " . $no_talkpage;
echo "\nNot redirect: " . $not_redirect;
echo "\nDone: Unsynchronized disambiguation talk page redirects\n\n";

$page_exists = 0;
$no_talkpage = 0;
$not_redirect = 0;
$dabs = $objwiki->categorymembers("Category:Unsynchronized ambiguous term talk page redirects");

print_r($dabs);

for ($a = 0; $a < count($dabs); $a++) {
	echo "\n__________\n" . $a . " Retrieving $dabs[$a] contents...\n";
	$contents = $objwiki->getpage($dabs[$a]);
	$talkcontents = $objwiki->getpage("Talk:" . $dabs[$a]);
	echo "\n$contents\n";
	echo "\n$talkcontents\n";

	if ($talkcontents != "") {
		if (preg_match("/^\n*\#REDIRECT(\s*|:)\[{2}.*\]{2}/i", $contents, $redirect)) {
			preg_match("/(?<=\[{2}).+(?=(\]{2}))/i", $redirect[0], $target);

			if (strpos($target[0],"#")) {
				$len = strpos($target[0],"#");
				$target[0] = substr($target[0],0,$len); // strip section links
			}

			echo "Target: " . $target[0] . "\n";
			$redrtalkcontents = "";
			$redrtalkcontents = $objwiki->getpage("Talk:" . $target[0]);
			#echo "\n$redrtalkcontents\n";

			if ($redrtalkcontents != "") {
				$page_exists += 1;
				$newredr = "#REDIRECT [[Talk:" . $target[0] . "]]\n";
				$objwiki->edit("Talk:" . $dabs[$a],$newredr,
					"[[User:Bot1058|Task 4]]: Syncing unsynchronized ambiguous term talk page redirect",false,true);
			}
			else {
				$no_talkpage += 1;
				echo "\nNo talk page: " . "Talk:" . $target[0] . "\n";
				$objwiki->edit("Talk:" . $dabs[$a],"{{Talk page of a redirect}}",
					"[[User:Bot1058|Task 4]]: Fixing unsynchronized ambiguous term {{Talk page of a redirect}}",false,true);
			}

			$objwiki->purgeCache($dabs[$a]);
			sleep (4);
		}
		else {
			$not_redirect += 1;
			echo "\n? Not a redirect!";
		}
	}
	else {
		echo "\n? Talk page is NULL or deleted!\n";
		$objwiki->purgeCache($dabs[$a]);
	}
}

echo "\nPages synced: " . $page_exists;
echo "\nNo talk page: " . $no_talkpage;
echo "\nNot redirect: " . $not_redirect;
echo "\nDone: Unsynchronized ambiguous term talk page redirects\n\n";

$synced = 0;
$not_synced = 0;
$crossed = 0;
$complex_crossed = 0;
$page_exists = 0;
$no_talkpage = 0;
$not_redirect = 0;
$unexpected = 0;
$dabs = $objwiki->categorymembers("Category:Unsynchronized talk page redirects");

print_r($dabs);

for ($a = 0; $a < count($dabs); $a++) {
	echo "\n__________\n" . $a . " Retrieving $dabs[$a] contents...\n";
	$contents = $objwiki->getpage($dabs[$a]);
	$subjectpagename = preg_replace("/^(User|Wikipedia|File|MediaWiki|Template|Help|Category|Portal|Book|Draft|)( |)talk:/i","$1:",$dabs[$a]);
	$subjectcontents = $objwiki->getpage($subjectpagename);
	echo "\n$contents\n";
	echo "\n$subjectcontents\n";

	if (preg_match("/^\n*\#REDIRECT(\s*|:)\[{2}.*\]{2}/i", $contents, $redirect)) {
		preg_match("/(?<=\[{2}).+(?=(\]{2}))/i", $redirect[0], $target);

		if (strpos($target[0],"#")) {
			$len = strpos($target[0],"#");
			$target[0] = substr($target[0],0,$len); // strip section links
		}

		$basepagename = preg_replace("/^(User|Wikipedia|File|MediaWiki|Template|Help|Category|Portal|Book|Draft|)( |)talk:/i","$1:",$target[0]);
		echo "Target: " . $target[0] . " (" . $basepagename . ")\n";

		if (preg_match("/^\n*\#REDIRECT(\s*|:)\[{2}.*\]{2}/i", $subjectcontents, $redirect)) {
			preg_match("/(?<=\[{2}).+(?=(\]{2}))/i", $redirect[0], $subjecttarget);

			if (strpos($subjecttarget[0],"#")) {
				$len = strpos($subjecttarget[0],"#");
				$subjecttarget[0] = substr($subjecttarget[0],0,$len); // strip section links
			}

			echo "Subject target: " . $subjecttarget[0];
			if ($subjecttarget[0] == $basepagename or ":" . $subjecttarget[0] == $basepagename) {
				$synced += 1;
				echo " already in sync\n";
				$objwiki->purgeCache($dabs[$a]); // nulledit
			}
			else {
				$not_synced += 1;
				echo " sync needed\n";

				$talkname = "Talk:" . $subjecttarget[0];
				$talk_contents = $objwiki->getpage($talkname);

				if (preg_match("/^\n*\#REDIRECT(\s*|:)\[{2}.*\]{2}/i", $talk_contents, $redirect)) {
					preg_match("/(?<=\[{2}).+(?=(\]{2}))/i", $redirect[0], $xtarget);
						if (strpos($xtarget[0],"#")) {
							$len = strpos($xtarget[0],"#");
							$xtarget[0] = substr($xtarget[0],0,$len); // strip section links
						}
						echo "Cross target: " . $xtarget[0] . "\n";
						if ($xtarget[0] == $target[0]) {
							$crossed += 1;
							echo "*** Pages crossed ***\n";
						}
						else {
							$complex_crossed += 1;
							echo "*** Pages crossed *** " . $xtarget[0] . " / " . $target[0] . "\n";
						}
				}
				else {
					if ($talk_contents != "") {
						$page_exists += 1;
						$newredr = str_replace($target[0], $talkname, $contents, $count);
						if ($count != 1) {
							echo "\nERROR: str_replace [[" . $target[0] . "]] with [[" . $talkname . "]] failed\nContents:\n" . $contents . "\n";
						}
						else {
							echo "\nChange " . $dabs[$a] . " contents to: " . $newredr . "\n";
							$objwiki->edit($dabs[$a], $newredr,
								"[[User:Bot1058|Task 4]]: Syncing unsynchronized talk page redirect – double redirect bypass from [[" .
								$target[0] . "]] to [[" . $talkname . "]]", false, true);
						}
					}
					else {
						$no_talkpage += 1;
						echo "\nNo talk page: " . $talkname . "\n";
						$objwiki->edit($dabs[$a],"{{Talk page of a redirect}}",
							"[[User:Bot1058|Task 4]]: Fixing unsynchronized {{Talk page of a redirect}}",false,true);
					}
				}
			}
			sleep (4);
		}
		else {
			$unexpected += 1;
			echo "\n? Not expected\n";
		}
	}
	else {
		$not_redirect += 1;
		echo "\n? Not a redirect!\n";
	}
}

echo "\nAlready sync: " . $synced;
echo "\nNeeds synced: " . $not_synced;
echo "\nPage crossed: " . $crossed;
echo "\nComplexcross: " . $complex_crossed;
echo "\nPages synced: " . $page_exists;
echo "\nNo talk page: " . $no_talkpage;
echo "\nNot redirect: " . $not_redirect;
echo "\nNot expected: " . $unexpected;

echo "\nDone: Unsynchronized talk page redirects\n\nTask 4 done.\n\n";

/** TASK 5 **/
$lc_cleared = 0;
$not_cleared = 0;
$null_edits = 0;
$maximum_length = $objwiki->getpage("Template:Short pages monitor/maximum length");
echo "\nMaximum allowed monitored short page length is " . $maximum_length . " bytes\n";
$lmsp = $objwiki->categorymembers("Category:Long monitored short pages");
print_r($lmsp);

for ($a = 0; $a < count($lmsp); $a++) {
	if (strpos($lmsp[$a],":")) {
		echo "\nSkipping " . $lmsp[$a];
	}
	else {
		echo "\n__________\n" . $a . " Retrieving $lmsp[$a] contents...\n";
		$contents = $objwiki->getpage($lmsp[$a]);
		#echo "\n$contents\n";
		echo strlen($contents) . " bytes ";
		if (strlen($contents) > $maximum_length) {
			$new_contents = str_replace(long_comment,"",$contents);
			$new_contents = str_replace(long_comment1,"",$new_contents);
			$new_contents = str_replace(long_comment2,"",$new_contents);
			$new_contents = str_replace(long_comment3,"",$new_contents);
			$new_contents = str_replace(long_comment4,"",$new_contents);
			$new_contents = str_replace(long_comment5,"",$new_contents);
			echo "new length = " . strlen($new_contents) . " bytes ";

			if ($new_contents != $contents) {
				if (strlen($new_contents) > 200) {
					$objwiki->edit($lmsp[$a],$new_contents,
					    "[[User:Bot1058|Task 5]]: remove this page from [[Category:Long monitored short pages]]",true,true);
					$lc_cleared += 1;
					sleep (4);
				}
				else {
					$new_contents = str_replace(long_comment2,long_comment,$contents);
					$new_contents = str_replace(long_comment5,long_comment,$new_contents);
					echo "new length = " . strlen($new_contents) . " bytes ";
					if ($new_contents != $contents) {
						echo "\nReplace with shorter, standard long comment\n";
						if (strlen($new_contents) > 200) {
							$objwiki->edit($lmsp[$a],$new_contents,
							    "[[User:Bot1058|Task 5]]: remove this page from [[Category:Long monitored short pages]]",true,true);
							$lc_cleared += 1;
							sleep (4);
						}
						else {
							echo "\nNew length is too short\n";
							$not_cleared += 1;
						}
					}
					else {
						echo "\nNew length is too short\n";
						$not_cleared += 1;
					}
				}
			}
			else {
				echo "\n$new_contents\n";
				$not_cleared += 1;
			}
		}
		else {
				echo "\nNull edit to clear the category\n";
				$objwiki->purgeCache($lmsp[$a]);
				$null_edits += 1;
		}
	}
}

echo "\nLong comments cleared: " . $lc_cleared;
echo "\nNot cleared: " . $not_cleared;
echo "\nNull edits: " . $null_edits;
echo "\nTask 5 done.\n\n";

/** Null edits to clear Category:Pages with incorrectly transcluded templates **/
echo "Null edits to clear Category:Pages with incorrectly transcluded templates\n";
$dabs = $objwiki->categorymembers("Category:Pages with incorrectly transcluded templates");

#print_r($dabs);

for ($a = 0; $a < count($dabs); $a++) {
	if (substr($dabs[$a],0,9) !== "Category:") {
		echo "\n$dabs[$a]\n";
		$contents = $objwiki->getpage($dabs[$a]);
		#echo "\n$contents\n";
		$objwiki->purgeCache($dabs[$a]);
		sleep (2);
	}
}

/** Null edits to clear pages that link to Template:Error in Talk: namespace **/
echo "\n\nNull edits to clear pages that link to Template:Error in Talk: namespace\n";
$dabs = $objwiki->getTalkTransclusions("Template:Error");

#print_r($dabs);

for ($a = 0; $a < count($dabs); $a++) {
	echo "\n$dabs[$a]\n";
	$contents = $objwiki->getpage($dabs[$a]);
	#echo "\n$contents\n";
	$objwiki->purgeCache($dabs[$a]);
	sleep (2);
}

/** Null edits to clear Category:Articles with talk page redirects **/
echo "\n\nNull edits to clear Category:Articles with talk page redirects\n";
$dabs = $objwiki->categorymembers("Category:Articles with talk page redirects");

#print_r($dabs);

for ($a = 0; $a < count($dabs); $a++) {
	if (substr($dabs[$a],0,9) !== "Category:") {
		echo "\n$dabs[$a]\n";
		$contents = $objwiki->getpage($dabs[$a]);
		#echo "\n$contents\n";
		$objwiki->purgeCache($dabs[$a]);
		sleep (2);
	}
}

echo "\nMission accomplished.\n";
echo "Current time: " . date("Y-m-d H:i:s", time()) . "\n\n";

?>