User:DustyBot/dustylib.php

<?php
/* Given a string containing only a template expression surrounded by curly brackets,
 * returns a structure that can be modified and then converted back into a string
 * Status: unstable, parameters and return value may change */
function wp_parse_template($text) {
    if (!ereg("^{{(.*)}}$", $text, $regs))
	trigger_error("Incomplete template", E_USER_ERROR);
    $text = $regs[1];
    if (strpos($text, "{") !== false)
	trigger_error("No support for complex templates", E_USER_ERROR);
    $args = explode("|", $text);
    $ret = array();
    $ret["title"] = array_shift($args);
    $rargs = array();
    foreach ($args as $a) {
	$pair = explode("=", $a);
	if (count($pair) == 2) {
	    $rargs[trim($pair[0])] = $pair[1];
	    if (trim($pair[0]) != $pair[0])
		$pmap[trim($pair[0])] = $pair[0];
	}
	else if (count($pair) == 1)
	    $rargs[] = $pair[0];
	else
	    trigger_error("Unhandled template argument \"".$a."\a", E_USER_WARNING);
    }
    $ret["args"] = $rargs;
    if (!empty($pmap))
	$ret["pmap"] = $pmap;
    return $ret;
}

/* Returns an empty but titled template structure
 * Status: unstable, parameters may change */
function wptmpl_create($title) {
    return array("title" => $title, "args" => array());
}

/* Sets a named template parameter
 * Status: unstable, parameters may change */
function wptmpl_set_arg(&$tmpl, $param, $val) {
    $tmpl["args"][$param] = $val;
}

/* Removes a named template parameter
 * Status: unstable, parameters may change */
function wptmpl_unset_arg(&$tmpl, $param) {
    $tmpl["args"][$param] = null;
}

/* Gets the value a named template parameter
 * Status: unstable, parameters may change */
function wptmpl_get_arg($tmpl, $param) {
    if (!array_key_exists($param, $tmpl["args"]))
	return null;
    return $tmpl["args"][$param];
}

/* Returns true if the template has a parameter with the given name
 * Status: unstable, parameters may change */
function wptmpl_has_arg($tmpl, $param) {
    if (!array_key_exists($param, $tmpl["args"]))
	return false;
    return isset($tmpl["args"][$param]);
}

/* Changes the template name
 * Status: unstable, parameters may change */
function wptmpl_set_title(&$tmpl, $title) {
    $tmpl["title"] = $title;
}

/* Changes the template name
 * Status: unstable, parameters may change */
function wptmpl_get_title($tmpl) {
    return $tmpl["title"];
}

/* Returns a string from the template structure
 * Status: unstable, parameters may change */
function wp_build_template($tmpl) {
    $text = "{{".$tmpl["title"];
    if (!array_key_exists("args", $tmpl))
	return $text."}}";
    $args = $tmpl["args"];
    if (array_key_exists("pmap", $tmpl))
	$pmap = $tmpl["pmap"];
    foreach ($args as $k => $v) {
	if (!isset($v))
	    continue;
	else if (is_string($k) && isset($pmap) && array_key_exists($k, $pmap))
	    $text .= "|".$pmap[$k]."=".$v;
	else if (is_string($k))
	    $text .= "|".$k."=".$v;
	else
	    $text .= "|".$v;
    }
    return $text."}}";
}

/* Finds the first full template with the given name in the text
 * Status: unstable, parameters may change */
function wp_find_template($title, $text, $ignore_case = false) {
    $regchars = ".[]{}*?";
    if (!eregi("({{[[:space:]]*(".addcslashes($title, $regchars).")[[:space:]]*[|}]+(.*))$", $text, $regs))
	return null;
    $tstr = $regs[1];
    if (!$ignore_case && ucfirst($regs[2]) != ucfirst($title))
	return wp_find_template($regs[3]);
    $l = 0;
    $len = strlen($tstr);
    for ($i = 0; $i < $len; $i++) {
	if ($tstr[$i] == "{")
	    $l++;
	else if ($tstr[$i] == "}")
	    $l--;
	if ($l <= 0)
	    break;
    }
    if ($l > 0)
	return null;
    return substr($tstr, 0, $i + 1);
}

/* Bot exclusion detector, returns false if the text contains a directive disallowing
 * this bot
 * Status: unstable, parameters may change */
function wp_page_allows_bot($text, $context = null, $messages = null) {
    $tstr = wp_find_template("Nobots", $text, true);
    if (!empty($tstr))
	return false;
    $tstr = wp_find_template("Bots", $text, true);
    if (empty($tstr))
	return true;
    $tmpl = wp_parse_template($tstr);
    if (isset($context) && array_key_exists("username", $context))
	$botname = $context["username"];
    if (array_key_exists("deny", $tmpl["args"])) {
	$denied = explode(",", $tmpl["args"]["deny"]);
	foreach ($denied as $d) {
	    $d = trim($d);
	    if (strtolower($d) == "all")
		return false;
	    if (isset($botname) && $d == $botname)
		return false;
	}
    }
    if (array_key_exists("allow", $tmpl["args"])) {
	$allowed = explode(",", $tmpl["args"]["allow"]);
	foreach ($allowed as $a) {
	    $a = trim($a);
	    if (strtolower($a) == "none")
		return false;
	}
    }
    if (array_key_exists("optout", $tmpl["args"]) && isset($messages)) {
	$optout = explode(",", $tmpl["args"]["optout"]);
	foreach ($optout as $o) {
	    $o = trim($o);
	    if (strtolower($o) == "all")
		return false;
	    if (is_string($messages) && strtolower($o) == strtolower($messages))
		return false;
	    if (is_array($messages) && in_array($o, $messages))
		return false;
	}
    }
    return true;
}

/* Returns an integer timestamp for the date the comment was signed, or null if no signature
 * was found. */
function wp_date_comment($text) {
    if (!eregi("\[\[user.* ([0-9]+:[0-9]+, [0-9]+ [a-z]+ [0-9]+ \(utc\))", $text, $regs))
	return null;
    $time = strtotime($regs[1]);
    if ($time > time())
	return null;
    return $time;
}

/* Returns a date string formatted for POSTing, for the given UNIX timestamp. */
function wp_format_post_date($ts) {
    return gmdate("Y-m-d\TH:i:s\Z", $ts);
}

/* Returns an array of the transcluded subpages. The subpages are the keys, the section
 * names are the values. */
function wp_list_subpages($title, $page) {
    $subpages = array();
    $lines = explode("\n", $page);
    foreach ($lines as $line) {
	$line = trim($line);
	if (ereg("==(.*)==", $line, $regs)) {
	    $section = trim($regs[1]);
	    continue;
	}
	$line = str_replace("_", " ", $line);
	$regchars = ".[]{}*?";
	if (ereg("{{".addcslashes($title, $regchars)."/(.*)}}", $line, $regs))
	    $subpages[rtrim($regs[1])] = $section;
    }
    return $subpages;
}


/* POSTs the array of data to the wiki
 * Status: stable */
function wp_post($post, $context = null) {
    $url = "http://en.wikipedia.org/w/api.php";
    if (isset($context) && array_key_exists("api url", $context))
	$url = $context["api url"];
    $header = "Content-type: application/x-www-form-urlencoded\n";
    if (isset($context) && array_key_exists("cookies", $context))
	$header .= "Cookie: ".http_build_query($context["cookies"], "", "; ")."\n";
    $http_opts = array(
	"http" => array(
	    "method" => "POST",
	    "header" => $header,
	    "content" => http_build_query($post)
	)
    );
    $sctx = stream_context_create($http_opts);
    while (true) {
	$ret = file_get_contents($url, 0, $sctx);
	if (strstr($ret, "maxlag")) {
	    sleep(5);
	    continue;
	}
	break;
    }
    return $ret;
}

/* Downloads the page contents from the wiki
 * Status: stable */
function wp_get($title, $context = null, &$timestamp = null) {
    $titles = array($title);
    $timestamps = array();
    $pages = wp_get_multiple($titles, $context, $timestamps);
    $timestamp = $timestamps[$title];
    return $pages[$title];
}

/* Returns an associative array of the contents of all the specified pages
 * Status: stable */
function wp_get_multiple($titles, $context = null, &$timestamps = null) {
    $timestamps = array();
    $pages = array();
    if (empty($titles))
	return $pages;
    $post = array(
	"action" => "query",
	"format" => "php",
	"prop" => "revisions",
	"titles" => implode("|", $titles),
	"rvprop" => "timestamp|content",
    );
    if (isset($context) && array_key_exists("maxlag", $context))
	$post["maxlag"] = (string)$context["maxlag"];
    $ret = wp_post($post, $context);
    $ret = unserialize($ret);
    $pinfo = $ret["query"]["pages"];
    if (!isset($pinfo))
	return array();
    $revs = array();
    foreach ($pinfo as $p) {
	$t = $p["title"];
	if (empty($p["revisions"]))
	    continue;
	$a = array_shift($p["revisions"]);
	$timestamps[$t] = strtotime($a["timestamp"]);
	$pages[$t] = $a["*"];
    }
    return $pages;
}

/* Creates a context structure to be passed to the other functions, also sets
 * various options
 * Status: unstable, parameters and return value may change */
function wp_create_context($maxlag = null, $bot = false, $api_url = null) {
    $context = array();
    if (isset($maxlag))
	$context["maxlag"] = $maxlag;
    if (isset($bot))
	$context["bot"] = $bot;
    if (isset($api_url))
	$context["api url"] = $api_url;
    return $context;
}

/* Sets the number of items to be returned for each query
 * Status: stable */
function wp_context_set_query_limit($limit, &$context) {
    $context["qlimit"] = $limit;
}

/* Logs the bot into the wiki associated with the given context
 * Status: unstable, parameters and return value may change */
function wp_login($username, $password, &$context) {
    if (!isset($username) || $username == "")
	trigger_error("Username not set", E_USER_ERROR);
    if (!isset($password))
	trigger_error("Password not set", E_USER_ERROR);
    $login_post = array(
	"action" => "login",
	"format" => "php",
	"lgname" => $username,
	"lgpassword" => $password,
    );
    $ret = wp_post($login_post, $context);
    $ret = unserialize($ret);
    if (!array_key_exists("login", $ret))
	return false;
    $login = $ret["login"];
    if ($login["result"] != "Success")
	return false;
    $prefix = $login["cookieprefix"];
    $cookies = array($prefix."UserName" => $settings["username"]);
    if (array_key_exists("lguserid", $login))
	$cookies[$prefix."UserID"] = $login["lguserid"];
    if (array_key_exists("lgtoken", $login))
	$cookies[$prefix."Token"] = $login["lgtoken"];
    if (array_key_exists("sessionid", $login))
	$cookies[$prefix."_session"] = $login["sessionid"];
    if (!isset($context))
	$context = array();
    $context["username"] = $username;
    $context["cookies"] = $cookies;
    return true;
}

/* Logs out of the wiki
 * Status: stable */
function wp_logout($context) {
    $post = array(
	"action" => "logout",
	"format" => "php",
    );
    wp_post($post, $context);
}

/* Returns an edit token to be used for all edits in the session
 * Status: stable */
function wp_get_edit_token($title, $context) {
    if (!isset($context) || !isset($context["cookies"]))
	trigger_error("Must be logged in to get edit token", E_USER_ERROR);
    $post = array(
	"action" => "query",
	"format" => "php",
	"prop" => "info",
	"intoken" => "edit",
	"titles" => $title,
    );
    if (isset($context) && array_key_exists("maxlag", $context))
	$post["maxlag"] = (string)$context["maxlag"];
    $ret = wp_post($post, $context);
    $ret = unserialize($ret);
    $pages = $ret["query"]["pages"];
    foreach ($pages as $p) {
	if ($p["title"] == $title)
	    return $p["edittoken"];
    }
    return "";
}

/* Uploads a new page or section over the existing one
 * Status: stable */
function wp_edit_section($title, $content, $summary, $section, $edtoken, $context,
	$timestamp = null) {
    if (!isset($context) || !isset($context["cookies"]))
	trigger_error("Must be logged in to edit pages", E_USER_ERROR);
    if (!wp_page_allows_bot($content, $context))
	trigger_error($title." excludes bot edits", E_USER_ERROR);
    $post = array(
	"action" => "edit",
	"format" => "php",
	"title" => $title,
	"text" => $content,
	"token" => $edtoken,
	"summary" => $summary,
    );
    if (array_key_exists("maxlag", $context))
	$post["maxlag"] = (string)$context["maxlag"];
    if (array_key_exists("bot", $context) && $context["bot"])
	$post["bot"] = "yes";
    if (isset($timestamp))
	$post["basetimestamp"] = wp_format_post_date($timestamp);
    if (isset($section))
	$post["section"] = $section;
    $ret = wp_post($post, $context);
    $ret = unserialize($ret);
    if (isset($ret["error"])) {
	trigger_error($ret["error"]["code"], E_USER_NOTICE);
	return false;
    }
    if (isset($ret["edit"]) && $ret["edit"]["result"] == "Success")
	return true;
    trigger_error("Unhandled query return status", E_USER_WARNING);
    return false;
}

/* Uploads a new page over the existing one
 * Status: stable */
function wp_edit_page($title, $content, $summary, $edtoken, $context,
	$timestamp = null) {
    return wp_edit_section($title, $content, $summary, null, $edtoken, $context, $timestamp);
}

/* Posts a new section to the page
 * Status: stable */
function wp_append_section($ptitle, $stitle, $content, $edtoken, $context) {
    return wp_edit_section($ptitle, $content, $stitle, "new", $edtoken, $context);
}

/* Keeps trying to modify the page until it is successful. The modifications are made by
 * the passed in function, with these parameters:
 * $new_page = $modify($old_page, $data); */
function wp_edit_war($title, $summary, $modify, $data, $ctx, $token = null,
	$old_page = null, $old_ts = null, $max_tries = null) {
    if (!isset($token))
	$token = wp_get_edit_token($title, $ctx);
    $tries = 0;
    while (true) {
	unset($last_ts);
	if (isset($old_page) && isset($old_ts)) {
	    $old = $old_page;
	    $last_ts = $old_ts;
	    unset($old_page);
	    unset($old_ts);
	}
	else
	    $old = wp_get($title, $ctx, $last_ts);
	if (!wp_page_allows_bot($old, $ctx))
	    trigger_error($title." excludes bot edits", E_USER_ERROR);
	$new = $modify($old, $data);
	if ($new == $old)
	    break;
	$edited = wp_edit_page($title, $new, $summary, $token, $ctx, $last_ts);
	$tries++;
	if ($edited)
	    break;
	if (isset($max_tries) && $tries >= $max_tries)
	    break;
    }
}

function wp_edit_test($title, $modify, $data, $ctx) {
    $old = wp_get($title, $ctx);
    return $modify($old, $data);
}

/* Returns an associative array with the name of the image repository for each file
 * Status: stable */
function wp_locate_files($files, $context) {
    if (empty($files))
	return null;
    $post = array(
	"action" => "query",
	"format" => "php",
	"prop" => "imageinfo",
	"titles" => implode("|", $files),
    );
    if (isset($context) && array_key_exists("maxlag", $context))
	$post["maxlag"] = (string)$context["maxlag"];
    $ret = wp_post($post, $context);
    $ret = unserialize($ret);
    $pages = $ret["query"]["pages"];
    $info = array();
    foreach ($pages as $p)
	$info[$p["title"]] = $p["imagerepository"];
    return $info;
}

/* Returns a list of pages in the category
 * Status: stable */
function wp_get_category_members($category, $context = null) {
    $ctitle = $category;
    if (!eregi("^Category:", $category))
	$ctitle = "Category:".$category;
    $mlist = array();
    while (true) {
	$post = array(
	    "action" => "query",
	    "format" => "php",
	    "list" => "categorymembers",
	    "cmtitle" => $ctitle,
	);
	if (isset($context) && array_key_exists("maxlag", $context))
	    $post["maxlag"] = (string)$context["maxlag"];
	if (isset($context) && array_key_exists("qlimit", $context))
	    $post["cmlimit"] = (string)$context["qlimit"];
	if (isset($continue))
	    $post["cmcontinue"] = (string)$continue;
	$ret = wp_post($post, $context);
	$ret = unserialize($ret);
	if (array_key_exists("error", $ret)) {
	    trigger_error($ret["error"]["info"], E_USER_NOTICE);
	    return null;
	}
	$members = $ret["query"]["categorymembers"];
	foreach ($members as $m)
	    $mlist[] = $m["title"];
	if (array_key_exists("query-continue", $ret))
	    $continue = $ret["query-continue"]["categorymembers"]["cmcontinue"];
	else
	    break;
    }
    return $mlist;
}

/* Goes through the page history to find when subpages were transcluded. The list
 * function returns an array of transcluded subpages and takes the arguments:
 * list_fn($title, $contents);
 * Pass in an array of the subpages to look for as $current_tcs. */
function wp_transcluded_dates($title, $list_fn, $current_tcs, $context = null) {
    $tc_ts = array();
    $found_missing = array();
    while (true) {
	$post = array(
	    "action" => "query",
	    "format" => "php",
	    "prop" => "revisions",
	    "titles" => $title,
	    "rvprop" => "timestamp|content",
	);
	if (isset($context) && array_key_exists("qlimit", $context))
	    $post["rvlimit"] = (string)$context["qlimit"];
	if (isset($context) && array_key_exists("maxlag", $context))
	    $post["maxlag"] = (string)$context["maxlag"];
	if (isset($continue))
	    $post["rvstartid"] = (string)$continue;
	$ret = wp_post($post, $context);
	$ret = unserialize($ret);
	$pages = $ret["query"]["pages"];
	$revs = array();
	foreach ($pages as $p) {
	    if ($p["title"] != $title)
		continue;
	    $revs = $p["revisions"];
	    break;
	}
	foreach ($revs as $r) {
	    $time = strtotime($r["timestamp"]);
	    $tcs = $list_fn($title, $r["*"]);
	    if (empty($tcs))
		continue;
	    if (isset($current_tcs)) {
		foreach ($current_tcs as $tc) {
		    if (!in_array($tc, $tcs))
			$found_missing[$tc] = true;
		}
	    }
	    foreach ($tcs as $tc) {
		if (array_key_exists($tc, $found_missing))
		    continue;
		if (!array_key_exists($tc, $tc_ts))
		    $tc_ts[$tc] = time();
		if ($tc_ts[$tc] > $time)
		    $tc_ts[$tc] = $time;
	    }
	}
	if (isset($current_tcs)) {
	    $all_missing = true;
	    foreach ($current_tcs as $tc) {
		if (array_key_exists($tc, $found_missing))
		    continue;
		$all_missing = false;
		break;
	    }
	    if ($all_missing)
		break;
	}
	if (isset($ret["query-continue"]))
	    $continue = $ret["query-continue"]["revisions"]["rvstartid"];
	else
	    break;
    }
    return $tc_ts;
}
?>