User:Rick Bot/scripts/listcat

#!/bin/bash

WGET="/usr/bin/curl"  # on a mac OS X
# WGET="wget -q -O -"   # on a linux box with wget

# $1 is category to list

echo "" >continue
while [ -e continue ]; do
  CONTINUE=`cat continue`
  rm continue
  QUERY="action=query&list=categorymembers&cmtitle=Category:$1&cmlimit=max&format=xml$CONTINUE" 
  # echo $QUERY
  $WGET "http://en.wikipedia.org/w/api.php?$QUERY" | sed -e 's/<query-continue/\
<query-continue/'  -e 's/<cm/\
<cm/g' | awk '

# <?xml version="1.0" encoding="utf-8"?><api><query-continue><categorymembers cmcontinue="A Raider Like Indiana|" /></query-continue><query><categorymembers><cm pageid="18818413" ns="2" title="User:007fan28" /><cm pageid="11799961" ns="2" title="User:10014derek" /><cm pageid="13520239" ns="2" title="User:1362" /><cm pageid="16494702" ns="2" title="User:1362/Subpages/Userboxes" /><cm pageid="5941489" ns="2" title="User:24fan24" /><cm pageid="7703708" ns="2" title="User:333" /><cm pageid="18131098" ns="2" title="User:333/U" /><cm pageid="18933587" ns="2" title="User:5768altalena" /><cm pageid="18933852" ns="2" title="User:5768altalena/cursed be the unholy cannon" /><cm pageid="15471958" ns="2" title="User:5dsddddd" /></categorymembers></query></api>

function hex(num) {
  if (num < 10) return(num)
  if (num == 10) return("A")
  if (num == 11) return("B")
  if (num == 12) return("C")
  if (num == 13) return("D")
  if (num == 14) return("E")
  if (num == 15) return("F")
}

/^<query-continue/ {
  sub("^.*cmcontinue=\"","")
  sub("\".*","")
  gsub(" ","%20")
  while ($0 ~ "&#[0-9]*;") {
    character=$0
    sub("^[^&]*&#","",character)
    sub(";.*","",character)
    num=character
    hexchar=""
    while (num >0) {
      hexchar=hex(num % 16) hexchar
      num = (num - (num % 16)) / 16
    }
    if (hexchar == "") { hexchar="0" }
    sub("&#[0-9]*;","%" hexchar)
  }
  # print "&cmcontinue=" $0 
  print "&cmcontinue=" $0 >"continue"
}

/^<cm / {
  sub("<cm.*title=\"","")
  sub("\".*","")
  print
}
'

done