MediaWiki
REL1_19
|
00001 <?php 00042 class BaseDump { 00043 var $reader = null; 00044 var $atEnd = false; 00045 var $atPageEnd = false; 00046 var $lastPage = 0; 00047 var $lastRev = 0; 00048 var $infiles = null; 00049 00050 function BaseDump( $infile ) { 00051 $this->infiles = explode(';',$infile); 00052 $this->reader = new XMLReader(); 00053 $infile = array_shift($this->infiles); 00054 if (defined( 'LIBXML_PARSEHUGE' ) ) { 00055 $this->reader->open( $infile, null, LIBXML_PARSEHUGE ); 00056 } 00057 else { 00058 $this->reader->open( $infile ); 00059 } 00060 } 00061 00071 function prefetch( $page, $rev ) { 00072 $page = intval( $page ); 00073 $rev = intval( $rev ); 00074 while ( $this->lastPage < $page && !$this->atEnd ) { 00075 $this->debug( "BaseDump::prefetch at page $this->lastPage, looking for $page" ); 00076 $this->nextPage(); 00077 } 00078 if ( $this->lastPage > $page || $this->atEnd ) { 00079 $this->debug( "BaseDump::prefetch already past page $page looking for rev $rev [$this->lastPage, $this->lastRev]" ); 00080 return null; 00081 } 00082 while ( $this->lastRev < $rev && !$this->atEnd && !$this->atPageEnd ) { 00083 $this->debug( "BaseDump::prefetch at page $this->lastPage, rev $this->lastRev, looking for $page, $rev" ); 00084 $this->nextRev(); 00085 } 00086 if ( $this->lastRev == $rev && !$this->atEnd ) { 00087 $this->debug( "BaseDump::prefetch hit on $page, $rev [$this->lastPage, $this->lastRev]" ); 00088 return $this->nextText(); 00089 } else { 00090 $this->debug( "BaseDump::prefetch already past rev $rev on page $page [$this->lastPage, $this->lastRev]" ); 00091 return null; 00092 } 00093 } 00094 00095 function debug( $str ) { 00096 wfDebug( $str . "\n" ); 00097 // global $dumper; 00098 // $dumper->progress( $str ); 00099 } 00100 00104 function nextPage() { 00105 if ( $this->skipTo( 'page', 'mediawiki' ) ) { 00106 if ( $this->skipTo( 'id' ) ) { 00107 $this->lastPage = intval( $this->nodeContents() ); 00108 $this->lastRev = 0; 00109 $this->atPageEnd = false; 00110 } 00111 } else { 00112 $this->close(); 00113 if (count($this->infiles)) { 00114 $infile = array_shift($this->infiles); 00115 $this->reader->open( $infile ); 00116 $this->atEnd = false; 00117 } 00118 } 00119 } 00120 00124 function nextRev() { 00125 if ( $this->skipTo( 'revision' ) ) { 00126 if ( $this->skipTo( 'id' ) ) { 00127 $this->lastRev = intval( $this->nodeContents() ); 00128 } 00129 } else { 00130 $this->atPageEnd = true; 00131 } 00132 } 00133 00138 function nextText() { 00139 $this->skipTo( 'text' ); 00140 return strval( $this->nodeContents() ); 00141 } 00142 00149 function skipTo( $name, $parent = 'page' ) { 00150 if ( $this->atEnd ) { 00151 return false; 00152 } 00153 while ( $this->reader->read() ) { 00154 if ( $this->reader->nodeType == XMLReader::ELEMENT && 00155 $this->reader->name == $name ) { 00156 return true; 00157 } 00158 if ( $this->reader->nodeType == XMLReader::END_ELEMENT && 00159 $this->reader->name == $parent ) { 00160 $this->debug( "BaseDump::skipTo found </$parent> searching for <$name>" ); 00161 return false; 00162 } 00163 } 00164 return $this->close(); 00165 } 00166 00175 function nodeContents() { 00176 if ( $this->atEnd ) { 00177 return null; 00178 } 00179 if ( $this->reader->isEmptyElement ) { 00180 return ""; 00181 } 00182 $buffer = ""; 00183 while ( $this->reader->read() ) { 00184 switch( $this->reader->nodeType ) { 00185 case XMLReader::TEXT: 00186 // case XMLReader::WHITESPACE: 00187 case XMLReader::SIGNIFICANT_WHITESPACE: 00188 $buffer .= $this->reader->value; 00189 break; 00190 case XMLReader::END_ELEMENT: 00191 return $buffer; 00192 } 00193 } 00194 return $this->close(); 00195 } 00196 00201 function close() { 00202 $this->reader->close(); 00203 $this->atEnd = true; 00204 return null; 00205 } 00206 }