MediaWiki
REL1_24
|
00001 <?php 00042 class BaseDump { 00043 protected $reader = null; 00044 protected $atEnd = false; 00045 protected $atPageEnd = false; 00046 protected $lastPage = 0; 00047 protected $lastRev = 0; 00048 protected $infiles = null; 00049 00050 public function __construct( $infile ) { 00051 $this->infiles = explode( ';', $infile ); 00052 $this->reader = new XMLReader(); 00053 $infile = array_shift( $this->infiles ); 00054 if ( defined( 'LIBXML_PARSEHUGE' ) ) { 00055 $this->reader->open( $infile, null, LIBXML_PARSEHUGE ); 00056 } else { 00057 $this->reader->open( $infile ); 00058 } 00059 } 00060 00070 function prefetch( $page, $rev ) { 00071 $page = intval( $page ); 00072 $rev = intval( $rev ); 00073 while ( $this->lastPage < $page && !$this->atEnd ) { 00074 $this->debug( "BaseDump::prefetch at page $this->lastPage, looking for $page" ); 00075 $this->nextPage(); 00076 } 00077 if ( $this->lastPage > $page || $this->atEnd ) { 00078 $this->debug( "BaseDump::prefetch already past page $page " 00079 . "looking for rev $rev [$this->lastPage, $this->lastRev]" ); 00080 00081 return null; 00082 } 00083 while ( $this->lastRev < $rev && !$this->atEnd && !$this->atPageEnd ) { 00084 $this->debug( "BaseDump::prefetch at page $this->lastPage, rev $this->lastRev, " 00085 . "looking for $page, $rev" ); 00086 $this->nextRev(); 00087 } 00088 if ( $this->lastRev == $rev && !$this->atEnd ) { 00089 $this->debug( "BaseDump::prefetch hit on $page, $rev [$this->lastPage, $this->lastRev]" ); 00090 00091 return $this->nextText(); 00092 } else { 00093 $this->debug( "BaseDump::prefetch already past rev $rev on page $page " 00094 . "[$this->lastPage, $this->lastRev]" ); 00095 00096 return null; 00097 } 00098 } 00099 00100 function debug( $str ) { 00101 wfDebug( $str . "\n" ); 00102 // global $dumper; 00103 // $dumper->progress( $str ); 00104 } 00105 00109 function nextPage() { 00110 if ( $this->skipTo( 'page', 'mediawiki' ) ) { 00111 if ( $this->skipTo( 'id' ) ) { 00112 $this->lastPage = intval( $this->nodeContents() ); 00113 $this->lastRev = 0; 00114 $this->atPageEnd = false; 00115 } 00116 } else { 00117 $this->close(); 00118 if ( count( $this->infiles ) ) { 00119 $infile = array_shift( $this->infiles ); 00120 $this->reader->open( $infile ); 00121 $this->atEnd = false; 00122 } 00123 } 00124 } 00125 00129 function nextRev() { 00130 if ( $this->skipTo( 'revision' ) ) { 00131 if ( $this->skipTo( 'id' ) ) { 00132 $this->lastRev = intval( $this->nodeContents() ); 00133 } 00134 } else { 00135 $this->atPageEnd = true; 00136 } 00137 } 00138 00143 function nextText() { 00144 $this->skipTo( 'text' ); 00145 00146 return strval( $this->nodeContents() ); 00147 } 00148 00155 function skipTo( $name, $parent = 'page' ) { 00156 if ( $this->atEnd ) { 00157 return false; 00158 } 00159 while ( $this->reader->read() ) { 00160 if ( $this->reader->nodeType == XMLReader::ELEMENT 00161 && $this->reader->name == $name 00162 ) { 00163 return true; 00164 } 00165 if ( $this->reader->nodeType == XMLReader::END_ELEMENT 00166 && $this->reader->name == $parent 00167 ) { 00168 $this->debug( "BaseDump::skipTo found </$parent> searching for <$name>" ); 00169 00170 return false; 00171 } 00172 } 00173 00174 return $this->close(); 00175 } 00176 00185 function nodeContents() { 00186 if ( $this->atEnd ) { 00187 return null; 00188 } 00189 if ( $this->reader->isEmptyElement ) { 00190 return ""; 00191 } 00192 $buffer = ""; 00193 while ( $this->reader->read() ) { 00194 switch ( $this->reader->nodeType ) { 00195 case XMLReader::TEXT: 00196 //case XMLReader::WHITESPACE: 00197 case XMLReader::SIGNIFICANT_WHITESPACE: 00198 $buffer .= $this->reader->value; 00199 break; 00200 case XMLReader::END_ELEMENT: 00201 return $buffer; 00202 } 00203 } 00204 00205 return $this->close(); 00206 } 00207 00212 function close() { 00213 $this->reader->close(); 00214 $this->atEnd = true; 00215 00216 return null; 00217 } 00218 }