[ Index ] |
PHP Cross Reference of MediaWiki-1.24.0 |
[Summary view] [Print] [Text view]
1 <?php 2 /** 3 * BackupDumper that postprocesses XML dumps from dumpBackup.php to add page text 4 * 5 * Copyright (C) 2005 Brion Vibber <[email protected]> 6 * https://www.mediawiki.org/ 7 * 8 * This program is free software; you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License as published by 10 * the Free Software Foundation; either version 2 of the License, or 11 * (at your option) any later version. 12 * 13 * This program is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU General Public License for more details. 17 * 18 * You should have received a copy of the GNU General Public License along 19 * with this program; if not, write to the Free Software Foundation, Inc., 20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 21 * http://www.gnu.org/copyleft/gpl.html 22 * 23 * @file 24 * @ingroup Maintenance 25 */ 26 27 require_once __DIR__ . '/backup.inc'; 28 29 /** 30 * @ingroup Maintenance 31 */ 32 class TextPassDumper extends BackupDumper { 33 public $prefetch = null; 34 35 // when we spend more than maxTimeAllowed seconds on this run, we continue 36 // processing until we write out the next complete page, then save output file(s), 37 // rename it/them and open new one(s) 38 public $maxTimeAllowed = 0; // 0 = no limit 39 40 protected $input = "php://stdin"; 41 protected $history = WikiExporter::FULL; 42 protected $fetchCount = 0; 43 protected $prefetchCount = 0; 44 protected $prefetchCountLast = 0; 45 protected $fetchCountLast = 0; 46 47 protected $maxFailures = 5; 48 protected $maxConsecutiveFailedTextRetrievals = 200; 49 protected $failureTimeout = 5; // Seconds to sleep after db failure 50 51 protected $php = "php"; 52 protected $spawn = false; 53 54 /** 55 * @var bool|resource 56 */ 57 protected $spawnProc = false; 58 59 /** 60 * @var bool|resource 61 */ 62 protected $spawnWrite = false; 63 64 /** 65 * @var bool|resource 66 */ 67 protected $spawnRead = false; 68 69 /** 70 * @var bool|resource 71 */ 72 protected $spawnErr = false; 73 74 protected $xmlwriterobj = false; 75 76 protected $timeExceeded = false; 77 protected $firstPageWritten = false; 78 protected $lastPageWritten = false; 79 protected $checkpointJustWritten = false; 80 protected $checkpointFiles = array(); 81 82 /** 83 * @var DatabaseBase 84 */ 85 protected $db; 86 87 /** 88 * Drop the database connection $this->db and try to get a new one. 89 * 90 * This function tries to get a /different/ connection if this is 91 * possible. Hence, (if this is possible) it switches to a different 92 * failover upon each call. 93 * 94 * This function resets $this->lb and closes all connections on it. 95 * 96 * @throws MWException 97 */ 98 function rotateDb() { 99 // Cleaning up old connections 100 if ( isset( $this->lb ) ) { 101 $this->lb->closeAll(); 102 unset( $this->lb ); 103 } 104 105 if ( $this->forcedDb !== null ) { 106 $this->db = $this->forcedDb; 107 108 return; 109 } 110 111 if ( isset( $this->db ) && $this->db->isOpen() ) { 112 throw new MWException( 'DB is set and has not been closed by the Load Balancer' ); 113 } 114 115 unset( $this->db ); 116 117 // Trying to set up new connection. 118 // We do /not/ retry upon failure, but delegate to encapsulating logic, to avoid 119 // individually retrying at different layers of code. 120 121 // 1. The LoadBalancer. 122 try { 123 $this->lb = wfGetLBFactory()->newMainLB(); 124 } catch ( Exception $e ) { 125 throw new MWException( __METHOD__ 126 . " rotating DB failed to obtain new load balancer (" . $e->getMessage() . ")" ); 127 } 128 129 // 2. The Connection, through the load balancer. 130 try { 131 $this->db = $this->lb->getConnection( DB_SLAVE, 'dump' ); 132 } catch ( Exception $e ) { 133 throw new MWException( __METHOD__ 134 . " rotating DB failed to obtain new database (" . $e->getMessage() . ")" ); 135 } 136 } 137 138 function initProgress( $history = WikiExporter::FULL ) { 139 parent::initProgress(); 140 $this->timeOfCheckpoint = $this->startTime; 141 } 142 143 function dump( $history, $text = WikiExporter::TEXT ) { 144 // Notice messages will foul up your XML output even if they're 145 // relatively harmless. 146 if ( ini_get( 'display_errors' ) ) { 147 ini_set( 'display_errors', 'stderr' ); 148 } 149 150 $this->initProgress( $this->history ); 151 152 // We are trying to get an initial database connection to avoid that the 153 // first try of this request's first call to getText fails. However, if 154 // obtaining a good DB connection fails it's not a serious issue, as 155 // getText does retry upon failure and can start without having a working 156 // DB connection. 157 try { 158 $this->rotateDb(); 159 } catch ( Exception $e ) { 160 // We do not even count this as failure. Just let eventual 161 // watchdogs know. 162 $this->progress( "Getting initial DB connection failed (" . 163 $e->getMessage() . ")" ); 164 } 165 166 $this->egress = new ExportProgressFilter( $this->sink, $this ); 167 168 // it would be nice to do it in the constructor, oh well. need egress set 169 $this->finalOptionCheck(); 170 171 // we only want this so we know how to close a stream :-P 172 $this->xmlwriterobj = new XmlDumpWriter(); 173 174 $input = fopen( $this->input, "rt" ); 175 $this->readDump( $input ); 176 177 if ( $this->spawnProc ) { 178 $this->closeSpawn(); 179 } 180 181 $this->report( true ); 182 } 183 184 function processOption( $opt, $val, $param ) { 185 global $IP; 186 $url = $this->processFileOpt( $val, $param ); 187 188 switch ( $opt ) { 189 case 'prefetch': 190 require_once "$IP/maintenance/backupPrefetch.inc"; 191 $this->prefetch = new BaseDump( $url ); 192 break; 193 case 'stub': 194 $this->input = $url; 195 break; 196 case 'maxtime': 197 $this->maxTimeAllowed = intval( $val ) * 60; 198 break; 199 case 'checkpointfile': 200 $this->checkpointFiles[] = $val; 201 break; 202 case 'current': 203 $this->history = WikiExporter::CURRENT; 204 break; 205 case 'full': 206 $this->history = WikiExporter::FULL; 207 break; 208 case 'spawn': 209 $this->spawn = true; 210 if ( $val ) { 211 $this->php = $val; 212 } 213 break; 214 } 215 } 216 217 function processFileOpt( $val, $param ) { 218 $fileURIs = explode( ';', $param ); 219 foreach ( $fileURIs as $URI ) { 220 switch ( $val ) { 221 case "file": 222 $newURI = $URI; 223 break; 224 case "gzip": 225 $newURI = "compress.zlib://$URI"; 226 break; 227 case "bzip2": 228 $newURI = "compress.bzip2://$URI"; 229 break; 230 case "7zip": 231 $newURI = "mediawiki.compress.7z://$URI"; 232 break; 233 default: 234 $newURI = $URI; 235 } 236 $newFileURIs[] = $newURI; 237 } 238 $val = implode( ';', $newFileURIs ); 239 240 return $val; 241 } 242 243 /** 244 * Overridden to include prefetch ratio if enabled. 245 */ 246 function showReport() { 247 if ( !$this->prefetch ) { 248 parent::showReport(); 249 250 return; 251 } 252 253 if ( $this->reporting ) { 254 $now = wfTimestamp( TS_DB ); 255 $nowts = microtime( true ); 256 $deltaAll = $nowts - $this->startTime; 257 $deltaPart = $nowts - $this->lastTime; 258 $this->pageCountPart = $this->pageCount - $this->pageCountLast; 259 $this->revCountPart = $this->revCount - $this->revCountLast; 260 261 if ( $deltaAll ) { 262 $portion = $this->revCount / $this->maxCount; 263 $eta = $this->startTime + $deltaAll / $portion; 264 $etats = wfTimestamp( TS_DB, intval( $eta ) ); 265 if ( $this->fetchCount ) { 266 $fetchRate = 100.0 * $this->prefetchCount / $this->fetchCount; 267 } else { 268 $fetchRate = '-'; 269 } 270 $pageRate = $this->pageCount / $deltaAll; 271 $revRate = $this->revCount / $deltaAll; 272 } else { 273 $pageRate = '-'; 274 $revRate = '-'; 275 $etats = '-'; 276 $fetchRate = '-'; 277 } 278 if ( $deltaPart ) { 279 if ( $this->fetchCountLast ) { 280 $fetchRatePart = 100.0 * $this->prefetchCountLast / $this->fetchCountLast; 281 } else { 282 $fetchRatePart = '-'; 283 } 284 $pageRatePart = $this->pageCountPart / $deltaPart; 285 $revRatePart = $this->revCountPart / $deltaPart; 286 } else { 287 $fetchRatePart = '-'; 288 $pageRatePart = '-'; 289 $revRatePart = '-'; 290 } 291 $this->progress( sprintf( 292 "%s: %s (ID %d) %d pages (%0.1f|%0.1f/sec all|curr), " 293 . "%d revs (%0.1f|%0.1f/sec all|curr), %0.1f%%|%0.1f%% " 294 . "prefetched (all|curr), ETA %s [max %d]", 295 $now, wfWikiID(), $this->ID, $this->pageCount, $pageRate, 296 $pageRatePart, $this->revCount, $revRate, $revRatePart, 297 $fetchRate, $fetchRatePart, $etats, $this->maxCount 298 ) ); 299 $this->lastTime = $nowts; 300 $this->revCountLast = $this->revCount; 301 $this->prefetchCountLast = $this->prefetchCount; 302 $this->fetchCountLast = $this->fetchCount; 303 } 304 } 305 306 function setTimeExceeded() { 307 $this->timeExceeded = true; 308 } 309 310 function checkIfTimeExceeded() { 311 if ( $this->maxTimeAllowed 312 && ( $this->lastTime - $this->timeOfCheckpoint > $this->maxTimeAllowed ) 313 ) { 314 return true; 315 } 316 317 return false; 318 } 319 320 function finalOptionCheck() { 321 if ( ( $this->checkpointFiles && !$this->maxTimeAllowed ) 322 || ( $this->maxTimeAllowed && !$this->checkpointFiles ) 323 ) { 324 throw new MWException( "Options checkpointfile and maxtime must be specified together.\n" ); 325 } 326 foreach ( $this->checkpointFiles as $checkpointFile ) { 327 $count = substr_count( $checkpointFile, "%s" ); 328 if ( $count != 2 ) { 329 throw new MWException( "Option checkpointfile must contain two '%s' " 330 . "for substitution of first and last pageids, count is $count instead, " 331 . "file is $checkpointFile.\n" ); 332 } 333 } 334 335 if ( $this->checkpointFiles ) { 336 $filenameList = (array)$this->egress->getFilenames(); 337 if ( count( $filenameList ) != count( $this->checkpointFiles ) ) { 338 throw new MWException( "One checkpointfile must be specified " 339 . "for each output option, if maxtime is used.\n" ); 340 } 341 } 342 } 343 344 /** 345 * @throws MWException Failure to parse XML input 346 * @param string $input 347 * @return bool 348 */ 349 function readDump( $input ) { 350 $this->buffer = ""; 351 $this->openElement = false; 352 $this->atStart = true; 353 $this->state = ""; 354 $this->lastName = ""; 355 $this->thisPage = 0; 356 $this->thisRev = 0; 357 358 $parser = xml_parser_create( "UTF-8" ); 359 xml_parser_set_option( $parser, XML_OPTION_CASE_FOLDING, false ); 360 361 xml_set_element_handler( 362 $parser, 363 array( &$this, 'startElement' ), 364 array( &$this, 'endElement' ) 365 ); 366 xml_set_character_data_handler( $parser, array( &$this, 'characterData' ) ); 367 368 $offset = 0; // for context extraction on error reporting 369 $bufferSize = 512 * 1024; 370 do { 371 if ( $this->checkIfTimeExceeded() ) { 372 $this->setTimeExceeded(); 373 } 374 $chunk = fread( $input, $bufferSize ); 375 if ( !xml_parse( $parser, $chunk, feof( $input ) ) ) { 376 wfDebug( "TextDumpPass::readDump encountered XML parsing error\n" ); 377 378 $byte = xml_get_current_byte_index( $parser ); 379 $msg = wfMessage( 'xml-error-string', 380 'XML import parse failure', 381 xml_get_current_line_number( $parser ), 382 xml_get_current_column_number( $parser ), 383 $byte . ( is_null( $chunk ) ? null : ( '; "' . substr( $chunk, $byte - $offset, 16 ) . '"' ) ), 384 xml_error_string( xml_get_error_code( $parser ) ) )->escaped(); 385 386 xml_parser_free( $parser ); 387 388 throw new MWException( $msg ); 389 } 390 $offset += strlen( $chunk ); 391 } while ( $chunk !== false && !feof( $input ) ); 392 if ( $this->maxTimeAllowed ) { 393 $filenameList = (array)$this->egress->getFilenames(); 394 // we wrote some stuff after last checkpoint that needs renamed 395 if ( file_exists( $filenameList[0] ) ) { 396 $newFilenames = array(); 397 # we might have just written the header and footer and had no 398 # pages or revisions written... perhaps they were all deleted 399 # there's no pageID 0 so we use that. the caller is responsible 400 # for deciding what to do with a file containing only the 401 # siteinfo information and the mw tags. 402 if ( !$this->firstPageWritten ) { 403 $firstPageID = str_pad( 0, 9, "0", STR_PAD_LEFT ); 404 $lastPageID = str_pad( 0, 9, "0", STR_PAD_LEFT ); 405 } else { 406 $firstPageID = str_pad( $this->firstPageWritten, 9, "0", STR_PAD_LEFT ); 407 $lastPageID = str_pad( $this->lastPageWritten, 9, "0", STR_PAD_LEFT ); 408 } 409 410 $filenameCount = count( $filenameList ); 411 for ( $i = 0; $i < $filenameCount; $i++ ) { 412 $checkpointNameFilledIn = sprintf( $this->checkpointFiles[$i], $firstPageID, $lastPageID ); 413 $fileinfo = pathinfo( $filenameList[$i] ); 414 $newFilenames[] = $fileinfo['dirname'] . '/' . $checkpointNameFilledIn; 415 } 416 $this->egress->closeAndRename( $newFilenames ); 417 } 418 } 419 xml_parser_free( $parser ); 420 421 return true; 422 } 423 424 /** 425 * Tries to get the revision text for a revision id. 426 * 427 * Upon errors, retries (Up to $this->maxFailures tries each call). 428 * If still no good revision get could be found even after this retrying, "" is returned. 429 * If no good revision text could be returned for 430 * $this->maxConsecutiveFailedTextRetrievals consecutive calls to getText, MWException 431 * is thrown. 432 * 433 * @param string $id The revision id to get the text for 434 * 435 * @return string The revision text for $id, or "" 436 * @throws MWException 437 */ 438 function getText( $id ) { 439 global $wgContentHandlerUseDB; 440 441 $prefetchNotTried = true; // Whether or not we already tried to get the text via prefetch. 442 $text = false; // The candidate for a good text. false if no proper value. 443 $failures = 0; // The number of times, this invocation of getText already failed. 444 445 // The number of times getText failed without yielding a good text in between. 446 static $consecutiveFailedTextRetrievals = 0; 447 448 $this->fetchCount++; 449 450 // To allow to simply return on success and do not have to worry about book keeping, 451 // we assume, this fetch works (possible after some retries). Nevertheless, we koop 452 // the old value, so we can restore it, if problems occur (See after the while loop). 453 $oldConsecutiveFailedTextRetrievals = $consecutiveFailedTextRetrievals; 454 $consecutiveFailedTextRetrievals = 0; 455 456 while ( $failures < $this->maxFailures ) { 457 458 // As soon as we found a good text for the $id, we will return immediately. 459 // Hence, if we make it past the try catch block, we know that we did not 460 // find a good text. 461 462 try { 463 // Step 1: Get some text (or reuse from previous iteratuon if checking 464 // for plausibility failed) 465 466 // Trying to get prefetch, if it has not been tried before 467 if ( $text === false && isset( $this->prefetch ) && $prefetchNotTried ) { 468 $prefetchNotTried = false; 469 $tryIsPrefetch = true; 470 $text = $this->prefetch->prefetch( intval( $this->thisPage ), 471 intval( $this->thisRev ) ); 472 if ( $text === null ) { 473 $text = false; 474 } 475 } 476 477 if ( $text === false ) { 478 // Fallback to asking the database 479 $tryIsPrefetch = false; 480 if ( $this->spawn ) { 481 $text = $this->getTextSpawned( $id ); 482 } else { 483 $text = $this->getTextDb( $id ); 484 } 485 486 // No more checks for texts from DB for now. 487 // If we received something that is not false, 488 // We treat it as good text, regardless of whether it actually is or is not 489 if ( $text !== false ) { 490 return $text; 491 } 492 } 493 494 if ( $text === false ) { 495 throw new MWException( "Generic error while obtaining text for id " . $id ); 496 } 497 498 // We received a good candidate for the text of $id via some method 499 500 // Step 2: Checking for plausibility and return the text if it is 501 // plausible 502 $revID = intval( $this->thisRev ); 503 if ( !isset( $this->db ) ) { 504 throw new MWException( "No database available" ); 505 } 506 507 $revLength = strlen( $text ); 508 if ( $wgContentHandlerUseDB ) { 509 $row = $this->db->selectRow( 510 'revision', 511 array( 'rev_len', 'rev_content_model' ), 512 array( 'rev_id' => $revID ), 513 __METHOD__ 514 ); 515 if ( $row ) { 516 // only check the length for the wikitext content handler, 517 // it's a wasted (and failed) check otherwise 518 if ( $row->rev_content_model == CONTENT_MODEL_WIKITEXT ) { 519 $revLength = $row->rev_len; 520 } 521 } 522 } else { 523 $revLength = $this->db->selectField( 'revision', 'rev_len', array( 'rev_id' => $revID ) ); 524 } 525 526 if ( strlen( $text ) == $revLength ) { 527 if ( $tryIsPrefetch ) { 528 $this->prefetchCount++; 529 } 530 531 return $text; 532 } 533 534 $text = false; 535 throw new MWException( "Received text is unplausible for id " . $id ); 536 } catch ( Exception $e ) { 537 $msg = "getting/checking text " . $id . " failed (" . $e->getMessage() . ")"; 538 if ( $failures + 1 < $this->maxFailures ) { 539 $msg .= " (Will retry " . ( $this->maxFailures - $failures - 1 ) . " more times)"; 540 } 541 $this->progress( $msg ); 542 } 543 544 // Something went wrong; we did not a text that was plausible :( 545 $failures++; 546 547 // A failure in a prefetch hit does not warrant resetting db connection etc. 548 if ( !$tryIsPrefetch ) { 549 // After backing off for some time, we try to reboot the whole process as 550 // much as possible to not carry over failures from one part to the other 551 // parts 552 sleep( $this->failureTimeout ); 553 try { 554 $this->rotateDb(); 555 if ( $this->spawn ) { 556 $this->closeSpawn(); 557 $this->openSpawn(); 558 } 559 } catch ( Exception $e ) { 560 $this->progress( "Rebooting getText infrastructure failed (" . $e->getMessage() . ")" . 561 " Trying to continue anyways" ); 562 } 563 } 564 } 565 566 // Retirieving a good text for $id failed (at least) maxFailures times. 567 // We abort for this $id. 568 569 // Restoring the consecutive failures, and maybe aborting, if the dump 570 // is too broken. 571 $consecutiveFailedTextRetrievals = $oldConsecutiveFailedTextRetrievals + 1; 572 if ( $consecutiveFailedTextRetrievals > $this->maxConsecutiveFailedTextRetrievals ) { 573 throw new MWException( "Graceful storage failure" ); 574 } 575 576 return ""; 577 } 578 579 /** 580 * May throw a database error if, say, the server dies during query. 581 * @param int $id 582 * @return bool|string 583 * @throws MWException 584 */ 585 private function getTextDb( $id ) { 586 global $wgContLang; 587 if ( !isset( $this->db ) ) { 588 throw new MWException( __METHOD__ . "No database available" ); 589 } 590 $row = $this->db->selectRow( 'text', 591 array( 'old_text', 'old_flags' ), 592 array( 'old_id' => $id ), 593 __METHOD__ ); 594 $text = Revision::getRevisionText( $row ); 595 if ( $text === false ) { 596 return false; 597 } 598 $stripped = str_replace( "\r", "", $text ); 599 $normalized = $wgContLang->normalize( $stripped ); 600 601 return $normalized; 602 } 603 604 private function getTextSpawned( $id ) { 605 wfSuppressWarnings(); 606 if ( !$this->spawnProc ) { 607 // First time? 608 $this->openSpawn(); 609 } 610 $text = $this->getTextSpawnedOnce( $id ); 611 wfRestoreWarnings(); 612 613 return $text; 614 } 615 616 function openSpawn() { 617 global $IP; 618 619 if ( file_exists( "$IP/../multiversion/MWScript.php" ) ) { 620 $cmd = implode( " ", 621 array_map( 'wfEscapeShellArg', 622 array( 623 $this->php, 624 "$IP/../multiversion/MWScript.php", 625 "fetchText.php", 626 '--wiki', wfWikiID() ) ) ); 627 } else { 628 $cmd = implode( " ", 629 array_map( 'wfEscapeShellArg', 630 array( 631 $this->php, 632 "$IP/maintenance/fetchText.php", 633 '--wiki', wfWikiID() ) ) ); 634 } 635 $spec = array( 636 0 => array( "pipe", "r" ), 637 1 => array( "pipe", "w" ), 638 2 => array( "file", "/dev/null", "a" ) ); 639 $pipes = array(); 640 641 $this->progress( "Spawning database subprocess: $cmd" ); 642 $this->spawnProc = proc_open( $cmd, $spec, $pipes ); 643 if ( !$this->spawnProc ) { 644 // shit 645 $this->progress( "Subprocess spawn failed." ); 646 647 return false; 648 } 649 list( 650 $this->spawnWrite, // -> stdin 651 $this->spawnRead, // <- stdout 652 ) = $pipes; 653 654 return true; 655 } 656 657 private function closeSpawn() { 658 wfSuppressWarnings(); 659 if ( $this->spawnRead ) { 660 fclose( $this->spawnRead ); 661 } 662 $this->spawnRead = false; 663 if ( $this->spawnWrite ) { 664 fclose( $this->spawnWrite ); 665 } 666 $this->spawnWrite = false; 667 if ( $this->spawnErr ) { 668 fclose( $this->spawnErr ); 669 } 670 $this->spawnErr = false; 671 if ( $this->spawnProc ) { 672 pclose( $this->spawnProc ); 673 } 674 $this->spawnProc = false; 675 wfRestoreWarnings(); 676 } 677 678 private function getTextSpawnedOnce( $id ) { 679 global $wgContLang; 680 681 $ok = fwrite( $this->spawnWrite, "$id\n" ); 682 // $this->progress( ">> $id" ); 683 if ( !$ok ) { 684 return false; 685 } 686 687 $ok = fflush( $this->spawnWrite ); 688 // $this->progress( ">> [flush]" ); 689 if ( !$ok ) { 690 return false; 691 } 692 693 // check that the text id they are sending is the one we asked for 694 // this avoids out of sync revision text errors we have encountered in the past 695 $newId = fgets( $this->spawnRead ); 696 if ( $newId === false ) { 697 return false; 698 } 699 if ( $id != intval( $newId ) ) { 700 return false; 701 } 702 703 $len = fgets( $this->spawnRead ); 704 // $this->progress( "<< " . trim( $len ) ); 705 if ( $len === false ) { 706 return false; 707 } 708 709 $nbytes = intval( $len ); 710 // actual error, not zero-length text 711 if ( $nbytes < 0 ) { 712 return false; 713 } 714 715 $text = ""; 716 717 // Subprocess may not send everything at once, we have to loop. 718 while ( $nbytes > strlen( $text ) ) { 719 $buffer = fread( $this->spawnRead, $nbytes - strlen( $text ) ); 720 if ( $buffer === false ) { 721 break; 722 } 723 $text .= $buffer; 724 } 725 726 $gotbytes = strlen( $text ); 727 if ( $gotbytes != $nbytes ) { 728 $this->progress( "Expected $nbytes bytes from database subprocess, got $gotbytes " ); 729 730 return false; 731 } 732 733 // Do normalization in the dump thread... 734 $stripped = str_replace( "\r", "", $text ); 735 $normalized = $wgContLang->normalize( $stripped ); 736 737 return $normalized; 738 } 739 740 function startElement( $parser, $name, $attribs ) { 741 $this->checkpointJustWritten = false; 742 743 $this->clearOpenElement( null ); 744 $this->lastName = $name; 745 746 if ( $name == 'revision' ) { 747 $this->state = $name; 748 $this->egress->writeOpenPage( null, $this->buffer ); 749 $this->buffer = ""; 750 } elseif ( $name == 'page' ) { 751 $this->state = $name; 752 if ( $this->atStart ) { 753 $this->egress->writeOpenStream( $this->buffer ); 754 $this->buffer = ""; 755 $this->atStart = false; 756 } 757 } 758 759 if ( $name == "text" && isset( $attribs['id'] ) ) { 760 $text = $this->getText( $attribs['id'] ); 761 $this->openElement = array( $name, array( 'xml:space' => 'preserve' ) ); 762 if ( strlen( $text ) > 0 ) { 763 $this->characterData( $parser, $text ); 764 } 765 } else { 766 $this->openElement = array( $name, $attribs ); 767 } 768 } 769 770 function endElement( $parser, $name ) { 771 $this->checkpointJustWritten = false; 772 773 if ( $this->openElement ) { 774 $this->clearOpenElement( "" ); 775 } else { 776 $this->buffer .= "</$name>"; 777 } 778 779 if ( $name == 'revision' ) { 780 $this->egress->writeRevision( null, $this->buffer ); 781 $this->buffer = ""; 782 $this->thisRev = ""; 783 } elseif ( $name == 'page' ) { 784 if ( !$this->firstPageWritten ) { 785 $this->firstPageWritten = trim( $this->thisPage ); 786 } 787 $this->lastPageWritten = trim( $this->thisPage ); 788 if ( $this->timeExceeded ) { 789 $this->egress->writeClosePage( $this->buffer ); 790 // nasty hack, we can't just write the chardata after the 791 // page tag, it will include leading blanks from the next line 792 $this->egress->sink->write( "\n" ); 793 794 $this->buffer = $this->xmlwriterobj->closeStream(); 795 $this->egress->writeCloseStream( $this->buffer ); 796 797 $this->buffer = ""; 798 $this->thisPage = ""; 799 // this could be more than one file if we had more than one output arg 800 801 $filenameList = (array)$this->egress->getFilenames(); 802 $newFilenames = array(); 803 $firstPageID = str_pad( $this->firstPageWritten, 9, "0", STR_PAD_LEFT ); 804 $lastPageID = str_pad( $this->lastPageWritten, 9, "0", STR_PAD_LEFT ); 805 $filenamesCount = count( $filenameList ); 806 for ( $i = 0; $i < $filenamesCount; $i++ ) { 807 $checkpointNameFilledIn = sprintf( $this->checkpointFiles[$i], $firstPageID, $lastPageID ); 808 $fileinfo = pathinfo( $filenameList[$i] ); 809 $newFilenames[] = $fileinfo['dirname'] . '/' . $checkpointNameFilledIn; 810 } 811 $this->egress->closeRenameAndReopen( $newFilenames ); 812 $this->buffer = $this->xmlwriterobj->openStream(); 813 $this->timeExceeded = false; 814 $this->timeOfCheckpoint = $this->lastTime; 815 $this->firstPageWritten = false; 816 $this->checkpointJustWritten = true; 817 } else { 818 $this->egress->writeClosePage( $this->buffer ); 819 $this->buffer = ""; 820 $this->thisPage = ""; 821 } 822 } elseif ( $name == 'mediawiki' ) { 823 $this->egress->writeCloseStream( $this->buffer ); 824 $this->buffer = ""; 825 } 826 } 827 828 function characterData( $parser, $data ) { 829 $this->clearOpenElement( null ); 830 if ( $this->lastName == "id" ) { 831 if ( $this->state == "revision" ) { 832 $this->thisRev .= $data; 833 } elseif ( $this->state == "page" ) { 834 $this->thisPage .= $data; 835 } 836 } 837 // have to skip the newline left over from closepagetag line of 838 // end of checkpoint files. nasty hack!! 839 if ( $this->checkpointJustWritten ) { 840 if ( $data[0] == "\n" ) { 841 $data = substr( $data, 1 ); 842 } 843 $this->checkpointJustWritten = false; 844 } 845 $this->buffer .= htmlspecialchars( $data ); 846 } 847 848 function clearOpenElement( $style ) { 849 if ( $this->openElement ) { 850 $this->buffer .= Xml::element( $this->openElement[0], $this->openElement[1], $style ); 851 $this->openElement = false; 852 } 853 } 854 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Fri Nov 28 14:03:12 2014 | Cross-referenced by PHPXref 0.7.1 |