MediaWiki
REL1_23
|
00001 <?php 00033 class WikiImporter { 00034 private $reader = null; 00035 private $mLogItemCallback, $mUploadCallback, $mRevisionCallback, $mPageCallback; 00036 private $mSiteInfoCallback, $mTargetNamespace, $mTargetRootPage, $mPageOutCallback; 00037 private $mNoticeCallback, $mDebug; 00038 private $mImportUploads, $mImageBasePath; 00039 private $mNoUpdates = false; 00040 00045 function __construct( $source ) { 00046 $this->reader = new XMLReader(); 00047 00048 stream_wrapper_register( 'uploadsource', 'UploadSourceAdapter' ); 00049 $id = UploadSourceAdapter::registerSource( $source ); 00050 if ( defined( 'LIBXML_PARSEHUGE' ) ) { 00051 $this->reader->open( "uploadsource://$id", null, LIBXML_PARSEHUGE ); 00052 } else { 00053 $this->reader->open( "uploadsource://$id" ); 00054 } 00055 00056 // Default callbacks 00057 $this->setRevisionCallback( array( $this, "importRevision" ) ); 00058 $this->setUploadCallback( array( $this, 'importUpload' ) ); 00059 $this->setLogItemCallback( array( $this, 'importLogItem' ) ); 00060 $this->setPageOutCallback( array( $this, 'finishImportPage' ) ); 00061 } 00062 00063 private function throwXmlError( $err ) { 00064 $this->debug( "FAILURE: $err" ); 00065 wfDebug( "WikiImporter XML error: $err\n" ); 00066 } 00067 00068 private function debug( $data ) { 00069 if ( $this->mDebug ) { 00070 wfDebug( "IMPORT: $data\n" ); 00071 } 00072 } 00073 00074 private function warn( $data ) { 00075 wfDebug( "IMPORT: $data\n" ); 00076 } 00077 00078 private function notice( $msg /*, $param, ...*/ ) { 00079 $params = func_get_args(); 00080 array_shift( $params ); 00081 00082 if ( is_callable( $this->mNoticeCallback ) ) { 00083 call_user_func( $this->mNoticeCallback, $msg, $params ); 00084 } else { # No ImportReporter -> CLI 00085 echo wfMessage( $msg, $params )->text() . "\n"; 00086 } 00087 } 00088 00093 function setDebug( $debug ) { 00094 $this->mDebug = $debug; 00095 } 00096 00101 function setNoUpdates( $noupdates ) { 00102 $this->mNoUpdates = $noupdates; 00103 } 00104 00111 public function setNoticeCallback( $callback ) { 00112 return wfSetVar( $this->mNoticeCallback, $callback ); 00113 } 00114 00120 public function setPageCallback( $callback ) { 00121 $previous = $this->mPageCallback; 00122 $this->mPageCallback = $callback; 00123 return $previous; 00124 } 00125 00135 public function setPageOutCallback( $callback ) { 00136 $previous = $this->mPageOutCallback; 00137 $this->mPageOutCallback = $callback; 00138 return $previous; 00139 } 00140 00146 public function setRevisionCallback( $callback ) { 00147 $previous = $this->mRevisionCallback; 00148 $this->mRevisionCallback = $callback; 00149 return $previous; 00150 } 00151 00157 public function setUploadCallback( $callback ) { 00158 $previous = $this->mUploadCallback; 00159 $this->mUploadCallback = $callback; 00160 return $previous; 00161 } 00162 00168 public function setLogItemCallback( $callback ) { 00169 $previous = $this->mLogItemCallback; 00170 $this->mLogItemCallback = $callback; 00171 return $previous; 00172 } 00173 00179 public function setSiteInfoCallback( $callback ) { 00180 $previous = $this->mSiteInfoCallback; 00181 $this->mSiteInfoCallback = $callback; 00182 return $previous; 00183 } 00184 00190 public function setTargetNamespace( $namespace ) { 00191 if ( is_null( $namespace ) ) { 00192 // Don't override namespaces 00193 $this->mTargetNamespace = null; 00194 } elseif ( $namespace >= 0 ) { 00195 // @todo FIXME: Check for validity 00196 $this->mTargetNamespace = intval( $namespace ); 00197 } else { 00198 return false; 00199 } 00200 } 00201 00207 public function setTargetRootPage( $rootpage ) { 00208 $status = Status::newGood(); 00209 if ( is_null( $rootpage ) ) { 00210 // No rootpage 00211 $this->mTargetRootPage = null; 00212 } elseif ( $rootpage !== '' ) { 00213 $rootpage = rtrim( $rootpage, '/' ); //avoid double slashes 00214 $title = Title::newFromText( $rootpage, !is_null( $this->mTargetNamespace ) ? $this->mTargetNamespace : NS_MAIN ); 00215 if ( !$title || $title->isExternal() ) { 00216 $status->fatal( 'import-rootpage-invalid' ); 00217 } else { 00218 if ( !MWNamespace::hasSubpages( $title->getNamespace() ) ) { 00219 global $wgContLang; 00220 00221 $displayNSText = $title->getNamespace() == NS_MAIN 00222 ? wfMessage( 'blanknamespace' )->text() 00223 : $wgContLang->getNsText( $title->getNamespace() ); 00224 $status->fatal( 'import-rootpage-nosubpage', $displayNSText ); 00225 } else { 00226 // set namespace to 'all', so the namespace check in processTitle() can passed 00227 $this->setTargetNamespace( null ); 00228 $this->mTargetRootPage = $title->getPrefixedDBkey(); 00229 } 00230 } 00231 } 00232 return $status; 00233 } 00234 00238 public function setImageBasePath( $dir ) { 00239 $this->mImageBasePath = $dir; 00240 } 00241 00245 public function setImportUploads( $import ) { 00246 $this->mImportUploads = $import; 00247 } 00248 00254 public function importRevision( $revision ) { 00255 if ( !$revision->getContent()->getContentHandler()->canBeUsedOn( $revision->getTitle() ) ) { 00256 $this->notice( 'import-error-bad-location', 00257 $revision->getTitle()->getPrefixedText(), 00258 $revision->getID(), 00259 $revision->getModel(), 00260 $revision->getFormat() ); 00261 00262 return false; 00263 } 00264 00265 try { 00266 $dbw = wfGetDB( DB_MASTER ); 00267 return $dbw->deadlockLoop( array( $revision, 'importOldRevision' ) ); 00268 } catch ( MWContentSerializationException $ex ) { 00269 $this->notice( 'import-error-unserialize', 00270 $revision->getTitle()->getPrefixedText(), 00271 $revision->getID(), 00272 $revision->getModel(), 00273 $revision->getFormat() ); 00274 } 00275 00276 return false; 00277 } 00278 00284 public function importLogItem( $rev ) { 00285 $dbw = wfGetDB( DB_MASTER ); 00286 return $dbw->deadlockLoop( array( $rev, 'importLogItem' ) ); 00287 } 00288 00294 public function importUpload( $revision ) { 00295 $dbw = wfGetDB( DB_MASTER ); 00296 return $dbw->deadlockLoop( array( $revision, 'importUpload' ) ); 00297 } 00298 00308 public function finishImportPage( $title, $origTitle, $revCount, $sRevCount, $pageInfo ) { 00309 $args = func_get_args(); 00310 return wfRunHooks( 'AfterImportPage', $args ); 00311 } 00312 00317 public function debugRevisionHandler( &$revision ) { 00318 $this->debug( "Got revision:" ); 00319 if ( is_object( $revision->title ) ) { 00320 $this->debug( "-- Title: " . $revision->title->getPrefixedText() ); 00321 } else { 00322 $this->debug( "-- Title: <invalid>" ); 00323 } 00324 $this->debug( "-- User: " . $revision->user_text ); 00325 $this->debug( "-- Timestamp: " . $revision->timestamp ); 00326 $this->debug( "-- Comment: " . $revision->comment ); 00327 $this->debug( "-- Text: " . $revision->text ); 00328 } 00329 00334 function pageCallback( $title ) { 00335 if ( isset( $this->mPageCallback ) ) { 00336 call_user_func( $this->mPageCallback, $title ); 00337 } 00338 } 00339 00348 private function pageOutCallback( $title, $origTitle, $revCount, $sucCount, $pageInfo ) { 00349 if ( isset( $this->mPageOutCallback ) ) { 00350 $args = func_get_args(); 00351 call_user_func_array( $this->mPageOutCallback, $args ); 00352 } 00353 } 00354 00360 private function revisionCallback( $revision ) { 00361 if ( isset( $this->mRevisionCallback ) ) { 00362 return call_user_func_array( $this->mRevisionCallback, 00363 array( $revision, $this ) ); 00364 } else { 00365 return false; 00366 } 00367 } 00368 00374 private function logItemCallback( $revision ) { 00375 if ( isset( $this->mLogItemCallback ) ) { 00376 return call_user_func_array( $this->mLogItemCallback, 00377 array( $revision, $this ) ); 00378 } else { 00379 return false; 00380 } 00381 } 00382 00390 private function nodeContents() { 00391 if ( $this->reader->isEmptyElement ) { 00392 return ""; 00393 } 00394 $buffer = ""; 00395 while ( $this->reader->read() ) { 00396 switch ( $this->reader->nodeType ) { 00397 case XmlReader::TEXT: 00398 case XmlReader::SIGNIFICANT_WHITESPACE: 00399 $buffer .= $this->reader->value; 00400 break; 00401 case XmlReader::END_ELEMENT: 00402 return $buffer; 00403 } 00404 } 00405 00406 $this->reader->close(); 00407 return ''; 00408 } 00409 00410 # -------------- 00411 00413 private function dumpElement() { 00414 static $lookup = null; 00415 if ( !$lookup ) { 00416 $xmlReaderConstants = array( 00417 "NONE", 00418 "ELEMENT", 00419 "ATTRIBUTE", 00420 "TEXT", 00421 "CDATA", 00422 "ENTITY_REF", 00423 "ENTITY", 00424 "PI", 00425 "COMMENT", 00426 "DOC", 00427 "DOC_TYPE", 00428 "DOC_FRAGMENT", 00429 "NOTATION", 00430 "WHITESPACE", 00431 "SIGNIFICANT_WHITESPACE", 00432 "END_ELEMENT", 00433 "END_ENTITY", 00434 "XML_DECLARATION", 00435 ); 00436 $lookup = array(); 00437 00438 foreach ( $xmlReaderConstants as $name ) { 00439 $lookup[constant( "XmlReader::$name" )] = $name; 00440 } 00441 } 00442 00443 print var_dump( 00444 $lookup[$this->reader->nodeType], 00445 $this->reader->name, 00446 $this->reader->value 00447 ) . "\n\n"; 00448 } 00449 00455 public function doImport() { 00456 00457 // Calls to reader->read need to be wrapped in calls to 00458 // libxml_disable_entity_loader() to avoid local file 00459 // inclusion attacks (bug 46932). 00460 $oldDisable = libxml_disable_entity_loader( true ); 00461 $this->reader->read(); 00462 00463 if ( $this->reader->name != 'mediawiki' ) { 00464 libxml_disable_entity_loader( $oldDisable ); 00465 throw new MWException( "Expected <mediawiki> tag, got " . 00466 $this->reader->name ); 00467 } 00468 $this->debug( "<mediawiki> tag is correct." ); 00469 00470 $this->debug( "Starting primary dump processing loop." ); 00471 00472 $keepReading = $this->reader->read(); 00473 $skip = false; 00474 while ( $keepReading ) { 00475 $tag = $this->reader->name; 00476 $type = $this->reader->nodeType; 00477 00478 if ( !wfRunHooks( 'ImportHandleToplevelXMLTag', array( $this ) ) ) { 00479 // Do nothing 00480 } elseif ( $tag == 'mediawiki' && $type == XmlReader::END_ELEMENT ) { 00481 break; 00482 } elseif ( $tag == 'siteinfo' ) { 00483 $this->handleSiteInfo(); 00484 } elseif ( $tag == 'page' ) { 00485 $this->handlePage(); 00486 } elseif ( $tag == 'logitem' ) { 00487 $this->handleLogItem(); 00488 } elseif ( $tag != '#text' ) { 00489 $this->warn( "Unhandled top-level XML tag $tag" ); 00490 00491 $skip = true; 00492 } 00493 00494 if ( $skip ) { 00495 $keepReading = $this->reader->next(); 00496 $skip = false; 00497 $this->debug( "Skip" ); 00498 } else { 00499 $keepReading = $this->reader->read(); 00500 } 00501 } 00502 00503 libxml_disable_entity_loader( $oldDisable ); 00504 return true; 00505 } 00506 00511 private function handleSiteInfo() { 00512 // Site info is useful, but not actually used for dump imports. 00513 // Includes a quick short-circuit to save performance. 00514 if ( ! $this->mSiteInfoCallback ) { 00515 $this->reader->next(); 00516 return true; 00517 } 00518 throw new MWException( "SiteInfo tag is not yet handled, do not set mSiteInfoCallback" ); 00519 } 00520 00521 private function handleLogItem() { 00522 $this->debug( "Enter log item handler." ); 00523 $logInfo = array(); 00524 00525 // Fields that can just be stuffed in the pageInfo object 00526 $normalFields = array( 'id', 'comment', 'type', 'action', 'timestamp', 00527 'logtitle', 'params' ); 00528 00529 while ( $this->reader->read() ) { 00530 if ( $this->reader->nodeType == XmlReader::END_ELEMENT && 00531 $this->reader->name == 'logitem' ) { 00532 break; 00533 } 00534 00535 $tag = $this->reader->name; 00536 00537 if ( !wfRunHooks( 'ImportHandleLogItemXMLTag', array( 00538 $this, $logInfo 00539 ) ) ) { 00540 // Do nothing 00541 } elseif ( in_array( $tag, $normalFields ) ) { 00542 $logInfo[$tag] = $this->nodeContents(); 00543 } elseif ( $tag == 'contributor' ) { 00544 $logInfo['contributor'] = $this->handleContributor(); 00545 } elseif ( $tag != '#text' ) { 00546 $this->warn( "Unhandled log-item XML tag $tag" ); 00547 } 00548 } 00549 00550 $this->processLogItem( $logInfo ); 00551 } 00552 00557 private function processLogItem( $logInfo ) { 00558 $revision = new WikiRevision; 00559 00560 $revision->setID( $logInfo['id'] ); 00561 $revision->setType( $logInfo['type'] ); 00562 $revision->setAction( $logInfo['action'] ); 00563 $revision->setTimestamp( $logInfo['timestamp'] ); 00564 $revision->setParams( $logInfo['params'] ); 00565 $revision->setTitle( Title::newFromText( $logInfo['logtitle'] ) ); 00566 $revision->setNoUpdates( $this->mNoUpdates ); 00567 00568 if ( isset( $logInfo['comment'] ) ) { 00569 $revision->setComment( $logInfo['comment'] ); 00570 } 00571 00572 if ( isset( $logInfo['contributor']['ip'] ) ) { 00573 $revision->setUserIP( $logInfo['contributor']['ip'] ); 00574 } 00575 if ( isset( $logInfo['contributor']['username'] ) ) { 00576 $revision->setUserName( $logInfo['contributor']['username'] ); 00577 } 00578 00579 return $this->logItemCallback( $revision ); 00580 } 00581 00582 private function handlePage() { 00583 // Handle page data. 00584 $this->debug( "Enter page handler." ); 00585 $pageInfo = array( 'revisionCount' => 0, 'successfulRevisionCount' => 0 ); 00586 00587 // Fields that can just be stuffed in the pageInfo object 00588 $normalFields = array( 'title', 'id', 'redirect', 'restrictions' ); 00589 00590 $skip = false; 00591 $badTitle = false; 00592 00593 while ( $skip ? $this->reader->next() : $this->reader->read() ) { 00594 if ( $this->reader->nodeType == XmlReader::END_ELEMENT && 00595 $this->reader->name == 'page' ) { 00596 break; 00597 } 00598 00599 $tag = $this->reader->name; 00600 00601 if ( $badTitle ) { 00602 // The title is invalid, bail out of this page 00603 $skip = true; 00604 } elseif ( !wfRunHooks( 'ImportHandlePageXMLTag', array( $this, 00605 &$pageInfo ) ) ) { 00606 // Do nothing 00607 } elseif ( in_array( $tag, $normalFields ) ) { 00608 $pageInfo[$tag] = $this->nodeContents(); 00609 if ( $tag == 'title' ) { 00610 $title = $this->processTitle( $pageInfo['title'] ); 00611 00612 if ( !$title ) { 00613 $badTitle = true; 00614 $skip = true; 00615 } 00616 00617 $this->pageCallback( $title ); 00618 list( $pageInfo['_title'], $origTitle ) = $title; 00619 } 00620 } elseif ( $tag == 'revision' ) { 00621 $this->handleRevision( $pageInfo ); 00622 } elseif ( $tag == 'upload' ) { 00623 $this->handleUpload( $pageInfo ); 00624 } elseif ( $tag != '#text' ) { 00625 $this->warn( "Unhandled page XML tag $tag" ); 00626 $skip = true; 00627 } 00628 } 00629 00630 $this->pageOutCallback( $pageInfo['_title'], $origTitle, 00631 $pageInfo['revisionCount'], 00632 $pageInfo['successfulRevisionCount'], 00633 $pageInfo ); 00634 } 00635 00639 private function handleRevision( &$pageInfo ) { 00640 $this->debug( "Enter revision handler" ); 00641 $revisionInfo = array(); 00642 00643 $normalFields = array( 'id', 'timestamp', 'comment', 'minor', 'model', 'format', 'text' ); 00644 00645 $skip = false; 00646 00647 while ( $skip ? $this->reader->next() : $this->reader->read() ) { 00648 if ( $this->reader->nodeType == XmlReader::END_ELEMENT && 00649 $this->reader->name == 'revision' ) { 00650 break; 00651 } 00652 00653 $tag = $this->reader->name; 00654 00655 if ( !wfRunHooks( 'ImportHandleRevisionXMLTag', array( 00656 $this, $pageInfo, $revisionInfo 00657 ) ) ) { 00658 // Do nothing 00659 } elseif ( in_array( $tag, $normalFields ) ) { 00660 $revisionInfo[$tag] = $this->nodeContents(); 00661 } elseif ( $tag == 'contributor' ) { 00662 $revisionInfo['contributor'] = $this->handleContributor(); 00663 } elseif ( $tag != '#text' ) { 00664 $this->warn( "Unhandled revision XML tag $tag" ); 00665 $skip = true; 00666 } 00667 } 00668 00669 $pageInfo['revisionCount']++; 00670 if ( $this->processRevision( $pageInfo, $revisionInfo ) ) { 00671 $pageInfo['successfulRevisionCount']++; 00672 } 00673 } 00674 00680 private function processRevision( $pageInfo, $revisionInfo ) { 00681 $revision = new WikiRevision; 00682 00683 if ( isset( $revisionInfo['id'] ) ) { 00684 $revision->setID( $revisionInfo['id'] ); 00685 } 00686 if ( isset( $revisionInfo['text'] ) ) { 00687 $revision->setText( $revisionInfo['text'] ); 00688 } 00689 if ( isset( $revisionInfo['model'] ) ) { 00690 $revision->setModel( $revisionInfo['model'] ); 00691 } 00692 if ( isset( $revisionInfo['format'] ) ) { 00693 $revision->setFormat( $revisionInfo['format'] ); 00694 } 00695 $revision->setTitle( $pageInfo['_title'] ); 00696 00697 if ( isset( $revisionInfo['timestamp'] ) ) { 00698 $revision->setTimestamp( $revisionInfo['timestamp'] ); 00699 } else { 00700 $revision->setTimestamp( wfTimestampNow() ); 00701 } 00702 00703 if ( isset( $revisionInfo['comment'] ) ) { 00704 $revision->setComment( $revisionInfo['comment'] ); 00705 } 00706 00707 if ( isset( $revisionInfo['minor'] ) ) { 00708 $revision->setMinor( true ); 00709 } 00710 if ( isset( $revisionInfo['contributor']['ip'] ) ) { 00711 $revision->setUserIP( $revisionInfo['contributor']['ip'] ); 00712 } 00713 if ( isset( $revisionInfo['contributor']['username'] ) ) { 00714 $revision->setUserName( $revisionInfo['contributor']['username'] ); 00715 } 00716 $revision->setNoUpdates( $this->mNoUpdates ); 00717 00718 return $this->revisionCallback( $revision ); 00719 } 00720 00725 private function handleUpload( &$pageInfo ) { 00726 $this->debug( "Enter upload handler" ); 00727 $uploadInfo = array(); 00728 00729 $normalFields = array( 'timestamp', 'comment', 'filename', 'text', 00730 'src', 'size', 'sha1base36', 'archivename', 'rel' ); 00731 00732 $skip = false; 00733 00734 while ( $skip ? $this->reader->next() : $this->reader->read() ) { 00735 if ( $this->reader->nodeType == XmlReader::END_ELEMENT && 00736 $this->reader->name == 'upload' ) { 00737 break; 00738 } 00739 00740 $tag = $this->reader->name; 00741 00742 if ( !wfRunHooks( 'ImportHandleUploadXMLTag', array( 00743 $this, $pageInfo 00744 ) ) ) { 00745 // Do nothing 00746 } elseif ( in_array( $tag, $normalFields ) ) { 00747 $uploadInfo[$tag] = $this->nodeContents(); 00748 } elseif ( $tag == 'contributor' ) { 00749 $uploadInfo['contributor'] = $this->handleContributor(); 00750 } elseif ( $tag == 'contents' ) { 00751 $contents = $this->nodeContents(); 00752 $encoding = $this->reader->getAttribute( 'encoding' ); 00753 if ( $encoding === 'base64' ) { 00754 $uploadInfo['fileSrc'] = $this->dumpTemp( base64_decode( $contents ) ); 00755 $uploadInfo['isTempSrc'] = true; 00756 } 00757 } elseif ( $tag != '#text' ) { 00758 $this->warn( "Unhandled upload XML tag $tag" ); 00759 $skip = true; 00760 } 00761 } 00762 00763 if ( $this->mImageBasePath && isset( $uploadInfo['rel'] ) ) { 00764 $path = "{$this->mImageBasePath}/{$uploadInfo['rel']}"; 00765 if ( file_exists( $path ) ) { 00766 $uploadInfo['fileSrc'] = $path; 00767 $uploadInfo['isTempSrc'] = false; 00768 } 00769 } 00770 00771 if ( $this->mImportUploads ) { 00772 return $this->processUpload( $pageInfo, $uploadInfo ); 00773 } 00774 } 00775 00780 private function dumpTemp( $contents ) { 00781 $filename = tempnam( wfTempDir(), 'importupload' ); 00782 file_put_contents( $filename, $contents ); 00783 return $filename; 00784 } 00785 00791 private function processUpload( $pageInfo, $uploadInfo ) { 00792 $revision = new WikiRevision; 00793 $text = isset( $uploadInfo['text'] ) ? $uploadInfo['text'] : ''; 00794 00795 $revision->setTitle( $pageInfo['_title'] ); 00796 $revision->setID( $pageInfo['id'] ); 00797 $revision->setTimestamp( $uploadInfo['timestamp'] ); 00798 $revision->setText( $text ); 00799 $revision->setFilename( $uploadInfo['filename'] ); 00800 if ( isset( $uploadInfo['archivename'] ) ) { 00801 $revision->setArchiveName( $uploadInfo['archivename'] ); 00802 } 00803 $revision->setSrc( $uploadInfo['src'] ); 00804 if ( isset( $uploadInfo['fileSrc'] ) ) { 00805 $revision->setFileSrc( $uploadInfo['fileSrc'], 00806 !empty( $uploadInfo['isTempSrc'] ) ); 00807 } 00808 if ( isset( $uploadInfo['sha1base36'] ) ) { 00809 $revision->setSha1Base36( $uploadInfo['sha1base36'] ); 00810 } 00811 $revision->setSize( intval( $uploadInfo['size'] ) ); 00812 $revision->setComment( $uploadInfo['comment'] ); 00813 00814 if ( isset( $uploadInfo['contributor']['ip'] ) ) { 00815 $revision->setUserIP( $uploadInfo['contributor']['ip'] ); 00816 } 00817 if ( isset( $uploadInfo['contributor']['username'] ) ) { 00818 $revision->setUserName( $uploadInfo['contributor']['username'] ); 00819 } 00820 $revision->setNoUpdates( $this->mNoUpdates ); 00821 00822 return call_user_func( $this->mUploadCallback, $revision ); 00823 } 00824 00828 private function handleContributor() { 00829 $fields = array( 'id', 'ip', 'username' ); 00830 $info = array(); 00831 00832 while ( $this->reader->read() ) { 00833 if ( $this->reader->nodeType == XmlReader::END_ELEMENT && 00834 $this->reader->name == 'contributor' ) { 00835 break; 00836 } 00837 00838 $tag = $this->reader->name; 00839 00840 if ( in_array( $tag, $fields ) ) { 00841 $info[$tag] = $this->nodeContents(); 00842 } 00843 } 00844 00845 return $info; 00846 } 00847 00852 private function processTitle( $text ) { 00853 global $wgCommandLineMode; 00854 00855 $workTitle = $text; 00856 $origTitle = Title::newFromText( $workTitle ); 00857 00858 if ( !is_null( $this->mTargetNamespace ) && !is_null( $origTitle ) ) { 00859 # makeTitleSafe, because $origTitle can have a interwiki (different setting of interwiki map) 00860 # and than dbKey can begin with a lowercase char 00861 $title = Title::makeTitleSafe( $this->mTargetNamespace, 00862 $origTitle->getDBkey() ); 00863 } else { 00864 if ( !is_null( $this->mTargetRootPage ) ) { 00865 $workTitle = $this->mTargetRootPage . '/' . $workTitle; 00866 } 00867 $title = Title::newFromText( $workTitle ); 00868 } 00869 00870 if ( is_null( $title ) ) { 00871 # Invalid page title? Ignore the page 00872 $this->notice( 'import-error-invalid', $workTitle ); 00873 return false; 00874 } elseif ( $title->isExternal() ) { 00875 $this->notice( 'import-error-interwiki', $title->getPrefixedText() ); 00876 return false; 00877 } elseif ( !$title->canExist() ) { 00878 $this->notice( 'import-error-special', $title->getPrefixedText() ); 00879 return false; 00880 } elseif ( !$title->userCan( 'edit' ) && !$wgCommandLineMode ) { 00881 # Do not import if the importing wiki user cannot edit this page 00882 $this->notice( 'import-error-edit', $title->getPrefixedText() ); 00883 return false; 00884 } elseif ( !$title->exists() && !$title->userCan( 'create' ) && !$wgCommandLineMode ) { 00885 # Do not import if the importing wiki user cannot create this page 00886 $this->notice( 'import-error-create', $title->getPrefixedText() ); 00887 return false; 00888 } 00889 00890 return array( $title, $origTitle ); 00891 } 00892 } 00893 00895 class UploadSourceAdapter { 00896 static $sourceRegistrations = array(); 00897 00898 private $mSource; 00899 private $mBuffer; 00900 private $mPosition; 00901 00906 static function registerSource( $source ) { 00907 $id = wfRandomString(); 00908 00909 self::$sourceRegistrations[$id] = $source; 00910 00911 return $id; 00912 } 00913 00921 function stream_open( $path, $mode, $options, &$opened_path ) { 00922 $url = parse_url( $path ); 00923 $id = $url['host']; 00924 00925 if ( !isset( self::$sourceRegistrations[$id] ) ) { 00926 return false; 00927 } 00928 00929 $this->mSource = self::$sourceRegistrations[$id]; 00930 00931 return true; 00932 } 00933 00938 function stream_read( $count ) { 00939 $return = ''; 00940 $leave = false; 00941 00942 while ( !$leave && !$this->mSource->atEnd() && 00943 strlen( $this->mBuffer ) < $count ) { 00944 $read = $this->mSource->readChunk(); 00945 00946 if ( !strlen( $read ) ) { 00947 $leave = true; 00948 } 00949 00950 $this->mBuffer .= $read; 00951 } 00952 00953 if ( strlen( $this->mBuffer ) ) { 00954 $return = substr( $this->mBuffer, 0, $count ); 00955 $this->mBuffer = substr( $this->mBuffer, $count ); 00956 } 00957 00958 $this->mPosition += strlen( $return ); 00959 00960 return $return; 00961 } 00962 00967 function stream_write( $data ) { 00968 return false; 00969 } 00970 00974 function stream_tell() { 00975 return $this->mPosition; 00976 } 00977 00981 function stream_eof() { 00982 return $this->mSource->atEnd(); 00983 } 00984 00988 function url_stat() { 00989 $result = array(); 00990 00991 $result['dev'] = $result[0] = 0; 00992 $result['ino'] = $result[1] = 0; 00993 $result['mode'] = $result[2] = 0; 00994 $result['nlink'] = $result[3] = 0; 00995 $result['uid'] = $result[4] = 0; 00996 $result['gid'] = $result[5] = 0; 00997 $result['rdev'] = $result[6] = 0; 00998 $result['size'] = $result[7] = 0; 00999 $result['atime'] = $result[8] = 0; 01000 $result['mtime'] = $result[9] = 0; 01001 $result['ctime'] = $result[10] = 0; 01002 $result['blksize'] = $result[11] = 0; 01003 $result['blocks'] = $result[12] = 0; 01004 01005 return $result; 01006 } 01007 } 01008 01009 class XMLReader2 extends XMLReader { 01010 01014 function nodeContents() { 01015 if ( $this->isEmptyElement ) { 01016 return ""; 01017 } 01018 $buffer = ""; 01019 while ( $this->read() ) { 01020 switch ( $this->nodeType ) { 01021 case XmlReader::TEXT: 01022 case XmlReader::SIGNIFICANT_WHITESPACE: 01023 $buffer .= $this->value; 01024 break; 01025 case XmlReader::END_ELEMENT: 01026 return $buffer; 01027 } 01028 } 01029 return $this->close(); 01030 } 01031 } 01032 01037 class WikiRevision { 01038 var $importer = null; 01039 01043 var $title = null; 01044 var $id = 0; 01045 var $timestamp = "20010115000000"; 01046 var $user = 0; 01047 var $user_text = ""; 01048 var $model = null; 01049 var $format = null; 01050 var $text = ""; 01051 var $content = null; 01052 var $comment = ""; 01053 var $minor = false; 01054 var $type = ""; 01055 var $action = ""; 01056 var $params = ""; 01057 var $fileSrc = ''; 01058 var $sha1base36 = false; 01059 var $isTemp = false; 01060 var $archiveName = ''; 01061 var $fileIsTemp; 01062 private $mNoUpdates = false; 01063 01068 function setTitle( $title ) { 01069 if ( is_object( $title ) ) { 01070 $this->title = $title; 01071 } elseif ( is_null( $title ) ) { 01072 throw new MWException( "WikiRevision given a null title in import. You may need to adjust \$wgLegalTitleChars." ); 01073 } else { 01074 throw new MWException( "WikiRevision given non-object title in import." ); 01075 } 01076 } 01077 01081 function setID( $id ) { 01082 $this->id = $id; 01083 } 01084 01088 function setTimestamp( $ts ) { 01089 # 2003-08-05T18:30:02Z 01090 $this->timestamp = wfTimestamp( TS_MW, $ts ); 01091 } 01092 01096 function setUsername( $user ) { 01097 $this->user_text = $user; 01098 } 01099 01103 function setUserIP( $ip ) { 01104 $this->user_text = $ip; 01105 } 01106 01110 function setModel( $model ) { 01111 $this->model = $model; 01112 } 01113 01117 function setFormat( $format ) { 01118 $this->format = $format; 01119 } 01120 01124 function setText( $text ) { 01125 $this->text = $text; 01126 } 01127 01131 function setComment( $text ) { 01132 $this->comment = $text; 01133 } 01134 01138 function setMinor( $minor ) { 01139 $this->minor = (bool)$minor; 01140 } 01141 01145 function setSrc( $src ) { 01146 $this->src = $src; 01147 } 01148 01153 function setFileSrc( $src, $isTemp ) { 01154 $this->fileSrc = $src; 01155 $this->fileIsTemp = $isTemp; 01156 } 01157 01161 function setSha1Base36( $sha1base36 ) { 01162 $this->sha1base36 = $sha1base36; 01163 } 01164 01168 function setFilename( $filename ) { 01169 $this->filename = $filename; 01170 } 01171 01175 function setArchiveName( $archiveName ) { 01176 $this->archiveName = $archiveName; 01177 } 01178 01182 function setSize( $size ) { 01183 $this->size = intval( $size ); 01184 } 01185 01189 function setType( $type ) { 01190 $this->type = $type; 01191 } 01192 01196 function setAction( $action ) { 01197 $this->action = $action; 01198 } 01199 01203 function setParams( $params ) { 01204 $this->params = $params; 01205 } 01206 01210 public function setNoUpdates( $noupdates ) { 01211 $this->mNoUpdates = $noupdates; 01212 } 01213 01217 function getTitle() { 01218 return $this->title; 01219 } 01220 01224 function getID() { 01225 return $this->id; 01226 } 01227 01231 function getTimestamp() { 01232 return $this->timestamp; 01233 } 01234 01238 function getUser() { 01239 return $this->user_text; 01240 } 01241 01247 function getText() { 01248 ContentHandler::deprecated( __METHOD__, '1.21' ); 01249 01250 return $this->text; 01251 } 01252 01256 function getContent() { 01257 if ( is_null( $this->content ) ) { 01258 $this->content = 01259 ContentHandler::makeContent( 01260 $this->text, 01261 $this->getTitle(), 01262 $this->getModel(), 01263 $this->getFormat() 01264 ); 01265 } 01266 01267 return $this->content; 01268 } 01269 01273 function getModel() { 01274 if ( is_null( $this->model ) ) { 01275 $this->model = $this->getTitle()->getContentModel(); 01276 } 01277 01278 return $this->model; 01279 } 01280 01284 function getFormat() { 01285 if ( is_null( $this->model ) ) { 01286 $this->format = ContentHandler::getForTitle( $this->getTitle() )->getDefaultFormat(); 01287 } 01288 01289 return $this->format; 01290 } 01291 01295 function getComment() { 01296 return $this->comment; 01297 } 01298 01302 function getMinor() { 01303 return $this->minor; 01304 } 01305 01309 function getSrc() { 01310 return $this->src; 01311 } 01312 01316 function getSha1() { 01317 if ( $this->sha1base36 ) { 01318 return wfBaseConvert( $this->sha1base36, 36, 16 ); 01319 } 01320 return false; 01321 } 01322 01326 function getFileSrc() { 01327 return $this->fileSrc; 01328 } 01329 01333 function isTempSrc() { 01334 return $this->isTemp; 01335 } 01336 01340 function getFilename() { 01341 return $this->filename; 01342 } 01343 01347 function getArchiveName() { 01348 return $this->archiveName; 01349 } 01350 01354 function getSize() { 01355 return $this->size; 01356 } 01357 01361 function getType() { 01362 return $this->type; 01363 } 01364 01368 function getAction() { 01369 return $this->action; 01370 } 01371 01375 function getParams() { 01376 return $this->params; 01377 } 01378 01382 function importOldRevision() { 01383 $dbw = wfGetDB( DB_MASTER ); 01384 01385 # Sneak a single revision into place 01386 $user = User::newFromName( $this->getUser() ); 01387 if ( $user ) { 01388 $userId = intval( $user->getId() ); 01389 $userText = $user->getName(); 01390 $userObj = $user; 01391 } else { 01392 $userId = 0; 01393 $userText = $this->getUser(); 01394 $userObj = new User; 01395 } 01396 01397 // avoid memory leak...? 01398 $linkCache = LinkCache::singleton(); 01399 $linkCache->clear(); 01400 01401 $page = WikiPage::factory( $this->title ); 01402 if ( !$page->exists() ) { 01403 # must create the page... 01404 $pageId = $page->insertOn( $dbw ); 01405 $created = true; 01406 $oldcountable = null; 01407 } else { 01408 $pageId = $page->getId(); 01409 $created = false; 01410 01411 $prior = $dbw->selectField( 'revision', '1', 01412 array( 'rev_page' => $pageId, 01413 'rev_timestamp' => $dbw->timestamp( $this->timestamp ), 01414 'rev_user_text' => $userText, 01415 'rev_comment' => $this->getComment() ), 01416 __METHOD__ 01417 ); 01418 if ( $prior ) { 01419 // @todo FIXME: This could fail slightly for multiple matches :P 01420 wfDebug( __METHOD__ . ": skipping existing revision for [[" . 01421 $this->title->getPrefixedText() . "]], timestamp " . $this->timestamp . "\n" ); 01422 return false; 01423 } 01424 $oldcountable = $page->isCountable(); 01425 } 01426 01427 # @todo FIXME: Use original rev_id optionally (better for backups) 01428 # Insert the row 01429 $revision = new Revision( array( 01430 'title' => $this->title, 01431 'page' => $pageId, 01432 'content_model' => $this->getModel(), 01433 'content_format' => $this->getFormat(), 01434 'text' => $this->getContent()->serialize( $this->getFormat() ), //XXX: just set 'content' => $this->getContent()? 01435 'comment' => $this->getComment(), 01436 'user' => $userId, 01437 'user_text' => $userText, 01438 'timestamp' => $this->timestamp, 01439 'minor_edit' => $this->minor, 01440 ) ); 01441 $revision->insertOn( $dbw ); 01442 $changed = $page->updateIfNewerOn( $dbw, $revision ); 01443 01444 if ( $changed !== false && !$this->mNoUpdates ) { 01445 wfDebug( __METHOD__ . ": running updates\n" ); 01446 $page->doEditUpdates( $revision, $userObj, array( 'created' => $created, 'oldcountable' => $oldcountable ) ); 01447 } 01448 01449 return true; 01450 } 01451 01455 function importLogItem() { 01456 $dbw = wfGetDB( DB_MASTER ); 01457 # @todo FIXME: This will not record autoblocks 01458 if ( !$this->getTitle() ) { 01459 wfDebug( __METHOD__ . ": skipping invalid {$this->type}/{$this->action} log time, timestamp " . 01460 $this->timestamp . "\n" ); 01461 return; 01462 } 01463 # Check if it exists already 01464 // @todo FIXME: Use original log ID (better for backups) 01465 $prior = $dbw->selectField( 'logging', '1', 01466 array( 'log_type' => $this->getType(), 01467 'log_action' => $this->getAction(), 01468 'log_timestamp' => $dbw->timestamp( $this->timestamp ), 01469 'log_namespace' => $this->getTitle()->getNamespace(), 01470 'log_title' => $this->getTitle()->getDBkey(), 01471 'log_comment' => $this->getComment(), 01472 #'log_user_text' => $this->user_text, 01473 'log_params' => $this->params ), 01474 __METHOD__ 01475 ); 01476 // @todo FIXME: This could fail slightly for multiple matches :P 01477 if ( $prior ) { 01478 wfDebug( __METHOD__ . ": skipping existing item for Log:{$this->type}/{$this->action}, timestamp " . 01479 $this->timestamp . "\n" ); 01480 return; 01481 } 01482 $log_id = $dbw->nextSequenceValue( 'logging_log_id_seq' ); 01483 $data = array( 01484 'log_id' => $log_id, 01485 'log_type' => $this->type, 01486 'log_action' => $this->action, 01487 'log_timestamp' => $dbw->timestamp( $this->timestamp ), 01488 'log_user' => User::idFromName( $this->user_text ), 01489 #'log_user_text' => $this->user_text, 01490 'log_namespace' => $this->getTitle()->getNamespace(), 01491 'log_title' => $this->getTitle()->getDBkey(), 01492 'log_comment' => $this->getComment(), 01493 'log_params' => $this->params 01494 ); 01495 $dbw->insert( 'logging', $data, __METHOD__ ); 01496 } 01497 01501 function importUpload() { 01502 # Construct a file 01503 $archiveName = $this->getArchiveName(); 01504 if ( $archiveName ) { 01505 wfDebug( __METHOD__ . "Importing archived file as $archiveName\n" ); 01506 $file = OldLocalFile::newFromArchiveName( $this->getTitle(), 01507 RepoGroup::singleton()->getLocalRepo(), $archiveName ); 01508 } else { 01509 $file = wfLocalFile( $this->getTitle() ); 01510 wfDebug( __METHOD__ . 'Importing new file as ' . $file->getName() . "\n" ); 01511 if ( $file->exists() && $file->getTimestamp() > $this->getTimestamp() ) { 01512 $archiveName = $file->getTimestamp() . '!' . $file->getName(); 01513 $file = OldLocalFile::newFromArchiveName( $this->getTitle(), 01514 RepoGroup::singleton()->getLocalRepo(), $archiveName ); 01515 wfDebug( __METHOD__ . "File already exists; importing as $archiveName\n" ); 01516 } 01517 } 01518 if ( !$file ) { 01519 wfDebug( __METHOD__ . ': Bad file for ' . $this->getTitle() . "\n" ); 01520 return false; 01521 } 01522 01523 # Get the file source or download if necessary 01524 $source = $this->getFileSrc(); 01525 $flags = $this->isTempSrc() ? File::DELETE_SOURCE : 0; 01526 if ( !$source ) { 01527 $source = $this->downloadSource(); 01528 $flags |= File::DELETE_SOURCE; 01529 } 01530 if ( !$source ) { 01531 wfDebug( __METHOD__ . ": Could not fetch remote file.\n" ); 01532 return false; 01533 } 01534 $sha1 = $this->getSha1(); 01535 if ( $sha1 && ( $sha1 !== sha1_file( $source ) ) ) { 01536 if ( $flags & File::DELETE_SOURCE ) { 01537 # Broken file; delete it if it is a temporary file 01538 unlink( $source ); 01539 } 01540 wfDebug( __METHOD__ . ": Corrupt file $source.\n" ); 01541 return false; 01542 } 01543 01544 $user = User::newFromName( $this->user_text ); 01545 01546 # Do the actual upload 01547 if ( $archiveName ) { 01548 $status = $file->uploadOld( $source, $archiveName, 01549 $this->getTimestamp(), $this->getComment(), $user, $flags ); 01550 } else { 01551 $status = $file->upload( $source, $this->getComment(), $this->getComment(), 01552 $flags, false, $this->getTimestamp(), $user ); 01553 } 01554 01555 if ( $status->isGood() ) { 01556 wfDebug( __METHOD__ . ": Successful\n" ); 01557 return true; 01558 } else { 01559 wfDebug( __METHOD__ . ': failed: ' . $status->getXml() . "\n" ); 01560 return false; 01561 } 01562 } 01563 01567 function downloadSource() { 01568 global $wgEnableUploads; 01569 if ( !$wgEnableUploads ) { 01570 return false; 01571 } 01572 01573 $tempo = tempnam( wfTempDir(), 'download' ); 01574 $f = fopen( $tempo, 'wb' ); 01575 if ( !$f ) { 01576 wfDebug( "IMPORT: couldn't write to temp file $tempo\n" ); 01577 return false; 01578 } 01579 01580 // @todo FIXME! 01581 $src = $this->getSrc(); 01582 $data = Http::get( $src ); 01583 if ( !$data ) { 01584 wfDebug( "IMPORT: couldn't fetch source $src\n" ); 01585 fclose( $f ); 01586 unlink( $tempo ); 01587 return false; 01588 } 01589 01590 fwrite( $f, $data ); 01591 fclose( $f ); 01592 01593 return $tempo; 01594 } 01595 01596 } 01597 01602 class ImportStringSource { 01603 function __construct( $string ) { 01604 $this->mString = $string; 01605 $this->mRead = false; 01606 } 01607 01611 function atEnd() { 01612 return $this->mRead; 01613 } 01614 01618 function readChunk() { 01619 if ( $this->atEnd() ) { 01620 return false; 01621 } 01622 $this->mRead = true; 01623 return $this->mString; 01624 } 01625 } 01626 01631 class ImportStreamSource { 01632 function __construct( $handle ) { 01633 $this->mHandle = $handle; 01634 } 01635 01639 function atEnd() { 01640 return feof( $this->mHandle ); 01641 } 01642 01646 function readChunk() { 01647 return fread( $this->mHandle, 32768 ); 01648 } 01649 01654 static function newFromFile( $filename ) { 01655 wfSuppressWarnings(); 01656 $file = fopen( $filename, 'rt' ); 01657 wfRestoreWarnings(); 01658 if ( !$file ) { 01659 return Status::newFatal( "importcantopen" ); 01660 } 01661 return Status::newGood( new ImportStreamSource( $file ) ); 01662 } 01663 01668 static function newFromUpload( $fieldname = "xmlimport" ) { 01669 $upload =& $_FILES[$fieldname]; 01670 01671 if ( $upload === null || !$upload['name'] ) { 01672 return Status::newFatal( 'importnofile' ); 01673 } 01674 if ( !empty( $upload['error'] ) ) { 01675 switch ( $upload['error'] ) { 01676 case 1: # The uploaded file exceeds the upload_max_filesize directive in php.ini. 01677 return Status::newFatal( 'importuploaderrorsize' ); 01678 case 2: # The uploaded file exceeds the MAX_FILE_SIZE directive that was specified in the HTML form. 01679 return Status::newFatal( 'importuploaderrorsize' ); 01680 case 3: # The uploaded file was only partially uploaded 01681 return Status::newFatal( 'importuploaderrorpartial' ); 01682 case 6: #Missing a temporary folder. 01683 return Status::newFatal( 'importuploaderrortemp' ); 01684 # case else: # Currently impossible 01685 } 01686 01687 } 01688 $fname = $upload['tmp_name']; 01689 if ( is_uploaded_file( $fname ) ) { 01690 return ImportStreamSource::newFromFile( $fname ); 01691 } else { 01692 return Status::newFatal( 'importnofile' ); 01693 } 01694 } 01695 01701 static function newFromURL( $url, $method = 'GET' ) { 01702 wfDebug( __METHOD__ . ": opening $url\n" ); 01703 # Use the standard HTTP fetch function; it times out 01704 # quicker and sorts out user-agent problems which might 01705 # otherwise prevent importing from large sites, such 01706 # as the Wikimedia cluster, etc. 01707 $data = Http::request( $method, $url, array( 'followRedirects' => true ) ); 01708 if ( $data !== false ) { 01709 $file = tmpfile(); 01710 fwrite( $file, $data ); 01711 fflush( $file ); 01712 fseek( $file, 0 ); 01713 return Status::newGood( new ImportStreamSource( $file ) ); 01714 } else { 01715 return Status::newFatal( 'importcantopen' ); 01716 } 01717 } 01718 01727 public static function newFromInterwiki( $interwiki, $page, $history = false, $templates = false, $pageLinkDepth = 0 ) { 01728 if ( $page == '' ) { 01729 return Status::newFatal( 'import-noarticle' ); 01730 } 01731 $link = Title::newFromText( "$interwiki:Special:Export/$page" ); 01732 if ( is_null( $link ) || !$link->isExternal() ) { 01733 return Status::newFatal( 'importbadinterwiki' ); 01734 } else { 01735 $params = array(); 01736 if ( $history ) { 01737 $params['history'] = 1; 01738 } 01739 if ( $templates ) { 01740 $params['templates'] = 1; 01741 } 01742 if ( $pageLinkDepth ) { 01743 $params['pagelink-depth'] = $pageLinkDepth; 01744 } 01745 $url = $link->getFullURL( $params ); 01746 # For interwikis, use POST to avoid redirects. 01747 return ImportStreamSource::newFromURL( $url, "POST" ); 01748 } 01749 } 01750 }