MediaWiki
REL1_21
|
00001 <?php 00033 class WikiImporter { 00034 private $reader = null; 00035 private $mLogItemCallback, $mUploadCallback, $mRevisionCallback, $mPageCallback; 00036 private $mSiteInfoCallback, $mTargetNamespace, $mTargetRootPage, $mPageOutCallback; 00037 private $mNoticeCallback, $mDebug; 00038 private $mImportUploads, $mImageBasePath; 00039 private $mNoUpdates = false; 00040 00045 function __construct( $source ) { 00046 $this->reader = new XMLReader(); 00047 00048 stream_wrapper_register( 'uploadsource', 'UploadSourceAdapter' ); 00049 $id = UploadSourceAdapter::registerSource( $source ); 00050 if (defined( 'LIBXML_PARSEHUGE' ) ) { 00051 $this->reader->open( "uploadsource://$id", null, LIBXML_PARSEHUGE ); 00052 } else { 00053 $this->reader->open( "uploadsource://$id" ); 00054 } 00055 00056 // Default callbacks 00057 $this->setRevisionCallback( array( $this, "importRevision" ) ); 00058 $this->setUploadCallback( array( $this, 'importUpload' ) ); 00059 $this->setLogItemCallback( array( $this, 'importLogItem' ) ); 00060 $this->setPageOutCallback( array( $this, 'finishImportPage' ) ); 00061 } 00062 00063 private function throwXmlError( $err ) { 00064 $this->debug( "FAILURE: $err" ); 00065 wfDebug( "WikiImporter XML error: $err\n" ); 00066 } 00067 00068 private function debug( $data ) { 00069 if( $this->mDebug ) { 00070 wfDebug( "IMPORT: $data\n" ); 00071 } 00072 } 00073 00074 private function warn( $data ) { 00075 wfDebug( "IMPORT: $data\n" ); 00076 } 00077 00078 private function notice( $msg /*, $param, ...*/ ) { 00079 $params = func_get_args(); 00080 array_shift( $params ); 00081 00082 if ( is_callable( $this->mNoticeCallback ) ) { 00083 call_user_func( $this->mNoticeCallback, $msg, $params ); 00084 } else { # No ImportReporter -> CLI 00085 echo wfMessage( $msg, $params )->text() . "\n"; 00086 } 00087 } 00088 00093 function setDebug( $debug ) { 00094 $this->mDebug = $debug; 00095 } 00096 00101 function setNoUpdates( $noupdates ) { 00102 $this->mNoUpdates = $noupdates; 00103 } 00104 00111 public function setNoticeCallback( $callback ) { 00112 return wfSetVar( $this->mNoticeCallback, $callback ); 00113 } 00114 00120 public function setPageCallback( $callback ) { 00121 $previous = $this->mPageCallback; 00122 $this->mPageCallback = $callback; 00123 return $previous; 00124 } 00125 00135 public function setPageOutCallback( $callback ) { 00136 $previous = $this->mPageOutCallback; 00137 $this->mPageOutCallback = $callback; 00138 return $previous; 00139 } 00140 00146 public function setRevisionCallback( $callback ) { 00147 $previous = $this->mRevisionCallback; 00148 $this->mRevisionCallback = $callback; 00149 return $previous; 00150 } 00151 00157 public function setUploadCallback( $callback ) { 00158 $previous = $this->mUploadCallback; 00159 $this->mUploadCallback = $callback; 00160 return $previous; 00161 } 00162 00168 public function setLogItemCallback( $callback ) { 00169 $previous = $this->mLogItemCallback; 00170 $this->mLogItemCallback = $callback; 00171 return $previous; 00172 } 00173 00179 public function setSiteInfoCallback( $callback ) { 00180 $previous = $this->mSiteInfoCallback; 00181 $this->mSiteInfoCallback = $callback; 00182 return $previous; 00183 } 00184 00190 public function setTargetNamespace( $namespace ) { 00191 if( is_null( $namespace ) ) { 00192 // Don't override namespaces 00193 $this->mTargetNamespace = null; 00194 } elseif( $namespace >= 0 ) { 00195 // @todo FIXME: Check for validity 00196 $this->mTargetNamespace = intval( $namespace ); 00197 } else { 00198 return false; 00199 } 00200 } 00201 00207 public function setTargetRootPage( $rootpage ) { 00208 $status = Status::newGood(); 00209 if( is_null( $rootpage ) ) { 00210 // No rootpage 00211 $this->mTargetRootPage = null; 00212 } elseif( $rootpage !== '' ) { 00213 $rootpage = rtrim( $rootpage, '/' ); //avoid double slashes 00214 $title = Title::newFromText( $rootpage, !is_null( $this->mTargetNamespace ) ? $this->mTargetNamespace : NS_MAIN ); 00215 if( !$title || $title->isExternal() ) { 00216 $status->fatal( 'import-rootpage-invalid' ); 00217 } else { 00218 if( !MWNamespace::hasSubpages( $title->getNamespace() ) ) { 00219 global $wgContLang; 00220 00221 $displayNSText = $title->getNamespace() == NS_MAIN 00222 ? wfMessage( 'blanknamespace' )->text() 00223 : $wgContLang->getNsText( $title->getNamespace() ); 00224 $status->fatal( 'import-rootpage-nosubpage', $displayNSText ); 00225 } else { 00226 // set namespace to 'all', so the namespace check in processTitle() can passed 00227 $this->setTargetNamespace( null ); 00228 $this->mTargetRootPage = $title->getPrefixedDBkey(); 00229 } 00230 } 00231 } 00232 return $status; 00233 } 00234 00238 public function setImageBasePath( $dir ) { 00239 $this->mImageBasePath = $dir; 00240 } 00241 00245 public function setImportUploads( $import ) { 00246 $this->mImportUploads = $import; 00247 } 00248 00254 public function importRevision( $revision ) { 00255 try { 00256 $dbw = wfGetDB( DB_MASTER ); 00257 return $dbw->deadlockLoop( array( $revision, 'importOldRevision' ) ); 00258 } catch ( MWContentSerializationException $ex ) { 00259 $this->notice( 'import-error-unserialize', 00260 $revision->getTitle()->getPrefixedText(), 00261 $revision->getID(), 00262 $revision->getModel(), 00263 $revision->getFormat() ); 00264 } 00265 } 00266 00272 public function importLogItem( $rev ) { 00273 $dbw = wfGetDB( DB_MASTER ); 00274 return $dbw->deadlockLoop( array( $rev, 'importLogItem' ) ); 00275 } 00276 00282 public function importUpload( $revision ) { 00283 $dbw = wfGetDB( DB_MASTER ); 00284 return $dbw->deadlockLoop( array( $revision, 'importUpload' ) ); 00285 } 00286 00296 public function finishImportPage( $title, $origTitle, $revCount, $sRevCount, $pageInfo ) { 00297 $args = func_get_args(); 00298 return wfRunHooks( 'AfterImportPage', $args ); 00299 } 00300 00305 public function debugRevisionHandler( &$revision ) { 00306 $this->debug( "Got revision:" ); 00307 if( is_object( $revision->title ) ) { 00308 $this->debug( "-- Title: " . $revision->title->getPrefixedText() ); 00309 } else { 00310 $this->debug( "-- Title: <invalid>" ); 00311 } 00312 $this->debug( "-- User: " . $revision->user_text ); 00313 $this->debug( "-- Timestamp: " . $revision->timestamp ); 00314 $this->debug( "-- Comment: " . $revision->comment ); 00315 $this->debug( "-- Text: " . $revision->text ); 00316 } 00317 00322 function pageCallback( $title ) { 00323 if( isset( $this->mPageCallback ) ) { 00324 call_user_func( $this->mPageCallback, $title ); 00325 } 00326 } 00327 00336 private function pageOutCallback( $title, $origTitle, $revCount, $sucCount, $pageInfo ) { 00337 if( isset( $this->mPageOutCallback ) ) { 00338 $args = func_get_args(); 00339 call_user_func_array( $this->mPageOutCallback, $args ); 00340 } 00341 } 00342 00348 private function revisionCallback( $revision ) { 00349 if ( isset( $this->mRevisionCallback ) ) { 00350 return call_user_func_array( $this->mRevisionCallback, 00351 array( $revision, $this ) ); 00352 } else { 00353 return false; 00354 } 00355 } 00356 00362 private function logItemCallback( $revision ) { 00363 if ( isset( $this->mLogItemCallback ) ) { 00364 return call_user_func_array( $this->mLogItemCallback, 00365 array( $revision, $this ) ); 00366 } else { 00367 return false; 00368 } 00369 } 00370 00378 private function nodeContents() { 00379 if( $this->reader->isEmptyElement ) { 00380 return ""; 00381 } 00382 $buffer = ""; 00383 while( $this->reader->read() ) { 00384 switch( $this->reader->nodeType ) { 00385 case XmlReader::TEXT: 00386 case XmlReader::SIGNIFICANT_WHITESPACE: 00387 $buffer .= $this->reader->value; 00388 break; 00389 case XmlReader::END_ELEMENT: 00390 return $buffer; 00391 } 00392 } 00393 00394 $this->reader->close(); 00395 return ''; 00396 } 00397 00398 # -------------- 00399 00401 private function dumpElement() { 00402 static $lookup = null; 00403 if ( !$lookup ) { 00404 $xmlReaderConstants = array( 00405 "NONE", 00406 "ELEMENT", 00407 "ATTRIBUTE", 00408 "TEXT", 00409 "CDATA", 00410 "ENTITY_REF", 00411 "ENTITY", 00412 "PI", 00413 "COMMENT", 00414 "DOC", 00415 "DOC_TYPE", 00416 "DOC_FRAGMENT", 00417 "NOTATION", 00418 "WHITESPACE", 00419 "SIGNIFICANT_WHITESPACE", 00420 "END_ELEMENT", 00421 "END_ENTITY", 00422 "XML_DECLARATION", 00423 ); 00424 $lookup = array(); 00425 00426 foreach( $xmlReaderConstants as $name ) { 00427 $lookup[constant("XmlReader::$name")] = $name; 00428 } 00429 } 00430 00431 print( var_dump( 00432 $lookup[$this->reader->nodeType], 00433 $this->reader->name, 00434 $this->reader->value 00435 )."\n\n" ); 00436 } 00437 00443 public function doImport() { 00444 00445 // Calls to reader->read need to be wrapped in calls to 00446 // libxml_disable_entity_loader() to avoid local file 00447 // inclusion attacks (bug 46932). 00448 $oldDisable = libxml_disable_entity_loader( true ); 00449 $this->reader->read(); 00450 00451 if ( $this->reader->name != 'mediawiki' ) { 00452 libxml_disable_entity_loader( $oldDisable ); 00453 throw new MWException( "Expected <mediawiki> tag, got ". 00454 $this->reader->name ); 00455 } 00456 $this->debug( "<mediawiki> tag is correct." ); 00457 00458 $this->debug( "Starting primary dump processing loop." ); 00459 00460 $keepReading = $this->reader->read(); 00461 $skip = false; 00462 while ( $keepReading ) { 00463 $tag = $this->reader->name; 00464 $type = $this->reader->nodeType; 00465 00466 if ( !wfRunHooks( 'ImportHandleToplevelXMLTag', $this ) ) { 00467 // Do nothing 00468 } elseif ( $tag == 'mediawiki' && $type == XmlReader::END_ELEMENT ) { 00469 break; 00470 } elseif ( $tag == 'siteinfo' ) { 00471 $this->handleSiteInfo(); 00472 } elseif ( $tag == 'page' ) { 00473 $this->handlePage(); 00474 } elseif ( $tag == 'logitem' ) { 00475 $this->handleLogItem(); 00476 } elseif ( $tag != '#text' ) { 00477 $this->warn( "Unhandled top-level XML tag $tag" ); 00478 00479 $skip = true; 00480 } 00481 00482 if ( $skip ) { 00483 $keepReading = $this->reader->next(); 00484 $skip = false; 00485 $this->debug( "Skip" ); 00486 } else { 00487 $keepReading = $this->reader->read(); 00488 } 00489 } 00490 00491 libxml_disable_entity_loader( $oldDisable ); 00492 return true; 00493 } 00494 00499 private function handleSiteInfo() { 00500 // Site info is useful, but not actually used for dump imports. 00501 // Includes a quick short-circuit to save performance. 00502 if ( ! $this->mSiteInfoCallback ) { 00503 $this->reader->next(); 00504 return true; 00505 } 00506 throw new MWException( "SiteInfo tag is not yet handled, do not set mSiteInfoCallback" ); 00507 } 00508 00509 private function handleLogItem() { 00510 $this->debug( "Enter log item handler." ); 00511 $logInfo = array(); 00512 00513 // Fields that can just be stuffed in the pageInfo object 00514 $normalFields = array( 'id', 'comment', 'type', 'action', 'timestamp', 00515 'logtitle', 'params' ); 00516 00517 while ( $this->reader->read() ) { 00518 if ( $this->reader->nodeType == XmlReader::END_ELEMENT && 00519 $this->reader->name == 'logitem' ) { 00520 break; 00521 } 00522 00523 $tag = $this->reader->name; 00524 00525 if ( !wfRunHooks( 'ImportHandleLogItemXMLTag', 00526 $this, $logInfo ) ) { 00527 // Do nothing 00528 } elseif ( in_array( $tag, $normalFields ) ) { 00529 $logInfo[$tag] = $this->nodeContents(); 00530 } elseif ( $tag == 'contributor' ) { 00531 $logInfo['contributor'] = $this->handleContributor(); 00532 } elseif ( $tag != '#text' ) { 00533 $this->warn( "Unhandled log-item XML tag $tag" ); 00534 } 00535 } 00536 00537 $this->processLogItem( $logInfo ); 00538 } 00539 00544 private function processLogItem( $logInfo ) { 00545 $revision = new WikiRevision; 00546 00547 $revision->setID( $logInfo['id'] ); 00548 $revision->setType( $logInfo['type'] ); 00549 $revision->setAction( $logInfo['action'] ); 00550 $revision->setTimestamp( $logInfo['timestamp'] ); 00551 $revision->setParams( $logInfo['params'] ); 00552 $revision->setTitle( Title::newFromText( $logInfo['logtitle'] ) ); 00553 $revision->setNoUpdates( $this->mNoUpdates ); 00554 00555 if ( isset( $logInfo['comment'] ) ) { 00556 $revision->setComment( $logInfo['comment'] ); 00557 } 00558 00559 if ( isset( $logInfo['contributor']['ip'] ) ) { 00560 $revision->setUserIP( $logInfo['contributor']['ip'] ); 00561 } 00562 if ( isset( $logInfo['contributor']['username'] ) ) { 00563 $revision->setUserName( $logInfo['contributor']['username'] ); 00564 } 00565 00566 return $this->logItemCallback( $revision ); 00567 } 00568 00569 private function handlePage() { 00570 // Handle page data. 00571 $this->debug( "Enter page handler." ); 00572 $pageInfo = array( 'revisionCount' => 0, 'successfulRevisionCount' => 0 ); 00573 00574 // Fields that can just be stuffed in the pageInfo object 00575 $normalFields = array( 'title', 'id', 'redirect', 'restrictions' ); 00576 00577 $skip = false; 00578 $badTitle = false; 00579 00580 while ( $skip ? $this->reader->next() : $this->reader->read() ) { 00581 if ( $this->reader->nodeType == XmlReader::END_ELEMENT && 00582 $this->reader->name == 'page' ) { 00583 break; 00584 } 00585 00586 $tag = $this->reader->name; 00587 00588 if ( $badTitle ) { 00589 // The title is invalid, bail out of this page 00590 $skip = true; 00591 } elseif ( !wfRunHooks( 'ImportHandlePageXMLTag', array( $this, 00592 &$pageInfo ) ) ) { 00593 // Do nothing 00594 } elseif ( in_array( $tag, $normalFields ) ) { 00595 $pageInfo[$tag] = $this->nodeContents(); 00596 if ( $tag == 'title' ) { 00597 $title = $this->processTitle( $pageInfo['title'] ); 00598 00599 if ( !$title ) { 00600 $badTitle = true; 00601 $skip = true; 00602 } 00603 00604 $this->pageCallback( $title ); 00605 list( $pageInfo['_title'], $origTitle ) = $title; 00606 } 00607 } elseif ( $tag == 'revision' ) { 00608 $this->handleRevision( $pageInfo ); 00609 } elseif ( $tag == 'upload' ) { 00610 $this->handleUpload( $pageInfo ); 00611 } elseif ( $tag != '#text' ) { 00612 $this->warn( "Unhandled page XML tag $tag" ); 00613 $skip = true; 00614 } 00615 } 00616 00617 $this->pageOutCallback( $pageInfo['_title'], $origTitle, 00618 $pageInfo['revisionCount'], 00619 $pageInfo['successfulRevisionCount'], 00620 $pageInfo ); 00621 } 00622 00626 private function handleRevision( &$pageInfo ) { 00627 $this->debug( "Enter revision handler" ); 00628 $revisionInfo = array(); 00629 00630 $normalFields = array( 'id', 'timestamp', 'comment', 'minor', 'model', 'format', 'text' ); 00631 00632 $skip = false; 00633 00634 while ( $skip ? $this->reader->next() : $this->reader->read() ) { 00635 if ( $this->reader->nodeType == XmlReader::END_ELEMENT && 00636 $this->reader->name == 'revision' ) { 00637 break; 00638 } 00639 00640 $tag = $this->reader->name; 00641 00642 if ( !wfRunHooks( 'ImportHandleRevisionXMLTag', $this, 00643 $pageInfo, $revisionInfo ) ) { 00644 // Do nothing 00645 } elseif ( in_array( $tag, $normalFields ) ) { 00646 $revisionInfo[$tag] = $this->nodeContents(); 00647 } elseif ( $tag == 'contributor' ) { 00648 $revisionInfo['contributor'] = $this->handleContributor(); 00649 } elseif ( $tag != '#text' ) { 00650 $this->warn( "Unhandled revision XML tag $tag" ); 00651 $skip = true; 00652 } 00653 } 00654 00655 $pageInfo['revisionCount']++; 00656 if ( $this->processRevision( $pageInfo, $revisionInfo ) ) { 00657 $pageInfo['successfulRevisionCount']++; 00658 } 00659 } 00660 00666 private function processRevision( $pageInfo, $revisionInfo ) { 00667 $revision = new WikiRevision; 00668 00669 if( isset( $revisionInfo['id'] ) ) { 00670 $revision->setID( $revisionInfo['id'] ); 00671 } 00672 if ( isset( $revisionInfo['text'] ) ) { 00673 $revision->setText( $revisionInfo['text'] ); 00674 } 00675 if ( isset( $revisionInfo['model'] ) ) { 00676 $revision->setModel( $revisionInfo['model'] ); 00677 } 00678 if ( isset( $revisionInfo['format'] ) ) { 00679 $revision->setFormat( $revisionInfo['format'] ); 00680 } 00681 $revision->setTitle( $pageInfo['_title'] ); 00682 00683 if ( isset( $revisionInfo['timestamp'] ) ) { 00684 $revision->setTimestamp( $revisionInfo['timestamp'] ); 00685 } else { 00686 $revision->setTimestamp( wfTimestampNow() ); 00687 } 00688 00689 if ( isset( $revisionInfo['comment'] ) ) { 00690 $revision->setComment( $revisionInfo['comment'] ); 00691 } 00692 00693 if ( isset( $revisionInfo['minor'] ) ) { 00694 $revision->setMinor( true ); 00695 } 00696 if ( isset( $revisionInfo['contributor']['ip'] ) ) { 00697 $revision->setUserIP( $revisionInfo['contributor']['ip'] ); 00698 } 00699 if ( isset( $revisionInfo['contributor']['username'] ) ) { 00700 $revision->setUserName( $revisionInfo['contributor']['username'] ); 00701 } 00702 $revision->setNoUpdates( $this->mNoUpdates ); 00703 00704 return $this->revisionCallback( $revision ); 00705 } 00706 00711 private function handleUpload( &$pageInfo ) { 00712 $this->debug( "Enter upload handler" ); 00713 $uploadInfo = array(); 00714 00715 $normalFields = array( 'timestamp', 'comment', 'filename', 'text', 00716 'src', 'size', 'sha1base36', 'archivename', 'rel' ); 00717 00718 $skip = false; 00719 00720 while ( $skip ? $this->reader->next() : $this->reader->read() ) { 00721 if ( $this->reader->nodeType == XmlReader::END_ELEMENT && 00722 $this->reader->name == 'upload' ) { 00723 break; 00724 } 00725 00726 $tag = $this->reader->name; 00727 00728 if ( !wfRunHooks( 'ImportHandleUploadXMLTag', $this, 00729 $pageInfo ) ) { 00730 // Do nothing 00731 } elseif ( in_array( $tag, $normalFields ) ) { 00732 $uploadInfo[$tag] = $this->nodeContents(); 00733 } elseif ( $tag == 'contributor' ) { 00734 $uploadInfo['contributor'] = $this->handleContributor(); 00735 } elseif ( $tag == 'contents' ) { 00736 $contents = $this->nodeContents(); 00737 $encoding = $this->reader->getAttribute( 'encoding' ); 00738 if ( $encoding === 'base64' ) { 00739 $uploadInfo['fileSrc'] = $this->dumpTemp( base64_decode( $contents ) ); 00740 $uploadInfo['isTempSrc'] = true; 00741 } 00742 } elseif ( $tag != '#text' ) { 00743 $this->warn( "Unhandled upload XML tag $tag" ); 00744 $skip = true; 00745 } 00746 } 00747 00748 if ( $this->mImageBasePath && isset( $uploadInfo['rel'] ) ) { 00749 $path = "{$this->mImageBasePath}/{$uploadInfo['rel']}"; 00750 if ( file_exists( $path ) ) { 00751 $uploadInfo['fileSrc'] = $path; 00752 $uploadInfo['isTempSrc'] = false; 00753 } 00754 } 00755 00756 if ( $this->mImportUploads ) { 00757 return $this->processUpload( $pageInfo, $uploadInfo ); 00758 } 00759 } 00760 00765 private function dumpTemp( $contents ) { 00766 $filename = tempnam( wfTempDir(), 'importupload' ); 00767 file_put_contents( $filename, $contents ); 00768 return $filename; 00769 } 00770 00776 private function processUpload( $pageInfo, $uploadInfo ) { 00777 $revision = new WikiRevision; 00778 $text = isset( $uploadInfo['text'] ) ? $uploadInfo['text'] : ''; 00779 00780 $revision->setTitle( $pageInfo['_title'] ); 00781 $revision->setID( $pageInfo['id'] ); 00782 $revision->setTimestamp( $uploadInfo['timestamp'] ); 00783 $revision->setText( $text ); 00784 $revision->setFilename( $uploadInfo['filename'] ); 00785 if ( isset( $uploadInfo['archivename'] ) ) { 00786 $revision->setArchiveName( $uploadInfo['archivename'] ); 00787 } 00788 $revision->setSrc( $uploadInfo['src'] ); 00789 if ( isset( $uploadInfo['fileSrc'] ) ) { 00790 $revision->setFileSrc( $uploadInfo['fileSrc'], 00791 !empty( $uploadInfo['isTempSrc'] ) ); 00792 } 00793 if ( isset( $uploadInfo['sha1base36'] ) ) { 00794 $revision->setSha1Base36( $uploadInfo['sha1base36'] ); 00795 } 00796 $revision->setSize( intval( $uploadInfo['size'] ) ); 00797 $revision->setComment( $uploadInfo['comment'] ); 00798 00799 if ( isset( $uploadInfo['contributor']['ip'] ) ) { 00800 $revision->setUserIP( $uploadInfo['contributor']['ip'] ); 00801 } 00802 if ( isset( $uploadInfo['contributor']['username'] ) ) { 00803 $revision->setUserName( $uploadInfo['contributor']['username'] ); 00804 } 00805 $revision->setNoUpdates( $this->mNoUpdates ); 00806 00807 return call_user_func( $this->mUploadCallback, $revision ); 00808 } 00809 00813 private function handleContributor() { 00814 $fields = array( 'id', 'ip', 'username' ); 00815 $info = array(); 00816 00817 while ( $this->reader->read() ) { 00818 if ( $this->reader->nodeType == XmlReader::END_ELEMENT && 00819 $this->reader->name == 'contributor' ) { 00820 break; 00821 } 00822 00823 $tag = $this->reader->name; 00824 00825 if ( in_array( $tag, $fields ) ) { 00826 $info[$tag] = $this->nodeContents(); 00827 } 00828 } 00829 00830 return $info; 00831 } 00832 00837 private function processTitle( $text ) { 00838 global $wgCommandLineMode; 00839 00840 $workTitle = $text; 00841 $origTitle = Title::newFromText( $workTitle ); 00842 00843 if( !is_null( $this->mTargetNamespace ) && !is_null( $origTitle ) ) { 00844 # makeTitleSafe, because $origTitle can have a interwiki (different setting of interwiki map) 00845 # and than dbKey can begin with a lowercase char 00846 $title = Title::makeTitleSafe( $this->mTargetNamespace, 00847 $origTitle->getDBkey() ); 00848 } else { 00849 if( !is_null( $this->mTargetRootPage ) ) { 00850 $workTitle = $this->mTargetRootPage . '/' . $workTitle; 00851 } 00852 $title = Title::newFromText( $workTitle ); 00853 } 00854 00855 if( is_null( $title ) ) { 00856 # Invalid page title? Ignore the page 00857 $this->notice( 'import-error-invalid', $workTitle ); 00858 return false; 00859 } elseif( $title->isExternal() ) { 00860 $this->notice( 'import-error-interwiki', $title->getPrefixedText() ); 00861 return false; 00862 } elseif( !$title->canExist() ) { 00863 $this->notice( 'import-error-special', $title->getPrefixedText() ); 00864 return false; 00865 } elseif( !$title->userCan( 'edit' ) && !$wgCommandLineMode ) { 00866 # Do not import if the importing wiki user cannot edit this page 00867 $this->notice( 'import-error-edit', $title->getPrefixedText() ); 00868 return false; 00869 } elseif( !$title->exists() && !$title->userCan( 'create' ) && !$wgCommandLineMode ) { 00870 # Do not import if the importing wiki user cannot create this page 00871 $this->notice( 'import-error-create', $title->getPrefixedText() ); 00872 return false; 00873 } 00874 00875 return array( $title, $origTitle ); 00876 } 00877 } 00878 00880 class UploadSourceAdapter { 00881 static $sourceRegistrations = array(); 00882 00883 private $mSource; 00884 private $mBuffer; 00885 private $mPosition; 00886 00891 static function registerSource( $source ) { 00892 $id = wfRandomString(); 00893 00894 self::$sourceRegistrations[$id] = $source; 00895 00896 return $id; 00897 } 00898 00906 function stream_open( $path, $mode, $options, &$opened_path ) { 00907 $url = parse_url( $path ); 00908 $id = $url['host']; 00909 00910 if ( !isset( self::$sourceRegistrations[$id] ) ) { 00911 return false; 00912 } 00913 00914 $this->mSource = self::$sourceRegistrations[$id]; 00915 00916 return true; 00917 } 00918 00923 function stream_read( $count ) { 00924 $return = ''; 00925 $leave = false; 00926 00927 while ( !$leave && !$this->mSource->atEnd() && 00928 strlen( $this->mBuffer ) < $count ) { 00929 $read = $this->mSource->readChunk(); 00930 00931 if ( !strlen( $read ) ) { 00932 $leave = true; 00933 } 00934 00935 $this->mBuffer .= $read; 00936 } 00937 00938 if ( strlen( $this->mBuffer ) ) { 00939 $return = substr( $this->mBuffer, 0, $count ); 00940 $this->mBuffer = substr( $this->mBuffer, $count ); 00941 } 00942 00943 $this->mPosition += strlen( $return ); 00944 00945 return $return; 00946 } 00947 00952 function stream_write( $data ) { 00953 return false; 00954 } 00955 00959 function stream_tell() { 00960 return $this->mPosition; 00961 } 00962 00966 function stream_eof() { 00967 return $this->mSource->atEnd(); 00968 } 00969 00973 function url_stat() { 00974 $result = array(); 00975 00976 $result['dev'] = $result[0] = 0; 00977 $result['ino'] = $result[1] = 0; 00978 $result['mode'] = $result[2] = 0; 00979 $result['nlink'] = $result[3] = 0; 00980 $result['uid'] = $result[4] = 0; 00981 $result['gid'] = $result[5] = 0; 00982 $result['rdev'] = $result[6] = 0; 00983 $result['size'] = $result[7] = 0; 00984 $result['atime'] = $result[8] = 0; 00985 $result['mtime'] = $result[9] = 0; 00986 $result['ctime'] = $result[10] = 0; 00987 $result['blksize'] = $result[11] = 0; 00988 $result['blocks'] = $result[12] = 0; 00989 00990 return $result; 00991 } 00992 } 00993 00994 class XMLReader2 extends XMLReader { 00995 00999 function nodeContents() { 01000 if( $this->isEmptyElement ) { 01001 return ""; 01002 } 01003 $buffer = ""; 01004 while( $this->read() ) { 01005 switch( $this->nodeType ) { 01006 case XmlReader::TEXT: 01007 case XmlReader::SIGNIFICANT_WHITESPACE: 01008 $buffer .= $this->value; 01009 break; 01010 case XmlReader::END_ELEMENT: 01011 return $buffer; 01012 } 01013 } 01014 return $this->close(); 01015 } 01016 } 01017 01022 class WikiRevision { 01023 var $importer = null; 01024 01028 var $title = null; 01029 var $id = 0; 01030 var $timestamp = "20010115000000"; 01031 var $user = 0; 01032 var $user_text = ""; 01033 var $model = null; 01034 var $format = null; 01035 var $text = ""; 01036 var $content = null; 01037 var $comment = ""; 01038 var $minor = false; 01039 var $type = ""; 01040 var $action = ""; 01041 var $params = ""; 01042 var $fileSrc = ''; 01043 var $sha1base36 = false; 01044 var $isTemp = false; 01045 var $archiveName = ''; 01046 var $fileIsTemp; 01047 private $mNoUpdates = false; 01048 01053 function setTitle( $title ) { 01054 if( is_object( $title ) ) { 01055 $this->title = $title; 01056 } elseif( is_null( $title ) ) { 01057 throw new MWException( "WikiRevision given a null title in import. You may need to adjust \$wgLegalTitleChars." ); 01058 } else { 01059 throw new MWException( "WikiRevision given non-object title in import." ); 01060 } 01061 } 01062 01066 function setID( $id ) { 01067 $this->id = $id; 01068 } 01069 01073 function setTimestamp( $ts ) { 01074 # 2003-08-05T18:30:02Z 01075 $this->timestamp = wfTimestamp( TS_MW, $ts ); 01076 } 01077 01081 function setUsername( $user ) { 01082 $this->user_text = $user; 01083 } 01084 01088 function setUserIP( $ip ) { 01089 $this->user_text = $ip; 01090 } 01091 01095 function setModel( $model ) { 01096 $this->model = $model; 01097 } 01098 01102 function setFormat( $format ) { 01103 $this->format = $format; 01104 } 01105 01109 function setText( $text ) { 01110 $this->text = $text; 01111 } 01112 01116 function setComment( $text ) { 01117 $this->comment = $text; 01118 } 01119 01123 function setMinor( $minor ) { 01124 $this->minor = (bool)$minor; 01125 } 01126 01130 function setSrc( $src ) { 01131 $this->src = $src; 01132 } 01133 01138 function setFileSrc( $src, $isTemp ) { 01139 $this->fileSrc = $src; 01140 $this->fileIsTemp = $isTemp; 01141 } 01142 01146 function setSha1Base36( $sha1base36 ) { 01147 $this->sha1base36 = $sha1base36; 01148 } 01149 01153 function setFilename( $filename ) { 01154 $this->filename = $filename; 01155 } 01156 01160 function setArchiveName( $archiveName ) { 01161 $this->archiveName = $archiveName; 01162 } 01163 01167 function setSize( $size ) { 01168 $this->size = intval( $size ); 01169 } 01170 01174 function setType( $type ) { 01175 $this->type = $type; 01176 } 01177 01181 function setAction( $action ) { 01182 $this->action = $action; 01183 } 01184 01188 function setParams( $params ) { 01189 $this->params = $params; 01190 } 01191 01195 public function setNoUpdates( $noupdates ) { 01196 $this->mNoUpdates = $noupdates; 01197 } 01198 01202 function getTitle() { 01203 return $this->title; 01204 } 01205 01209 function getID() { 01210 return $this->id; 01211 } 01212 01216 function getTimestamp() { 01217 return $this->timestamp; 01218 } 01219 01223 function getUser() { 01224 return $this->user_text; 01225 } 01226 01232 function getText() { 01233 ContentHandler::deprecated( __METHOD__, '1.21' ); 01234 01235 return $this->text; 01236 } 01237 01241 function getContent() { 01242 if ( is_null( $this->content ) ) { 01243 $this->content = 01244 ContentHandler::makeContent( 01245 $this->text, 01246 $this->getTitle(), 01247 $this->getModel(), 01248 $this->getFormat() 01249 ); 01250 } 01251 01252 return $this->content; 01253 } 01254 01258 function getModel() { 01259 if ( is_null( $this->model ) ) { 01260 $this->model = $this->getTitle()->getContentModel(); 01261 } 01262 01263 return $this->model; 01264 } 01265 01269 function getFormat() { 01270 if ( is_null( $this->model ) ) { 01271 $this->format = ContentHandler::getForTitle( $this->getTitle() )->getDefaultFormat(); 01272 } 01273 01274 return $this->format; 01275 } 01276 01280 function getComment() { 01281 return $this->comment; 01282 } 01283 01287 function getMinor() { 01288 return $this->minor; 01289 } 01290 01294 function getSrc() { 01295 return $this->src; 01296 } 01297 01301 function getSha1() { 01302 if ( $this->sha1base36 ) { 01303 return wfBaseConvert( $this->sha1base36, 36, 16 ); 01304 } 01305 return false; 01306 } 01307 01311 function getFileSrc() { 01312 return $this->fileSrc; 01313 } 01314 01318 function isTempSrc() { 01319 return $this->isTemp; 01320 } 01321 01325 function getFilename() { 01326 return $this->filename; 01327 } 01328 01332 function getArchiveName() { 01333 return $this->archiveName; 01334 } 01335 01339 function getSize() { 01340 return $this->size; 01341 } 01342 01346 function getType() { 01347 return $this->type; 01348 } 01349 01353 function getAction() { 01354 return $this->action; 01355 } 01356 01360 function getParams() { 01361 return $this->params; 01362 } 01363 01367 function importOldRevision() { 01368 $dbw = wfGetDB( DB_MASTER ); 01369 01370 # Sneak a single revision into place 01371 $user = User::newFromName( $this->getUser() ); 01372 if( $user ) { 01373 $userId = intval( $user->getId() ); 01374 $userText = $user->getName(); 01375 $userObj = $user; 01376 } else { 01377 $userId = 0; 01378 $userText = $this->getUser(); 01379 $userObj = new User; 01380 } 01381 01382 // avoid memory leak...? 01383 $linkCache = LinkCache::singleton(); 01384 $linkCache->clear(); 01385 01386 $page = WikiPage::factory( $this->title ); 01387 if( !$page->exists() ) { 01388 # must create the page... 01389 $pageId = $page->insertOn( $dbw ); 01390 $created = true; 01391 $oldcountable = null; 01392 } else { 01393 $pageId = $page->getId(); 01394 $created = false; 01395 01396 $prior = $dbw->selectField( 'revision', '1', 01397 array( 'rev_page' => $pageId, 01398 'rev_timestamp' => $dbw->timestamp( $this->timestamp ), 01399 'rev_user_text' => $userText, 01400 'rev_comment' => $this->getComment() ), 01401 __METHOD__ 01402 ); 01403 if( $prior ) { 01404 // @todo FIXME: This could fail slightly for multiple matches :P 01405 wfDebug( __METHOD__ . ": skipping existing revision for [[" . 01406 $this->title->getPrefixedText() . "]], timestamp " . $this->timestamp . "\n" ); 01407 return false; 01408 } 01409 $oldcountable = $page->isCountable(); 01410 } 01411 01412 # @todo FIXME: Use original rev_id optionally (better for backups) 01413 # Insert the row 01414 $revision = new Revision( array( 01415 'title' => $this->title, 01416 'page' => $pageId, 01417 'content_model' => $this->getModel(), 01418 'content_format' => $this->getFormat(), 01419 'text' => $this->getContent()->serialize( $this->getFormat() ), //XXX: just set 'content' => $this->getContent()? 01420 'comment' => $this->getComment(), 01421 'user' => $userId, 01422 'user_text' => $userText, 01423 'timestamp' => $this->timestamp, 01424 'minor_edit' => $this->minor, 01425 ) ); 01426 $revision->insertOn( $dbw ); 01427 $changed = $page->updateIfNewerOn( $dbw, $revision ); 01428 01429 if ( $changed !== false && !$this->mNoUpdates ) { 01430 wfDebug( __METHOD__ . ": running updates\n" ); 01431 $page->doEditUpdates( $revision, $userObj, array( 'created' => $created, 'oldcountable' => $oldcountable ) ); 01432 } 01433 01434 return true; 01435 } 01436 01440 function importLogItem() { 01441 $dbw = wfGetDB( DB_MASTER ); 01442 # @todo FIXME: This will not record autoblocks 01443 if( !$this->getTitle() ) { 01444 wfDebug( __METHOD__ . ": skipping invalid {$this->type}/{$this->action} log time, timestamp " . 01445 $this->timestamp . "\n" ); 01446 return; 01447 } 01448 # Check if it exists already 01449 // @todo FIXME: Use original log ID (better for backups) 01450 $prior = $dbw->selectField( 'logging', '1', 01451 array( 'log_type' => $this->getType(), 01452 'log_action' => $this->getAction(), 01453 'log_timestamp' => $dbw->timestamp( $this->timestamp ), 01454 'log_namespace' => $this->getTitle()->getNamespace(), 01455 'log_title' => $this->getTitle()->getDBkey(), 01456 'log_comment' => $this->getComment(), 01457 #'log_user_text' => $this->user_text, 01458 'log_params' => $this->params ), 01459 __METHOD__ 01460 ); 01461 // @todo FIXME: This could fail slightly for multiple matches :P 01462 if( $prior ) { 01463 wfDebug( __METHOD__ . ": skipping existing item for Log:{$this->type}/{$this->action}, timestamp " . 01464 $this->timestamp . "\n" ); 01465 return; 01466 } 01467 $log_id = $dbw->nextSequenceValue( 'logging_log_id_seq' ); 01468 $data = array( 01469 'log_id' => $log_id, 01470 'log_type' => $this->type, 01471 'log_action' => $this->action, 01472 'log_timestamp' => $dbw->timestamp( $this->timestamp ), 01473 'log_user' => User::idFromName( $this->user_text ), 01474 #'log_user_text' => $this->user_text, 01475 'log_namespace' => $this->getTitle()->getNamespace(), 01476 'log_title' => $this->getTitle()->getDBkey(), 01477 'log_comment' => $this->getComment(), 01478 'log_params' => $this->params 01479 ); 01480 $dbw->insert( 'logging', $data, __METHOD__ ); 01481 } 01482 01486 function importUpload() { 01487 # Construct a file 01488 $archiveName = $this->getArchiveName(); 01489 if ( $archiveName ) { 01490 wfDebug( __METHOD__ . "Importing archived file as $archiveName\n" ); 01491 $file = OldLocalFile::newFromArchiveName( $this->getTitle(), 01492 RepoGroup::singleton()->getLocalRepo(), $archiveName ); 01493 } else { 01494 $file = wfLocalFile( $this->getTitle() ); 01495 wfDebug( __METHOD__ . 'Importing new file as ' . $file->getName() . "\n" ); 01496 if ( $file->exists() && $file->getTimestamp() > $this->getTimestamp() ) { 01497 $archiveName = $file->getTimestamp() . '!' . $file->getName(); 01498 $file = OldLocalFile::newFromArchiveName( $this->getTitle(), 01499 RepoGroup::singleton()->getLocalRepo(), $archiveName ); 01500 wfDebug( __METHOD__ . "File already exists; importing as $archiveName\n" ); 01501 } 01502 } 01503 if( !$file ) { 01504 wfDebug( __METHOD__ . ': Bad file for ' . $this->getTitle() . "\n" ); 01505 return false; 01506 } 01507 01508 # Get the file source or download if necessary 01509 $source = $this->getFileSrc(); 01510 $flags = $this->isTempSrc() ? File::DELETE_SOURCE : 0; 01511 if ( !$source ) { 01512 $source = $this->downloadSource(); 01513 $flags |= File::DELETE_SOURCE; 01514 } 01515 if( !$source ) { 01516 wfDebug( __METHOD__ . ": Could not fetch remote file.\n" ); 01517 return false; 01518 } 01519 $sha1 = $this->getSha1(); 01520 if ( $sha1 && ( $sha1 !== sha1_file( $source ) ) ) { 01521 if ( $flags & File::DELETE_SOURCE ) { 01522 # Broken file; delete it if it is a temporary file 01523 unlink( $source ); 01524 } 01525 wfDebug( __METHOD__ . ": Corrupt file $source.\n" ); 01526 return false; 01527 } 01528 01529 $user = User::newFromName( $this->user_text ); 01530 01531 # Do the actual upload 01532 if ( $archiveName ) { 01533 $status = $file->uploadOld( $source, $archiveName, 01534 $this->getTimestamp(), $this->getComment(), $user, $flags ); 01535 } else { 01536 $status = $file->upload( $source, $this->getComment(), $this->getComment(), 01537 $flags, false, $this->getTimestamp(), $user ); 01538 } 01539 01540 if ( $status->isGood() ) { 01541 wfDebug( __METHOD__ . ": Successful\n" ); 01542 return true; 01543 } else { 01544 wfDebug( __METHOD__ . ': failed: ' . $status->getXml() . "\n" ); 01545 return false; 01546 } 01547 } 01548 01552 function downloadSource() { 01553 global $wgEnableUploads; 01554 if( !$wgEnableUploads ) { 01555 return false; 01556 } 01557 01558 $tempo = tempnam( wfTempDir(), 'download' ); 01559 $f = fopen( $tempo, 'wb' ); 01560 if( !$f ) { 01561 wfDebug( "IMPORT: couldn't write to temp file $tempo\n" ); 01562 return false; 01563 } 01564 01565 // @todo FIXME! 01566 $src = $this->getSrc(); 01567 $data = Http::get( $src ); 01568 if( !$data ) { 01569 wfDebug( "IMPORT: couldn't fetch source $src\n" ); 01570 fclose( $f ); 01571 unlink( $tempo ); 01572 return false; 01573 } 01574 01575 fwrite( $f, $data ); 01576 fclose( $f ); 01577 01578 return $tempo; 01579 } 01580 01581 } 01582 01587 class ImportStringSource { 01588 function __construct( $string ) { 01589 $this->mString = $string; 01590 $this->mRead = false; 01591 } 01592 01596 function atEnd() { 01597 return $this->mRead; 01598 } 01599 01603 function readChunk() { 01604 if( $this->atEnd() ) { 01605 return false; 01606 } 01607 $this->mRead = true; 01608 return $this->mString; 01609 } 01610 } 01611 01616 class ImportStreamSource { 01617 function __construct( $handle ) { 01618 $this->mHandle = $handle; 01619 } 01620 01624 function atEnd() { 01625 return feof( $this->mHandle ); 01626 } 01627 01631 function readChunk() { 01632 return fread( $this->mHandle, 32768 ); 01633 } 01634 01639 static function newFromFile( $filename ) { 01640 wfSuppressWarnings(); 01641 $file = fopen( $filename, 'rt' ); 01642 wfRestoreWarnings(); 01643 if( !$file ) { 01644 return Status::newFatal( "importcantopen" ); 01645 } 01646 return Status::newGood( new ImportStreamSource( $file ) ); 01647 } 01648 01653 static function newFromUpload( $fieldname = "xmlimport" ) { 01654 $upload =& $_FILES[$fieldname]; 01655 01656 if( $upload === null || !$upload['name'] ) { 01657 return Status::newFatal( 'importnofile' ); 01658 } 01659 if( !empty( $upload['error'] ) ) { 01660 switch( $upload['error'] ) { 01661 case 1: # The uploaded file exceeds the upload_max_filesize directive in php.ini. 01662 return Status::newFatal( 'importuploaderrorsize' ); 01663 case 2: # The uploaded file exceeds the MAX_FILE_SIZE directive that was specified in the HTML form. 01664 return Status::newFatal( 'importuploaderrorsize' ); 01665 case 3: # The uploaded file was only partially uploaded 01666 return Status::newFatal( 'importuploaderrorpartial' ); 01667 case 6: #Missing a temporary folder. 01668 return Status::newFatal( 'importuploaderrortemp' ); 01669 # case else: # Currently impossible 01670 } 01671 01672 } 01673 $fname = $upload['tmp_name']; 01674 if( is_uploaded_file( $fname ) ) { 01675 return ImportStreamSource::newFromFile( $fname ); 01676 } else { 01677 return Status::newFatal( 'importnofile' ); 01678 } 01679 } 01680 01686 static function newFromURL( $url, $method = 'GET' ) { 01687 wfDebug( __METHOD__ . ": opening $url\n" ); 01688 # Use the standard HTTP fetch function; it times out 01689 # quicker and sorts out user-agent problems which might 01690 # otherwise prevent importing from large sites, such 01691 # as the Wikimedia cluster, etc. 01692 $data = Http::request( $method, $url, array( 'followRedirects' => true ) ); 01693 if( $data !== false ) { 01694 $file = tmpfile(); 01695 fwrite( $file, $data ); 01696 fflush( $file ); 01697 fseek( $file, 0 ); 01698 return Status::newGood( new ImportStreamSource( $file ) ); 01699 } else { 01700 return Status::newFatal( 'importcantopen' ); 01701 } 01702 } 01703 01712 public static function newFromInterwiki( $interwiki, $page, $history = false, $templates = false, $pageLinkDepth = 0 ) { 01713 if( $page == '' ) { 01714 return Status::newFatal( 'import-noarticle' ); 01715 } 01716 $link = Title::newFromText( "$interwiki:Special:Export/$page" ); 01717 if( is_null( $link ) || $link->getInterwiki() == '' ) { 01718 return Status::newFatal( 'importbadinterwiki' ); 01719 } else { 01720 $params = array(); 01721 if ( $history ) $params['history'] = 1; 01722 if ( $templates ) $params['templates'] = 1; 01723 if ( $pageLinkDepth ) $params['pagelink-depth'] = $pageLinkDepth; 01724 $url = $link->getFullUrl( $params ); 01725 # For interwikis, use POST to avoid redirects. 01726 return ImportStreamSource::newFromURL( $url, "POST" ); 01727 } 01728 } 01729 }