MediaWiki
REL1_24
|
00001 <?php 00033 class WikiImporter { 00034 private $reader = null; 00035 private $mLogItemCallback, $mUploadCallback, $mRevisionCallback, $mPageCallback; 00036 private $mSiteInfoCallback, $mTargetNamespace, $mTargetRootPage, $mPageOutCallback; 00037 private $mNoticeCallback, $mDebug; 00038 private $mImportUploads, $mImageBasePath; 00039 private $mNoUpdates = false; 00040 00045 function __construct( ImportStreamSource $source ) { 00046 $this->reader = new XMLReader(); 00047 00048 if ( !in_array( 'uploadsource', stream_get_wrappers() ) ) { 00049 stream_wrapper_register( 'uploadsource', 'UploadSourceAdapter' ); 00050 } 00051 $id = UploadSourceAdapter::registerSource( $source ); 00052 if ( defined( 'LIBXML_PARSEHUGE' ) ) { 00053 $this->reader->open( "uploadsource://$id", null, LIBXML_PARSEHUGE ); 00054 } else { 00055 $this->reader->open( "uploadsource://$id" ); 00056 } 00057 00058 // Default callbacks 00059 $this->setRevisionCallback( array( $this, "importRevision" ) ); 00060 $this->setUploadCallback( array( $this, 'importUpload' ) ); 00061 $this->setLogItemCallback( array( $this, 'importLogItem' ) ); 00062 $this->setPageOutCallback( array( $this, 'finishImportPage' ) ); 00063 } 00064 00068 public function getReader() { 00069 return $this->reader; 00070 } 00071 00072 public function throwXmlError( $err ) { 00073 $this->debug( "FAILURE: $err" ); 00074 wfDebug( "WikiImporter XML error: $err\n" ); 00075 } 00076 00077 public function debug( $data ) { 00078 if ( $this->mDebug ) { 00079 wfDebug( "IMPORT: $data\n" ); 00080 } 00081 } 00082 00083 public function warn( $data ) { 00084 wfDebug( "IMPORT: $data\n" ); 00085 } 00086 00087 public function notice( $msg /*, $param, ...*/ ) { 00088 $params = func_get_args(); 00089 array_shift( $params ); 00090 00091 if ( is_callable( $this->mNoticeCallback ) ) { 00092 call_user_func( $this->mNoticeCallback, $msg, $params ); 00093 } else { # No ImportReporter -> CLI 00094 echo wfMessage( $msg, $params )->text() . "\n"; 00095 } 00096 } 00097 00102 function setDebug( $debug ) { 00103 $this->mDebug = $debug; 00104 } 00105 00110 function setNoUpdates( $noupdates ) { 00111 $this->mNoUpdates = $noupdates; 00112 } 00113 00120 public function setNoticeCallback( $callback ) { 00121 return wfSetVar( $this->mNoticeCallback, $callback ); 00122 } 00123 00129 public function setPageCallback( $callback ) { 00130 $previous = $this->mPageCallback; 00131 $this->mPageCallback = $callback; 00132 return $previous; 00133 } 00134 00144 public function setPageOutCallback( $callback ) { 00145 $previous = $this->mPageOutCallback; 00146 $this->mPageOutCallback = $callback; 00147 return $previous; 00148 } 00149 00155 public function setRevisionCallback( $callback ) { 00156 $previous = $this->mRevisionCallback; 00157 $this->mRevisionCallback = $callback; 00158 return $previous; 00159 } 00160 00166 public function setUploadCallback( $callback ) { 00167 $previous = $this->mUploadCallback; 00168 $this->mUploadCallback = $callback; 00169 return $previous; 00170 } 00171 00177 public function setLogItemCallback( $callback ) { 00178 $previous = $this->mLogItemCallback; 00179 $this->mLogItemCallback = $callback; 00180 return $previous; 00181 } 00182 00188 public function setSiteInfoCallback( $callback ) { 00189 $previous = $this->mSiteInfoCallback; 00190 $this->mSiteInfoCallback = $callback; 00191 return $previous; 00192 } 00193 00199 public function setTargetNamespace( $namespace ) { 00200 if ( is_null( $namespace ) ) { 00201 // Don't override namespaces 00202 $this->mTargetNamespace = null; 00203 } elseif ( $namespace >= 0 ) { 00204 // @todo FIXME: Check for validity 00205 $this->mTargetNamespace = intval( $namespace ); 00206 } else { 00207 return false; 00208 } 00209 } 00210 00216 public function setTargetRootPage( $rootpage ) { 00217 $status = Status::newGood(); 00218 if ( is_null( $rootpage ) ) { 00219 // No rootpage 00220 $this->mTargetRootPage = null; 00221 } elseif ( $rootpage !== '' ) { 00222 $rootpage = rtrim( $rootpage, '/' ); //avoid double slashes 00223 $title = Title::newFromText( $rootpage, !is_null( $this->mTargetNamespace ) 00224 ? $this->mTargetNamespace 00225 : NS_MAIN 00226 ); 00227 00228 if ( !$title || $title->isExternal() ) { 00229 $status->fatal( 'import-rootpage-invalid' ); 00230 } else { 00231 if ( !MWNamespace::hasSubpages( $title->getNamespace() ) ) { 00232 global $wgContLang; 00233 00234 $displayNSText = $title->getNamespace() == NS_MAIN 00235 ? wfMessage( 'blanknamespace' )->text() 00236 : $wgContLang->getNsText( $title->getNamespace() ); 00237 $status->fatal( 'import-rootpage-nosubpage', $displayNSText ); 00238 } else { 00239 // set namespace to 'all', so the namespace check in processTitle() can passed 00240 $this->setTargetNamespace( null ); 00241 $this->mTargetRootPage = $title->getPrefixedDBkey(); 00242 } 00243 } 00244 } 00245 return $status; 00246 } 00247 00251 public function setImageBasePath( $dir ) { 00252 $this->mImageBasePath = $dir; 00253 } 00254 00258 public function setImportUploads( $import ) { 00259 $this->mImportUploads = $import; 00260 } 00261 00267 public function importRevision( $revision ) { 00268 if ( !$revision->getContentHandler()->canBeUsedOn( $revision->getTitle() ) ) { 00269 $this->notice( 'import-error-bad-location', 00270 $revision->getTitle()->getPrefixedText(), 00271 $revision->getID(), 00272 $revision->getModel(), 00273 $revision->getFormat() ); 00274 00275 return false; 00276 } 00277 00278 try { 00279 $dbw = wfGetDB( DB_MASTER ); 00280 return $dbw->deadlockLoop( array( $revision, 'importOldRevision' ) ); 00281 } catch ( MWContentSerializationException $ex ) { 00282 $this->notice( 'import-error-unserialize', 00283 $revision->getTitle()->getPrefixedText(), 00284 $revision->getID(), 00285 $revision->getModel(), 00286 $revision->getFormat() ); 00287 } 00288 00289 return false; 00290 } 00291 00297 public function importLogItem( $revision ) { 00298 $dbw = wfGetDB( DB_MASTER ); 00299 return $dbw->deadlockLoop( array( $revision, 'importLogItem' ) ); 00300 } 00301 00307 public function importUpload( $revision ) { 00308 $dbw = wfGetDB( DB_MASTER ); 00309 return $dbw->deadlockLoop( array( $revision, 'importUpload' ) ); 00310 } 00311 00321 public function finishImportPage( $title, $origTitle, $revCount, $sRevCount, $pageInfo ) { 00322 $args = func_get_args(); 00323 return wfRunHooks( 'AfterImportPage', $args ); 00324 } 00325 00330 public function debugRevisionHandler( &$revision ) { 00331 $this->debug( "Got revision:" ); 00332 if ( is_object( $revision->title ) ) { 00333 $this->debug( "-- Title: " . $revision->title->getPrefixedText() ); 00334 } else { 00335 $this->debug( "-- Title: <invalid>" ); 00336 } 00337 $this->debug( "-- User: " . $revision->user_text ); 00338 $this->debug( "-- Timestamp: " . $revision->timestamp ); 00339 $this->debug( "-- Comment: " . $revision->comment ); 00340 $this->debug( "-- Text: " . $revision->text ); 00341 } 00342 00347 function pageCallback( $title ) { 00348 if ( isset( $this->mPageCallback ) ) { 00349 call_user_func( $this->mPageCallback, $title ); 00350 } 00351 } 00352 00361 private function pageOutCallback( $title, $origTitle, $revCount, $sucCount, $pageInfo ) { 00362 if ( isset( $this->mPageOutCallback ) ) { 00363 $args = func_get_args(); 00364 call_user_func_array( $this->mPageOutCallback, $args ); 00365 } 00366 } 00367 00373 private function revisionCallback( $revision ) { 00374 if ( isset( $this->mRevisionCallback ) ) { 00375 return call_user_func_array( $this->mRevisionCallback, 00376 array( $revision, $this ) ); 00377 } else { 00378 return false; 00379 } 00380 } 00381 00387 private function logItemCallback( $revision ) { 00388 if ( isset( $this->mLogItemCallback ) ) { 00389 return call_user_func_array( $this->mLogItemCallback, 00390 array( $revision, $this ) ); 00391 } else { 00392 return false; 00393 } 00394 } 00395 00401 public function nodeAttribute( $attr ) { 00402 return $this->reader->getAttribute( $attr ); 00403 } 00404 00412 public function nodeContents() { 00413 if ( $this->reader->isEmptyElement ) { 00414 return ""; 00415 } 00416 $buffer = ""; 00417 while ( $this->reader->read() ) { 00418 switch ( $this->reader->nodeType ) { 00419 case XmlReader::TEXT: 00420 case XmlReader::SIGNIFICANT_WHITESPACE: 00421 $buffer .= $this->reader->value; 00422 break; 00423 case XmlReader::END_ELEMENT: 00424 return $buffer; 00425 } 00426 } 00427 00428 $this->reader->close(); 00429 return ''; 00430 } 00431 00437 public function doImport() { 00438 // Calls to reader->read need to be wrapped in calls to 00439 // libxml_disable_entity_loader() to avoid local file 00440 // inclusion attacks (bug 46932). 00441 $oldDisable = libxml_disable_entity_loader( true ); 00442 $this->reader->read(); 00443 00444 if ( $this->reader->name != 'mediawiki' ) { 00445 libxml_disable_entity_loader( $oldDisable ); 00446 throw new MWException( "Expected <mediawiki> tag, got " . 00447 $this->reader->name ); 00448 } 00449 $this->debug( "<mediawiki> tag is correct." ); 00450 00451 $this->debug( "Starting primary dump processing loop." ); 00452 00453 $keepReading = $this->reader->read(); 00454 $skip = false; 00455 while ( $keepReading ) { 00456 $tag = $this->reader->name; 00457 $type = $this->reader->nodeType; 00458 00459 if ( !wfRunHooks( 'ImportHandleToplevelXMLTag', array( $this ) ) ) { 00460 // Do nothing 00461 } elseif ( $tag == 'mediawiki' && $type == XmlReader::END_ELEMENT ) { 00462 break; 00463 } elseif ( $tag == 'siteinfo' ) { 00464 $this->handleSiteInfo(); 00465 } elseif ( $tag == 'page' ) { 00466 $this->handlePage(); 00467 } elseif ( $tag == 'logitem' ) { 00468 $this->handleLogItem(); 00469 } elseif ( $tag != '#text' ) { 00470 $this->warn( "Unhandled top-level XML tag $tag" ); 00471 00472 $skip = true; 00473 } 00474 00475 if ( $skip ) { 00476 $keepReading = $this->reader->next(); 00477 $skip = false; 00478 $this->debug( "Skip" ); 00479 } else { 00480 $keepReading = $this->reader->read(); 00481 } 00482 } 00483 00484 libxml_disable_entity_loader( $oldDisable ); 00485 return true; 00486 } 00487 00492 private function handleSiteInfo() { 00493 // Site info is useful, but not actually used for dump imports. 00494 // Includes a quick short-circuit to save performance. 00495 if ( !$this->mSiteInfoCallback ) { 00496 $this->reader->next(); 00497 return true; 00498 } 00499 throw new MWException( "SiteInfo tag is not yet handled, do not set mSiteInfoCallback" ); 00500 } 00501 00502 private function handleLogItem() { 00503 $this->debug( "Enter log item handler." ); 00504 $logInfo = array(); 00505 00506 // Fields that can just be stuffed in the pageInfo object 00507 $normalFields = array( 'id', 'comment', 'type', 'action', 'timestamp', 00508 'logtitle', 'params' ); 00509 00510 while ( $this->reader->read() ) { 00511 if ( $this->reader->nodeType == XmlReader::END_ELEMENT && 00512 $this->reader->name == 'logitem' ) { 00513 break; 00514 } 00515 00516 $tag = $this->reader->name; 00517 00518 if ( !wfRunHooks( 'ImportHandleLogItemXMLTag', array( 00519 $this, $logInfo 00520 ) ) ) { 00521 // Do nothing 00522 } elseif ( in_array( $tag, $normalFields ) ) { 00523 $logInfo[$tag] = $this->nodeContents(); 00524 } elseif ( $tag == 'contributor' ) { 00525 $logInfo['contributor'] = $this->handleContributor(); 00526 } elseif ( $tag != '#text' ) { 00527 $this->warn( "Unhandled log-item XML tag $tag" ); 00528 } 00529 } 00530 00531 $this->processLogItem( $logInfo ); 00532 } 00533 00538 private function processLogItem( $logInfo ) { 00539 $revision = new WikiRevision; 00540 00541 $revision->setID( $logInfo['id'] ); 00542 $revision->setType( $logInfo['type'] ); 00543 $revision->setAction( $logInfo['action'] ); 00544 $revision->setTimestamp( $logInfo['timestamp'] ); 00545 $revision->setParams( $logInfo['params'] ); 00546 $revision->setTitle( Title::newFromText( $logInfo['logtitle'] ) ); 00547 $revision->setNoUpdates( $this->mNoUpdates ); 00548 00549 if ( isset( $logInfo['comment'] ) ) { 00550 $revision->setComment( $logInfo['comment'] ); 00551 } 00552 00553 if ( isset( $logInfo['contributor']['ip'] ) ) { 00554 $revision->setUserIP( $logInfo['contributor']['ip'] ); 00555 } 00556 if ( isset( $logInfo['contributor']['username'] ) ) { 00557 $revision->setUserName( $logInfo['contributor']['username'] ); 00558 } 00559 00560 return $this->logItemCallback( $revision ); 00561 } 00562 00563 private function handlePage() { 00564 // Handle page data. 00565 $this->debug( "Enter page handler." ); 00566 $pageInfo = array( 'revisionCount' => 0, 'successfulRevisionCount' => 0 ); 00567 00568 // Fields that can just be stuffed in the pageInfo object 00569 $normalFields = array( 'title', 'id', 'redirect', 'restrictions' ); 00570 00571 $skip = false; 00572 $badTitle = false; 00573 00574 while ( $skip ? $this->reader->next() : $this->reader->read() ) { 00575 if ( $this->reader->nodeType == XmlReader::END_ELEMENT && 00576 $this->reader->name == 'page' ) { 00577 break; 00578 } 00579 00580 $tag = $this->reader->name; 00581 00582 if ( $badTitle ) { 00583 // The title is invalid, bail out of this page 00584 $skip = true; 00585 } elseif ( !wfRunHooks( 'ImportHandlePageXMLTag', array( $this, 00586 &$pageInfo ) ) ) { 00587 // Do nothing 00588 } elseif ( in_array( $tag, $normalFields ) ) { 00589 // An XML snippet: 00590 // <page> 00591 // <id>123</id> 00592 // <title>Page</title> 00593 // <redirect title="NewTitle"/> 00594 // ... 00595 // Because the redirect tag is built differently, we need special handling for that case. 00596 if ( $tag == 'redirect' ) { 00597 $pageInfo[$tag] = $this->nodeAttribute( 'title' ); 00598 } else { 00599 $pageInfo[$tag] = $this->nodeContents(); 00600 if ( $tag == 'title' ) { 00601 $title = $this->processTitle( $pageInfo['title'] ); 00602 00603 if ( !$title ) { 00604 $badTitle = true; 00605 $skip = true; 00606 } 00607 00608 $this->pageCallback( $title ); 00609 list( $pageInfo['_title'], $origTitle ) = $title; 00610 } 00611 } 00612 } elseif ( $tag == 'revision' ) { 00613 $this->handleRevision( $pageInfo ); 00614 } elseif ( $tag == 'upload' ) { 00615 $this->handleUpload( $pageInfo ); 00616 } elseif ( $tag != '#text' ) { 00617 $this->warn( "Unhandled page XML tag $tag" ); 00618 $skip = true; 00619 } 00620 } 00621 00622 $this->pageOutCallback( $pageInfo['_title'], $origTitle, 00623 $pageInfo['revisionCount'], 00624 $pageInfo['successfulRevisionCount'], 00625 $pageInfo ); 00626 } 00627 00631 private function handleRevision( &$pageInfo ) { 00632 $this->debug( "Enter revision handler" ); 00633 $revisionInfo = array(); 00634 00635 $normalFields = array( 'id', 'timestamp', 'comment', 'minor', 'model', 'format', 'text' ); 00636 00637 $skip = false; 00638 00639 while ( $skip ? $this->reader->next() : $this->reader->read() ) { 00640 if ( $this->reader->nodeType == XmlReader::END_ELEMENT && 00641 $this->reader->name == 'revision' ) { 00642 break; 00643 } 00644 00645 $tag = $this->reader->name; 00646 00647 if ( !wfRunHooks( 'ImportHandleRevisionXMLTag', array( 00648 $this, $pageInfo, $revisionInfo 00649 ) ) ) { 00650 // Do nothing 00651 } elseif ( in_array( $tag, $normalFields ) ) { 00652 $revisionInfo[$tag] = $this->nodeContents(); 00653 } elseif ( $tag == 'contributor' ) { 00654 $revisionInfo['contributor'] = $this->handleContributor(); 00655 } elseif ( $tag != '#text' ) { 00656 $this->warn( "Unhandled revision XML tag $tag" ); 00657 $skip = true; 00658 } 00659 } 00660 00661 $pageInfo['revisionCount']++; 00662 if ( $this->processRevision( $pageInfo, $revisionInfo ) ) { 00663 $pageInfo['successfulRevisionCount']++; 00664 } 00665 } 00666 00672 private function processRevision( $pageInfo, $revisionInfo ) { 00673 $revision = new WikiRevision; 00674 00675 if ( isset( $revisionInfo['id'] ) ) { 00676 $revision->setID( $revisionInfo['id'] ); 00677 } 00678 if ( isset( $revisionInfo['model'] ) ) { 00679 $revision->setModel( $revisionInfo['model'] ); 00680 } 00681 if ( isset( $revisionInfo['format'] ) ) { 00682 $revision->setFormat( $revisionInfo['format'] ); 00683 } 00684 $revision->setTitle( $pageInfo['_title'] ); 00685 00686 if ( isset( $revisionInfo['text'] ) ) { 00687 $handler = $revision->getContentHandler(); 00688 $text = $handler->importTransform( 00689 $revisionInfo['text'], 00690 $revision->getFormat() ); 00691 00692 $revision->setText( $text ); 00693 } 00694 if ( isset( $revisionInfo['timestamp'] ) ) { 00695 $revision->setTimestamp( $revisionInfo['timestamp'] ); 00696 } else { 00697 $revision->setTimestamp( wfTimestampNow() ); 00698 } 00699 00700 if ( isset( $revisionInfo['comment'] ) ) { 00701 $revision->setComment( $revisionInfo['comment'] ); 00702 } 00703 00704 if ( isset( $revisionInfo['minor'] ) ) { 00705 $revision->setMinor( true ); 00706 } 00707 if ( isset( $revisionInfo['contributor']['ip'] ) ) { 00708 $revision->setUserIP( $revisionInfo['contributor']['ip'] ); 00709 } 00710 if ( isset( $revisionInfo['contributor']['username'] ) ) { 00711 $revision->setUserName( $revisionInfo['contributor']['username'] ); 00712 } 00713 $revision->setNoUpdates( $this->mNoUpdates ); 00714 00715 return $this->revisionCallback( $revision ); 00716 } 00717 00722 private function handleUpload( &$pageInfo ) { 00723 $this->debug( "Enter upload handler" ); 00724 $uploadInfo = array(); 00725 00726 $normalFields = array( 'timestamp', 'comment', 'filename', 'text', 00727 'src', 'size', 'sha1base36', 'archivename', 'rel' ); 00728 00729 $skip = false; 00730 00731 while ( $skip ? $this->reader->next() : $this->reader->read() ) { 00732 if ( $this->reader->nodeType == XmlReader::END_ELEMENT && 00733 $this->reader->name == 'upload' ) { 00734 break; 00735 } 00736 00737 $tag = $this->reader->name; 00738 00739 if ( !wfRunHooks( 'ImportHandleUploadXMLTag', array( 00740 $this, $pageInfo 00741 ) ) ) { 00742 // Do nothing 00743 } elseif ( in_array( $tag, $normalFields ) ) { 00744 $uploadInfo[$tag] = $this->nodeContents(); 00745 } elseif ( $tag == 'contributor' ) { 00746 $uploadInfo['contributor'] = $this->handleContributor(); 00747 } elseif ( $tag == 'contents' ) { 00748 $contents = $this->nodeContents(); 00749 $encoding = $this->reader->getAttribute( 'encoding' ); 00750 if ( $encoding === 'base64' ) { 00751 $uploadInfo['fileSrc'] = $this->dumpTemp( base64_decode( $contents ) ); 00752 $uploadInfo['isTempSrc'] = true; 00753 } 00754 } elseif ( $tag != '#text' ) { 00755 $this->warn( "Unhandled upload XML tag $tag" ); 00756 $skip = true; 00757 } 00758 } 00759 00760 if ( $this->mImageBasePath && isset( $uploadInfo['rel'] ) ) { 00761 $path = "{$this->mImageBasePath}/{$uploadInfo['rel']}"; 00762 if ( file_exists( $path ) ) { 00763 $uploadInfo['fileSrc'] = $path; 00764 $uploadInfo['isTempSrc'] = false; 00765 } 00766 } 00767 00768 if ( $this->mImportUploads ) { 00769 return $this->processUpload( $pageInfo, $uploadInfo ); 00770 } 00771 } 00772 00777 private function dumpTemp( $contents ) { 00778 $filename = tempnam( wfTempDir(), 'importupload' ); 00779 file_put_contents( $filename, $contents ); 00780 return $filename; 00781 } 00782 00788 private function processUpload( $pageInfo, $uploadInfo ) { 00789 $revision = new WikiRevision; 00790 $text = isset( $uploadInfo['text'] ) ? $uploadInfo['text'] : ''; 00791 00792 $revision->setTitle( $pageInfo['_title'] ); 00793 $revision->setID( $pageInfo['id'] ); 00794 $revision->setTimestamp( $uploadInfo['timestamp'] ); 00795 $revision->setText( $text ); 00796 $revision->setFilename( $uploadInfo['filename'] ); 00797 if ( isset( $uploadInfo['archivename'] ) ) { 00798 $revision->setArchiveName( $uploadInfo['archivename'] ); 00799 } 00800 $revision->setSrc( $uploadInfo['src'] ); 00801 if ( isset( $uploadInfo['fileSrc'] ) ) { 00802 $revision->setFileSrc( $uploadInfo['fileSrc'], 00803 !empty( $uploadInfo['isTempSrc'] ) ); 00804 } 00805 if ( isset( $uploadInfo['sha1base36'] ) ) { 00806 $revision->setSha1Base36( $uploadInfo['sha1base36'] ); 00807 } 00808 $revision->setSize( intval( $uploadInfo['size'] ) ); 00809 $revision->setComment( $uploadInfo['comment'] ); 00810 00811 if ( isset( $uploadInfo['contributor']['ip'] ) ) { 00812 $revision->setUserIP( $uploadInfo['contributor']['ip'] ); 00813 } 00814 if ( isset( $uploadInfo['contributor']['username'] ) ) { 00815 $revision->setUserName( $uploadInfo['contributor']['username'] ); 00816 } 00817 $revision->setNoUpdates( $this->mNoUpdates ); 00818 00819 return call_user_func( $this->mUploadCallback, $revision ); 00820 } 00821 00825 private function handleContributor() { 00826 $fields = array( 'id', 'ip', 'username' ); 00827 $info = array(); 00828 00829 while ( $this->reader->read() ) { 00830 if ( $this->reader->nodeType == XmlReader::END_ELEMENT && 00831 $this->reader->name == 'contributor' ) { 00832 break; 00833 } 00834 00835 $tag = $this->reader->name; 00836 00837 if ( in_array( $tag, $fields ) ) { 00838 $info[$tag] = $this->nodeContents(); 00839 } 00840 } 00841 00842 return $info; 00843 } 00844 00849 private function processTitle( $text ) { 00850 global $wgCommandLineMode; 00851 00852 $workTitle = $text; 00853 $origTitle = Title::newFromText( $workTitle ); 00854 00855 if ( !is_null( $this->mTargetNamespace ) && !is_null( $origTitle ) ) { 00856 # makeTitleSafe, because $origTitle can have a interwiki (different setting of interwiki map) 00857 # and than dbKey can begin with a lowercase char 00858 $title = Title::makeTitleSafe( $this->mTargetNamespace, 00859 $origTitle->getDBkey() ); 00860 } else { 00861 if ( !is_null( $this->mTargetRootPage ) ) { 00862 $workTitle = $this->mTargetRootPage . '/' . $workTitle; 00863 } 00864 $title = Title::newFromText( $workTitle ); 00865 } 00866 00867 if ( is_null( $title ) ) { 00868 # Invalid page title? Ignore the page 00869 $this->notice( 'import-error-invalid', $workTitle ); 00870 return false; 00871 } elseif ( $title->isExternal() ) { 00872 $this->notice( 'import-error-interwiki', $title->getPrefixedText() ); 00873 return false; 00874 } elseif ( !$title->canExist() ) { 00875 $this->notice( 'import-error-special', $title->getPrefixedText() ); 00876 return false; 00877 } elseif ( !$title->userCan( 'edit' ) && !$wgCommandLineMode ) { 00878 # Do not import if the importing wiki user cannot edit this page 00879 $this->notice( 'import-error-edit', $title->getPrefixedText() ); 00880 return false; 00881 } elseif ( !$title->exists() && !$title->userCan( 'create' ) && !$wgCommandLineMode ) { 00882 # Do not import if the importing wiki user cannot create this page 00883 $this->notice( 'import-error-create', $title->getPrefixedText() ); 00884 return false; 00885 } 00886 00887 return array( $title, $origTitle ); 00888 } 00889 } 00890 00892 class UploadSourceAdapter { 00894 public static $sourceRegistrations = array(); 00895 00897 private $mSource; 00898 00900 private $mBuffer; 00901 00903 private $mPosition; 00904 00909 static function registerSource( ImportStreamSource $source ) { 00910 $id = wfRandomString(); 00911 00912 self::$sourceRegistrations[$id] = $source; 00913 00914 return $id; 00915 } 00916 00924 function stream_open( $path, $mode, $options, &$opened_path ) { 00925 $url = parse_url( $path ); 00926 $id = $url['host']; 00927 00928 if ( !isset( self::$sourceRegistrations[$id] ) ) { 00929 return false; 00930 } 00931 00932 $this->mSource = self::$sourceRegistrations[$id]; 00933 00934 return true; 00935 } 00936 00941 function stream_read( $count ) { 00942 $return = ''; 00943 $leave = false; 00944 00945 while ( !$leave && !$this->mSource->atEnd() && 00946 strlen( $this->mBuffer ) < $count ) { 00947 $read = $this->mSource->readChunk(); 00948 00949 if ( !strlen( $read ) ) { 00950 $leave = true; 00951 } 00952 00953 $this->mBuffer .= $read; 00954 } 00955 00956 if ( strlen( $this->mBuffer ) ) { 00957 $return = substr( $this->mBuffer, 0, $count ); 00958 $this->mBuffer = substr( $this->mBuffer, $count ); 00959 } 00960 00961 $this->mPosition += strlen( $return ); 00962 00963 return $return; 00964 } 00965 00970 function stream_write( $data ) { 00971 return false; 00972 } 00973 00977 function stream_tell() { 00978 return $this->mPosition; 00979 } 00980 00984 function stream_eof() { 00985 return $this->mSource->atEnd(); 00986 } 00987 00991 function url_stat() { 00992 $result = array(); 00993 00994 $result['dev'] = $result[0] = 0; 00995 $result['ino'] = $result[1] = 0; 00996 $result['mode'] = $result[2] = 0; 00997 $result['nlink'] = $result[3] = 0; 00998 $result['uid'] = $result[4] = 0; 00999 $result['gid'] = $result[5] = 0; 01000 $result['rdev'] = $result[6] = 0; 01001 $result['size'] = $result[7] = 0; 01002 $result['atime'] = $result[8] = 0; 01003 $result['mtime'] = $result[9] = 0; 01004 $result['ctime'] = $result[10] = 0; 01005 $result['blksize'] = $result[11] = 0; 01006 $result['blocks'] = $result[12] = 0; 01007 01008 return $result; 01009 } 01010 } 01011 01016 class WikiRevision { 01018 public $importer = null; 01019 01021 public $title = null; 01022 01024 public $id = 0; 01025 01027 public $timestamp = "20010115000000"; 01028 01032 public $user = 0; 01033 01035 public $user_text = ""; 01036 01038 public $model = null; 01039 01041 public $format = null; 01042 01044 public $text = ""; 01045 01047 protected $size; 01048 01050 public $content = null; 01051 01053 protected $contentHandler = null; 01054 01056 public $comment = ""; 01057 01059 public $minor = false; 01060 01062 public $type = ""; 01063 01065 public $action = ""; 01066 01068 public $params = ""; 01069 01071 public $fileSrc = ''; 01072 01074 public $sha1base36 = false; 01075 01080 public $isTemp = false; 01081 01083 public $archiveName = ''; 01084 01085 protected $filename; 01086 01088 protected $src; 01089 01091 public $fileIsTemp; 01092 01094 private $mNoUpdates = false; 01095 01100 function setTitle( $title ) { 01101 if ( is_object( $title ) ) { 01102 $this->title = $title; 01103 } elseif ( is_null( $title ) ) { 01104 throw new MWException( "WikiRevision given a null title in import. " 01105 . "You may need to adjust \$wgLegalTitleChars." ); 01106 } else { 01107 throw new MWException( "WikiRevision given non-object title in import." ); 01108 } 01109 } 01110 01114 function setID( $id ) { 01115 $this->id = $id; 01116 } 01117 01121 function setTimestamp( $ts ) { 01122 # 2003-08-05T18:30:02Z 01123 $this->timestamp = wfTimestamp( TS_MW, $ts ); 01124 } 01125 01129 function setUsername( $user ) { 01130 $this->user_text = $user; 01131 } 01132 01136 function setUserIP( $ip ) { 01137 $this->user_text = $ip; 01138 } 01139 01143 function setModel( $model ) { 01144 $this->model = $model; 01145 } 01146 01150 function setFormat( $format ) { 01151 $this->format = $format; 01152 } 01153 01157 function setText( $text ) { 01158 $this->text = $text; 01159 } 01160 01164 function setComment( $text ) { 01165 $this->comment = $text; 01166 } 01167 01171 function setMinor( $minor ) { 01172 $this->minor = (bool)$minor; 01173 } 01174 01178 function setSrc( $src ) { 01179 $this->src = $src; 01180 } 01181 01186 function setFileSrc( $src, $isTemp ) { 01187 $this->fileSrc = $src; 01188 $this->fileIsTemp = $isTemp; 01189 } 01190 01194 function setSha1Base36( $sha1base36 ) { 01195 $this->sha1base36 = $sha1base36; 01196 } 01197 01201 function setFilename( $filename ) { 01202 $this->filename = $filename; 01203 } 01204 01208 function setArchiveName( $archiveName ) { 01209 $this->archiveName = $archiveName; 01210 } 01211 01215 function setSize( $size ) { 01216 $this->size = intval( $size ); 01217 } 01218 01222 function setType( $type ) { 01223 $this->type = $type; 01224 } 01225 01229 function setAction( $action ) { 01230 $this->action = $action; 01231 } 01232 01236 function setParams( $params ) { 01237 $this->params = $params; 01238 } 01239 01243 public function setNoUpdates( $noupdates ) { 01244 $this->mNoUpdates = $noupdates; 01245 } 01246 01250 function getTitle() { 01251 return $this->title; 01252 } 01253 01257 function getID() { 01258 return $this->id; 01259 } 01260 01264 function getTimestamp() { 01265 return $this->timestamp; 01266 } 01267 01271 function getUser() { 01272 return $this->user_text; 01273 } 01274 01280 function getText() { 01281 ContentHandler::deprecated( __METHOD__, '1.21' ); 01282 01283 return $this->text; 01284 } 01285 01289 function getContentHandler() { 01290 if ( is_null( $this->contentHandler ) ) { 01291 $this->contentHandler = ContentHandler::getForModelID( $this->getModel() ); 01292 } 01293 01294 return $this->contentHandler; 01295 } 01296 01300 function getContent() { 01301 if ( is_null( $this->content ) ) { 01302 $handler = $this->getContentHandler(); 01303 $this->content = $handler->unserializeContent( $this->text, $this->getFormat() ); 01304 } 01305 01306 return $this->content; 01307 } 01308 01312 function getModel() { 01313 if ( is_null( $this->model ) ) { 01314 $this->model = $this->getTitle()->getContentModel(); 01315 } 01316 01317 return $this->model; 01318 } 01319 01323 function getFormat() { 01324 if ( is_null( $this->format ) ) { 01325 $this->format = $this->getContentHandler()->getDefaultFormat(); 01326 } 01327 01328 return $this->format; 01329 } 01330 01334 function getComment() { 01335 return $this->comment; 01336 } 01337 01341 function getMinor() { 01342 return $this->minor; 01343 } 01344 01348 function getSrc() { 01349 return $this->src; 01350 } 01351 01355 function getSha1() { 01356 if ( $this->sha1base36 ) { 01357 return wfBaseConvert( $this->sha1base36, 36, 16 ); 01358 } 01359 return false; 01360 } 01361 01365 function getFileSrc() { 01366 return $this->fileSrc; 01367 } 01368 01372 function isTempSrc() { 01373 return $this->isTemp; 01374 } 01375 01379 function getFilename() { 01380 return $this->filename; 01381 } 01382 01386 function getArchiveName() { 01387 return $this->archiveName; 01388 } 01389 01393 function getSize() { 01394 return $this->size; 01395 } 01396 01400 function getType() { 01401 return $this->type; 01402 } 01403 01407 function getAction() { 01408 return $this->action; 01409 } 01410 01414 function getParams() { 01415 return $this->params; 01416 } 01417 01421 function importOldRevision() { 01422 $dbw = wfGetDB( DB_MASTER ); 01423 01424 # Sneak a single revision into place 01425 $user = User::newFromName( $this->getUser() ); 01426 if ( $user ) { 01427 $userId = intval( $user->getId() ); 01428 $userText = $user->getName(); 01429 $userObj = $user; 01430 } else { 01431 $userId = 0; 01432 $userText = $this->getUser(); 01433 $userObj = new User; 01434 } 01435 01436 // avoid memory leak...? 01437 $linkCache = LinkCache::singleton(); 01438 $linkCache->clear(); 01439 01440 $page = WikiPage::factory( $this->title ); 01441 $page->loadPageData( 'fromdbmaster' ); 01442 if ( !$page->exists() ) { 01443 # must create the page... 01444 $pageId = $page->insertOn( $dbw ); 01445 $created = true; 01446 $oldcountable = null; 01447 } else { 01448 $pageId = $page->getId(); 01449 $created = false; 01450 01451 $prior = $dbw->selectField( 'revision', '1', 01452 array( 'rev_page' => $pageId, 01453 'rev_timestamp' => $dbw->timestamp( $this->timestamp ), 01454 'rev_user_text' => $userText, 01455 'rev_comment' => $this->getComment() ), 01456 __METHOD__ 01457 ); 01458 if ( $prior ) { 01459 // @todo FIXME: This could fail slightly for multiple matches :P 01460 wfDebug( __METHOD__ . ": skipping existing revision for [[" . 01461 $this->title->getPrefixedText() . "]], timestamp " . $this->timestamp . "\n" ); 01462 return false; 01463 } 01464 $oldcountable = $page->isCountable(); 01465 } 01466 01467 # @todo FIXME: Use original rev_id optionally (better for backups) 01468 # Insert the row 01469 $revision = new Revision( array( 01470 'title' => $this->title, 01471 'page' => $pageId, 01472 'content_model' => $this->getModel(), 01473 'content_format' => $this->getFormat(), 01474 //XXX: just set 'content' => $this->getContent()? 01475 'text' => $this->getContent()->serialize( $this->getFormat() ), 01476 'comment' => $this->getComment(), 01477 'user' => $userId, 01478 'user_text' => $userText, 01479 'timestamp' => $this->timestamp, 01480 'minor_edit' => $this->minor, 01481 ) ); 01482 $revision->insertOn( $dbw ); 01483 $changed = $page->updateIfNewerOn( $dbw, $revision ); 01484 01485 if ( $changed !== false && !$this->mNoUpdates ) { 01486 wfDebug( __METHOD__ . ": running updates\n" ); 01487 $page->doEditUpdates( 01488 $revision, 01489 $userObj, 01490 array( 'created' => $created, 'oldcountable' => $oldcountable ) 01491 ); 01492 } 01493 01494 return true; 01495 } 01496 01497 function importLogItem() { 01498 $dbw = wfGetDB( DB_MASTER ); 01499 # @todo FIXME: This will not record autoblocks 01500 if ( !$this->getTitle() ) { 01501 wfDebug( __METHOD__ . ": skipping invalid {$this->type}/{$this->action} log time, timestamp " . 01502 $this->timestamp . "\n" ); 01503 return; 01504 } 01505 # Check if it exists already 01506 // @todo FIXME: Use original log ID (better for backups) 01507 $prior = $dbw->selectField( 'logging', '1', 01508 array( 'log_type' => $this->getType(), 01509 'log_action' => $this->getAction(), 01510 'log_timestamp' => $dbw->timestamp( $this->timestamp ), 01511 'log_namespace' => $this->getTitle()->getNamespace(), 01512 'log_title' => $this->getTitle()->getDBkey(), 01513 'log_comment' => $this->getComment(), 01514 #'log_user_text' => $this->user_text, 01515 'log_params' => $this->params ), 01516 __METHOD__ 01517 ); 01518 // @todo FIXME: This could fail slightly for multiple matches :P 01519 if ( $prior ) { 01520 wfDebug( __METHOD__ 01521 . ": skipping existing item for Log:{$this->type}/{$this->action}, timestamp " 01522 . $this->timestamp . "\n" ); 01523 return; 01524 } 01525 $log_id = $dbw->nextSequenceValue( 'logging_log_id_seq' ); 01526 $data = array( 01527 'log_id' => $log_id, 01528 'log_type' => $this->type, 01529 'log_action' => $this->action, 01530 'log_timestamp' => $dbw->timestamp( $this->timestamp ), 01531 'log_user' => User::idFromName( $this->user_text ), 01532 #'log_user_text' => $this->user_text, 01533 'log_namespace' => $this->getTitle()->getNamespace(), 01534 'log_title' => $this->getTitle()->getDBkey(), 01535 'log_comment' => $this->getComment(), 01536 'log_params' => $this->params 01537 ); 01538 $dbw->insert( 'logging', $data, __METHOD__ ); 01539 } 01540 01544 function importUpload() { 01545 # Construct a file 01546 $archiveName = $this->getArchiveName(); 01547 if ( $archiveName ) { 01548 wfDebug( __METHOD__ . "Importing archived file as $archiveName\n" ); 01549 $file = OldLocalFile::newFromArchiveName( $this->getTitle(), 01550 RepoGroup::singleton()->getLocalRepo(), $archiveName ); 01551 } else { 01552 $file = wfLocalFile( $this->getTitle() ); 01553 wfDebug( __METHOD__ . 'Importing new file as ' . $file->getName() . "\n" ); 01554 if ( $file->exists() && $file->getTimestamp() > $this->getTimestamp() ) { 01555 $archiveName = $file->getTimestamp() . '!' . $file->getName(); 01556 $file = OldLocalFile::newFromArchiveName( $this->getTitle(), 01557 RepoGroup::singleton()->getLocalRepo(), $archiveName ); 01558 wfDebug( __METHOD__ . "File already exists; importing as $archiveName\n" ); 01559 } 01560 } 01561 if ( !$file ) { 01562 wfDebug( __METHOD__ . ': Bad file for ' . $this->getTitle() . "\n" ); 01563 return false; 01564 } 01565 01566 # Get the file source or download if necessary 01567 $source = $this->getFileSrc(); 01568 $flags = $this->isTempSrc() ? File::DELETE_SOURCE : 0; 01569 if ( !$source ) { 01570 $source = $this->downloadSource(); 01571 $flags |= File::DELETE_SOURCE; 01572 } 01573 if ( !$source ) { 01574 wfDebug( __METHOD__ . ": Could not fetch remote file.\n" ); 01575 return false; 01576 } 01577 $sha1 = $this->getSha1(); 01578 if ( $sha1 && ( $sha1 !== sha1_file( $source ) ) ) { 01579 if ( $flags & File::DELETE_SOURCE ) { 01580 # Broken file; delete it if it is a temporary file 01581 unlink( $source ); 01582 } 01583 wfDebug( __METHOD__ . ": Corrupt file $source.\n" ); 01584 return false; 01585 } 01586 01587 $user = User::newFromName( $this->user_text ); 01588 01589 # Do the actual upload 01590 if ( $archiveName ) { 01591 $status = $file->uploadOld( $source, $archiveName, 01592 $this->getTimestamp(), $this->getComment(), $user, $flags ); 01593 } else { 01594 $status = $file->upload( $source, $this->getComment(), $this->getComment(), 01595 $flags, false, $this->getTimestamp(), $user ); 01596 } 01597 01598 if ( $status->isGood() ) { 01599 wfDebug( __METHOD__ . ": Successful\n" ); 01600 return true; 01601 } else { 01602 wfDebug( __METHOD__ . ': failed: ' . $status->getXml() . "\n" ); 01603 return false; 01604 } 01605 } 01606 01610 function downloadSource() { 01611 global $wgEnableUploads; 01612 if ( !$wgEnableUploads ) { 01613 return false; 01614 } 01615 01616 $tempo = tempnam( wfTempDir(), 'download' ); 01617 $f = fopen( $tempo, 'wb' ); 01618 if ( !$f ) { 01619 wfDebug( "IMPORT: couldn't write to temp file $tempo\n" ); 01620 return false; 01621 } 01622 01623 // @todo FIXME! 01624 $src = $this->getSrc(); 01625 $data = Http::get( $src ); 01626 if ( !$data ) { 01627 wfDebug( "IMPORT: couldn't fetch source $src\n" ); 01628 fclose( $f ); 01629 unlink( $tempo ); 01630 return false; 01631 } 01632 01633 fwrite( $f, $data ); 01634 fclose( $f ); 01635 01636 return $tempo; 01637 } 01638 01639 } 01640 01645 class ImportStringSource { 01646 function __construct( $string ) { 01647 $this->mString = $string; 01648 $this->mRead = false; 01649 } 01650 01654 function atEnd() { 01655 return $this->mRead; 01656 } 01657 01661 function readChunk() { 01662 if ( $this->atEnd() ) { 01663 return false; 01664 } 01665 $this->mRead = true; 01666 return $this->mString; 01667 } 01668 } 01669 01674 class ImportStreamSource { 01675 function __construct( $handle ) { 01676 $this->mHandle = $handle; 01677 } 01678 01682 function atEnd() { 01683 return feof( $this->mHandle ); 01684 } 01685 01689 function readChunk() { 01690 return fread( $this->mHandle, 32768 ); 01691 } 01692 01697 static function newFromFile( $filename ) { 01698 wfSuppressWarnings(); 01699 $file = fopen( $filename, 'rt' ); 01700 wfRestoreWarnings(); 01701 if ( !$file ) { 01702 return Status::newFatal( "importcantopen" ); 01703 } 01704 return Status::newGood( new ImportStreamSource( $file ) ); 01705 } 01706 01711 static function newFromUpload( $fieldname = "xmlimport" ) { 01712 $upload =& $_FILES[$fieldname]; 01713 01714 if ( $upload === null || !$upload['name'] ) { 01715 return Status::newFatal( 'importnofile' ); 01716 } 01717 if ( !empty( $upload['error'] ) ) { 01718 switch ( $upload['error'] ) { 01719 case 1: 01720 # The uploaded file exceeds the upload_max_filesize directive in php.ini. 01721 return Status::newFatal( 'importuploaderrorsize' ); 01722 case 2: 01723 # The uploaded file exceeds the MAX_FILE_SIZE directive that 01724 # was specified in the HTML form. 01725 return Status::newFatal( 'importuploaderrorsize' ); 01726 case 3: 01727 # The uploaded file was only partially uploaded 01728 return Status::newFatal( 'importuploaderrorpartial' ); 01729 case 6: 01730 # Missing a temporary folder. 01731 return Status::newFatal( 'importuploaderrortemp' ); 01732 # case else: # Currently impossible 01733 } 01734 01735 } 01736 $fname = $upload['tmp_name']; 01737 if ( is_uploaded_file( $fname ) ) { 01738 return ImportStreamSource::newFromFile( $fname ); 01739 } else { 01740 return Status::newFatal( 'importnofile' ); 01741 } 01742 } 01743 01749 static function newFromURL( $url, $method = 'GET' ) { 01750 wfDebug( __METHOD__ . ": opening $url\n" ); 01751 # Use the standard HTTP fetch function; it times out 01752 # quicker and sorts out user-agent problems which might 01753 # otherwise prevent importing from large sites, such 01754 # as the Wikimedia cluster, etc. 01755 $data = Http::request( $method, $url, array( 'followRedirects' => true ) ); 01756 if ( $data !== false ) { 01757 $file = tmpfile(); 01758 fwrite( $file, $data ); 01759 fflush( $file ); 01760 fseek( $file, 0 ); 01761 return Status::newGood( new ImportStreamSource( $file ) ); 01762 } else { 01763 return Status::newFatal( 'importcantopen' ); 01764 } 01765 } 01766 01775 public static function newFromInterwiki( $interwiki, $page, $history = false, 01776 $templates = false, $pageLinkDepth = 0 01777 ) { 01778 if ( $page == '' ) { 01779 return Status::newFatal( 'import-noarticle' ); 01780 } 01781 $link = Title::newFromText( "$interwiki:Special:Export/$page" ); 01782 if ( is_null( $link ) || !$link->isExternal() ) { 01783 return Status::newFatal( 'importbadinterwiki' ); 01784 } else { 01785 $params = array(); 01786 if ( $history ) { 01787 $params['history'] = 1; 01788 } 01789 if ( $templates ) { 01790 $params['templates'] = 1; 01791 } 01792 if ( $pageLinkDepth ) { 01793 $params['pagelink-depth'] = $pageLinkDepth; 01794 } 01795 $url = $link->getFullURL( $params ); 01796 # For interwikis, use POST to avoid redirects. 01797 return ImportStreamSource::newFromURL( $url, "POST" ); 01798 } 01799 } 01800 }