MediaWiki
REL1_20
|
00001 <?php 00033 class WikiImporter { 00034 private $reader = null; 00035 private $mLogItemCallback, $mUploadCallback, $mRevisionCallback, $mPageCallback; 00036 private $mSiteInfoCallback, $mTargetNamespace, $mTargetRootPage, $mPageOutCallback; 00037 private $mNoticeCallback, $mDebug; 00038 private $mImportUploads, $mImageBasePath; 00039 private $mNoUpdates = false; 00040 00045 function __construct( $source ) { 00046 $this->reader = new XMLReader(); 00047 00048 stream_wrapper_register( 'uploadsource', 'UploadSourceAdapter' ); 00049 $id = UploadSourceAdapter::registerSource( $source ); 00050 if (defined( 'LIBXML_PARSEHUGE' ) ) { 00051 $this->reader->open( "uploadsource://$id", null, LIBXML_PARSEHUGE ); 00052 } else { 00053 $this->reader->open( "uploadsource://$id" ); 00054 } 00055 00056 // Default callbacks 00057 $this->setRevisionCallback( array( $this, "importRevision" ) ); 00058 $this->setUploadCallback( array( $this, 'importUpload' ) ); 00059 $this->setLogItemCallback( array( $this, 'importLogItem' ) ); 00060 $this->setPageOutCallback( array( $this, 'finishImportPage' ) ); 00061 } 00062 00063 private function throwXmlError( $err ) { 00064 $this->debug( "FAILURE: $err" ); 00065 wfDebug( "WikiImporter XML error: $err\n" ); 00066 } 00067 00068 private function debug( $data ) { 00069 if( $this->mDebug ) { 00070 wfDebug( "IMPORT: $data\n" ); 00071 } 00072 } 00073 00074 private function warn( $data ) { 00075 wfDebug( "IMPORT: $data\n" ); 00076 } 00077 00078 private function notice( $msg /*, $param, ...*/ ) { 00079 $params = func_get_args(); 00080 array_shift( $params ); 00081 00082 if ( is_callable( $this->mNoticeCallback ) ) { 00083 call_user_func( $this->mNoticeCallback, $msg, $params ); 00084 } else { # No ImportReporter -> CLI 00085 echo wfMessage( $msg, $params )->text() . "\n"; 00086 } 00087 } 00088 00093 function setDebug( $debug ) { 00094 $this->mDebug = $debug; 00095 } 00096 00101 function setNoUpdates( $noupdates ) { 00102 $this->mNoUpdates = $noupdates; 00103 } 00104 00111 public function setNoticeCallback( $callback ) { 00112 return wfSetVar( $this->mNoticeCallback, $callback ); 00113 } 00114 00120 public function setPageCallback( $callback ) { 00121 $previous = $this->mPageCallback; 00122 $this->mPageCallback = $callback; 00123 return $previous; 00124 } 00125 00135 public function setPageOutCallback( $callback ) { 00136 $previous = $this->mPageOutCallback; 00137 $this->mPageOutCallback = $callback; 00138 return $previous; 00139 } 00140 00146 public function setRevisionCallback( $callback ) { 00147 $previous = $this->mRevisionCallback; 00148 $this->mRevisionCallback = $callback; 00149 return $previous; 00150 } 00151 00157 public function setUploadCallback( $callback ) { 00158 $previous = $this->mUploadCallback; 00159 $this->mUploadCallback = $callback; 00160 return $previous; 00161 } 00162 00168 public function setLogItemCallback( $callback ) { 00169 $previous = $this->mLogItemCallback; 00170 $this->mLogItemCallback = $callback; 00171 return $previous; 00172 } 00173 00179 public function setSiteInfoCallback( $callback ) { 00180 $previous = $this->mSiteInfoCallback; 00181 $this->mSiteInfoCallback = $callback; 00182 return $previous; 00183 } 00184 00190 public function setTargetNamespace( $namespace ) { 00191 if( is_null( $namespace ) ) { 00192 // Don't override namespaces 00193 $this->mTargetNamespace = null; 00194 } elseif( $namespace >= 0 ) { 00195 // @todo FIXME: Check for validity 00196 $this->mTargetNamespace = intval( $namespace ); 00197 } else { 00198 return false; 00199 } 00200 } 00201 00207 public function setTargetRootPage( $rootpage ) { 00208 $status = Status::newGood(); 00209 if( is_null( $rootpage ) ) { 00210 // No rootpage 00211 $this->mTargetRootPage = null; 00212 } elseif( $rootpage !== '' ) { 00213 $rootpage = rtrim( $rootpage, '/' ); //avoid double slashes 00214 $title = Title::newFromText( $rootpage, !is_null( $this->mTargetNamespace ) ? $this->mTargetNamespace : NS_MAIN ); 00215 if( !$title || $title->isExternal() ) { 00216 $status->fatal( 'import-rootpage-invalid' ); 00217 } else { 00218 if( !MWNamespace::hasSubpages( $title->getNamespace() ) ) { 00219 global $wgContLang; 00220 00221 $displayNSText = $title->getNamespace() == NS_MAIN 00222 ? wfMessage( 'blanknamespace' )->text() 00223 : $wgContLang->getNsText( $title->getNamespace() ); 00224 $status->fatal( 'import-rootpage-nosubpage', $displayNSText ); 00225 } else { 00226 // set namespace to 'all', so the namespace check in processTitle() can passed 00227 $this->setTargetNamespace( null ); 00228 $this->mTargetRootPage = $title->getPrefixedDBKey(); 00229 } 00230 } 00231 } 00232 return $status; 00233 } 00234 00238 public function setImageBasePath( $dir ) { 00239 $this->mImageBasePath = $dir; 00240 } 00241 00245 public function setImportUploads( $import ) { 00246 $this->mImportUploads = $import; 00247 } 00248 00254 public function importRevision( $revision ) { 00255 $dbw = wfGetDB( DB_MASTER ); 00256 return $dbw->deadlockLoop( array( $revision, 'importOldRevision' ) ); 00257 } 00258 00264 public function importLogItem( $rev ) { 00265 $dbw = wfGetDB( DB_MASTER ); 00266 return $dbw->deadlockLoop( array( $rev, 'importLogItem' ) ); 00267 } 00268 00274 public function importUpload( $revision ) { 00275 $dbw = wfGetDB( DB_MASTER ); 00276 return $dbw->deadlockLoop( array( $revision, 'importUpload' ) ); 00277 } 00278 00288 public function finishImportPage( $title, $origTitle, $revCount, $sRevCount, $pageInfo ) { 00289 $args = func_get_args(); 00290 return wfRunHooks( 'AfterImportPage', $args ); 00291 } 00292 00297 public function debugRevisionHandler( &$revision ) { 00298 $this->debug( "Got revision:" ); 00299 if( is_object( $revision->title ) ) { 00300 $this->debug( "-- Title: " . $revision->title->getPrefixedText() ); 00301 } else { 00302 $this->debug( "-- Title: <invalid>" ); 00303 } 00304 $this->debug( "-- User: " . $revision->user_text ); 00305 $this->debug( "-- Timestamp: " . $revision->timestamp ); 00306 $this->debug( "-- Comment: " . $revision->comment ); 00307 $this->debug( "-- Text: " . $revision->text ); 00308 } 00309 00314 function pageCallback( $title ) { 00315 if( isset( $this->mPageCallback ) ) { 00316 call_user_func( $this->mPageCallback, $title ); 00317 } 00318 } 00319 00328 private function pageOutCallback( $title, $origTitle, $revCount, $sucCount, $pageInfo ) { 00329 if( isset( $this->mPageOutCallback ) ) { 00330 $args = func_get_args(); 00331 call_user_func_array( $this->mPageOutCallback, $args ); 00332 } 00333 } 00334 00340 private function revisionCallback( $revision ) { 00341 if ( isset( $this->mRevisionCallback ) ) { 00342 return call_user_func_array( $this->mRevisionCallback, 00343 array( $revision, $this ) ); 00344 } else { 00345 return false; 00346 } 00347 } 00348 00354 private function logItemCallback( $revision ) { 00355 if ( isset( $this->mLogItemCallback ) ) { 00356 return call_user_func_array( $this->mLogItemCallback, 00357 array( $revision, $this ) ); 00358 } else { 00359 return false; 00360 } 00361 } 00362 00370 private function nodeContents() { 00371 if( $this->reader->isEmptyElement ) { 00372 return ""; 00373 } 00374 $buffer = ""; 00375 while( $this->reader->read() ) { 00376 switch( $this->reader->nodeType ) { 00377 case XmlReader::TEXT: 00378 case XmlReader::SIGNIFICANT_WHITESPACE: 00379 $buffer .= $this->reader->value; 00380 break; 00381 case XmlReader::END_ELEMENT: 00382 return $buffer; 00383 } 00384 } 00385 00386 $this->reader->close(); 00387 return ''; 00388 } 00389 00390 # -------------- 00391 00393 private function dumpElement() { 00394 static $lookup = null; 00395 if (!$lookup) { 00396 $xmlReaderConstants = array( 00397 "NONE", 00398 "ELEMENT", 00399 "ATTRIBUTE", 00400 "TEXT", 00401 "CDATA", 00402 "ENTITY_REF", 00403 "ENTITY", 00404 "PI", 00405 "COMMENT", 00406 "DOC", 00407 "DOC_TYPE", 00408 "DOC_FRAGMENT", 00409 "NOTATION", 00410 "WHITESPACE", 00411 "SIGNIFICANT_WHITESPACE", 00412 "END_ELEMENT", 00413 "END_ENTITY", 00414 "XML_DECLARATION", 00415 ); 00416 $lookup = array(); 00417 00418 foreach( $xmlReaderConstants as $name ) { 00419 $lookup[constant("XmlReader::$name")] = $name; 00420 } 00421 } 00422 00423 print( var_dump( 00424 $lookup[$this->reader->nodeType], 00425 $this->reader->name, 00426 $this->reader->value 00427 )."\n\n" ); 00428 } 00429 00434 public function doImport() { 00435 00436 // Calls to reader->read need to be wrapped in calls to 00437 // libxml_disable_entity_loader() to avoid local file 00438 // inclusion attacks (bug 46932). 00439 $oldDisable = libxml_disable_entity_loader( true ); 00440 $this->reader->read(); 00441 00442 if ( $this->reader->name != 'mediawiki' ) { 00443 libxml_disable_entity_loader( $oldDisable ); 00444 throw new MWException( "Expected <mediawiki> tag, got " . 00445 $this->reader->name ); 00446 } 00447 $this->debug( "<mediawiki> tag is correct." ); 00448 00449 $this->debug( "Starting primary dump processing loop." ); 00450 00451 $keepReading = $this->reader->read(); 00452 $skip = false; 00453 while ( $keepReading ) { 00454 $tag = $this->reader->name; 00455 $type = $this->reader->nodeType; 00456 00457 if ( !wfRunHooks( 'ImportHandleToplevelXMLTag', $this ) ) { 00458 // Do nothing 00459 } elseif ( $tag == 'mediawiki' && $type == XmlReader::END_ELEMENT ) { 00460 break; 00461 } elseif ( $tag == 'siteinfo' ) { 00462 $this->handleSiteInfo(); 00463 } elseif ( $tag == 'page' ) { 00464 $this->handlePage(); 00465 } elseif ( $tag == 'logitem' ) { 00466 $this->handleLogItem(); 00467 } elseif ( $tag != '#text' ) { 00468 $this->warn( "Unhandled top-level XML tag $tag" ); 00469 00470 $skip = true; 00471 } 00472 00473 if ($skip) { 00474 $keepReading = $this->reader->next(); 00475 $skip = false; 00476 $this->debug( "Skip" ); 00477 } else { 00478 $keepReading = $this->reader->read(); 00479 } 00480 } 00481 00482 libxml_disable_entity_loader( $oldDisable ); 00483 return true; 00484 } 00485 00490 private function handleSiteInfo() { 00491 // Site info is useful, but not actually used for dump imports. 00492 // Includes a quick short-circuit to save performance. 00493 if ( ! $this->mSiteInfoCallback ) { 00494 $this->reader->next(); 00495 return true; 00496 } 00497 throw new MWException( "SiteInfo tag is not yet handled, do not set mSiteInfoCallback" ); 00498 } 00499 00500 private function handleLogItem() { 00501 $this->debug( "Enter log item handler." ); 00502 $logInfo = array(); 00503 00504 // Fields that can just be stuffed in the pageInfo object 00505 $normalFields = array( 'id', 'comment', 'type', 'action', 'timestamp', 00506 'logtitle', 'params' ); 00507 00508 while ( $this->reader->read() ) { 00509 if ( $this->reader->nodeType == XmlReader::END_ELEMENT && 00510 $this->reader->name == 'logitem') { 00511 break; 00512 } 00513 00514 $tag = $this->reader->name; 00515 00516 if ( !wfRunHooks( 'ImportHandleLogItemXMLTag', 00517 $this, $logInfo ) ) { 00518 // Do nothing 00519 } elseif ( in_array( $tag, $normalFields ) ) { 00520 $logInfo[$tag] = $this->nodeContents(); 00521 } elseif ( $tag == 'contributor' ) { 00522 $logInfo['contributor'] = $this->handleContributor(); 00523 } elseif ( $tag != '#text' ) { 00524 $this->warn( "Unhandled log-item XML tag $tag" ); 00525 } 00526 } 00527 00528 $this->processLogItem( $logInfo ); 00529 } 00530 00535 private function processLogItem( $logInfo ) { 00536 $revision = new WikiRevision; 00537 00538 $revision->setID( $logInfo['id'] ); 00539 $revision->setType( $logInfo['type'] ); 00540 $revision->setAction( $logInfo['action'] ); 00541 $revision->setTimestamp( $logInfo['timestamp'] ); 00542 $revision->setParams( $logInfo['params'] ); 00543 $revision->setTitle( Title::newFromText( $logInfo['logtitle'] ) ); 00544 $revision->setNoUpdates( $this->mNoUpdates ); 00545 00546 if ( isset( $logInfo['comment'] ) ) { 00547 $revision->setComment( $logInfo['comment'] ); 00548 } 00549 00550 if ( isset( $logInfo['contributor']['ip'] ) ) { 00551 $revision->setUserIP( $logInfo['contributor']['ip'] ); 00552 } 00553 if ( isset( $logInfo['contributor']['username'] ) ) { 00554 $revision->setUserName( $logInfo['contributor']['username'] ); 00555 } 00556 00557 return $this->logItemCallback( $revision ); 00558 } 00559 00560 private function handlePage() { 00561 // Handle page data. 00562 $this->debug( "Enter page handler." ); 00563 $pageInfo = array( 'revisionCount' => 0, 'successfulRevisionCount' => 0 ); 00564 00565 // Fields that can just be stuffed in the pageInfo object 00566 $normalFields = array( 'title', 'id', 'redirect', 'restrictions' ); 00567 00568 $skip = false; 00569 $badTitle = false; 00570 00571 while ( $skip ? $this->reader->next() : $this->reader->read() ) { 00572 if ( $this->reader->nodeType == XmlReader::END_ELEMENT && 00573 $this->reader->name == 'page') { 00574 break; 00575 } 00576 00577 $tag = $this->reader->name; 00578 00579 if ( $badTitle ) { 00580 // The title is invalid, bail out of this page 00581 $skip = true; 00582 } elseif ( !wfRunHooks( 'ImportHandlePageXMLTag', array( $this, 00583 &$pageInfo ) ) ) { 00584 // Do nothing 00585 } elseif ( in_array( $tag, $normalFields ) ) { 00586 $pageInfo[$tag] = $this->nodeContents(); 00587 if ( $tag == 'title' ) { 00588 $title = $this->processTitle( $pageInfo['title'] ); 00589 00590 if ( !$title ) { 00591 $badTitle = true; 00592 $skip = true; 00593 } 00594 00595 $this->pageCallback( $title ); 00596 list( $pageInfo['_title'], $origTitle ) = $title; 00597 } 00598 } elseif ( $tag == 'revision' ) { 00599 $this->handleRevision( $pageInfo ); 00600 } elseif ( $tag == 'upload' ) { 00601 $this->handleUpload( $pageInfo ); 00602 } elseif ( $tag != '#text' ) { 00603 $this->warn( "Unhandled page XML tag $tag" ); 00604 $skip = true; 00605 } 00606 } 00607 00608 $this->pageOutCallback( $pageInfo['_title'], $origTitle, 00609 $pageInfo['revisionCount'], 00610 $pageInfo['successfulRevisionCount'], 00611 $pageInfo ); 00612 } 00613 00617 private function handleRevision( &$pageInfo ) { 00618 $this->debug( "Enter revision handler" ); 00619 $revisionInfo = array(); 00620 00621 $normalFields = array( 'id', 'timestamp', 'comment', 'minor', 'text' ); 00622 00623 $skip = false; 00624 00625 while ( $skip ? $this->reader->next() : $this->reader->read() ) { 00626 if ( $this->reader->nodeType == XmlReader::END_ELEMENT && 00627 $this->reader->name == 'revision') { 00628 break; 00629 } 00630 00631 $tag = $this->reader->name; 00632 00633 if ( !wfRunHooks( 'ImportHandleRevisionXMLTag', $this, 00634 $pageInfo, $revisionInfo ) ) { 00635 // Do nothing 00636 } elseif ( in_array( $tag, $normalFields ) ) { 00637 $revisionInfo[$tag] = $this->nodeContents(); 00638 } elseif ( $tag == 'contributor' ) { 00639 $revisionInfo['contributor'] = $this->handleContributor(); 00640 } elseif ( $tag != '#text' ) { 00641 $this->warn( "Unhandled revision XML tag $tag" ); 00642 $skip = true; 00643 } 00644 } 00645 00646 $pageInfo['revisionCount']++; 00647 if ( $this->processRevision( $pageInfo, $revisionInfo ) ) { 00648 $pageInfo['successfulRevisionCount']++; 00649 } 00650 } 00651 00657 private function processRevision( $pageInfo, $revisionInfo ) { 00658 $revision = new WikiRevision; 00659 00660 if( isset( $revisionInfo['id'] ) ) { 00661 $revision->setID( $revisionInfo['id'] ); 00662 } 00663 if ( isset( $revisionInfo['text'] ) ) { 00664 $revision->setText( $revisionInfo['text'] ); 00665 } 00666 $revision->setTitle( $pageInfo['_title'] ); 00667 00668 if ( isset( $revisionInfo['timestamp'] ) ) { 00669 $revision->setTimestamp( $revisionInfo['timestamp'] ); 00670 } else { 00671 $revision->setTimestamp( wfTimestampNow() ); 00672 } 00673 00674 if ( isset( $revisionInfo['comment'] ) ) { 00675 $revision->setComment( $revisionInfo['comment'] ); 00676 } 00677 00678 if ( isset( $revisionInfo['minor'] ) ) { 00679 $revision->setMinor( true ); 00680 } 00681 if ( isset( $revisionInfo['contributor']['ip'] ) ) { 00682 $revision->setUserIP( $revisionInfo['contributor']['ip'] ); 00683 } 00684 if ( isset( $revisionInfo['contributor']['username'] ) ) { 00685 $revision->setUserName( $revisionInfo['contributor']['username'] ); 00686 } 00687 $revision->setNoUpdates( $this->mNoUpdates ); 00688 00689 return $this->revisionCallback( $revision ); 00690 } 00691 00696 private function handleUpload( &$pageInfo ) { 00697 $this->debug( "Enter upload handler" ); 00698 $uploadInfo = array(); 00699 00700 $normalFields = array( 'timestamp', 'comment', 'filename', 'text', 00701 'src', 'size', 'sha1base36', 'archivename', 'rel' ); 00702 00703 $skip = false; 00704 00705 while ( $skip ? $this->reader->next() : $this->reader->read() ) { 00706 if ( $this->reader->nodeType == XmlReader::END_ELEMENT && 00707 $this->reader->name == 'upload') { 00708 break; 00709 } 00710 00711 $tag = $this->reader->name; 00712 00713 if ( !wfRunHooks( 'ImportHandleUploadXMLTag', $this, 00714 $pageInfo ) ) { 00715 // Do nothing 00716 } elseif ( in_array( $tag, $normalFields ) ) { 00717 $uploadInfo[$tag] = $this->nodeContents(); 00718 } elseif ( $tag == 'contributor' ) { 00719 $uploadInfo['contributor'] = $this->handleContributor(); 00720 } elseif ( $tag == 'contents' ) { 00721 $contents = $this->nodeContents(); 00722 $encoding = $this->reader->getAttribute( 'encoding' ); 00723 if ( $encoding === 'base64' ) { 00724 $uploadInfo['fileSrc'] = $this->dumpTemp( base64_decode( $contents ) ); 00725 $uploadInfo['isTempSrc'] = true; 00726 } 00727 } elseif ( $tag != '#text' ) { 00728 $this->warn( "Unhandled upload XML tag $tag" ); 00729 $skip = true; 00730 } 00731 } 00732 00733 if ( $this->mImageBasePath && isset( $uploadInfo['rel'] ) ) { 00734 $path = "{$this->mImageBasePath}/{$uploadInfo['rel']}"; 00735 if ( file_exists( $path ) ) { 00736 $uploadInfo['fileSrc'] = $path; 00737 $uploadInfo['isTempSrc'] = false; 00738 } 00739 } 00740 00741 if ( $this->mImportUploads ) { 00742 return $this->processUpload( $pageInfo, $uploadInfo ); 00743 } 00744 } 00745 00750 private function dumpTemp( $contents ) { 00751 $filename = tempnam( wfTempDir(), 'importupload' ); 00752 file_put_contents( $filename, $contents ); 00753 return $filename; 00754 } 00755 00761 private function processUpload( $pageInfo, $uploadInfo ) { 00762 $revision = new WikiRevision; 00763 $text = isset( $uploadInfo['text'] ) ? $uploadInfo['text'] : ''; 00764 00765 $revision->setTitle( $pageInfo['_title'] ); 00766 $revision->setID( $pageInfo['id'] ); 00767 $revision->setTimestamp( $uploadInfo['timestamp'] ); 00768 $revision->setText( $text ); 00769 $revision->setFilename( $uploadInfo['filename'] ); 00770 if ( isset( $uploadInfo['archivename'] ) ) { 00771 $revision->setArchiveName( $uploadInfo['archivename'] ); 00772 } 00773 $revision->setSrc( $uploadInfo['src'] ); 00774 if ( isset( $uploadInfo['fileSrc'] ) ) { 00775 $revision->setFileSrc( $uploadInfo['fileSrc'], 00776 !empty( $uploadInfo['isTempSrc'] ) ); 00777 } 00778 if ( isset( $uploadInfo['sha1base36'] ) ) { 00779 $revision->setSha1Base36( $uploadInfo['sha1base36'] ); 00780 } 00781 $revision->setSize( intval( $uploadInfo['size'] ) ); 00782 $revision->setComment( $uploadInfo['comment'] ); 00783 00784 if ( isset( $uploadInfo['contributor']['ip'] ) ) { 00785 $revision->setUserIP( $uploadInfo['contributor']['ip'] ); 00786 } 00787 if ( isset( $uploadInfo['contributor']['username'] ) ) { 00788 $revision->setUserName( $uploadInfo['contributor']['username'] ); 00789 } 00790 $revision->setNoUpdates( $this->mNoUpdates ); 00791 00792 return call_user_func( $this->mUploadCallback, $revision ); 00793 } 00794 00798 private function handleContributor() { 00799 $fields = array( 'id', 'ip', 'username' ); 00800 $info = array(); 00801 00802 while ( $this->reader->read() ) { 00803 if ( $this->reader->nodeType == XmlReader::END_ELEMENT && 00804 $this->reader->name == 'contributor') { 00805 break; 00806 } 00807 00808 $tag = $this->reader->name; 00809 00810 if ( in_array( $tag, $fields ) ) { 00811 $info[$tag] = $this->nodeContents(); 00812 } 00813 } 00814 00815 return $info; 00816 } 00817 00822 private function processTitle( $text ) { 00823 global $wgCommandLineMode; 00824 00825 $workTitle = $text; 00826 $origTitle = Title::newFromText( $workTitle ); 00827 00828 if( !is_null( $this->mTargetNamespace ) && !is_null( $origTitle ) ) { 00829 # makeTitleSafe, because $origTitle can have a interwiki (different setting of interwiki map) 00830 # and than dbKey can begin with a lowercase char 00831 $title = Title::makeTitleSafe( $this->mTargetNamespace, 00832 $origTitle->getDBkey() ); 00833 } else { 00834 if( !is_null( $this->mTargetRootPage ) ) { 00835 $workTitle = $this->mTargetRootPage . '/' . $workTitle; 00836 } 00837 $title = Title::newFromText( $workTitle ); 00838 } 00839 00840 if( is_null( $title ) ) { 00841 # Invalid page title? Ignore the page 00842 $this->notice( 'import-error-invalid', $workTitle ); 00843 return false; 00844 } elseif( $title->isExternal() ) { 00845 $this->notice( 'import-error-interwiki', $title->getPrefixedText() ); 00846 return false; 00847 } elseif( !$title->canExist() ) { 00848 $this->notice( 'import-error-special', $title->getPrefixedText() ); 00849 return false; 00850 } elseif( !$title->userCan( 'edit' ) && !$wgCommandLineMode ) { 00851 # Do not import if the importing wiki user cannot edit this page 00852 $this->notice( 'import-error-edit', $title->getPrefixedText() ); 00853 return false; 00854 } elseif( !$title->exists() && !$title->userCan( 'create' ) && !$wgCommandLineMode ) { 00855 # Do not import if the importing wiki user cannot create this page 00856 $this->notice( 'import-error-create', $title->getPrefixedText() ); 00857 return false; 00858 } 00859 00860 return array( $title, $origTitle ); 00861 } 00862 } 00863 00865 class UploadSourceAdapter { 00866 static $sourceRegistrations = array(); 00867 00868 private $mSource; 00869 private $mBuffer; 00870 private $mPosition; 00871 00876 static function registerSource( $source ) { 00877 $id = wfRandomString(); 00878 00879 self::$sourceRegistrations[$id] = $source; 00880 00881 return $id; 00882 } 00883 00891 function stream_open( $path, $mode, $options, &$opened_path ) { 00892 $url = parse_url($path); 00893 $id = $url['host']; 00894 00895 if ( !isset( self::$sourceRegistrations[$id] ) ) { 00896 return false; 00897 } 00898 00899 $this->mSource = self::$sourceRegistrations[$id]; 00900 00901 return true; 00902 } 00903 00908 function stream_read( $count ) { 00909 $return = ''; 00910 $leave = false; 00911 00912 while ( !$leave && !$this->mSource->atEnd() && 00913 strlen($this->mBuffer) < $count ) { 00914 $read = $this->mSource->readChunk(); 00915 00916 if ( !strlen($read) ) { 00917 $leave = true; 00918 } 00919 00920 $this->mBuffer .= $read; 00921 } 00922 00923 if ( strlen($this->mBuffer) ) { 00924 $return = substr( $this->mBuffer, 0, $count ); 00925 $this->mBuffer = substr( $this->mBuffer, $count ); 00926 } 00927 00928 $this->mPosition += strlen($return); 00929 00930 return $return; 00931 } 00932 00937 function stream_write( $data ) { 00938 return false; 00939 } 00940 00944 function stream_tell() { 00945 return $this->mPosition; 00946 } 00947 00951 function stream_eof() { 00952 return $this->mSource->atEnd(); 00953 } 00954 00958 function url_stat() { 00959 $result = array(); 00960 00961 $result['dev'] = $result[0] = 0; 00962 $result['ino'] = $result[1] = 0; 00963 $result['mode'] = $result[2] = 0; 00964 $result['nlink'] = $result[3] = 0; 00965 $result['uid'] = $result[4] = 0; 00966 $result['gid'] = $result[5] = 0; 00967 $result['rdev'] = $result[6] = 0; 00968 $result['size'] = $result[7] = 0; 00969 $result['atime'] = $result[8] = 0; 00970 $result['mtime'] = $result[9] = 0; 00971 $result['ctime'] = $result[10] = 0; 00972 $result['blksize'] = $result[11] = 0; 00973 $result['blocks'] = $result[12] = 0; 00974 00975 return $result; 00976 } 00977 } 00978 00979 class XMLReader2 extends XMLReader { 00980 00984 function nodeContents() { 00985 if( $this->isEmptyElement ) { 00986 return ""; 00987 } 00988 $buffer = ""; 00989 while( $this->read() ) { 00990 switch( $this->nodeType ) { 00991 case XmlReader::TEXT: 00992 case XmlReader::SIGNIFICANT_WHITESPACE: 00993 $buffer .= $this->value; 00994 break; 00995 case XmlReader::END_ELEMENT: 00996 return $buffer; 00997 } 00998 } 00999 return $this->close(); 01000 } 01001 } 01002 01007 class WikiRevision { 01008 var $importer = null; 01009 01013 var $title = null; 01014 var $id = 0; 01015 var $timestamp = "20010115000000"; 01016 var $user = 0; 01017 var $user_text = ""; 01018 var $text = ""; 01019 var $comment = ""; 01020 var $minor = false; 01021 var $type = ""; 01022 var $action = ""; 01023 var $params = ""; 01024 var $fileSrc = ''; 01025 var $sha1base36 = false; 01026 var $isTemp = false; 01027 var $archiveName = ''; 01028 var $fileIsTemp; 01029 private $mNoUpdates = false; 01030 01035 function setTitle( $title ) { 01036 if( is_object( $title ) ) { 01037 $this->title = $title; 01038 } elseif( is_null( $title ) ) { 01039 throw new MWException( "WikiRevision given a null title in import. You may need to adjust \$wgLegalTitleChars." ); 01040 } else { 01041 throw new MWException( "WikiRevision given non-object title in import." ); 01042 } 01043 } 01044 01048 function setID( $id ) { 01049 $this->id = $id; 01050 } 01051 01055 function setTimestamp( $ts ) { 01056 # 2003-08-05T18:30:02Z 01057 $this->timestamp = wfTimestamp( TS_MW, $ts ); 01058 } 01059 01063 function setUsername( $user ) { 01064 $this->user_text = $user; 01065 } 01066 01070 function setUserIP( $ip ) { 01071 $this->user_text = $ip; 01072 } 01073 01077 function setText( $text ) { 01078 $this->text = $text; 01079 } 01080 01084 function setComment( $text ) { 01085 $this->comment = $text; 01086 } 01087 01091 function setMinor( $minor ) { 01092 $this->minor = (bool)$minor; 01093 } 01094 01098 function setSrc( $src ) { 01099 $this->src = $src; 01100 } 01101 01106 function setFileSrc( $src, $isTemp ) { 01107 $this->fileSrc = $src; 01108 $this->fileIsTemp = $isTemp; 01109 } 01110 01114 function setSha1Base36( $sha1base36 ) { 01115 $this->sha1base36 = $sha1base36; 01116 } 01117 01121 function setFilename( $filename ) { 01122 $this->filename = $filename; 01123 } 01124 01128 function setArchiveName( $archiveName ) { 01129 $this->archiveName = $archiveName; 01130 } 01131 01135 function setSize( $size ) { 01136 $this->size = intval( $size ); 01137 } 01138 01142 function setType( $type ) { 01143 $this->type = $type; 01144 } 01145 01149 function setAction( $action ) { 01150 $this->action = $action; 01151 } 01152 01156 function setParams( $params ) { 01157 $this->params = $params; 01158 } 01159 01163 public function setNoUpdates( $noupdates ) { 01164 $this->mNoUpdates = $noupdates; 01165 } 01166 01170 function getTitle() { 01171 return $this->title; 01172 } 01173 01177 function getID() { 01178 return $this->id; 01179 } 01180 01184 function getTimestamp() { 01185 return $this->timestamp; 01186 } 01187 01191 function getUser() { 01192 return $this->user_text; 01193 } 01194 01198 function getText() { 01199 return $this->text; 01200 } 01201 01205 function getComment() { 01206 return $this->comment; 01207 } 01208 01212 function getMinor() { 01213 return $this->minor; 01214 } 01215 01219 function getSrc() { 01220 return $this->src; 01221 } 01222 01226 function getSha1() { 01227 if ( $this->sha1base36 ) { 01228 return wfBaseConvert( $this->sha1base36, 36, 16 ); 01229 } 01230 return false; 01231 } 01232 01236 function getFileSrc() { 01237 return $this->fileSrc; 01238 } 01239 01243 function isTempSrc() { 01244 return $this->isTemp; 01245 } 01246 01250 function getFilename() { 01251 return $this->filename; 01252 } 01253 01257 function getArchiveName() { 01258 return $this->archiveName; 01259 } 01260 01264 function getSize() { 01265 return $this->size; 01266 } 01267 01271 function getType() { 01272 return $this->type; 01273 } 01274 01278 function getAction() { 01279 return $this->action; 01280 } 01281 01285 function getParams() { 01286 return $this->params; 01287 } 01288 01292 function importOldRevision() { 01293 $dbw = wfGetDB( DB_MASTER ); 01294 01295 # Sneak a single revision into place 01296 $user = User::newFromName( $this->getUser() ); 01297 if( $user ) { 01298 $userId = intval( $user->getId() ); 01299 $userText = $user->getName(); 01300 $userObj = $user; 01301 } else { 01302 $userId = 0; 01303 $userText = $this->getUser(); 01304 $userObj = new User; 01305 } 01306 01307 // avoid memory leak...? 01308 $linkCache = LinkCache::singleton(); 01309 $linkCache->clear(); 01310 01311 $page = WikiPage::factory( $this->title ); 01312 if( !$page->exists() ) { 01313 # must create the page... 01314 $pageId = $page->insertOn( $dbw ); 01315 $created = true; 01316 $oldcountable = null; 01317 } else { 01318 $pageId = $page->getId(); 01319 $created = false; 01320 01321 $prior = $dbw->selectField( 'revision', '1', 01322 array( 'rev_page' => $pageId, 01323 'rev_timestamp' => $dbw->timestamp( $this->timestamp ), 01324 'rev_user_text' => $userText, 01325 'rev_comment' => $this->getComment() ), 01326 __METHOD__ 01327 ); 01328 if( $prior ) { 01329 // @todo FIXME: This could fail slightly for multiple matches :P 01330 wfDebug( __METHOD__ . ": skipping existing revision for [[" . 01331 $this->title->getPrefixedText() . "]], timestamp " . $this->timestamp . "\n" ); 01332 return false; 01333 } 01334 $oldcountable = $page->isCountable(); 01335 } 01336 01337 # @todo FIXME: Use original rev_id optionally (better for backups) 01338 # Insert the row 01339 $revision = new Revision( array( 01340 'page' => $pageId, 01341 'text' => $this->getText(), 01342 'comment' => $this->getComment(), 01343 'user' => $userId, 01344 'user_text' => $userText, 01345 'timestamp' => $this->timestamp, 01346 'minor_edit' => $this->minor, 01347 ) ); 01348 $revision->insertOn( $dbw ); 01349 $changed = $page->updateIfNewerOn( $dbw, $revision ); 01350 01351 if ( $changed !== false && !$this->mNoUpdates ) { 01352 wfDebug( __METHOD__ . ": running updates\n" ); 01353 $page->doEditUpdates( $revision, $userObj, array( 'created' => $created, 'oldcountable' => $oldcountable ) ); 01354 } 01355 01356 return true; 01357 } 01358 01362 function importLogItem() { 01363 $dbw = wfGetDB( DB_MASTER ); 01364 # @todo FIXME: This will not record autoblocks 01365 if( !$this->getTitle() ) { 01366 wfDebug( __METHOD__ . ": skipping invalid {$this->type}/{$this->action} log time, timestamp " . 01367 $this->timestamp . "\n" ); 01368 return; 01369 } 01370 # Check if it exists already 01371 // @todo FIXME: Use original log ID (better for backups) 01372 $prior = $dbw->selectField( 'logging', '1', 01373 array( 'log_type' => $this->getType(), 01374 'log_action' => $this->getAction(), 01375 'log_timestamp' => $dbw->timestamp( $this->timestamp ), 01376 'log_namespace' => $this->getTitle()->getNamespace(), 01377 'log_title' => $this->getTitle()->getDBkey(), 01378 'log_comment' => $this->getComment(), 01379 #'log_user_text' => $this->user_text, 01380 'log_params' => $this->params ), 01381 __METHOD__ 01382 ); 01383 // @todo FIXME: This could fail slightly for multiple matches :P 01384 if( $prior ) { 01385 wfDebug( __METHOD__ . ": skipping existing item for Log:{$this->type}/{$this->action}, timestamp " . 01386 $this->timestamp . "\n" ); 01387 return; 01388 } 01389 $log_id = $dbw->nextSequenceValue( 'logging_log_id_seq' ); 01390 $data = array( 01391 'log_id' => $log_id, 01392 'log_type' => $this->type, 01393 'log_action' => $this->action, 01394 'log_timestamp' => $dbw->timestamp( $this->timestamp ), 01395 'log_user' => User::idFromName( $this->user_text ), 01396 #'log_user_text' => $this->user_text, 01397 'log_namespace' => $this->getTitle()->getNamespace(), 01398 'log_title' => $this->getTitle()->getDBkey(), 01399 'log_comment' => $this->getComment(), 01400 'log_params' => $this->params 01401 ); 01402 $dbw->insert( 'logging', $data, __METHOD__ ); 01403 } 01404 01408 function importUpload() { 01409 # Construct a file 01410 $archiveName = $this->getArchiveName(); 01411 if ( $archiveName ) { 01412 wfDebug( __METHOD__ . "Importing archived file as $archiveName\n" ); 01413 $file = OldLocalFile::newFromArchiveName( $this->getTitle(), 01414 RepoGroup::singleton()->getLocalRepo(), $archiveName ); 01415 } else { 01416 $file = wfLocalFile( $this->getTitle() ); 01417 wfDebug( __METHOD__ . 'Importing new file as ' . $file->getName() . "\n" ); 01418 if ( $file->exists() && $file->getTimestamp() > $this->getTimestamp() ) { 01419 $archiveName = $file->getTimestamp() . '!' . $file->getName(); 01420 $file = OldLocalFile::newFromArchiveName( $this->getTitle(), 01421 RepoGroup::singleton()->getLocalRepo(), $archiveName ); 01422 wfDebug( __METHOD__ . "File already exists; importing as $archiveName\n" ); 01423 } 01424 } 01425 if( !$file ) { 01426 wfDebug( __METHOD__ . ': Bad file for ' . $this->getTitle() . "\n" ); 01427 return false; 01428 } 01429 01430 # Get the file source or download if necessary 01431 $source = $this->getFileSrc(); 01432 $flags = $this->isTempSrc() ? File::DELETE_SOURCE : 0; 01433 if ( !$source ) { 01434 $source = $this->downloadSource(); 01435 $flags |= File::DELETE_SOURCE; 01436 } 01437 if( !$source ) { 01438 wfDebug( __METHOD__ . ": Could not fetch remote file.\n" ); 01439 return false; 01440 } 01441 $sha1 = $this->getSha1(); 01442 if ( $sha1 && ( $sha1 !== sha1_file( $source ) ) ) { 01443 if ( $flags & File::DELETE_SOURCE ) { 01444 # Broken file; delete it if it is a temporary file 01445 unlink( $source ); 01446 } 01447 wfDebug( __METHOD__ . ": Corrupt file $source.\n" ); 01448 return false; 01449 } 01450 01451 $user = User::newFromName( $this->user_text ); 01452 01453 # Do the actual upload 01454 if ( $archiveName ) { 01455 $status = $file->uploadOld( $source, $archiveName, 01456 $this->getTimestamp(), $this->getComment(), $user, $flags ); 01457 } else { 01458 $status = $file->upload( $source, $this->getComment(), $this->getComment(), 01459 $flags, false, $this->getTimestamp(), $user ); 01460 } 01461 01462 if ( $status->isGood() ) { 01463 wfDebug( __METHOD__ . ": Succesful\n" ); 01464 return true; 01465 } else { 01466 wfDebug( __METHOD__ . ': failed: ' . $status->getXml() . "\n" ); 01467 return false; 01468 } 01469 } 01470 01474 function downloadSource() { 01475 global $wgEnableUploads; 01476 if( !$wgEnableUploads ) { 01477 return false; 01478 } 01479 01480 $tempo = tempnam( wfTempDir(), 'download' ); 01481 $f = fopen( $tempo, 'wb' ); 01482 if( !$f ) { 01483 wfDebug( "IMPORT: couldn't write to temp file $tempo\n" ); 01484 return false; 01485 } 01486 01487 // @todo FIXME! 01488 $src = $this->getSrc(); 01489 $data = Http::get( $src ); 01490 if( !$data ) { 01491 wfDebug( "IMPORT: couldn't fetch source $src\n" ); 01492 fclose( $f ); 01493 unlink( $tempo ); 01494 return false; 01495 } 01496 01497 fwrite( $f, $data ); 01498 fclose( $f ); 01499 01500 return $tempo; 01501 } 01502 01503 } 01504 01509 class ImportStringSource { 01510 function __construct( $string ) { 01511 $this->mString = $string; 01512 $this->mRead = false; 01513 } 01514 01518 function atEnd() { 01519 return $this->mRead; 01520 } 01521 01525 function readChunk() { 01526 if( $this->atEnd() ) { 01527 return false; 01528 } 01529 $this->mRead = true; 01530 return $this->mString; 01531 } 01532 } 01533 01538 class ImportStreamSource { 01539 function __construct( $handle ) { 01540 $this->mHandle = $handle; 01541 } 01542 01546 function atEnd() { 01547 return feof( $this->mHandle ); 01548 } 01549 01553 function readChunk() { 01554 return fread( $this->mHandle, 32768 ); 01555 } 01556 01561 static function newFromFile( $filename ) { 01562 wfSuppressWarnings(); 01563 $file = fopen( $filename, 'rt' ); 01564 wfRestoreWarnings(); 01565 if( !$file ) { 01566 return Status::newFatal( "importcantopen" ); 01567 } 01568 return Status::newGood( new ImportStreamSource( $file ) ); 01569 } 01570 01575 static function newFromUpload( $fieldname = "xmlimport" ) { 01576 $upload =& $_FILES[$fieldname]; 01577 01578 if( !isset( $upload ) || !$upload['name'] ) { 01579 return Status::newFatal( 'importnofile' ); 01580 } 01581 if( !empty( $upload['error'] ) ) { 01582 switch($upload['error']){ 01583 case 1: # The uploaded file exceeds the upload_max_filesize directive in php.ini. 01584 return Status::newFatal( 'importuploaderrorsize' ); 01585 case 2: # The uploaded file exceeds the MAX_FILE_SIZE directive that was specified in the HTML form. 01586 return Status::newFatal( 'importuploaderrorsize' ); 01587 case 3: # The uploaded file was only partially uploaded 01588 return Status::newFatal( 'importuploaderrorpartial' ); 01589 case 6: #Missing a temporary folder. 01590 return Status::newFatal( 'importuploaderrortemp' ); 01591 # case else: # Currently impossible 01592 } 01593 01594 } 01595 $fname = $upload['tmp_name']; 01596 if( is_uploaded_file( $fname ) ) { 01597 return ImportStreamSource::newFromFile( $fname ); 01598 } else { 01599 return Status::newFatal( 'importnofile' ); 01600 } 01601 } 01602 01608 static function newFromURL( $url, $method = 'GET' ) { 01609 wfDebug( __METHOD__ . ": opening $url\n" ); 01610 # Use the standard HTTP fetch function; it times out 01611 # quicker and sorts out user-agent problems which might 01612 # otherwise prevent importing from large sites, such 01613 # as the Wikimedia cluster, etc. 01614 $data = Http::request( $method, $url, array( 'followRedirects' => true ) ); 01615 if( $data !== false ) { 01616 $file = tmpfile(); 01617 fwrite( $file, $data ); 01618 fflush( $file ); 01619 fseek( $file, 0 ); 01620 return Status::newGood( new ImportStreamSource( $file ) ); 01621 } else { 01622 return Status::newFatal( 'importcantopen' ); 01623 } 01624 } 01625 01634 public static function newFromInterwiki( $interwiki, $page, $history = false, $templates = false, $pageLinkDepth = 0 ) { 01635 if( $page == '' ) { 01636 return Status::newFatal( 'import-noarticle' ); 01637 } 01638 $link = Title::newFromText( "$interwiki:Special:Export/$page" ); 01639 if( is_null( $link ) || $link->getInterwiki() == '' ) { 01640 return Status::newFatal( 'importbadinterwiki' ); 01641 } else { 01642 $params = array(); 01643 if ( $history ) $params['history'] = 1; 01644 if ( $templates ) $params['templates'] = 1; 01645 if ( $pageLinkDepth ) $params['pagelink-depth'] = $pageLinkDepth; 01646 $url = $link->getFullUrl( $params ); 01647 # For interwikis, use POST to avoid redirects. 01648 return ImportStreamSource::newFromURL( $url, "POST" ); 01649 } 01650 } 01651 }