MediaWiki  master
WikiImporter.php
Go to the documentation of this file.
1 <?php
33 class WikiImporter {
34  private $reader = null;
35  private $foreignNamespaces = null;
40  private $mNoUpdates = false;
42  private $config;
46  private $countableCache = [];
47 
55  if ( !class_exists( 'XMLReader' ) ) {
56  throw new Exception( 'Import requires PHP to have been compiled with libxml support' );
57  }
58 
59  $this->reader = new XMLReader();
60  if ( !$config ) {
61  wfDeprecated( __METHOD__ . ' without a Config instance', '1.25' );
62  $config = ConfigFactory::getDefaultInstance()->makeConfig( 'main' );
63  }
64  $this->config = $config;
65 
66  if ( !in_array( 'uploadsource', stream_get_wrappers() ) ) {
67  stream_wrapper_register( 'uploadsource', 'UploadSourceAdapter' );
68  }
69  $id = UploadSourceAdapter::registerSource( $source );
70 
71  // Enable the entity loader, as it is needed for loading external URLs via
72  // XMLReader::open (T86036)
73  $oldDisable = libxml_disable_entity_loader( false );
74  if ( defined( 'LIBXML_PARSEHUGE' ) ) {
75  $status = $this->reader->open( "uploadsource://$id", null, LIBXML_PARSEHUGE );
76  } else {
77  $status = $this->reader->open( "uploadsource://$id" );
78  }
79  if ( !$status ) {
80  $error = libxml_get_last_error();
81  libxml_disable_entity_loader( $oldDisable );
82  throw new MWException( 'Encountered an internal error while initializing WikiImporter object: ' .
83  $error->message );
84  }
85  libxml_disable_entity_loader( $oldDisable );
86 
87  // Default callbacks
88  $this->setPageCallback( [ $this, 'beforeImportPage' ] );
89  $this->setRevisionCallback( [ $this, "importRevision" ] );
90  $this->setUploadCallback( [ $this, 'importUpload' ] );
91  $this->setLogItemCallback( [ $this, 'importLogItem' ] );
92  $this->setPageOutCallback( [ $this, 'finishImportPage' ] );
93 
94  $this->importTitleFactory = new NaiveImportTitleFactory();
95  }
96 
100  public function getReader() {
101  return $this->reader;
102  }
103 
104  public function throwXmlError( $err ) {
105  $this->debug( "FAILURE: $err" );
106  wfDebug( "WikiImporter XML error: $err\n" );
107  }
108 
109  public function debug( $data ) {
110  if ( $this->mDebug ) {
111  wfDebug( "IMPORT: $data\n" );
112  }
113  }
114 
115  public function warn( $data ) {
116  wfDebug( "IMPORT: $data\n" );
117  }
118 
119  public function notice( $msg /*, $param, ...*/ ) {
120  $params = func_get_args();
121  array_shift( $params );
122 
123  if ( is_callable( $this->mNoticeCallback ) ) {
124  call_user_func( $this->mNoticeCallback, $msg, $params );
125  } else { # No ImportReporter -> CLI
126  echo wfMessage( $msg, $params )->text() . "\n";
127  }
128  }
129 
134  function setDebug( $debug ) {
135  $this->mDebug = $debug;
136  }
137 
142  function setNoUpdates( $noupdates ) {
143  $this->mNoUpdates = $noupdates;
144  }
145 
152  public function setNoticeCallback( $callback ) {
153  return wfSetVar( $this->mNoticeCallback, $callback );
154  }
155 
161  public function setPageCallback( $callback ) {
162  $previous = $this->mPageCallback;
163  $this->mPageCallback = $callback;
164  return $previous;
165  }
166 
176  public function setPageOutCallback( $callback ) {
177  $previous = $this->mPageOutCallback;
178  $this->mPageOutCallback = $callback;
179  return $previous;
180  }
181 
187  public function setRevisionCallback( $callback ) {
188  $previous = $this->mRevisionCallback;
189  $this->mRevisionCallback = $callback;
190  return $previous;
191  }
192 
198  public function setUploadCallback( $callback ) {
199  $previous = $this->mUploadCallback;
200  $this->mUploadCallback = $callback;
201  return $previous;
202  }
203 
209  public function setLogItemCallback( $callback ) {
210  $previous = $this->mLogItemCallback;
211  $this->mLogItemCallback = $callback;
212  return $previous;
213  }
214 
220  public function setSiteInfoCallback( $callback ) {
221  $previous = $this->mSiteInfoCallback;
222  $this->mSiteInfoCallback = $callback;
223  return $previous;
224  }
225 
231  public function setImportTitleFactory( $factory ) {
232  $this->importTitleFactory = $factory;
233  }
234 
240  public function setTargetNamespace( $namespace ) {
241  if ( is_null( $namespace ) ) {
242  // Don't override namespaces
244  return true;
245  } elseif (
246  $namespace >= 0 &&
247  MWNamespace::exists( intval( $namespace ) )
248  ) {
249  $namespace = intval( $namespace );
250  $this->setImportTitleFactory( new NamespaceImportTitleFactory( $namespace ) );
251  return true;
252  } else {
253  return false;
254  }
255  }
256 
262  public function setTargetRootPage( $rootpage ) {
264  if ( is_null( $rootpage ) ) {
265  // No rootpage
267  } elseif ( $rootpage !== '' ) {
268  $rootpage = rtrim( $rootpage, '/' ); // avoid double slashes
269  $title = Title::newFromText( $rootpage );
270 
271  if ( !$title || $title->isExternal() ) {
272  $status->fatal( 'import-rootpage-invalid' );
273  } else {
274  if ( !MWNamespace::hasSubpages( $title->getNamespace() ) ) {
276 
277  $displayNSText = $title->getNamespace() == NS_MAIN
278  ? wfMessage( 'blanknamespace' )->text()
279  : $wgContLang->getNsText( $title->getNamespace() );
280  $status->fatal( 'import-rootpage-nosubpage', $displayNSText );
281  } else {
282  // set namespace to 'all', so the namespace check in processTitle() can pass
283  $this->setTargetNamespace( null );
285  }
286  }
287  }
288  return $status;
289  }
290 
294  public function setImageBasePath( $dir ) {
295  $this->mImageBasePath = $dir;
296  }
297 
301  public function setImportUploads( $import ) {
302  $this->mImportUploads = $import;
303  }
304 
311  public function beforeImportPage( $titleAndForeignTitle ) {
312  $title = $titleAndForeignTitle[0];
314  $this->countableCache['title_' . $title->getPrefixedText()] = $page->isCountable();
315  return true;
316  }
317 
323  public function importRevision( $revision ) {
324  if ( !$revision->getContentHandler()->canBeUsedOn( $revision->getTitle() ) ) {
325  $this->notice( 'import-error-bad-location',
326  $revision->getTitle()->getPrefixedText(),
327  $revision->getID(),
328  $revision->getModel(),
329  $revision->getFormat() );
330 
331  return false;
332  }
333 
334  try {
335  return $revision->importOldRevision();
336  } catch ( MWContentSerializationException $ex ) {
337  $this->notice( 'import-error-unserialize',
338  $revision->getTitle()->getPrefixedText(),
339  $revision->getID(),
340  $revision->getModel(),
341  $revision->getFormat() );
342  }
343 
344  return false;
345  }
346 
352  public function importLogItem( $revision ) {
353  return $revision->importLogItem();
354  }
355 
361  public function importUpload( $revision ) {
362  return $revision->importUpload();
363  }
364 
374  public function finishImportPage( $title, $foreignTitle, $revCount,
375  $sRevCount, $pageInfo ) {
376 
377  // Update article count statistics (T42009)
378  // The normal counting logic in WikiPage->doEditUpdates() is designed for
379  // one-revision-at-a-time editing, not bulk imports. In this situation it
380  // suffers from issues of slave lag. We let WikiPage handle the total page
381  // and revision count, and we implement our own custom logic for the
382  // article (content page) count.
384  $page->loadPageData( 'fromdbmaster' );
385  $content = $page->getContent();
386  if ( $content === null ) {
387  wfDebug( __METHOD__ . ': Skipping article count adjustment for ' . $title .
388  ' because WikiPage::getContent() returned null' );
389  } else {
390  $editInfo = $page->prepareContentForEdit( $content );
391  $countKey = 'title_' . $title->getPrefixedText();
392  $countable = $page->isCountable( $editInfo );
393  if ( array_key_exists( $countKey, $this->countableCache ) &&
394  $countable != $this->countableCache[$countKey] ) {
396  'articles' => ( (int)$countable - (int)$this->countableCache[$countKey] )
397  ] ) );
398  }
399  }
400 
401  $args = func_get_args();
402  return Hooks::run( 'AfterImportPage', $args );
403  }
404 
409  public function debugRevisionHandler( &$revision ) {
410  $this->debug( "Got revision:" );
411  if ( is_object( $revision->title ) ) {
412  $this->debug( "-- Title: " . $revision->title->getPrefixedText() );
413  } else {
414  $this->debug( "-- Title: <invalid>" );
415  }
416  $this->debug( "-- User: " . $revision->user_text );
417  $this->debug( "-- Timestamp: " . $revision->timestamp );
418  $this->debug( "-- Comment: " . $revision->comment );
419  $this->debug( "-- Text: " . $revision->text );
420  }
421 
427  private function siteInfoCallback( $siteInfo ) {
428  if ( isset( $this->mSiteInfoCallback ) ) {
429  return call_user_func_array( $this->mSiteInfoCallback,
430  [ $siteInfo, $this ] );
431  } else {
432  return false;
433  }
434  }
435 
440  function pageCallback( $title ) {
441  if ( isset( $this->mPageCallback ) ) {
442  call_user_func( $this->mPageCallback, $title );
443  }
444  }
445 
454  private function pageOutCallback( $title, $foreignTitle, $revCount,
455  $sucCount, $pageInfo ) {
456  if ( isset( $this->mPageOutCallback ) ) {
457  $args = func_get_args();
458  call_user_func_array( $this->mPageOutCallback, $args );
459  }
460  }
461 
467  private function revisionCallback( $revision ) {
468  if ( isset( $this->mRevisionCallback ) ) {
469  return call_user_func_array( $this->mRevisionCallback,
470  [ $revision, $this ] );
471  } else {
472  return false;
473  }
474  }
475 
481  private function logItemCallback( $revision ) {
482  if ( isset( $this->mLogItemCallback ) ) {
483  return call_user_func_array( $this->mLogItemCallback,
484  [ $revision, $this ] );
485  } else {
486  return false;
487  }
488  }
489 
496  public function nodeAttribute( $attr ) {
497  return $this->reader->getAttribute( $attr );
498  }
499 
507  public function nodeContents() {
508  if ( $this->reader->isEmptyElement ) {
509  return "";
510  }
511  $buffer = "";
512  while ( $this->reader->read() ) {
513  switch ( $this->reader->nodeType ) {
514  case XMLReader::TEXT:
515  case XMLReader::CDATA:
516  case XMLReader::SIGNIFICANT_WHITESPACE:
517  $buffer .= $this->reader->value;
518  break;
519  case XMLReader::END_ELEMENT:
520  return $buffer;
521  }
522  }
523 
524  $this->reader->close();
525  return '';
526  }
527 
533  public function doImport() {
534  // Calls to reader->read need to be wrapped in calls to
535  // libxml_disable_entity_loader() to avoid local file
536  // inclusion attacks (bug 46932).
537  $oldDisable = libxml_disable_entity_loader( true );
538  $this->reader->read();
539 
540  if ( $this->reader->localName != 'mediawiki' ) {
541  libxml_disable_entity_loader( $oldDisable );
542  throw new MWException( "Expected <mediawiki> tag, got " .
543  $this->reader->localName );
544  }
545  $this->debug( "<mediawiki> tag is correct." );
546 
547  $this->debug( "Starting primary dump processing loop." );
548 
549  $keepReading = $this->reader->read();
550  $skip = false;
551  $rethrow = null;
552  try {
553  while ( $keepReading ) {
554  $tag = $this->reader->localName;
555  $type = $this->reader->nodeType;
556 
557  if ( !Hooks::run( 'ImportHandleToplevelXMLTag', [ $this ] ) ) {
558  // Do nothing
559  } elseif ( $tag == 'mediawiki' && $type == XMLReader::END_ELEMENT ) {
560  break;
561  } elseif ( $tag == 'siteinfo' ) {
562  $this->handleSiteInfo();
563  } elseif ( $tag == 'page' ) {
564  $this->handlePage();
565  } elseif ( $tag == 'logitem' ) {
566  $this->handleLogItem();
567  } elseif ( $tag != '#text' ) {
568  $this->warn( "Unhandled top-level XML tag $tag" );
569 
570  $skip = true;
571  }
572 
573  if ( $skip ) {
574  $keepReading = $this->reader->next();
575  $skip = false;
576  $this->debug( "Skip" );
577  } else {
578  $keepReading = $this->reader->read();
579  }
580  }
581  } catch ( Exception $ex ) {
582  $rethrow = $ex;
583  }
584 
585  // finally
586  libxml_disable_entity_loader( $oldDisable );
587  $this->reader->close();
588 
589  if ( $rethrow ) {
590  throw $rethrow;
591  }
592 
593  return true;
594  }
595 
596  private function handleSiteInfo() {
597  $this->debug( "Enter site info handler." );
598  $siteInfo = [];
599 
600  // Fields that can just be stuffed in the siteInfo object
601  $normalFields = [ 'sitename', 'base', 'generator', 'case' ];
602 
603  while ( $this->reader->read() ) {
604  if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
605  $this->reader->localName == 'siteinfo' ) {
606  break;
607  }
608 
609  $tag = $this->reader->localName;
610 
611  if ( $tag == 'namespace' ) {
612  $this->foreignNamespaces[$this->nodeAttribute( 'key' )] =
613  $this->nodeContents();
614  } elseif ( in_array( $tag, $normalFields ) ) {
615  $siteInfo[$tag] = $this->nodeContents();
616  }
617  }
618 
619  $siteInfo['_namespaces'] = $this->foreignNamespaces;
620  $this->siteInfoCallback( $siteInfo );
621  }
622 
623  private function handleLogItem() {
624  $this->debug( "Enter log item handler." );
625  $logInfo = [];
626 
627  // Fields that can just be stuffed in the pageInfo object
628  $normalFields = [ 'id', 'comment', 'type', 'action', 'timestamp',
629  'logtitle', 'params' ];
630 
631  while ( $this->reader->read() ) {
632  if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
633  $this->reader->localName == 'logitem' ) {
634  break;
635  }
636 
637  $tag = $this->reader->localName;
638 
639  if ( !Hooks::run( 'ImportHandleLogItemXMLTag', [
640  $this, $logInfo
641  ] ) ) {
642  // Do nothing
643  } elseif ( in_array( $tag, $normalFields ) ) {
644  $logInfo[$tag] = $this->nodeContents();
645  } elseif ( $tag == 'contributor' ) {
646  $logInfo['contributor'] = $this->handleContributor();
647  } elseif ( $tag != '#text' ) {
648  $this->warn( "Unhandled log-item XML tag $tag" );
649  }
650  }
651 
652  $this->processLogItem( $logInfo );
653  }
654 
659  private function processLogItem( $logInfo ) {
660 
661  $revision = new WikiRevision( $this->config );
662 
663  if ( isset( $logInfo['id'] ) ) {
664  $revision->setID( $logInfo['id'] );
665  }
666  $revision->setType( $logInfo['type'] );
667  $revision->setAction( $logInfo['action'] );
668  if ( isset( $logInfo['timestamp'] ) ) {
669  $revision->setTimestamp( $logInfo['timestamp'] );
670  }
671  if ( isset( $logInfo['params'] ) ) {
672  $revision->setParams( $logInfo['params'] );
673  }
674  if ( isset( $logInfo['logtitle'] ) ) {
675  // @todo Using Title for non-local titles is a recipe for disaster.
676  // We should use ForeignTitle here instead.
677  $revision->setTitle( Title::newFromText( $logInfo['logtitle'] ) );
678  }
679 
680  $revision->setNoUpdates( $this->mNoUpdates );
681 
682  if ( isset( $logInfo['comment'] ) ) {
683  $revision->setComment( $logInfo['comment'] );
684  }
685 
686  if ( isset( $logInfo['contributor']['ip'] ) ) {
687  $revision->setUserIP( $logInfo['contributor']['ip'] );
688  }
689 
690  if ( !isset( $logInfo['contributor']['username'] ) ) {
691  $revision->setUsername( 'Unknown user' );
692  } else {
693  $revision->setUsername( $logInfo['contributor']['username'] );
694  }
695 
696  return $this->logItemCallback( $revision );
697  }
698 
699  private function handlePage() {
700  // Handle page data.
701  $this->debug( "Enter page handler." );
702  $pageInfo = [ 'revisionCount' => 0, 'successfulRevisionCount' => 0 ];
703 
704  // Fields that can just be stuffed in the pageInfo object
705  $normalFields = [ 'title', 'ns', 'id', 'redirect', 'restrictions' ];
706 
707  $skip = false;
708  $badTitle = false;
709 
710  while ( $skip ? $this->reader->next() : $this->reader->read() ) {
711  if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
712  $this->reader->localName == 'page' ) {
713  break;
714  }
715 
716  $skip = false;
717 
718  $tag = $this->reader->localName;
719 
720  if ( $badTitle ) {
721  // The title is invalid, bail out of this page
722  $skip = true;
723  } elseif ( !Hooks::run( 'ImportHandlePageXMLTag', [ $this,
724  &$pageInfo ] ) ) {
725  // Do nothing
726  } elseif ( in_array( $tag, $normalFields ) ) {
727  // An XML snippet:
728  // <page>
729  // <id>123</id>
730  // <title>Page</title>
731  // <redirect title="NewTitle"/>
732  // ...
733  // Because the redirect tag is built differently, we need special handling for that case.
734  if ( $tag == 'redirect' ) {
735  $pageInfo[$tag] = $this->nodeAttribute( 'title' );
736  } else {
737  $pageInfo[$tag] = $this->nodeContents();
738  }
739  } elseif ( $tag == 'revision' || $tag == 'upload' ) {
740  if ( !isset( $title ) ) {
741  $title = $this->processTitle( $pageInfo['title'],
742  isset( $pageInfo['ns'] ) ? $pageInfo['ns'] : null );
743 
744  // $title is either an array of two titles or false.
745  if ( is_array( $title ) ) {
746  $this->pageCallback( $title );
747  list( $pageInfo['_title'], $foreignTitle ) = $title;
748  } else {
749  $badTitle = true;
750  $skip = true;
751  }
752  }
753 
754  if ( $title ) {
755  if ( $tag == 'revision' ) {
756  $this->handleRevision( $pageInfo );
757  } else {
758  $this->handleUpload( $pageInfo );
759  }
760  }
761  } elseif ( $tag != '#text' ) {
762  $this->warn( "Unhandled page XML tag $tag" );
763  $skip = true;
764  }
765  }
766 
767  // @note $pageInfo is only set if a valid $title is processed above with
768  // no error. If we have a valid $title, then pageCallback is called
769  // above, $pageInfo['title'] is set and we do pageOutCallback here.
770  // If $pageInfo['_title'] is not set, then $foreignTitle is also not
771  // set since they both come from $title above.
772  if ( array_key_exists( '_title', $pageInfo ) ) {
773  $this->pageOutCallback( $pageInfo['_title'], $foreignTitle,
774  $pageInfo['revisionCount'],
775  $pageInfo['successfulRevisionCount'],
776  $pageInfo );
777  }
778  }
779 
783  private function handleRevision( &$pageInfo ) {
784  $this->debug( "Enter revision handler" );
785  $revisionInfo = [];
786 
787  $normalFields = [ 'id', 'timestamp', 'comment', 'minor', 'model', 'format', 'text' ];
788 
789  $skip = false;
790 
791  while ( $skip ? $this->reader->next() : $this->reader->read() ) {
792  if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
793  $this->reader->localName == 'revision' ) {
794  break;
795  }
796 
797  $tag = $this->reader->localName;
798 
799  if ( !Hooks::run( 'ImportHandleRevisionXMLTag', [
800  $this, $pageInfo, $revisionInfo
801  ] ) ) {
802  // Do nothing
803  } elseif ( in_array( $tag, $normalFields ) ) {
804  $revisionInfo[$tag] = $this->nodeContents();
805  } elseif ( $tag == 'contributor' ) {
806  $revisionInfo['contributor'] = $this->handleContributor();
807  } elseif ( $tag != '#text' ) {
808  $this->warn( "Unhandled revision XML tag $tag" );
809  $skip = true;
810  }
811  }
812 
813  $pageInfo['revisionCount']++;
814  if ( $this->processRevision( $pageInfo, $revisionInfo ) ) {
815  $pageInfo['successfulRevisionCount']++;
816  }
817  }
818 
824  private function processRevision( $pageInfo, $revisionInfo ) {
826 
827  // Make sure revisions won't violate $wgMaxArticleSize, which could lead to
828  // database errors and instability. Testing for revisions with only listed
829  // content models, as other content models might use serialization formats
830  // which aren't checked against $wgMaxArticleSize.
831  if ( ( !isset( $revisionInfo['model'] ) ||
832  in_array( $revisionInfo['model'], [
833  'wikitext',
834  'css',
835  'json',
836  'javascript',
837  'text',
838  ''
839  ] ) ) &&
840  strlen( $revisionInfo['text'] ) > $wgMaxArticleSize * 1024
841  ) {
842  throw new MWException( 'The text of ' .
843  ( isset( $revisionInfo['id'] ) ?
844  "the revision with ID $revisionInfo[id]" :
845  'a revision'
846  ) . " exceeds the maximum allowable size ($wgMaxArticleSize KB)" );
847  }
848 
849  $revision = new WikiRevision( $this->config );
850 
851  if ( isset( $revisionInfo['id'] ) ) {
852  $revision->setID( $revisionInfo['id'] );
853  }
854  if ( isset( $revisionInfo['model'] ) ) {
855  $revision->setModel( $revisionInfo['model'] );
856  }
857  if ( isset( $revisionInfo['format'] ) ) {
858  $revision->setFormat( $revisionInfo['format'] );
859  }
860  $revision->setTitle( $pageInfo['_title'] );
861 
862  if ( isset( $revisionInfo['text'] ) ) {
863  $handler = $revision->getContentHandler();
864  $text = $handler->importTransform(
865  $revisionInfo['text'],
866  $revision->getFormat() );
867 
868  $revision->setText( $text );
869  }
870  if ( isset( $revisionInfo['timestamp'] ) ) {
871  $revision->setTimestamp( $revisionInfo['timestamp'] );
872  } else {
873  $revision->setTimestamp( wfTimestampNow() );
874  }
875 
876  if ( isset( $revisionInfo['comment'] ) ) {
877  $revision->setComment( $revisionInfo['comment'] );
878  }
879 
880  if ( isset( $revisionInfo['minor'] ) ) {
881  $revision->setMinor( true );
882  }
883  if ( isset( $revisionInfo['contributor']['ip'] ) ) {
884  $revision->setUserIP( $revisionInfo['contributor']['ip'] );
885  } elseif ( isset( $revisionInfo['contributor']['username'] ) ) {
886  $revision->setUsername( $revisionInfo['contributor']['username'] );
887  } else {
888  $revision->setUsername( 'Unknown user' );
889  }
890  $revision->setNoUpdates( $this->mNoUpdates );
891 
892  return $this->revisionCallback( $revision );
893  }
894 
899  private function handleUpload( &$pageInfo ) {
900  $this->debug( "Enter upload handler" );
901  $uploadInfo = [];
902 
903  $normalFields = [ 'timestamp', 'comment', 'filename', 'text',
904  'src', 'size', 'sha1base36', 'archivename', 'rel' ];
905 
906  $skip = false;
907 
908  while ( $skip ? $this->reader->next() : $this->reader->read() ) {
909  if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
910  $this->reader->localName == 'upload' ) {
911  break;
912  }
913 
914  $tag = $this->reader->localName;
915 
916  if ( !Hooks::run( 'ImportHandleUploadXMLTag', [
917  $this, $pageInfo
918  ] ) ) {
919  // Do nothing
920  } elseif ( in_array( $tag, $normalFields ) ) {
921  $uploadInfo[$tag] = $this->nodeContents();
922  } elseif ( $tag == 'contributor' ) {
923  $uploadInfo['contributor'] = $this->handleContributor();
924  } elseif ( $tag == 'contents' ) {
925  $contents = $this->nodeContents();
926  $encoding = $this->reader->getAttribute( 'encoding' );
927  if ( $encoding === 'base64' ) {
928  $uploadInfo['fileSrc'] = $this->dumpTemp( base64_decode( $contents ) );
929  $uploadInfo['isTempSrc'] = true;
930  }
931  } elseif ( $tag != '#text' ) {
932  $this->warn( "Unhandled upload XML tag $tag" );
933  $skip = true;
934  }
935  }
936 
937  if ( $this->mImageBasePath && isset( $uploadInfo['rel'] ) ) {
938  $path = "{$this->mImageBasePath}/{$uploadInfo['rel']}";
939  if ( file_exists( $path ) ) {
940  $uploadInfo['fileSrc'] = $path;
941  $uploadInfo['isTempSrc'] = false;
942  }
943  }
944 
945  if ( $this->mImportUploads ) {
946  return $this->processUpload( $pageInfo, $uploadInfo );
947  }
948  }
949 
954  private function dumpTemp( $contents ) {
955  $filename = tempnam( wfTempDir(), 'importupload' );
956  file_put_contents( $filename, $contents );
957  return $filename;
958  }
959 
965  private function processUpload( $pageInfo, $uploadInfo ) {
966  $revision = new WikiRevision( $this->config );
967  $text = isset( $uploadInfo['text'] ) ? $uploadInfo['text'] : '';
968 
969  $revision->setTitle( $pageInfo['_title'] );
970  $revision->setID( $pageInfo['id'] );
971  $revision->setTimestamp( $uploadInfo['timestamp'] );
972  $revision->setText( $text );
973  $revision->setFilename( $uploadInfo['filename'] );
974  if ( isset( $uploadInfo['archivename'] ) ) {
975  $revision->setArchiveName( $uploadInfo['archivename'] );
976  }
977  $revision->setSrc( $uploadInfo['src'] );
978  if ( isset( $uploadInfo['fileSrc'] ) ) {
979  $revision->setFileSrc( $uploadInfo['fileSrc'],
980  !empty( $uploadInfo['isTempSrc'] ) );
981  }
982  if ( isset( $uploadInfo['sha1base36'] ) ) {
983  $revision->setSha1Base36( $uploadInfo['sha1base36'] );
984  }
985  $revision->setSize( intval( $uploadInfo['size'] ) );
986  $revision->setComment( $uploadInfo['comment'] );
987 
988  if ( isset( $uploadInfo['contributor']['ip'] ) ) {
989  $revision->setUserIP( $uploadInfo['contributor']['ip'] );
990  }
991  if ( isset( $uploadInfo['contributor']['username'] ) ) {
992  $revision->setUsername( $uploadInfo['contributor']['username'] );
993  }
994  $revision->setNoUpdates( $this->mNoUpdates );
995 
996  return call_user_func( $this->mUploadCallback, $revision );
997  }
998 
1002  private function handleContributor() {
1003  $fields = [ 'id', 'ip', 'username' ];
1004  $info = [];
1005 
1006  if ( $this->reader->isEmptyElement ) {
1007  return $info;
1008  }
1009  while ( $this->reader->read() ) {
1010  if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
1011  $this->reader->localName == 'contributor' ) {
1012  break;
1013  }
1014 
1015  $tag = $this->reader->localName;
1016 
1017  if ( in_array( $tag, $fields ) ) {
1018  $info[$tag] = $this->nodeContents();
1019  }
1020  }
1021 
1022  return $info;
1023  }
1024 
1030  private function processTitle( $text, $ns = null ) {
1031  if ( is_null( $this->foreignNamespaces ) ) {
1032  $foreignTitleFactory = new NaiveForeignTitleFactory();
1033  } else {
1034  $foreignTitleFactory = new NamespaceAwareForeignTitleFactory(
1035  $this->foreignNamespaces );
1036  }
1037 
1038  $foreignTitle = $foreignTitleFactory->createForeignTitle( $text,
1039  intval( $ns ) );
1040 
1041  $title = $this->importTitleFactory->createTitleFromForeignTitle(
1042  $foreignTitle );
1043 
1044  $commandLineMode = $this->config->get( 'CommandLineMode' );
1045  if ( is_null( $title ) ) {
1046  # Invalid page title? Ignore the page
1047  $this->notice( 'import-error-invalid', $foreignTitle->getFullText() );
1048  return false;
1049  } elseif ( $title->isExternal() ) {
1050  $this->notice( 'import-error-interwiki', $title->getPrefixedText() );
1051  return false;
1052  } elseif ( !$title->canExist() ) {
1053  $this->notice( 'import-error-special', $title->getPrefixedText() );
1054  return false;
1055  } elseif ( !$title->userCan( 'edit' ) && !$commandLineMode ) {
1056  # Do not import if the importing wiki user cannot edit this page
1057  $this->notice( 'import-error-edit', $title->getPrefixedText() );
1058  return false;
1059  } elseif ( !$title->exists() && !$title->userCan( 'create' ) && !$commandLineMode ) {
1060  # Do not import if the importing wiki user cannot create this page
1061  $this->notice( 'import-error-create', $title->getPrefixedText() );
1062  return false;
1063  }
1064 
1065  return [ $title, $foreignTitle ];
1066  }
1067 }
setTargetRootPage($rootpage)
Set a target root page under which all pages are imported.
static factory(Title $title)
Create a WikiPage object of the appropriate class for the given title.
Definition: WikiPage.php:101
A parser that translates page titles on a foreign wiki into ForeignTitle objects, using information a...
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition: deferred.txt:11
$wgMaxArticleSize
Maximum article size in kilobytes.
processRevision($pageInfo, $revisionInfo)
setImageBasePath($dir)
if(count($args)==0) $dir
const NS_MAIN
Definition: Defines.php:69
pageOutCallback($title, $foreignTitle, $revCount, $sucCount, $pageInfo)
Notify the callback function when a "</page>" is closed.
XML file reader for the page data importer.
A class to convert page titles on a foreign wiki (ForeignTitle objects) into page titles on the local...
A class to convert page titles on a foreign wiki (ForeignTitle objects) into page titles on the local...
setSiteInfoCallback($callback)
Sets the action to perform when site info is encountered.
$source
finishImportPage($title, $foreignTitle, $revCount, $sRevCount, $pageInfo)
Mostly for hook use.
static exists($index)
Returns whether the specified namespace exists.
static newFromText($text, $defaultNamespace=NS_MAIN)
Create a new Title from text, such as what one would find in a link.
Definition: Title.php:256
setNoticeCallback($callback)
Set a callback that displays notice messages.
importLogItem($revision)
Default per-revision callback, performs the import.
when a variable name is used in a it is silently declared as a new local masking the global
Definition: design.txt:93
array $countableCache
setRevisionCallback($callback)
Sets the action to perform as each page revision is reached.
setTargetNamespace($namespace)
Set a target namespace to override the defaults.
setPageOutCallback($callback)
Sets the action to perform as each page in the stream is completed.
wfDebug($text, $dest= 'all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
setImportTitleFactory($factory)
Sets the factory object to use to convert ForeignTitle objects into local Title objects.
if($line===false) $args
Definition: cdb.php:64
$factory
dumpTemp($contents)
handleRevision(&$pageInfo)
wfTempDir()
Tries to get the system directory for temporary files.
nodeContents()
Shouldn't something like this be built-in to XMLReader? Fetches text contents of the current element...
Interface for configuration instances.
Definition: Config.php:28
throwXmlError($err)
logItemCallback($revision)
Notify the callback function of a new log item.
doImport()
Primary entry point.
static factory(array $deltas)
MediaWiki exception.
Definition: MWException.php:26
wfTimestampNow()
Convenience function; returns MediaWiki timestamp for the present time.
siteInfoCallback($siteInfo)
Notify the callback function of site info.
nodeAttribute($attr)
Retrieves the contents of the named attribute of the current element.
$params
Represents a revision, log entry or upload during the import process.
wfDeprecated($function, $version=false, $component=false, $callerOffset=2)
Throws a warning that $function is deprecated.
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped just before the function returns a value If you return an< a > element with HTML attributes $attribs and contents $html will be returned If you return $ret will be returned after processing after in associative array form externallinks including delete and has completed for all link tables whether this was an auto creation default is conds Array Extra conditions for the No matching items in log is displayed if loglist is empty msgKey Array If you want a nice box with a set this to the key of the message First element is the message additional optional elements are parameters for the key that are processed with wfMessage() -> params() ->parseAsBlock()-offset Set to overwrite offset parameter in $wgRequest set to ''to unsetoffset-wrap String Wrap the message in html(usually something like"&lt
$buffer
Allows to change the fields on the form that will be generated are created Can be used to omit specific feeds from being outputted You must not use this hook to add use OutputPage::addFeedLink() instead.&$feedLinks conditions will AND in the final query as a Content object as a Content object $title
Definition: hooks.txt:312
static hasSubpages($index)
Does the namespace allow subpages?
static addUpdate(DeferrableUpdate $update, $type=self::POSTSEND)
Add an update to the deferred list.
Config $config
static run($event, array $args=[], $deprecatedVersion=null)
Call hook functions defined in Hooks::register and $wgHooks.
Definition: Hooks.php:131
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books $tag
Definition: hooks.txt:981
debugRevisionHandler(&$revision)
Alternate per-revision callback, for debugging.
revisionCallback($revision)
Notify the callback function of a revision.
setDebug($debug)
Set debug mode...
setImportUploads($import)
setPageCallback($callback)
Sets the action to perform as each new page in the stream is reached.
__construct(ImportSource $source, Config $config=null)
Creates an ImportXMLReader drawing from the source provided.
setNoUpdates($noupdates)
Set 'no updates' mode.
processTitle($text, $ns=null)
static getDefaultInstance()
static registerSource(ImportSource $source)
setUploadCallback($callback)
Sets the action to perform as each file upload version is reached.
setLogItemCallback($callback)
Sets the action to perform as each log item reached.
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
A parser that translates page titles on a foreign wiki into ForeignTitle objects, with no knowledge o...
wfSetVar(&$dest, $source, $force=false)
Sets dest to source and returns the original value of dest If source is NULL, it just returns the val...
processUpload($pageInfo, $uploadInfo)
Source interface for XML import.
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions as context called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content $content
Definition: hooks.txt:1020
ImportTitleFactory $importTitleFactory
this class mediates it Skin Encapsulates a look and feel for the wiki All of the functions that render HTML and make choices about how to render it are here and are called from various other places when and is meant to be subclassed with other skins that may override some of its functions The User object contains a reference to a and so rather than having a global skin object we just rely on the global User and get the skin with $wgUser and also has some character encoding functions and other locale stuff The current user interface language is instantiated as and the local content language as $wgContLang
Definition: design.txt:56
A class to convert page titles on a foreign wiki (ForeignTitle objects) into page titles on the local...
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set $status
Definition: hooks.txt:1020
Reporting callback.
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that probably a stub it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output modifiable modifiable after all normalizations have been except for the $wgMaxImageArea check set to true or false to override the $wgMaxImageArea check result gives extension the possibility to transform it themselves $handler
Definition: hooks.txt:776
processLogItem($logInfo)
$debug
Definition: mcc.php:31
beforeImportPage($titleAndForeignTitle)
Default per-page callback.
pageCallback($title)
Notify the callback function when a new "<page>" is reached.
do that in ParserLimitReportFormat instead use this to modify the parameters of the image and a DIV can begin in one section and end in another Make sure your code can handle that case gracefully See the EditSectionClearerLink extension for an example zero but section is usually empty its values are the globals values before the output is cached one of or reset my talk my contributions etc etc otherwise the built in rate limiting checks are if enabled allows for interception of redirect as a string mapping parameter names to values & $type
Definition: hooks.txt:2376
importUpload($revision)
Dummy for now...
static newGood($value=null)
Factory function for good results.
Definition: Status.php:101
Exception representing a failure to serialize or unserialize a content object.
do that in ParserLimitReportFormat instead use this to modify the parameters of the image and a DIV can begin in one section and end in another Make sure your code can handle that case gracefully See the EditSectionClearerLink extension for an example zero but section is usually empty its values are the globals values before the output is cached $page
Definition: hooks.txt:2376
importRevision($revision)
Default per-revision callback, performs the import.
handleUpload(&$pageInfo)