MediaWiki  REL1_20
SpecialExport.php
Go to the documentation of this file.
00001 <?php
00031 class SpecialExport extends SpecialPage {
00032 
00033         private $curonly, $doExport, $pageLinkDepth, $templates;
00034         private $images;
00035 
00036         public function __construct() {
00037                 parent::__construct( 'Export' );
00038         }
00039 
00040         public function execute( $par ) {
00041                 global $wgSitename, $wgExportAllowListContributors, $wgExportFromNamespaces;
00042                 global $wgExportAllowHistory, $wgExportMaxHistory, $wgExportMaxLinkDepth;
00043                 global $wgExportAllowAll;
00044 
00045                 $this->setHeaders();
00046                 $this->outputHeader();
00047 
00048                 // Set some variables
00049                 $this->curonly = true;
00050                 $this->doExport = false;
00051                 $request = $this->getRequest();
00052                 $this->templates = $request->getCheck( 'templates' );
00053                 $this->images = $request->getCheck( 'images' ); // Doesn't do anything yet
00054                 $this->pageLinkDepth = $this->validateLinkDepth(
00055                         $request->getIntOrNull( 'pagelink-depth' )
00056                 );
00057                 $nsindex = '';
00058                 $exportall = false;
00059 
00060                 if ( $request->getCheck( 'addcat' ) ) {
00061                         $page = $request->getText( 'pages' );
00062                         $catname = $request->getText( 'catname' );
00063 
00064                         if ( $catname !== '' && $catname !== null && $catname !== false ) {
00065                                 $t = Title::makeTitleSafe( NS_MAIN, $catname );
00066                                 if ( $t ) {
00072                                         $catpages = $this->getPagesFromCategory( $t );
00073                                         if ( $catpages ) {
00074                                                 $page .= "\n" . implode( "\n", $catpages );
00075                                         }
00076                                 }
00077                         }
00078                 }
00079                 elseif( $request->getCheck( 'addns' ) && $wgExportFromNamespaces ) {
00080                         $page = $request->getText( 'pages' );
00081                         $nsindex = $request->getText( 'nsindex', '' );
00082 
00083                         if ( strval( $nsindex ) !== ''  ) {
00087                                 $nspages = $this->getPagesFromNamespace( $nsindex );
00088                                 if ( $nspages ) {
00089                                         $page .= "\n" . implode( "\n", $nspages );
00090                                 }
00091                         }
00092                 }
00093                 elseif( $request->getCheck( 'exportall' ) && $wgExportAllowAll ) {
00094                         $this->doExport = true;
00095                         $exportall = true;
00096 
00097                         /* Although $page and $history are not used later on, we
00098                         nevertheless set them to avoid that PHP notices about using
00099                         undefined variables foul up our XML output (see call to
00100                         doExport(...) further down) */
00101                         $page = '';
00102                         $history = '';
00103                 }
00104                 elseif( $request->wasPosted() && $par == '' ) {
00105                         $page = $request->getText( 'pages' );
00106                         $this->curonly = $request->getCheck( 'curonly' );
00107                         $rawOffset = $request->getVal( 'offset' );
00108 
00109                         if( $rawOffset ) {
00110                                 $offset = wfTimestamp( TS_MW, $rawOffset );
00111                         } else {
00112                                 $offset = null;
00113                         }
00114 
00115                         $limit = $request->getInt( 'limit' );
00116                         $dir = $request->getVal( 'dir' );
00117                         $history = array(
00118                                 'dir' => 'asc',
00119                                 'offset' => false,
00120                                 'limit' => $wgExportMaxHistory,
00121                         );
00122                         $historyCheck = $request->getCheck( 'history' );
00123 
00124                         if ( $this->curonly ) {
00125                                 $history = WikiExporter::CURRENT;
00126                         } elseif ( !$historyCheck ) {
00127                                 if ( $limit > 0 && ($wgExportMaxHistory == 0 || $limit < $wgExportMaxHistory ) ) {
00128                                         $history['limit'] = $limit;
00129                                 }
00130                                 if ( !is_null( $offset ) ) {
00131                                         $history['offset'] = $offset;
00132                                 }
00133                                 if ( strtolower( $dir ) == 'desc' ) {
00134                                         $history['dir'] = 'desc';
00135                                 }
00136                         }
00137 
00138                         if( $page != '' ) {
00139                                 $this->doExport = true;
00140                         }
00141                 } else {
00142                         // Default to current-only for GET requests.
00143                         $page = $request->getText( 'pages', $par );
00144                         $historyCheck = $request->getCheck( 'history' );
00145 
00146                         if( $historyCheck ) {
00147                                 $history = WikiExporter::FULL;
00148                         } else {
00149                                 $history = WikiExporter::CURRENT;
00150                         }
00151 
00152                         if( $page != '' ) {
00153                                 $this->doExport = true;
00154                         }
00155                 }
00156 
00157                 if( !$wgExportAllowHistory ) {
00158                         // Override
00159                         $history = WikiExporter::CURRENT;
00160                 }
00161 
00162                 $list_authors = $request->getCheck( 'listauthors' );
00163                 if ( !$this->curonly || !$wgExportAllowListContributors ) {
00164                         $list_authors = false ;
00165                 }
00166 
00167                 if ( $this->doExport ) {
00168                         $this->getOutput()->disable();
00169 
00170                         // Cancel output buffering and gzipping if set
00171                         // This should provide safer streaming for pages with history
00172                         wfResetOutputBuffers();
00173                         $request->response()->header( "Content-type: application/xml; charset=utf-8" );
00174 
00175                         if( $request->getCheck( 'wpDownload' ) ) {
00176                                 // Provide a sane filename suggestion
00177                                 $filename = urlencode( $wgSitename . '-' . wfTimestampNow() . '.xml' );
00178                                 $request->response()->header( "Content-disposition: attachment;filename={$filename}" );
00179                         }
00180 
00181                         $this->doExport( $page, $history, $list_authors, $exportall );
00182 
00183                         return;
00184                 }
00185 
00186                 $out = $this->getOutput();
00187                 $out->addWikiMsg( 'exporttext' );
00188 
00189                 $form = Xml::openElement( 'form', array( 'method' => 'post',
00190                         'action' => $this->getTitle()->getLocalUrl( 'action=submit' ) ) );
00191                 $form .= Xml::inputLabel( $this->msg( 'export-addcattext' )->text(), 'catname', 'catname', 40 ) . '&#160;';
00192                 $form .= Xml::submitButton( $this->msg( 'export-addcat' )->text(), array( 'name' => 'addcat' ) ) . '<br />';
00193 
00194                 if ( $wgExportFromNamespaces ) {
00195                         $form .= Html::namespaceSelector(
00196                                 array(
00197                                         'selected' => $nsindex,
00198                                         'label' => $this->msg( 'export-addnstext' )->text()
00199                                 ), array(
00200                                         'name'  => 'nsindex',
00201                                         'id'    => 'namespace',
00202                                         'class' => 'namespaceselector',
00203                                 )
00204                         ) . '&#160;';
00205                         $form .= Xml::submitButton( $this->msg( 'export-addns' )->text(), array( 'name' => 'addns' ) ) . '<br />';
00206                 }
00207 
00208                 if ( $wgExportAllowAll ) {
00209                         $form .= Xml::checkLabel(
00210                                 $this->msg( 'exportall' )->text(),
00211                                 'exportall',
00212                                 'exportall',
00213                                 $request->wasPosted() ? $request->getCheck( 'exportall' ) : false
00214                         ) . '<br />';
00215                 }
00216 
00217                 $form .= Xml::element( 'textarea', array( 'name' => 'pages', 'cols' => 40, 'rows' => 10 ), $page, false );
00218                 $form .= '<br />';
00219 
00220                 if( $wgExportAllowHistory ) {
00221                         $form .= Xml::checkLabel(
00222                                 $this->msg( 'exportcuronly' )->text(),
00223                                 'curonly',
00224                                 'curonly',
00225                                 $request->wasPosted() ? $request->getCheck( 'curonly' ) : true
00226                         ) . '<br />';
00227                 } else {
00228                         $out->addWikiMsg( 'exportnohistory' );
00229                 }
00230 
00231                 $form .= Xml::checkLabel(
00232                         $this->msg( 'export-templates' )->text(),
00233                         'templates',
00234                         'wpExportTemplates',
00235                         $request->wasPosted() ? $request->getCheck( 'templates' ) : false
00236                 ) . '<br />';
00237 
00238                 if( $wgExportMaxLinkDepth || $this->userCanOverrideExportDepth() ) {
00239                         $form .= Xml::inputLabel( $this->msg( 'export-pagelinks' )->text(), 'pagelink-depth', 'pagelink-depth', 20, 0 ) . '<br />';
00240                 }
00241                 // Enable this when we can do something useful exporting/importing image information. :)
00242                 //$form .= Xml::checkLabel( $this->msg( 'export-images' )->text(), 'images', 'wpExportImages', false ) . '<br />';
00243                 $form .= Xml::checkLabel(
00244                         $this->msg( 'export-download' )->text(),
00245                         'wpDownload',
00246                         'wpDownload',
00247                         $request->wasPosted() ? $request->getCheck( 'wpDownload' ) : true
00248                 ) . '<br />';
00249 
00250                 if ( $wgExportAllowListContributors ) {
00251                         $form .= Xml::checkLabel(
00252                                 $this->msg( 'exportlistauthors' )->text(),
00253                                 'listauthors',
00254                                 'listauthors',
00255                                 $request->wasPosted() ? $request->getCheck( 'listauthors' ) : false
00256                         ) . '<br />';
00257                 }
00258 
00259                 $form .= Xml::submitButton( $this->msg( 'export-submit' )->text(), Linker::tooltipAndAccesskeyAttribs( 'export' ) );
00260                 $form .= Xml::closeElement( 'form' );
00261 
00262                 $out->addHTML( $form );
00263         }
00264 
00268         private function userCanOverrideExportDepth() {
00269                 return $this->getUser()->isAllowed( 'override-export-depth' );
00270         }
00271 
00281         private function doExport( $page, $history, $list_authors, $exportall ) {
00282 
00283                 // If we are grabbing everything, enable full history and ignore the rest
00284                 if ( $exportall ) {
00285                         $history = WikiExporter::FULL;
00286                 } else {
00287 
00288                         $pageSet = array(); // Inverted index of all pages to look up
00289                 
00290                         // Split up and normalize input
00291                         foreach( explode( "\n", $page ) as $pageName ) {
00292                                 $pageName = trim( $pageName );
00293                                 $title = Title::newFromText( $pageName );
00294                                 if( $title && $title->getInterwiki() == '' && $title->getText() !== '' ) {
00295                                         // Only record each page once!
00296                                         $pageSet[$title->getPrefixedText()] = true;
00297                                 }
00298                         }
00299 
00300                         // Set of original pages to pass on to further manipulation...
00301                         $inputPages = array_keys( $pageSet );
00302 
00303                         // Look up any linked pages if asked...
00304                         if( $this->templates ) {
00305                                 $pageSet = $this->getTemplates( $inputPages, $pageSet );
00306                         }
00307                         $linkDepth = $this->pageLinkDepth;
00308                         if( $linkDepth ) {
00309                                 $pageSet = $this->getPageLinks( $inputPages, $pageSet, $linkDepth );
00310                         }
00311 
00312                         /*
00313                          // Enable this when we can do something useful exporting/importing image information. :)
00314                          if( $this->images ) ) {
00315                          $pageSet = $this->getImages( $inputPages, $pageSet );
00316                          }
00317                         */
00318 
00319                         $pages = array_keys( $pageSet );
00320 
00321                         // Normalize titles to the same format and remove dupes, see bug 17374
00322                         foreach( $pages as $k => $v ) {
00323                                 $pages[$k] = str_replace( " ", "_", $v );
00324                         }
00325 
00326                         $pages = array_unique( $pages );
00327                 }
00328 
00329                 /* Ok, let's get to it... */
00330                 if( $history == WikiExporter::CURRENT ) {
00331                         $lb = false;
00332                         $db = wfGetDB( DB_SLAVE );
00333                         $buffer = WikiExporter::BUFFER;
00334                 } else {
00335                         // Use an unbuffered query; histories may be very long!
00336                         $lb = wfGetLBFactory()->newMainLB();
00337                         $db = $lb->getConnection( DB_SLAVE );
00338                         $buffer = WikiExporter::STREAM;
00339 
00340                         // This might take a while... :D
00341                         wfSuppressWarnings();
00342                         set_time_limit(0);
00343                         wfRestoreWarnings();
00344                 }
00345 
00346                 $exporter = new WikiExporter( $db, $history, $buffer );
00347                 $exporter->list_authors = $list_authors;
00348                 $exporter->openStream();
00349 
00350                 if ( $exportall ) {
00351                         $exporter->allPages();
00352                 } else {
00353                         foreach( $pages as $page ) {
00354                         /*
00355                          if( $wgExportMaxHistory && !$this->curonly ) {
00356                          $title = Title::newFromText( $page );
00357                          if( $title ) {
00358                          $count = Revision::countByTitle( $db, $title );
00359                          if( $count > $wgExportMaxHistory ) {
00360                          wfDebug( __FUNCTION__ .
00361                          ": Skipped $page, $count revisions too big\n" );
00362                          continue;
00363                          }
00364                          }
00365                          }*/
00366                         #Bug 8824: Only export pages the user can read
00367                                 $title = Title::newFromText( $page );
00368                                 if( is_null( $title ) ) {
00369                                         continue; #TODO: perhaps output an <error> tag or something.
00370                                 }
00371                                 if( !$title->userCan( 'read', $this->getUser() ) ) {
00372                                         continue; #TODO: perhaps output an <error> tag or something.
00373                                 }
00374 
00375                                 $exporter->pageByTitle( $title );
00376                         }
00377                 }
00378 
00379                 $exporter->closeStream();
00380 
00381                 if( $lb ) {
00382                         $lb->closeAll();
00383                 }
00384         }
00385 
00390         private function getPagesFromCategory( $title ) {
00391                 global $wgContLang;
00392 
00393                 $name = $title->getDBkey();
00394 
00395                 $dbr = wfGetDB( DB_SLAVE );
00396                 $res = $dbr->select(
00397                         array( 'page', 'categorylinks' ),
00398                         array( 'page_namespace', 'page_title' ),
00399                         array( 'cl_from=page_id', 'cl_to' => $name ),
00400                         __METHOD__,
00401                         array( 'LIMIT' => '5000' )
00402                 );
00403 
00404                 $pages = array();
00405 
00406                 foreach ( $res as $row ) {
00407                         $n = $row->page_title;
00408                         if ($row->page_namespace) {
00409                                 $ns = $wgContLang->getNsText( $row->page_namespace );
00410                                 $n = $ns . ':' . $n;
00411                         }
00412 
00413                         $pages[] = $n;
00414                 }
00415                 return $pages;
00416         }
00417 
00422         private function getPagesFromNamespace( $nsindex ) {
00423                 global $wgContLang;
00424 
00425                 $dbr = wfGetDB( DB_SLAVE );
00426                 $res = $dbr->select(
00427                         'page',
00428                         array( 'page_namespace', 'page_title' ),
00429                         array( 'page_namespace' => $nsindex ),
00430                         __METHOD__,
00431                         array( 'LIMIT' => '5000' )
00432                 );
00433 
00434                 $pages = array();
00435 
00436                 foreach ( $res as $row ) {
00437                         $n = $row->page_title;
00438 
00439                         if ( $row->page_namespace ) {
00440                                 $ns = $wgContLang->getNsText( $row->page_namespace );
00441                                 $n = $ns . ':' . $n;
00442                         }
00443 
00444                         $pages[] = $n;
00445                 }
00446                 return $pages;
00447         }
00448 
00455         private function getTemplates( $inputPages, $pageSet ) {
00456                 return $this->getLinks( $inputPages, $pageSet,
00457                         'templatelinks',
00458                         array( 'namespace' => 'tl_namespace', 'title' => 'tl_title' ),
00459                         array( 'page_id=tl_from' )
00460                 );
00461         }
00462 
00468         private function validateLinkDepth( $depth ) {
00469                 global $wgExportMaxLinkDepth;
00470 
00471                 if( $depth < 0 ) {
00472                         return 0;
00473                 }
00474 
00475                 if ( !$this->userCanOverrideExportDepth() ) {
00476                         if( $depth > $wgExportMaxLinkDepth ) {
00477                                 return $wgExportMaxLinkDepth;
00478                         }
00479                 }
00480 
00481                 /*
00482                  * There's a HARD CODED limit of 5 levels of recursion here to prevent a
00483                  * crazy-big export from being done by someone setting the depth
00484                  * number too high. In other words, last resort safety net.
00485                  */
00486                 return intval( min( $depth, 5 ) );
00487         }
00488 
00496         private function getPageLinks( $inputPages, $pageSet, $depth ) {
00497                 for( ; $depth > 0; --$depth ) {
00498                         $pageSet = $this->getLinks(
00499                                 $inputPages, $pageSet, 'pagelinks',
00500                                 array( 'namespace' => 'pl_namespace', 'title' => 'pl_title' ),
00501                                 array( 'page_id=pl_from' )
00502                         );
00503                         $inputPages = array_keys( $pageSet );
00504                 }
00505 
00506                 return $pageSet;
00507         }
00508 
00517         private function getImages( $inputPages, $pageSet ) {
00518                 return $this->getLinks(
00519                         $inputPages,
00520                         $pageSet,
00521                         'imagelinks',
00522                         array( 'namespace' => NS_FILE, 'title' => 'il_to' ),
00523                         array( 'page_id=il_from' )
00524                 );
00525         }
00526 
00531         private function getLinks( $inputPages, $pageSet, $table, $fields, $join ) {
00532                 $dbr = wfGetDB( DB_SLAVE );
00533 
00534                 foreach( $inputPages as $page ) {
00535                         $title = Title::newFromText( $page );
00536 
00537                         if( $title ) {
00538                                 $pageSet[$title->getPrefixedText()] = true;
00541                                 $result = $dbr->select(
00542                                         array( 'page', $table ),
00543                                         $fields,
00544                                         array_merge(
00545                                                 $join,
00546                                                 array(
00547                                                         'page_namespace' => $title->getNamespace(),
00548                                                         'page_title' => $title->getDBkey()
00549                                                 )
00550                                         ),
00551                                         __METHOD__
00552                                 );
00553 
00554                                 foreach( $result as $row ) {
00555                                         $template = Title::makeTitle( $row->namespace, $row->title );
00556                                         $pageSet[$template->getPrefixedText()] = true;
00557                                 }
00558                         }
00559                 }
00560 
00561                 return $pageSet;
00562         }
00563 
00564 }