MediaWiki  REL1_24
SpecialExport.php
Go to the documentation of this file.
00001 <?php
00031 class SpecialExport extends SpecialPage {
00032     private $curonly, $doExport, $pageLinkDepth, $templates;
00033     private $images;
00034 
00035     public function __construct() {
00036         parent::__construct( 'Export' );
00037     }
00038 
00039     public function execute( $par ) {
00040         $this->setHeaders();
00041         $this->outputHeader();
00042         $config = $this->getConfig();
00043 
00044         // Set some variables
00045         $this->curonly = true;
00046         $this->doExport = false;
00047         $request = $this->getRequest();
00048         $this->templates = $request->getCheck( 'templates' );
00049         $this->images = $request->getCheck( 'images' ); // Doesn't do anything yet
00050         $this->pageLinkDepth = $this->validateLinkDepth(
00051             $request->getIntOrNull( 'pagelink-depth' )
00052         );
00053         $nsindex = '';
00054         $exportall = false;
00055 
00056         if ( $request->getCheck( 'addcat' ) ) {
00057             $page = $request->getText( 'pages' );
00058             $catname = $request->getText( 'catname' );
00059 
00060             if ( $catname !== '' && $catname !== null && $catname !== false ) {
00061                 $t = Title::makeTitleSafe( NS_MAIN, $catname );
00062                 if ( $t ) {
00068                     $catpages = $this->getPagesFromCategory( $t );
00069                     if ( $catpages ) {
00070                         $page .= "\n" . implode( "\n", $catpages );
00071                     }
00072                 }
00073             }
00074         } elseif ( $request->getCheck( 'addns' ) && $config->get( 'ExportFromNamespaces' ) ) {
00075             $page = $request->getText( 'pages' );
00076             $nsindex = $request->getText( 'nsindex', '' );
00077 
00078             if ( strval( $nsindex ) !== '' ) {
00082                 $nspages = $this->getPagesFromNamespace( $nsindex );
00083                 if ( $nspages ) {
00084                     $page .= "\n" . implode( "\n", $nspages );
00085                 }
00086             }
00087         } elseif ( $request->getCheck( 'exportall' ) && $config->get( 'ExportAllowAll' ) ) {
00088             $this->doExport = true;
00089             $exportall = true;
00090 
00091             /* Although $page and $history are not used later on, we
00092             nevertheless set them to avoid that PHP notices about using
00093             undefined variables foul up our XML output (see call to
00094             doExport(...) further down) */
00095             $page = '';
00096             $history = '';
00097         } elseif ( $request->wasPosted() && $par == '' ) {
00098             $page = $request->getText( 'pages' );
00099             $this->curonly = $request->getCheck( 'curonly' );
00100             $rawOffset = $request->getVal( 'offset' );
00101 
00102             if ( $rawOffset ) {
00103                 $offset = wfTimestamp( TS_MW, $rawOffset );
00104             } else {
00105                 $offset = null;
00106             }
00107 
00108             $maxHistory = $config->get( 'ExportMaxHistory' );
00109             $limit = $request->getInt( 'limit' );
00110             $dir = $request->getVal( 'dir' );
00111             $history = array(
00112                 'dir' => 'asc',
00113                 'offset' => false,
00114                 'limit' => $maxHistory,
00115             );
00116             $historyCheck = $request->getCheck( 'history' );
00117 
00118             if ( $this->curonly ) {
00119                 $history = WikiExporter::CURRENT;
00120             } elseif ( !$historyCheck ) {
00121                 if ( $limit > 0 && ( $maxHistory == 0 || $limit < $maxHistory ) ) {
00122                     $history['limit'] = $limit;
00123                 }
00124 
00125                 if ( !is_null( $offset ) ) {
00126                     $history['offset'] = $offset;
00127                 }
00128 
00129                 if ( strtolower( $dir ) == 'desc' ) {
00130                     $history['dir'] = 'desc';
00131                 }
00132             }
00133 
00134             if ( $page != '' ) {
00135                 $this->doExport = true;
00136             }
00137         } else {
00138             // Default to current-only for GET requests.
00139             $page = $request->getText( 'pages', $par );
00140             $historyCheck = $request->getCheck( 'history' );
00141 
00142             if ( $historyCheck ) {
00143                 $history = WikiExporter::FULL;
00144             } else {
00145                 $history = WikiExporter::CURRENT;
00146             }
00147 
00148             if ( $page != '' ) {
00149                 $this->doExport = true;
00150             }
00151         }
00152 
00153         if ( !$config->get( 'ExportAllowHistory' ) ) {
00154             // Override
00155             $history = WikiExporter::CURRENT;
00156         }
00157 
00158         $list_authors = $request->getCheck( 'listauthors' );
00159         if ( !$this->curonly || !$config->get( 'ExportAllowListContributors' ) ) {
00160             $list_authors = false;
00161         }
00162 
00163         if ( $this->doExport ) {
00164             $this->getOutput()->disable();
00165 
00166             // Cancel output buffering and gzipping if set
00167             // This should provide safer streaming for pages with history
00168             wfResetOutputBuffers();
00169             $request->response()->header( "Content-type: application/xml; charset=utf-8" );
00170 
00171             if ( $request->getCheck( 'wpDownload' ) ) {
00172                 // Provide a sane filename suggestion
00173                 $filename = urlencode( $config->get( 'Sitename' ) . '-' . wfTimestampNow() . '.xml' );
00174                 $request->response()->header( "Content-disposition: attachment;filename={$filename}" );
00175             }
00176 
00177             $this->doExport( $page, $history, $list_authors, $exportall );
00178 
00179             return;
00180         }
00181 
00182         $out = $this->getOutput();
00183         $out->addWikiMsg( 'exporttext' );
00184 
00185         $form = Xml::openElement( 'form', array( 'method' => 'post',
00186             'action' => $this->getPageTitle()->getLocalURL( 'action=submit' ) ) );
00187         $form .= Xml::inputLabel(
00188             $this->msg( 'export-addcattext' )->text(),
00189             'catname',
00190             'catname',
00191             40
00192         ) . '&#160;';
00193         $form .= Xml::submitButton(
00194             $this->msg( 'export-addcat' )->text(),
00195             array( 'name' => 'addcat' )
00196         ) . '<br />';
00197 
00198         if ( $config->get( 'ExportFromNamespaces' ) ) {
00199             $form .= Html::namespaceSelector(
00200                 array(
00201                     'selected' => $nsindex,
00202                     'label' => $this->msg( 'export-addnstext' )->text()
00203                 ), array(
00204                     'name' => 'nsindex',
00205                     'id' => 'namespace',
00206                     'class' => 'namespaceselector',
00207                 )
00208             ) . '&#160;';
00209             $form .= Xml::submitButton(
00210                 $this->msg( 'export-addns' )->text(),
00211                 array( 'name' => 'addns' )
00212             ) . '<br />';
00213         }
00214 
00215         if ( $config->get( 'ExportAllowAll' ) ) {
00216             $form .= Xml::checkLabel(
00217                 $this->msg( 'exportall' )->text(),
00218                 'exportall',
00219                 'exportall',
00220                 $request->wasPosted() ? $request->getCheck( 'exportall' ) : false
00221             ) . '<br />';
00222         }
00223 
00224         $form .= Xml::element(
00225             'textarea',
00226             array( 'name' => 'pages', 'cols' => 40, 'rows' => 10 ),
00227             $page,
00228             false
00229         );
00230         $form .= '<br />';
00231 
00232         if ( $config->get( 'ExportAllowHistory' ) ) {
00233             $form .= Xml::checkLabel(
00234                 $this->msg( 'exportcuronly' )->text(),
00235                 'curonly',
00236                 'curonly',
00237                 $request->wasPosted() ? $request->getCheck( 'curonly' ) : true
00238             ) . '<br />';
00239         } else {
00240             $out->addWikiMsg( 'exportnohistory' );
00241         }
00242 
00243         $form .= Xml::checkLabel(
00244             $this->msg( 'export-templates' )->text(),
00245             'templates',
00246             'wpExportTemplates',
00247             $request->wasPosted() ? $request->getCheck( 'templates' ) : false
00248         ) . '<br />';
00249 
00250         if ( $config->get( 'ExportMaxLinkDepth' ) || $this->userCanOverrideExportDepth() ) {
00251             $form .= Xml::inputLabel(
00252                 $this->msg( 'export-pagelinks' )->text(),
00253                 'pagelink-depth',
00254                 'pagelink-depth',
00255                 20,
00256                 0
00257             ) . '<br />';
00258         }
00259 
00260         /* Enable this when we can do something useful exporting/importing image information.
00261         $form .= Xml::checkLabel(
00262                 $this->msg( 'export-images' )->text(),
00263                 'images',
00264                 'wpExportImages',
00265                 false
00266             ) . '<br />';
00267         */
00268         $form .= Xml::checkLabel(
00269             $this->msg( 'export-download' )->text(),
00270             'wpDownload',
00271             'wpDownload',
00272             $request->wasPosted() ? $request->getCheck( 'wpDownload' ) : true
00273         ) . '<br />';
00274 
00275         if ( $config->get( 'ExportAllowListContributors' ) ) {
00276             $form .= Xml::checkLabel(
00277                 $this->msg( 'exportlistauthors' )->text(),
00278                 'listauthors',
00279                 'listauthors',
00280                 $request->wasPosted() ? $request->getCheck( 'listauthors' ) : false
00281             ) . '<br />';
00282         }
00283 
00284         $form .= Xml::submitButton(
00285             $this->msg( 'export-submit' )->text(),
00286             Linker::tooltipAndAccesskeyAttribs( 'export' )
00287         );
00288         $form .= Xml::closeElement( 'form' );
00289 
00290         $out->addHTML( $form );
00291     }
00292 
00296     private function userCanOverrideExportDepth() {
00297         return $this->getUser()->isAllowed( 'override-export-depth' );
00298     }
00299 
00309     private function doExport( $page, $history, $list_authors, $exportall ) {
00310 
00311         // If we are grabbing everything, enable full history and ignore the rest
00312         if ( $exportall ) {
00313             $history = WikiExporter::FULL;
00314         } else {
00315 
00316             $pageSet = array(); // Inverted index of all pages to look up
00317 
00318             // Split up and normalize input
00319             foreach ( explode( "\n", $page ) as $pageName ) {
00320                 $pageName = trim( $pageName );
00321                 $title = Title::newFromText( $pageName );
00322                 if ( $title && !$title->isExternal() && $title->getText() !== '' ) {
00323                     // Only record each page once!
00324                     $pageSet[$title->getPrefixedText()] = true;
00325                 }
00326             }
00327 
00328             // Set of original pages to pass on to further manipulation...
00329             $inputPages = array_keys( $pageSet );
00330 
00331             // Look up any linked pages if asked...
00332             if ( $this->templates ) {
00333                 $pageSet = $this->getTemplates( $inputPages, $pageSet );
00334             }
00335             $linkDepth = $this->pageLinkDepth;
00336             if ( $linkDepth ) {
00337                 $pageSet = $this->getPageLinks( $inputPages, $pageSet, $linkDepth );
00338             }
00339 
00340             // Enable this when we can do something useful exporting/importing image information.
00341             // if( $this->images ) ) {
00342             // $pageSet = $this->getImages( $inputPages, $pageSet );
00343             // }
00344 
00345             $pages = array_keys( $pageSet );
00346 
00347             // Normalize titles to the same format and remove dupes, see bug 17374
00348             foreach ( $pages as $k => $v ) {
00349                 $pages[$k] = str_replace( " ", "_", $v );
00350             }
00351 
00352             $pages = array_unique( $pages );
00353         }
00354 
00355         /* Ok, let's get to it... */
00356         if ( $history == WikiExporter::CURRENT ) {
00357             $lb = false;
00358             $db = wfGetDB( DB_SLAVE );
00359             $buffer = WikiExporter::BUFFER;
00360         } else {
00361             // Use an unbuffered query; histories may be very long!
00362             $lb = wfGetLBFactory()->newMainLB();
00363             $db = $lb->getConnection( DB_SLAVE );
00364             $buffer = WikiExporter::STREAM;
00365 
00366             // This might take a while... :D
00367             wfSuppressWarnings();
00368             set_time_limit( 0 );
00369             wfRestoreWarnings();
00370         }
00371 
00372         $exporter = new WikiExporter( $db, $history, $buffer );
00373         $exporter->list_authors = $list_authors;
00374         $exporter->openStream();
00375 
00376         if ( $exportall ) {
00377             $exporter->allPages();
00378         } else {
00379             foreach ( $pages as $page ) {
00380                 #Bug 8824: Only export pages the user can read
00381                 $title = Title::newFromText( $page );
00382                 if ( is_null( $title ) ) {
00383                     // @todo Perhaps output an <error> tag or something.
00384                     continue;
00385                 }
00386 
00387                 if ( !$title->userCan( 'read', $this->getUser() ) ) {
00388                     // @todo Perhaps output an <error> tag or something.
00389                     continue;
00390                 }
00391 
00392                 $exporter->pageByTitle( $title );
00393             }
00394         }
00395 
00396         $exporter->closeStream();
00397 
00398         if ( $lb ) {
00399             $lb->closeAll();
00400         }
00401     }
00402 
00407     private function getPagesFromCategory( $title ) {
00408         global $wgContLang;
00409 
00410         $name = $title->getDBkey();
00411 
00412         $dbr = wfGetDB( DB_SLAVE );
00413         $res = $dbr->select(
00414             array( 'page', 'categorylinks' ),
00415             array( 'page_namespace', 'page_title' ),
00416             array( 'cl_from=page_id', 'cl_to' => $name ),
00417             __METHOD__,
00418             array( 'LIMIT' => '5000' )
00419         );
00420 
00421         $pages = array();
00422 
00423         foreach ( $res as $row ) {
00424             $n = $row->page_title;
00425             if ( $row->page_namespace ) {
00426                 $ns = $wgContLang->getNsText( $row->page_namespace );
00427                 $n = $ns . ':' . $n;
00428             }
00429 
00430             $pages[] = $n;
00431         }
00432 
00433         return $pages;
00434     }
00435 
00440     private function getPagesFromNamespace( $nsindex ) {
00441         global $wgContLang;
00442 
00443         $dbr = wfGetDB( DB_SLAVE );
00444         $res = $dbr->select(
00445             'page',
00446             array( 'page_namespace', 'page_title' ),
00447             array( 'page_namespace' => $nsindex ),
00448             __METHOD__,
00449             array( 'LIMIT' => '5000' )
00450         );
00451 
00452         $pages = array();
00453 
00454         foreach ( $res as $row ) {
00455             $n = $row->page_title;
00456 
00457             if ( $row->page_namespace ) {
00458                 $ns = $wgContLang->getNsText( $row->page_namespace );
00459                 $n = $ns . ':' . $n;
00460             }
00461 
00462             $pages[] = $n;
00463         }
00464 
00465         return $pages;
00466     }
00467 
00474     private function getTemplates( $inputPages, $pageSet ) {
00475         return $this->getLinks( $inputPages, $pageSet,
00476             'templatelinks',
00477             array( 'namespace' => 'tl_namespace', 'title' => 'tl_title' ),
00478             array( 'page_id=tl_from' )
00479         );
00480     }
00481 
00487     private function validateLinkDepth( $depth ) {
00488         if ( $depth < 0 ) {
00489             return 0;
00490         }
00491 
00492         if ( !$this->userCanOverrideExportDepth() ) {
00493             $maxLinkDepth = $this->getConfig()->get( 'ExportMaxLinkDepth' );
00494             if ( $depth > $maxLinkDepth ) {
00495                 return $maxLinkDepth;
00496             }
00497         }
00498 
00499         /*
00500          * There's a HARD CODED limit of 5 levels of recursion here to prevent a
00501          * crazy-big export from being done by someone setting the depth
00502          * number too high. In other words, last resort safety net.
00503          */
00504 
00505         return intval( min( $depth, 5 ) );
00506     }
00507 
00515     private function getPageLinks( $inputPages, $pageSet, $depth ) {
00516         // @codingStandardsIgnoreStart Squiz.WhiteSpace.SemicolonSpacing.Incorrect
00517         for ( ; $depth > 0; --$depth ) {
00518             // @codingStandardsIgnoreEnd
00519             $pageSet = $this->getLinks(
00520                 $inputPages, $pageSet, 'pagelinks',
00521                 array( 'namespace' => 'pl_namespace', 'title' => 'pl_title' ),
00522                 array( 'page_id=pl_from' )
00523             );
00524             $inputPages = array_keys( $pageSet );
00525         }
00526 
00527         return $pageSet;
00528     }
00529 
00538     private function getImages( $inputPages, $pageSet ) {
00539         return $this->getLinks(
00540             $inputPages,
00541             $pageSet,
00542             'imagelinks',
00543             array( 'namespace' => NS_FILE, 'title' => 'il_to' ),
00544             array( 'page_id=il_from' )
00545         );
00546     }
00547 
00557     private function getLinks( $inputPages, $pageSet, $table, $fields, $join ) {
00558         $dbr = wfGetDB( DB_SLAVE );
00559 
00560         foreach ( $inputPages as $page ) {
00561             $title = Title::newFromText( $page );
00562 
00563             if ( $title ) {
00564                 $pageSet[$title->getPrefixedText()] = true;
00567                 $result = $dbr->select(
00568                     array( 'page', $table ),
00569                     $fields,
00570                     array_merge(
00571                         $join,
00572                         array(
00573                             'page_namespace' => $title->getNamespace(),
00574                             'page_title' => $title->getDBkey()
00575                         )
00576                     ),
00577                     __METHOD__
00578                 );
00579 
00580                 foreach ( $result as $row ) {
00581                     $template = Title::makeTitle( $row->namespace, $row->title );
00582                     $pageSet[$template->getPrefixedText()] = true;
00583                 }
00584             }
00585         }
00586 
00587         return $pageSet;
00588     }
00589 
00590     protected function getGroupName() {
00591         return 'pagetools';
00592     }
00593 }