MediaWiki  REL1_22
SpecialExport.php
Go to the documentation of this file.
00001 <?php
00031 class SpecialExport extends SpecialPage {
00032     private $curonly, $doExport, $pageLinkDepth, $templates;
00033     private $images;
00034 
00035     public function __construct() {
00036         parent::__construct( 'Export' );
00037     }
00038 
00039     public function execute( $par ) {
00040         global $wgSitename, $wgExportAllowListContributors, $wgExportFromNamespaces;
00041         global $wgExportAllowHistory, $wgExportMaxHistory, $wgExportMaxLinkDepth;
00042         global $wgExportAllowAll;
00043 
00044         $this->setHeaders();
00045         $this->outputHeader();
00046 
00047         // Set some variables
00048         $this->curonly = true;
00049         $this->doExport = false;
00050         $request = $this->getRequest();
00051         $this->templates = $request->getCheck( 'templates' );
00052         $this->images = $request->getCheck( 'images' ); // Doesn't do anything yet
00053         $this->pageLinkDepth = $this->validateLinkDepth(
00054             $request->getIntOrNull( 'pagelink-depth' )
00055         );
00056         $nsindex = '';
00057         $exportall = false;
00058 
00059         if ( $request->getCheck( 'addcat' ) ) {
00060             $page = $request->getText( 'pages' );
00061             $catname = $request->getText( 'catname' );
00062 
00063             if ( $catname !== '' && $catname !== null && $catname !== false ) {
00064                 $t = Title::makeTitleSafe( NS_MAIN, $catname );
00065                 if ( $t ) {
00071                     $catpages = $this->getPagesFromCategory( $t );
00072                     if ( $catpages ) {
00073                         $page .= "\n" . implode( "\n", $catpages );
00074                     }
00075                 }
00076             }
00077         } elseif ( $request->getCheck( 'addns' ) && $wgExportFromNamespaces ) {
00078             $page = $request->getText( 'pages' );
00079             $nsindex = $request->getText( 'nsindex', '' );
00080 
00081             if ( strval( $nsindex ) !== '' ) {
00085                 $nspages = $this->getPagesFromNamespace( $nsindex );
00086                 if ( $nspages ) {
00087                     $page .= "\n" . implode( "\n", $nspages );
00088                 }
00089             }
00090         } elseif ( $request->getCheck( 'exportall' ) && $wgExportAllowAll ) {
00091             $this->doExport = true;
00092             $exportall = true;
00093 
00094             /* Although $page and $history are not used later on, we
00095             nevertheless set them to avoid that PHP notices about using
00096             undefined variables foul up our XML output (see call to
00097             doExport(...) further down) */
00098             $page = '';
00099             $history = '';
00100         } elseif ( $request->wasPosted() && $par == '' ) {
00101             $page = $request->getText( 'pages' );
00102             $this->curonly = $request->getCheck( 'curonly' );
00103             $rawOffset = $request->getVal( 'offset' );
00104 
00105             if ( $rawOffset ) {
00106                 $offset = wfTimestamp( TS_MW, $rawOffset );
00107             } else {
00108                 $offset = null;
00109             }
00110 
00111             $limit = $request->getInt( 'limit' );
00112             $dir = $request->getVal( 'dir' );
00113             $history = array(
00114                 'dir' => 'asc',
00115                 'offset' => false,
00116                 'limit' => $wgExportMaxHistory,
00117             );
00118             $historyCheck = $request->getCheck( 'history' );
00119 
00120             if ( $this->curonly ) {
00121                 $history = WikiExporter::CURRENT;
00122             } elseif ( !$historyCheck ) {
00123                 if ( $limit > 0 && ( $wgExportMaxHistory == 0 || $limit < $wgExportMaxHistory ) ) {
00124                     $history['limit'] = $limit;
00125                 }
00126 
00127                 if ( !is_null( $offset ) ) {
00128                     $history['offset'] = $offset;
00129                 }
00130 
00131                 if ( strtolower( $dir ) == 'desc' ) {
00132                     $history['dir'] = 'desc';
00133                 }
00134             }
00135 
00136             if ( $page != '' ) {
00137                 $this->doExport = true;
00138             }
00139         } else {
00140             // Default to current-only for GET requests.
00141             $page = $request->getText( 'pages', $par );
00142             $historyCheck = $request->getCheck( 'history' );
00143 
00144             if ( $historyCheck ) {
00145                 $history = WikiExporter::FULL;
00146             } else {
00147                 $history = WikiExporter::CURRENT;
00148             }
00149 
00150             if ( $page != '' ) {
00151                 $this->doExport = true;
00152             }
00153         }
00154 
00155         if ( !$wgExportAllowHistory ) {
00156             // Override
00157             $history = WikiExporter::CURRENT;
00158         }
00159 
00160         $list_authors = $request->getCheck( 'listauthors' );
00161         if ( !$this->curonly || !$wgExportAllowListContributors ) {
00162             $list_authors = false;
00163         }
00164 
00165         if ( $this->doExport ) {
00166             $this->getOutput()->disable();
00167 
00168             // Cancel output buffering and gzipping if set
00169             // This should provide safer streaming for pages with history
00170             wfResetOutputBuffers();
00171             $request->response()->header( "Content-type: application/xml; charset=utf-8" );
00172 
00173             if ( $request->getCheck( 'wpDownload' ) ) {
00174                 // Provide a sane filename suggestion
00175                 $filename = urlencode( $wgSitename . '-' . wfTimestampNow() . '.xml' );
00176                 $request->response()->header( "Content-disposition: attachment;filename={$filename}" );
00177             }
00178 
00179             $this->doExport( $page, $history, $list_authors, $exportall );
00180 
00181             return;
00182         }
00183 
00184         $out = $this->getOutput();
00185         $out->addWikiMsg( 'exporttext' );
00186 
00187         $form = Xml::openElement( 'form', array( 'method' => 'post',
00188             'action' => $this->getTitle()->getLocalURL( 'action=submit' ) ) );
00189         $form .= Xml::inputLabel(
00190             $this->msg( 'export-addcattext' )->text(),
00191             'catname',
00192             'catname',
00193             40
00194         ) . '&#160;';
00195         $form .= Xml::submitButton(
00196             $this->msg( 'export-addcat' )->text(),
00197             array( 'name' => 'addcat' )
00198         ) . '<br />';
00199 
00200         if ( $wgExportFromNamespaces ) {
00201             $form .= Html::namespaceSelector(
00202                 array(
00203                     'selected' => $nsindex,
00204                     'label' => $this->msg( 'export-addnstext' )->text()
00205                 ), array(
00206                     'name' => 'nsindex',
00207                     'id' => 'namespace',
00208                     'class' => 'namespaceselector',
00209                 )
00210             ) . '&#160;';
00211             $form .= Xml::submitButton(
00212                 $this->msg( 'export-addns' )->text(),
00213                 array( 'name' => 'addns' )
00214             ) . '<br />';
00215         }
00216 
00217         if ( $wgExportAllowAll ) {
00218             $form .= Xml::checkLabel(
00219                 $this->msg( 'exportall' )->text(),
00220                 'exportall',
00221                 'exportall',
00222                 $request->wasPosted() ? $request->getCheck( 'exportall' ) : false
00223             ) . '<br />';
00224         }
00225 
00226         $form .= Xml::element(
00227             'textarea',
00228             array( 'name' => 'pages', 'cols' => 40, 'rows' => 10 ),
00229             $page,
00230             false
00231         );
00232         $form .= '<br />';
00233 
00234         if ( $wgExportAllowHistory ) {
00235             $form .= Xml::checkLabel(
00236                 $this->msg( 'exportcuronly' )->text(),
00237                 'curonly',
00238                 'curonly',
00239                 $request->wasPosted() ? $request->getCheck( 'curonly' ) : true
00240             ) . '<br />';
00241         } else {
00242             $out->addWikiMsg( 'exportnohistory' );
00243         }
00244 
00245         $form .= Xml::checkLabel(
00246             $this->msg( 'export-templates' )->text(),
00247             'templates',
00248             'wpExportTemplates',
00249             $request->wasPosted() ? $request->getCheck( 'templates' ) : false
00250         ) . '<br />';
00251 
00252         if ( $wgExportMaxLinkDepth || $this->userCanOverrideExportDepth() ) {
00253             $form .= Xml::inputLabel(
00254                 $this->msg( 'export-pagelinks' )->text(),
00255                 'pagelink-depth',
00256                 'pagelink-depth',
00257                 20,
00258                 0
00259             ) . '<br />';
00260         }
00261 
00262         // Enable this when we can do something useful exporting/importing image information. :)
00263         //$form .= Xml::checkLabel( $this->msg( 'export-images' )->text(), 'images', 'wpExportImages', false ) . '<br />';
00264         $form .= Xml::checkLabel(
00265             $this->msg( 'export-download' )->text(),
00266             'wpDownload',
00267             'wpDownload',
00268             $request->wasPosted() ? $request->getCheck( 'wpDownload' ) : true
00269         ) . '<br />';
00270 
00271         if ( $wgExportAllowListContributors ) {
00272             $form .= Xml::checkLabel(
00273                 $this->msg( 'exportlistauthors' )->text(),
00274                 'listauthors',
00275                 'listauthors',
00276                 $request->wasPosted() ? $request->getCheck( 'listauthors' ) : false
00277             ) . '<br />';
00278         }
00279 
00280         $form .= Xml::submitButton(
00281             $this->msg( 'export-submit' )->text(),
00282             Linker::tooltipAndAccesskeyAttribs( 'export' )
00283         );
00284         $form .= Xml::closeElement( 'form' );
00285 
00286         $out->addHTML( $form );
00287     }
00288 
00292     private function userCanOverrideExportDepth() {
00293         return $this->getUser()->isAllowed( 'override-export-depth' );
00294     }
00295 
00305     private function doExport( $page, $history, $list_authors, $exportall ) {
00306 
00307         // If we are grabbing everything, enable full history and ignore the rest
00308         if ( $exportall ) {
00309             $history = WikiExporter::FULL;
00310         } else {
00311 
00312             $pageSet = array(); // Inverted index of all pages to look up
00313 
00314             // Split up and normalize input
00315             foreach ( explode( "\n", $page ) as $pageName ) {
00316                 $pageName = trim( $pageName );
00317                 $title = Title::newFromText( $pageName );
00318                 if ( $title && $title->getInterwiki() == '' && $title->getText() !== '' ) {
00319                     // Only record each page once!
00320                     $pageSet[$title->getPrefixedText()] = true;
00321                 }
00322             }
00323 
00324             // Set of original pages to pass on to further manipulation...
00325             $inputPages = array_keys( $pageSet );
00326 
00327             // Look up any linked pages if asked...
00328             if ( $this->templates ) {
00329                 $pageSet = $this->getTemplates( $inputPages, $pageSet );
00330             }
00331             $linkDepth = $this->pageLinkDepth;
00332             if ( $linkDepth ) {
00333                 $pageSet = $this->getPageLinks( $inputPages, $pageSet, $linkDepth );
00334             }
00335 
00336             // Enable this when we can do something useful exporting/importing image information.
00337             // if( $this->images ) ) {
00338             // $pageSet = $this->getImages( $inputPages, $pageSet );
00339             // }
00340 
00341             $pages = array_keys( $pageSet );
00342 
00343             // Normalize titles to the same format and remove dupes, see bug 17374
00344             foreach ( $pages as $k => $v ) {
00345                 $pages[$k] = str_replace( " ", "_", $v );
00346             }
00347 
00348             $pages = array_unique( $pages );
00349         }
00350 
00351         /* Ok, let's get to it... */
00352         if ( $history == WikiExporter::CURRENT ) {
00353             $lb = false;
00354             $db = wfGetDB( DB_SLAVE );
00355             $buffer = WikiExporter::BUFFER;
00356         } else {
00357             // Use an unbuffered query; histories may be very long!
00358             $lb = wfGetLBFactory()->newMainLB();
00359             $db = $lb->getConnection( DB_SLAVE );
00360             $buffer = WikiExporter::STREAM;
00361 
00362             // This might take a while... :D
00363             wfSuppressWarnings();
00364             set_time_limit( 0 );
00365             wfRestoreWarnings();
00366         }
00367 
00368         $exporter = new WikiExporter( $db, $history, $buffer );
00369         $exporter->list_authors = $list_authors;
00370         $exporter->openStream();
00371 
00372         if ( $exportall ) {
00373             $exporter->allPages();
00374         } else {
00375             foreach ( $pages as $page ) {
00376                 #Bug 8824: Only export pages the user can read
00377                 $title = Title::newFromText( $page );
00378                 if ( is_null( $title ) ) {
00379                     // @todo Perhaps output an <error> tag or something.
00380                     continue;
00381                 }
00382 
00383                 if ( !$title->userCan( 'read', $this->getUser() ) ) {
00384                     // @todo Perhaps output an <error> tag or something.
00385                     continue;
00386                 }
00387 
00388                 $exporter->pageByTitle( $title );
00389             }
00390         }
00391 
00392         $exporter->closeStream();
00393 
00394         if ( $lb ) {
00395             $lb->closeAll();
00396         }
00397     }
00398 
00403     private function getPagesFromCategory( $title ) {
00404         global $wgContLang;
00405 
00406         $name = $title->getDBkey();
00407 
00408         $dbr = wfGetDB( DB_SLAVE );
00409         $res = $dbr->select(
00410             array( 'page', 'categorylinks' ),
00411             array( 'page_namespace', 'page_title' ),
00412             array( 'cl_from=page_id', 'cl_to' => $name ),
00413             __METHOD__,
00414             array( 'LIMIT' => '5000' )
00415         );
00416 
00417         $pages = array();
00418 
00419         foreach ( $res as $row ) {
00420             $n = $row->page_title;
00421             if ( $row->page_namespace ) {
00422                 $ns = $wgContLang->getNsText( $row->page_namespace );
00423                 $n = $ns . ':' . $n;
00424             }
00425 
00426             $pages[] = $n;
00427         }
00428 
00429         return $pages;
00430     }
00431 
00436     private function getPagesFromNamespace( $nsindex ) {
00437         global $wgContLang;
00438 
00439         $dbr = wfGetDB( DB_SLAVE );
00440         $res = $dbr->select(
00441             'page',
00442             array( 'page_namespace', 'page_title' ),
00443             array( 'page_namespace' => $nsindex ),
00444             __METHOD__,
00445             array( 'LIMIT' => '5000' )
00446         );
00447 
00448         $pages = array();
00449 
00450         foreach ( $res as $row ) {
00451             $n = $row->page_title;
00452 
00453             if ( $row->page_namespace ) {
00454                 $ns = $wgContLang->getNsText( $row->page_namespace );
00455                 $n = $ns . ':' . $n;
00456             }
00457 
00458             $pages[] = $n;
00459         }
00460 
00461         return $pages;
00462     }
00463 
00470     private function getTemplates( $inputPages, $pageSet ) {
00471         return $this->getLinks( $inputPages, $pageSet,
00472             'templatelinks',
00473             array( 'namespace' => 'tl_namespace', 'title' => 'tl_title' ),
00474             array( 'page_id=tl_from' )
00475         );
00476     }
00477 
00483     private function validateLinkDepth( $depth ) {
00484         global $wgExportMaxLinkDepth;
00485 
00486         if ( $depth < 0 ) {
00487             return 0;
00488         }
00489 
00490         if ( !$this->userCanOverrideExportDepth() ) {
00491             if ( $depth > $wgExportMaxLinkDepth ) {
00492                 return $wgExportMaxLinkDepth;
00493             }
00494         }
00495 
00496         /*
00497          * There's a HARD CODED limit of 5 levels of recursion here to prevent a
00498          * crazy-big export from being done by someone setting the depth
00499          * number too high. In other words, last resort safety net.
00500          */
00501 
00502         return intval( min( $depth, 5 ) );
00503     }
00504 
00512     private function getPageLinks( $inputPages, $pageSet, $depth ) {
00513         for ( ; $depth > 0; --$depth ) {
00514             $pageSet = $this->getLinks(
00515                 $inputPages, $pageSet, 'pagelinks',
00516                 array( 'namespace' => 'pl_namespace', 'title' => 'pl_title' ),
00517                 array( 'page_id=pl_from' )
00518             );
00519             $inputPages = array_keys( $pageSet );
00520         }
00521 
00522         return $pageSet;
00523     }
00524 
00533     private function getImages( $inputPages, $pageSet ) {
00534         return $this->getLinks(
00535             $inputPages,
00536             $pageSet,
00537             'imagelinks',
00538             array( 'namespace' => NS_FILE, 'title' => 'il_to' ),
00539             array( 'page_id=il_from' )
00540         );
00541     }
00542 
00552     private function getLinks( $inputPages, $pageSet, $table, $fields, $join ) {
00553         $dbr = wfGetDB( DB_SLAVE );
00554 
00555         foreach ( $inputPages as $page ) {
00556             $title = Title::newFromText( $page );
00557 
00558             if ( $title ) {
00559                 $pageSet[$title->getPrefixedText()] = true;
00562                 $result = $dbr->select(
00563                     array( 'page', $table ),
00564                     $fields,
00565                     array_merge(
00566                         $join,
00567                         array(
00568                             'page_namespace' => $title->getNamespace(),
00569                             'page_title' => $title->getDBkey()
00570                         )
00571                     ),
00572                     __METHOD__
00573                 );
00574 
00575                 foreach ( $result as $row ) {
00576                     $template = Title::makeTitle( $row->namespace, $row->title );
00577                     $pageSet[$template->getPrefixedText()] = true;
00578                 }
00579             }
00580         }
00581 
00582         return $pageSet;
00583     }
00584 
00585     protected function getGroupName() {
00586         return 'pagetools';
00587     }
00588 }