MediaWiki  REL1_19
SpecialExport.php
Go to the documentation of this file.
00001 <?php
00031 class SpecialExport extends SpecialPage {
00032 
00033         private $curonly, $doExport, $pageLinkDepth, $templates;
00034         private $images;
00035 
00036         public function __construct() {
00037                 parent::__construct( 'Export' );
00038         }
00039 
00040         public function execute( $par ) {
00041                 global $wgSitename, $wgExportAllowListContributors, $wgExportFromNamespaces;
00042                 global $wgExportAllowHistory, $wgExportMaxHistory, $wgExportMaxLinkDepth;
00043                 global $wgExportAllowAll;
00044 
00045                 $this->setHeaders();
00046                 $this->outputHeader();
00047 
00048                 // Set some variables
00049                 $this->curonly = true;
00050                 $this->doExport = false;
00051                 $request = $this->getRequest();
00052                 $this->templates = $request->getCheck( 'templates' );
00053                 $this->images = $request->getCheck( 'images' ); // Doesn't do anything yet
00054                 $this->pageLinkDepth = $this->validateLinkDepth(
00055                         $request->getIntOrNull( 'pagelink-depth' )
00056                 );
00057                 $nsindex = '';
00058                 $exportall = false;
00059 
00060                 if ( $request->getCheck( 'addcat' ) ) {
00061                         $page = $request->getText( 'pages' );
00062                         $catname = $request->getText( 'catname' );
00063 
00064                         if ( $catname !== '' && $catname !== null && $catname !== false ) {
00065                                 $t = Title::makeTitleSafe( NS_MAIN, $catname );
00066                                 if ( $t ) {
00072                                         $catpages = $this->getPagesFromCategory( $t );
00073                                         if ( $catpages ) {
00074                                                 $page .= "\n" . implode( "\n", $catpages );
00075                                         }
00076                                 }
00077                         }
00078                 }
00079                 elseif( $request->getCheck( 'addns' ) && $wgExportFromNamespaces ) {
00080                         $page = $request->getText( 'pages' );
00081                         $nsindex = $request->getText( 'nsindex', '' );
00082 
00083                         if ( strval( $nsindex ) !== ''  ) {
00087                                 $nspages = $this->getPagesFromNamespace( $nsindex );
00088                                 if ( $nspages ) {
00089                                         $page .= "\n" . implode( "\n", $nspages );
00090                                 }
00091                         }
00092                 }
00093                 elseif( $request->getCheck( 'exportall' ) && $wgExportAllowAll ) {
00094                         $this->doExport = true;
00095                         $exportall = true;
00096                 }
00097                 elseif( $request->wasPosted() && $par == '' ) {
00098                         $page = $request->getText( 'pages' );
00099                         $this->curonly = $request->getCheck( 'curonly' );
00100                         $rawOffset = $request->getVal( 'offset' );
00101 
00102                         if( $rawOffset ) {
00103                                 $offset = wfTimestamp( TS_MW, $rawOffset );
00104                         } else {
00105                                 $offset = null;
00106                         }
00107 
00108                         $limit = $request->getInt( 'limit' );
00109                         $dir = $request->getVal( 'dir' );
00110                         $history = array(
00111                                 'dir' => 'asc',
00112                                 'offset' => false,
00113                                 'limit' => $wgExportMaxHistory,
00114                         );
00115                         $historyCheck = $request->getCheck( 'history' );
00116 
00117                         if ( $this->curonly ) {
00118                                 $history = WikiExporter::CURRENT;
00119                         } elseif ( !$historyCheck ) {
00120                                 if ( $limit > 0 && ($wgExportMaxHistory == 0 || $limit < $wgExportMaxHistory ) ) {
00121                                         $history['limit'] = $limit;
00122                                 }
00123                                 if ( !is_null( $offset ) ) {
00124                                         $history['offset'] = $offset;
00125                                 }
00126                                 if ( strtolower( $dir ) == 'desc' ) {
00127                                         $history['dir'] = 'desc';
00128                                 }
00129                         }
00130 
00131                         if( $page != '' ) {
00132                                 $this->doExport = true;
00133                         }
00134                 } else {
00135                         // Default to current-only for GET requests.
00136                         $page = $request->getText( 'pages', $par );
00137                         $historyCheck = $request->getCheck( 'history' );
00138 
00139                         if( $historyCheck ) {
00140                                 $history = WikiExporter::FULL;
00141                         } else {
00142                                 $history = WikiExporter::CURRENT;
00143                         }
00144 
00145                         if( $page != '' ) {
00146                                 $this->doExport = true;
00147                         }
00148                 }
00149 
00150                 if( !$wgExportAllowHistory ) {
00151                         // Override
00152                         $history = WikiExporter::CURRENT;
00153                 }
00154 
00155                 $list_authors = $request->getCheck( 'listauthors' );
00156                 if ( !$this->curonly || !$wgExportAllowListContributors ) {
00157                         $list_authors = false ;
00158                 }
00159 
00160                 if ( $this->doExport ) {
00161                         $this->getOutput()->disable();
00162 
00163                         // Cancel output buffering and gzipping if set
00164                         // This should provide safer streaming for pages with history
00165                         wfResetOutputBuffers();
00166                         $request->response()->header( "Content-type: application/xml; charset=utf-8" );
00167 
00168                         if( $request->getCheck( 'wpDownload' ) ) {
00169                                 // Provide a sane filename suggestion
00170                                 $filename = urlencode( $wgSitename . '-' . wfTimestampNow() . '.xml' );
00171                                 $request->response()->header( "Content-disposition: attachment;filename={$filename}" );
00172                         }
00173 
00174                         $this->doExport( $page, $history, $list_authors, $exportall );
00175 
00176                         return;
00177                 }
00178 
00179                 $out = $this->getOutput();
00180                 $out->addWikiMsg( 'exporttext' );
00181 
00182                 $form = Xml::openElement( 'form', array( 'method' => 'post',
00183                         'action' => $this->getTitle()->getLocalUrl( 'action=submit' ) ) );
00184                 $form .= Xml::inputLabel( wfMsg( 'export-addcattext' )    , 'catname', 'catname', 40 ) . '&#160;';
00185                 $form .= Xml::submitButton( wfMsg( 'export-addcat' ), array( 'name' => 'addcat' ) ) . '<br />';
00186 
00187                 if ( $wgExportFromNamespaces ) {
00188                         $form .= Xml::namespaceSelector( $nsindex, null, 'nsindex', wfMsg( 'export-addnstext' ) ) . '&#160;';
00189                         $form .= Xml::submitButton( wfMsg( 'export-addns' ), array( 'name' => 'addns' ) ) . '<br />';
00190                 }
00191 
00192                 if ( $wgExportAllowAll ) {
00193                         $form .= Xml::checkLabel(
00194                                 wfMsg( 'exportall' ),
00195                                 'exportall',
00196                                 'exportall',
00197                                 $request->wasPosted() ? $request->getCheck( 'exportall' ) : false
00198                         ) . '<br />';
00199                 }
00200 
00201                 $form .= Xml::element( 'textarea', array( 'name' => 'pages', 'cols' => 40, 'rows' => 10 ), $page, false );
00202                 $form .= '<br />';
00203 
00204                 if( $wgExportAllowHistory ) {
00205                         $form .= Xml::checkLabel(
00206                                 wfMsg( 'exportcuronly' ),
00207                                 'curonly',
00208                                 'curonly',
00209                                 $request->wasPosted() ? $request->getCheck( 'curonly' ) : true
00210                         ) . '<br />';
00211                 } else {
00212                         $out->addHTML( wfMsgExt( 'exportnohistory', 'parse' ) );
00213                 }
00214 
00215                 $form .= Xml::checkLabel(
00216                         wfMsg( 'export-templates' ),
00217                         'templates',
00218                         'wpExportTemplates',
00219                         $request->wasPosted() ? $request->getCheck( 'templates' ) : false
00220                 ) . '<br />';
00221 
00222                 if( $wgExportMaxLinkDepth || $this->userCanOverrideExportDepth() ) {
00223                         $form .= Xml::inputLabel( wfMsg( 'export-pagelinks' ), 'pagelink-depth', 'pagelink-depth', 20, 0 ) . '<br />';
00224                 }
00225                 // Enable this when we can do something useful exporting/importing image information. :)
00226                 //$form .= Xml::checkLabel( wfMsg( 'export-images' ), 'images', 'wpExportImages', false ) . '<br />';
00227                 $form .= Xml::checkLabel(
00228                         wfMsg( 'export-download' ),
00229                         'wpDownload',
00230                         'wpDownload',
00231                         $request->wasPosted() ? $request->getCheck( 'wpDownload' ) : true
00232                 ) . '<br />';
00233 
00234                 if ( $wgExportAllowListContributors ) {
00235                         $form .= Xml::checkLabel(
00236                                 wfMsg( 'exportlistauthors' ),
00237                                 'listauthors',
00238                                 'listauthors',
00239                                 $request->wasPosted() ? $request->getCheck( 'listauthors' ) : false
00240                         ) . '<br />';
00241                 }
00242 
00243                 $form .= Xml::submitButton( wfMsg( 'export-submit' ), Linker::tooltipAndAccesskeyAttribs( 'export' ) );
00244                 $form .= Xml::closeElement( 'form' );
00245 
00246                 $out->addHTML( $form );
00247         }
00248 
00252         private function userCanOverrideExportDepth() {
00253                 return $this->getUser()->isAllowed( 'override-export-depth' );
00254         }
00255 
00265         private function doExport( $page, $history, $list_authors, $exportall ) {
00266 
00267                 // If we are grabbing everything, enable full history and ignore the rest
00268                 if ( $exportall ) {
00269                         $history = WikiExporter::FULL;
00270                 } else {
00271 
00272                         $pageSet = array(); // Inverted index of all pages to look up
00273                 
00274                         // Split up and normalize input
00275                         foreach( explode( "\n", $page ) as $pageName ) {
00276                                 $pageName = trim( $pageName );
00277                                 $title = Title::newFromText( $pageName );
00278                                 if( $title && $title->getInterwiki() == '' && $title->getText() !== '' ) {
00279                                         // Only record each page once!
00280                                         $pageSet[$title->getPrefixedText()] = true;
00281                                 }
00282                         }
00283 
00284                         // Set of original pages to pass on to further manipulation...
00285                         $inputPages = array_keys( $pageSet );
00286 
00287                         // Look up any linked pages if asked...
00288                         if( $this->templates ) {
00289                                 $pageSet = $this->getTemplates( $inputPages, $pageSet );
00290                         }
00291                         $linkDepth = $this->pageLinkDepth;
00292                         if( $linkDepth ) {
00293                                 $pageSet = $this->getPageLinks( $inputPages, $pageSet, $linkDepth );
00294                         }
00295 
00296                         /*
00297                          // Enable this when we can do something useful exporting/importing image information. :)
00298                          if( $this->images ) ) {
00299                          $pageSet = $this->getImages( $inputPages, $pageSet );
00300                          }
00301                         */
00302 
00303                         $pages = array_keys( $pageSet );
00304 
00305                         // Normalize titles to the same format and remove dupes, see bug 17374
00306                         foreach( $pages as $k => $v ) {
00307                                 $pages[$k] = str_replace( " ", "_", $v );
00308                         }
00309 
00310                         $pages = array_unique( $pages );
00311                 }
00312 
00313                 /* Ok, let's get to it... */
00314                 if( $history == WikiExporter::CURRENT ) {
00315                         $lb = false;
00316                         $db = wfGetDB( DB_SLAVE );
00317                         $buffer = WikiExporter::BUFFER;
00318                 } else {
00319                         // Use an unbuffered query; histories may be very long!
00320                         $lb = wfGetLBFactory()->newMainLB();
00321                         $db = $lb->getConnection( DB_SLAVE );
00322                         $buffer = WikiExporter::STREAM;
00323 
00324                         // This might take a while... :D
00325                         wfSuppressWarnings();
00326                         set_time_limit(0);
00327                         wfRestoreWarnings();
00328                 }
00329 
00330                 $exporter = new WikiExporter( $db, $history, $buffer );
00331                 $exporter->list_authors = $list_authors;
00332                 $exporter->openStream();
00333 
00334                 if ( $exportall ) {
00335                         $exporter->allPages();
00336                 } else {
00337                         foreach( $pages as $page ) {
00338                         /*
00339                          if( $wgExportMaxHistory && !$this->curonly ) {
00340                          $title = Title::newFromText( $page );
00341                          if( $title ) {
00342                          $count = Revision::countByTitle( $db, $title );
00343                          if( $count > $wgExportMaxHistory ) {
00344                          wfDebug( __FUNCTION__ .
00345                          ": Skipped $page, $count revisions too big\n" );
00346                          continue;
00347                          }
00348                          }
00349                          }*/
00350                         #Bug 8824: Only export pages the user can read
00351                                 $title = Title::newFromText( $page );
00352                                 if( is_null( $title ) ) {
00353                                         continue; #TODO: perhaps output an <error> tag or something.
00354                                 }
00355                                 if( !$title->userCan( 'read', $this->getUser() ) ) {
00356                                         continue; #TODO: perhaps output an <error> tag or something.
00357                                 }
00358 
00359                                 $exporter->pageByTitle( $title );
00360                         }
00361                 }
00362 
00363                 $exporter->closeStream();
00364 
00365                 if( $lb ) {
00366                         $lb->closeAll();
00367                 }
00368         }
00369 
00374         private function getPagesFromCategory( $title ) {
00375                 global $wgContLang;
00376 
00377                 $name = $title->getDBkey();
00378 
00379                 $dbr = wfGetDB( DB_SLAVE );
00380                 $res = $dbr->select(
00381                         array( 'page', 'categorylinks' ),
00382                         array( 'page_namespace', 'page_title' ),
00383                         array( 'cl_from=page_id', 'cl_to' => $name ),
00384                         __METHOD__,
00385                         array( 'LIMIT' => '5000' )
00386                 );
00387 
00388                 $pages = array();
00389 
00390                 foreach ( $res as $row ) {
00391                         $n = $row->page_title;
00392                         if ($row->page_namespace) {
00393                                 $ns = $wgContLang->getNsText( $row->page_namespace );
00394                                 $n = $ns . ':' . $n;
00395                         }
00396 
00397                         $pages[] = $n;
00398                 }
00399                 return $pages;
00400         }
00401 
00406         private function getPagesFromNamespace( $nsindex ) {
00407                 global $wgContLang;
00408 
00409                 $dbr = wfGetDB( DB_SLAVE );
00410                 $res = $dbr->select(
00411                         'page',
00412                         array( 'page_namespace', 'page_title' ),
00413                         array( 'page_namespace' => $nsindex ),
00414                         __METHOD__,
00415                         array( 'LIMIT' => '5000' )
00416                 );
00417 
00418                 $pages = array();
00419 
00420                 foreach ( $res as $row ) {
00421                         $n = $row->page_title;
00422 
00423                         if ( $row->page_namespace ) {
00424                                 $ns = $wgContLang->getNsText( $row->page_namespace );
00425                                 $n = $ns . ':' . $n;
00426                         }
00427 
00428                         $pages[] = $n;
00429                 }
00430                 return $pages;
00431         }
00432 
00439         private function getTemplates( $inputPages, $pageSet ) {
00440                 return $this->getLinks( $inputPages, $pageSet,
00441                         'templatelinks',
00442                         array( 'tl_namespace AS namespace', 'tl_title AS title' ),
00443                         array( 'page_id=tl_from' )
00444                 );
00445         }
00446 
00452         private function validateLinkDepth( $depth ) {
00453                 global $wgExportMaxLinkDepth;
00454 
00455                 if( $depth < 0 ) {
00456                         return 0;
00457                 }
00458 
00459                 if ( !$this->userCanOverrideExportDepth() ) {
00460                         if( $depth > $wgExportMaxLinkDepth ) {
00461                                 return $wgExportMaxLinkDepth;
00462                         }
00463                 }
00464 
00465                 /*
00466                  * There's a HARD CODED limit of 5 levels of recursion here to prevent a
00467                  * crazy-big export from being done by someone setting the depth
00468                  * number too high. In other words, last resort safety net.
00469                  */
00470                 return intval( min( $depth, 5 ) );
00471         }
00472 
00480         private function getPageLinks( $inputPages, $pageSet, $depth ) {
00481                 for( ; $depth > 0; --$depth ) {
00482                         $pageSet = $this->getLinks(
00483                                 $inputPages, $pageSet, 'pagelinks',
00484                                 array( 'pl_namespace AS namespace', 'pl_title AS title' ),
00485                                 array( 'page_id=pl_from' )
00486                         );
00487                         $inputPages = array_keys( $pageSet );
00488                 }
00489 
00490                 return $pageSet;
00491         }
00492 
00501         private function getImages( $inputPages, $pageSet ) {
00502                 return $this->getLinks(
00503                         $inputPages,
00504                         $pageSet,
00505                         'imagelinks',
00506                         array( NS_FILE . ' AS namespace', 'il_to AS title' ),
00507                         array( 'page_id=il_from' )
00508                 );
00509         }
00510 
00514         private function getLinks( $inputPages, $pageSet, $table, $fields, $join ) {
00515                 $dbr = wfGetDB( DB_SLAVE );
00516 
00517                 foreach( $inputPages as $page ) {
00518                         $title = Title::newFromText( $page );
00519 
00520                         if( $title ) {
00521                                 $pageSet[$title->getPrefixedText()] = true;
00524                                 $result = $dbr->select(
00525                                         array( 'page', $table ),
00526                                         $fields,
00527                                         array_merge(
00528                                                 $join,
00529                                                 array(
00530                                                         'page_namespace' => $title->getNamespace(),
00531                                                         'page_title' => $title->getDBkey()
00532                                                 )
00533                                         ),
00534                                         __METHOD__
00535                                 );
00536 
00537                                 foreach( $result as $row ) {
00538                                         $template = Title::makeTitle( $row->namespace, $row->title );
00539                                         $pageSet[$template->getPrefixedText()] = true;
00540                                 }
00541                         }
00542                 }
00543 
00544                 return $pageSet;
00545         }
00546 
00547 }