MediaWiki  REL1_20
ApiPageSet.php
Go to the documentation of this file.
00001 <?php
00040 class ApiPageSet extends ApiQueryBase {
00041 
00042         private $mAllPages; // [ns][dbkey] => page_id or negative when missing
00043         private $mTitles, $mGoodTitles, $mMissingTitles, $mInvalidTitles;
00044         private $mMissingPageIDs, $mRedirectTitles, $mSpecialTitles;
00045         private $mNormalizedTitles, $mInterwikiTitles;
00046         private $mResolveRedirects, $mPendingRedirectIDs;
00047         private $mConvertTitles, $mConvertedTitles;
00048         private $mGoodRevIDs, $mMissingRevIDs;
00049         private $mFakePageId;
00050 
00051         private $mRequestedPageFields;
00052 
00059         public function __construct( $query, $resolveRedirects = false, $convertTitles = false ) {
00060                 parent::__construct( $query, 'query' );
00061 
00062                 $this->mAllPages = array();
00063                 $this->mTitles = array();
00064                 $this->mGoodTitles = array();
00065                 $this->mMissingTitles = array();
00066                 $this->mInvalidTitles = array();
00067                 $this->mMissingPageIDs = array();
00068                 $this->mRedirectTitles = array();
00069                 $this->mNormalizedTitles = array();
00070                 $this->mInterwikiTitles = array();
00071                 $this->mGoodRevIDs = array();
00072                 $this->mMissingRevIDs = array();
00073                 $this->mSpecialTitles = array();
00074 
00075                 $this->mRequestedPageFields = array();
00076                 $this->mResolveRedirects = $resolveRedirects;
00077                 if ( $resolveRedirects ) {
00078                         $this->mPendingRedirectIDs = array();
00079                 }
00080 
00081                 $this->mConvertTitles = $convertTitles;
00082                 $this->mConvertedTitles = array();
00083 
00084                 $this->mFakePageId = - 1;
00085         }
00086 
00091         public function isResolvingRedirects() {
00092                 return $this->mResolveRedirects;
00093         }
00094 
00100         public function requestField( $fieldName ) {
00101                 $this->mRequestedPageFields[$fieldName] = null;
00102         }
00103 
00110         public function getCustomField( $fieldName ) {
00111                 return $this->mRequestedPageFields[$fieldName];
00112         }
00113 
00120         public function getPageTableFields() {
00121                 // Ensure we get minimum required fields
00122                 // DON'T change this order
00123                 $pageFlds = array(
00124                         'page_namespace' => null,
00125                         'page_title' => null,
00126                         'page_id' => null,
00127                 );
00128 
00129                 if ( $this->mResolveRedirects ) {
00130                         $pageFlds['page_is_redirect'] = null;
00131                 }
00132 
00133                 // only store non-default fields
00134                 $this->mRequestedPageFields = array_diff_key( $this->mRequestedPageFields, $pageFlds );
00135 
00136                 $pageFlds = array_merge( $pageFlds, $this->mRequestedPageFields );
00137                 return array_keys( $pageFlds );
00138         }
00139 
00146         public function getAllTitlesByNamespace() {
00147                 return $this->mAllPages;
00148         }
00149 
00154         public function getTitles() {
00155                 return $this->mTitles;
00156         }
00157 
00162         public function getTitleCount() {
00163                 return count( $this->mTitles );
00164         }
00165 
00170         public function getGoodTitles() {
00171                 return $this->mGoodTitles;
00172         }
00173 
00178         public function getGoodTitleCount() {
00179                 return count( $this->mGoodTitles );
00180         }
00181 
00187         public function getMissingTitles() {
00188                 return $this->mMissingTitles;
00189         }
00190 
00196         public function getInvalidTitles() {
00197                 return $this->mInvalidTitles;
00198         }
00199 
00204         public function getMissingPageIDs() {
00205                 return $this->mMissingPageIDs;
00206         }
00207 
00213         public function getRedirectTitles() {
00214                 return $this->mRedirectTitles;
00215         }
00216 
00222         public function getNormalizedTitles() {
00223                 return $this->mNormalizedTitles;
00224         }
00225 
00231         public function getConvertedTitles() {
00232                 return $this->mConvertedTitles;
00233         }
00234 
00240         public function getInterwikiTitles() {
00241                 return $this->mInterwikiTitles;
00242         }
00243 
00248         public function getRevisionIDs() {
00249                 return $this->mGoodRevIDs;
00250         }
00251 
00256         public function getMissingRevisionIDs() {
00257                 return $this->mMissingRevIDs;
00258         }
00259 
00264         public function getSpecialTitles() {
00265                 return $this->mSpecialTitles;
00266         }
00267 
00272         public function getRevisionCount() {
00273                 return count( $this->getRevisionIDs() );
00274         }
00275 
00279         public function execute() {
00280                 $this->profileIn();
00281                 $params = $this->extractRequestParams();
00282 
00283                 // Only one of the titles/pageids/revids is allowed at the same time
00284                 $dataSource = null;
00285                 if ( isset( $params['titles'] ) ) {
00286                         $dataSource = 'titles';
00287                 }
00288                 if ( isset( $params['pageids'] ) ) {
00289                         if ( isset( $dataSource ) ) {
00290                                 $this->dieUsage( "Cannot use 'pageids' at the same time as '$dataSource'", 'multisource' );
00291                         }
00292                         $dataSource = 'pageids';
00293                 }
00294                 if ( isset( $params['revids'] ) ) {
00295                         if ( isset( $dataSource ) ) {
00296                                 $this->dieUsage( "Cannot use 'revids' at the same time as '$dataSource'", 'multisource' );
00297                         }
00298                         $dataSource = 'revids';
00299                 }
00300 
00301                 switch ( $dataSource ) {
00302                         case 'titles':
00303                                 $this->initFromTitles( $params['titles'] );
00304                                 break;
00305                         case 'pageids':
00306                                 $this->initFromPageIds( $params['pageids'] );
00307                                 break;
00308                         case 'revids':
00309                                 if ( $this->mResolveRedirects ) {
00310                                         $this->setWarning( 'Redirect resolution cannot be used together with the revids= parameter. ' .
00311                                         'Any redirects the revids= point to have not been resolved.' );
00312                                 }
00313                                 $this->mResolveRedirects = false;
00314                                 $this->initFromRevIDs( $params['revids'] );
00315                                 break;
00316                         default:
00317                                 // Do nothing - some queries do not need any of the data sources.
00318                                 break;
00319                 }
00320                 $this->profileOut();
00321         }
00322 
00327         public function populateFromTitles( $titles ) {
00328                 $this->profileIn();
00329                 $this->initFromTitles( $titles );
00330                 $this->profileOut();
00331         }
00332 
00337         public function populateFromPageIDs( $pageIDs ) {
00338                 $this->profileIn();
00339                 $this->initFromPageIds( $pageIDs );
00340                 $this->profileOut();
00341         }
00342 
00348         public function populateFromQueryResult( $db, $queryResult ) {
00349                 $this->profileIn();
00350                 $this->initFromQueryResult( $queryResult );
00351                 $this->profileOut();
00352         }
00353 
00358         public function populateFromRevisionIDs( $revIDs ) {
00359                 $this->profileIn();
00360                 $this->initFromRevIDs( $revIDs );
00361                 $this->profileOut();
00362         }
00363 
00368         public function processDbRow( $row ) {
00369                 // Store Title object in various data structures
00370                 $title = Title::newFromRow( $row );
00371 
00372                 $pageId = intval( $row->page_id );
00373                 $this->mAllPages[$row->page_namespace][$row->page_title] = $pageId;
00374                 $this->mTitles[] = $title;
00375 
00376                 if ( $this->mResolveRedirects && $row->page_is_redirect == '1' ) {
00377                         $this->mPendingRedirectIDs[$pageId] = $title;
00378                 } else {
00379                         $this->mGoodTitles[$pageId] = $title;
00380                 }
00381 
00382                 foreach ( $this->mRequestedPageFields as $fieldName => &$fieldValues ) {
00383                         $fieldValues[$pageId] = $row-> $fieldName;
00384                 }
00385         }
00386 
00390         public function finishPageSetGeneration() {
00391                 $this->profileIn();
00392                 $this->resolvePendingRedirects();
00393                 $this->profileOut();
00394         }
00395 
00412         private function initFromTitles( $titles ) {
00413                 // Get validated and normalized title objects
00414                 $linkBatch = $this->processTitlesArray( $titles );
00415                 if ( $linkBatch->isEmpty() ) {
00416                         return;
00417                 }
00418 
00419                 $db = $this->getDB();
00420                 $set = $linkBatch->constructSet( 'page', $db );
00421 
00422                 // Get pageIDs data from the `page` table
00423                 $this->profileDBIn();
00424                 $res = $db->select( 'page', $this->getPageTableFields(), $set,
00425                                         __METHOD__ );
00426                 $this->profileDBOut();
00427 
00428                 // Hack: get the ns:titles stored in array(ns => array(titles)) format
00429                 $this->initFromQueryResult( $res, $linkBatch->data, true ); // process Titles
00430 
00431                 // Resolve any found redirects
00432                 $this->resolvePendingRedirects();
00433         }
00434 
00439         private function initFromPageIds( $pageids ) {
00440                 if ( !count( $pageids ) ) {
00441                         return;
00442                 }
00443 
00444                 $pageids = array_map( 'intval', $pageids ); // paranoia
00445                 $remaining = array_flip( $pageids );
00446 
00447                 $pageids = self::getPositiveIntegers( $pageids );
00448 
00449                 $res = null;
00450                 if ( count( $pageids ) ) {
00451                         $set = array(
00452                                 'page_id' => $pageids
00453                         );
00454                         $db = $this->getDB();
00455 
00456                         // Get pageIDs data from the `page` table
00457                         $this->profileDBIn();
00458                         $res = $db->select( 'page', $this->getPageTableFields(), $set,
00459                                                 __METHOD__ );
00460                         $this->profileDBOut();
00461                 }
00462 
00463                 $this->initFromQueryResult( $res, $remaining, false );  // process PageIDs
00464 
00465                 // Resolve any found redirects
00466                 $this->resolvePendingRedirects();
00467         }
00468 
00479         private function initFromQueryResult( $res, &$remaining = null, $processTitles = null ) {
00480                 if ( !is_null( $remaining ) && is_null( $processTitles ) ) {
00481                         ApiBase::dieDebug( __METHOD__, 'Missing $processTitles parameter when $remaining is provided' );
00482                 }
00483 
00484                 $usernames = array();
00485                 if ( $res ) {
00486                         foreach ( $res as $row ) {
00487                                 $pageId = intval( $row->page_id );
00488 
00489                                 // Remove found page from the list of remaining items
00490                                 if ( isset( $remaining ) ) {
00491                                         if ( $processTitles ) {
00492                                                 unset( $remaining[$row->page_namespace][$row->page_title] );
00493                                         } else {
00494                                                 unset( $remaining[$pageId] );
00495                                         }
00496                                 }
00497 
00498                                 // Store any extra fields requested by modules
00499                                 $this->processDbRow( $row );
00500 
00501                                 // Need gender information
00502                                 if( MWNamespace::hasGenderDistinction( $row->page_namespace ) ) {
00503                                         $usernames[] = $row->page_title;
00504                                 }
00505                         }
00506                 }
00507 
00508                 if ( isset( $remaining ) ) {
00509                         // Any items left in the $remaining list are added as missing
00510                         if ( $processTitles ) {
00511                                 // The remaining titles in $remaining are non-existent pages
00512                                 foreach ( $remaining as $ns => $dbkeys ) {
00513                                         foreach ( array_keys( $dbkeys ) as $dbkey ) {
00514                                                 $title = Title::makeTitle( $ns, $dbkey );
00515                                                 $this->mAllPages[$ns][$dbkey] = $this->mFakePageId;
00516                                                 $this->mMissingTitles[$this->mFakePageId] = $title;
00517                                                 $this->mFakePageId--;
00518                                                 $this->mTitles[] = $title;
00519 
00520                                                 // need gender information
00521                                                 if( MWNamespace::hasGenderDistinction( $ns ) ) {
00522                                                         $usernames[] = $dbkey;
00523                                                 }
00524                                         }
00525                                 }
00526                         } else {
00527                                 // The remaining pageids do not exist
00528                                 if ( !$this->mMissingPageIDs ) {
00529                                         $this->mMissingPageIDs = array_keys( $remaining );
00530                                 } else {
00531                                         $this->mMissingPageIDs = array_merge( $this->mMissingPageIDs, array_keys( $remaining ) );
00532                                 }
00533                         }
00534                 }
00535 
00536                 // Get gender information
00537                 $genderCache = GenderCache::singleton();
00538                 $genderCache->doQuery( $usernames, __METHOD__ );
00539         }
00540 
00546         private function initFromRevIDs( $revids ) {
00547                 if ( !count( $revids ) ) {
00548                         return;
00549                 }
00550 
00551                 $revids = array_map( 'intval', $revids ); // paranoia
00552                 $db = $this->getDB();
00553                 $pageids = array();
00554                 $remaining = array_flip( $revids );
00555 
00556                 $revids = self::getPositiveIntegers( $revids );
00557 
00558                 if ( count( $revids ) ) {
00559                         $tables = array( 'revision', 'page' );
00560                         $fields = array( 'rev_id', 'rev_page' );
00561                         $where = array( 'rev_id' => $revids, 'rev_page = page_id' );
00562 
00563                         // Get pageIDs data from the `page` table
00564                         $this->profileDBIn();
00565                         $res = $db->select( $tables, $fields, $where,  __METHOD__ );
00566                         foreach ( $res as $row ) {
00567                                 $revid = intval( $row->rev_id );
00568                                 $pageid = intval( $row->rev_page );
00569                                 $this->mGoodRevIDs[$revid] = $pageid;
00570                                 $pageids[$pageid] = '';
00571                                 unset( $remaining[$revid] );
00572                         }
00573                         $this->profileDBOut();
00574                 }
00575 
00576                 $this->mMissingRevIDs = array_keys( $remaining );
00577 
00578                 // Populate all the page information
00579                 $this->initFromPageIds( array_keys( $pageids ) );
00580         }
00581 
00587         private function resolvePendingRedirects() {
00588                 if ( $this->mResolveRedirects ) {
00589                         $db = $this->getDB();
00590                         $pageFlds = $this->getPageTableFields();
00591 
00592                         // Repeat until all redirects have been resolved
00593                         // The infinite loop is prevented by keeping all known pages in $this->mAllPages
00594                         while ( $this->mPendingRedirectIDs ) {
00595                                 // Resolve redirects by querying the pagelinks table, and repeat the process
00596                                 // Create a new linkBatch object for the next pass
00597                                 $linkBatch = $this->getRedirectTargets();
00598 
00599                                 if ( $linkBatch->isEmpty() ) {
00600                                         break;
00601                                 }
00602 
00603                                 $set = $linkBatch->constructSet( 'page', $db );
00604                                 if ( $set === false ) {
00605                                         break;
00606                                 }
00607 
00608                                 // Get pageIDs data from the `page` table
00609                                 $this->profileDBIn();
00610                                 $res = $db->select( 'page', $pageFlds, $set, __METHOD__ );
00611                                 $this->profileDBOut();
00612 
00613                                 // Hack: get the ns:titles stored in array(ns => array(titles)) format
00614                                 $this->initFromQueryResult( $res, $linkBatch->data, true );
00615                         }
00616                 }
00617         }
00618 
00626         private function getRedirectTargets() {
00627                 $lb = new LinkBatch();
00628                 $db = $this->getDB();
00629 
00630                 $this->profileDBIn();
00631                 $res = $db->select(
00632                         'redirect',
00633                         array(
00634                                 'rd_from',
00635                                 'rd_namespace',
00636                                 'rd_fragment',
00637                                 'rd_interwiki',
00638                                 'rd_title'
00639                         ), array( 'rd_from' => array_keys( $this->mPendingRedirectIDs ) ),
00640                         __METHOD__
00641                 );
00642                 $this->profileDBOut();
00643                 foreach ( $res as $row ) {
00644                         $rdfrom = intval( $row->rd_from );
00645                         $from = $this->mPendingRedirectIDs[$rdfrom]->getPrefixedText();
00646                         $to = Title::makeTitle( $row->rd_namespace, $row->rd_title, $row->rd_fragment, $row->rd_interwiki );
00647                         unset( $this->mPendingRedirectIDs[$rdfrom] );
00648                         if ( !isset( $this->mAllPages[$row->rd_namespace][$row->rd_title] ) ) {
00649                                 $lb->add( $row->rd_namespace, $row->rd_title );
00650                         }
00651                         $this->mRedirectTitles[$from] = $to;
00652                 }
00653 
00654                 if ( $this->mPendingRedirectIDs ) {
00655                         // We found pages that aren't in the redirect table
00656                         // Add them
00657                         foreach ( $this->mPendingRedirectIDs as $id => $title ) {
00658                                 $page = WikiPage::factory( $title );
00659                                 $rt = $page->insertRedirect();
00660                                 if ( !$rt ) {
00661                                         // What the hell. Let's just ignore this
00662                                         continue;
00663                                 }
00664                                 $lb->addObj( $rt );
00665                                 $this->mRedirectTitles[$title->getPrefixedText()] = $rt;
00666                                 unset( $this->mPendingRedirectIDs[$id] );
00667                         }
00668                 }
00669                 return $lb;
00670         }
00671 
00681         private function processTitlesArray( $titles ) {
00682                 $genderCache = GenderCache::singleton();
00683                 $genderCache->doTitlesArray( $titles, __METHOD__ );
00684 
00685                 $linkBatch = new LinkBatch();
00686 
00687                 foreach ( $titles as $title ) {
00688                         $titleObj = is_string( $title ) ? Title::newFromText( $title ) : $title;
00689                         if ( !$titleObj ) {
00690                                 // Handle invalid titles gracefully
00691                                 $this->mAllpages[0][$title] = $this->mFakePageId;
00692                                 $this->mInvalidTitles[$this->mFakePageId] = $title;
00693                                 $this->mFakePageId--;
00694                                 continue; // There's nothing else we can do
00695                         }
00696                         $unconvertedTitle = $titleObj->getPrefixedText();
00697                         $titleWasConverted = false;
00698                         $iw = $titleObj->getInterwiki();
00699                         if ( strval( $iw ) !== '' ) {
00700                                 // This title is an interwiki link.
00701                                 $this->mInterwikiTitles[$titleObj->getPrefixedText()] = $iw;
00702                         } else {
00703                                 // Variants checking
00704                                 global $wgContLang;
00705                                 if ( $this->mConvertTitles &&
00706                                                 count( $wgContLang->getVariants() ) > 1  &&
00707                                                 !$titleObj->exists() ) {
00708                                         // Language::findVariantLink will modify titleObj into
00709                                         // the canonical variant if possible
00710                                         $wgContLang->findVariantLink( $title, $titleObj );
00711                                         $titleWasConverted = $unconvertedTitle !== $titleObj->getPrefixedText();
00712                                 }
00713 
00714                                 if ( $titleObj->getNamespace() < 0 ) {
00715                                         // Handle Special and Media pages
00716                                         $titleObj = $titleObj->fixSpecialName();
00717                                         $this->mSpecialTitles[$this->mFakePageId] = $titleObj;
00718                                         $this->mFakePageId--;
00719                                 } else {
00720                                         // Regular page
00721                                         $linkBatch->addObj( $titleObj );
00722                                 }
00723                         }
00724 
00725                         // Make sure we remember the original title that was
00726                         // given to us. This way the caller can correlate new
00727                         // titles with the originally requested when e.g. the
00728                         // namespace is localized or the capitalization is
00729                         // different
00730                         if ( $titleWasConverted ) {
00731                                 $this->mConvertedTitles[$title] = $titleObj->getPrefixedText();
00732                         } elseif ( is_string( $title ) && $title !== $titleObj->getPrefixedText() ) {
00733                                 $this->mNormalizedTitles[$title] = $titleObj->getPrefixedText();
00734                         }
00735                 }
00736 
00737                 return $linkBatch;
00738         }
00739 
00746         private static function getPositiveIntegers( $array ) {
00747                 // bug 25734 API: possible issue with revids validation
00748                 // It seems with a load of revision rows, MySQL gets upset
00749                 // Remove any < 0 integers, as they can't be valid
00750                 foreach( $array as $i => $int ) {
00751                         if ( $int < 0 ) {
00752                                 unset( $array[$i] );
00753                         }
00754                 }
00755 
00756                 return $array;
00757         }
00758 
00759         public function getAllowedParams() {
00760                 return array(
00761                         'titles' => array(
00762                                 ApiBase::PARAM_ISMULTI => true
00763                         ),
00764                         'pageids' => array(
00765                                 ApiBase::PARAM_TYPE => 'integer',
00766                                 ApiBase::PARAM_ISMULTI => true
00767                         ),
00768                         'revids' => array(
00769                                 ApiBase::PARAM_TYPE => 'integer',
00770                                 ApiBase::PARAM_ISMULTI => true
00771                         )
00772                 );
00773         }
00774 
00775         public function getParamDescription() {
00776                 return array(
00777                         'titles' => 'A list of titles to work on',
00778                         'pageids' => 'A list of page IDs to work on',
00779                         'revids' => 'A list of revision IDs to work on'
00780                 );
00781         }
00782 
00783         public function getPossibleErrors() {
00784                 return array_merge( parent::getPossibleErrors(), array(
00785                         array( 'code' => 'multisource', 'info' => "Cannot use 'pageids' at the same time as 'dataSource'" ),
00786                         array( 'code' => 'multisource', 'info' => "Cannot use 'revids' at the same time as 'dataSource'" ),
00787                 ) );
00788         }
00789 
00790         public function getVersion() {
00791                 return __CLASS__ . ': $Id$';
00792         }
00793 }