MediaWiki  REL1_24
MediaWikiSite.php
Go to the documentation of this file.
00001 <?php
00035 class MediaWikiSite extends Site {
00036     const PATH_FILE = 'file_path';
00037     const PATH_PAGE = 'page_path';
00038 
00047     public static function newFromGlobalId( $globalId ) {
00048         $site = new static();
00049         $site->setGlobalId( $globalId );
00050         return $site;
00051     }
00052 
00060     public function __construct( $type = self::TYPE_MEDIAWIKI ) {
00061         parent::__construct( $type );
00062     }
00063 
00073     public function toDBKey( $title ) {
00074         return str_replace( ' ', '_', $title );
00075     }
00076 
00098     public function normalizePageName( $pageName ) {
00099 
00100         // Check if we have strings as arguments.
00101         if ( !is_string( $pageName ) ) {
00102             throw new MWException( '$pageName must be a string' );
00103         }
00104 
00105         // Go on call the external site
00106         if ( defined( 'MW_PHPUNIT_TEST' ) ) {
00107             // If the code is under test, don't call out to other sites, just
00108             // normalize locally.
00109             // Note: this may cause results to be inconsistent with the actual
00110             // normalization used by the respective remote site!
00111 
00112             $t = Title::newFromText( $pageName );
00113             return $t->getPrefixedText();
00114         } else {
00115 
00116             // Make sure the string is normalized into NFC (due to the bug 40017)
00117             // but do nothing to the whitespaces, that should work appropriately.
00118             // @see https://bugzilla.wikimedia.org/show_bug.cgi?id=40017
00119             $pageName = UtfNormal::cleanUp( $pageName );
00120 
00121             // Build the args for the specific call
00122             $args = array(
00123                 'action' => 'query',
00124                 'prop' => 'info',
00125                 'redirects' => true,
00126                 'converttitles' => true,
00127                 'format' => 'json',
00128                 'titles' => $pageName,
00129                 // @todo options for maxlag and maxage
00130                 // Note that maxlag will lead to a long delay before a reply is made,
00131                 // but that maxage can avoid the extreme delay. On the other hand
00132                 // maxage could be nice to use anyhow as it stops unnecessary requests.
00133                 // Also consider smaxage if maxage is used.
00134             );
00135 
00136             $url = wfAppendQuery( $this->getFileUrl( 'api.php' ), $args );
00137 
00138             // Go on call the external site
00139             // @todo we need a good way to specify a timeout here.
00140             $ret = Http::get( $url );
00141         }
00142 
00143         if ( $ret === false ) {
00144             wfDebugLog( "MediaWikiSite", "call to external site failed: $url" );
00145             return false;
00146         }
00147 
00148         $data = FormatJson::decode( $ret, true );
00149 
00150         if ( !is_array( $data ) ) {
00151             wfDebugLog( "MediaWikiSite", "call to <$url> returned bad json: " . $ret );
00152             return false;
00153         }
00154 
00155         $page = static::extractPageRecord( $data, $pageName );
00156 
00157         if ( isset( $page['missing'] ) ) {
00158             wfDebugLog( "MediaWikiSite", "call to <$url> returned a marker for a missing page title! "
00159                 . $ret );
00160             return false;
00161         }
00162 
00163         if ( isset( $page['invalid'] ) ) {
00164             wfDebugLog( "MediaWikiSite", "call to <$url> returned a marker for an invalid page title! "
00165                 . $ret );
00166             return false;
00167         }
00168 
00169         if ( !isset( $page['title'] ) ) {
00170             wfDebugLog( "MediaWikiSite", "call to <$url> did not return a page title! " . $ret );
00171             return false;
00172         }
00173 
00174         return $page['title'];
00175     }
00176 
00187     private static function extractPageRecord( $externalData, $pageTitle ) {
00188         // If there is a special case with only one returned page
00189         // we can cheat, and only return
00190         // the single page in the "pages" substructure.
00191         if ( isset( $externalData['query']['pages'] ) ) {
00192             $pages = array_values( $externalData['query']['pages'] );
00193             if ( count( $pages ) === 1 ) {
00194                 return $pages[0];
00195             }
00196         }
00197         // This is only used during internal testing, as it is assumed
00198         // a more optimal (and lossfree) storage.
00199         // Make initial checks and return if prerequisites are not meet.
00200         if ( !is_array( $externalData ) || !isset( $externalData['query'] ) ) {
00201             return false;
00202         }
00203         // Loop over the tree different named structures, that otherwise are similar
00204         $structs = array(
00205             'normalized' => 'from',
00206             'converted' => 'from',
00207             'redirects' => 'from',
00208             'pages' => 'title'
00209         );
00210         foreach ( $structs as $listId => $fieldId ) {
00211             // Check if the substructure exist at all.
00212             if ( !isset( $externalData['query'][$listId] ) ) {
00213                 continue;
00214             }
00215             // Filter the substructure down to what we actually are using.
00216             $collectedHits = array_filter(
00217                 array_values( $externalData['query'][$listId] ),
00218                 function ( $a ) use ( $fieldId, $pageTitle ) {
00219                     return $a[$fieldId] === $pageTitle;
00220                 }
00221             );
00222             // If still looping over normalization, conversion or redirects,
00223             // then we need to keep the new page title for later rounds.
00224             if ( $fieldId === 'from' && is_array( $collectedHits ) ) {
00225                 switch ( count( $collectedHits ) ) {
00226                     case 0:
00227                         break;
00228                     case 1:
00229                         $pageTitle = $collectedHits[0]['to'];
00230                         break;
00231                     default:
00232                         return false;
00233                 }
00234             }
00235             // If on the pages structure we should prepare for returning.
00236             elseif ( $fieldId === 'title' && is_array( $collectedHits ) ) {
00237                 switch ( count( $collectedHits ) ) {
00238                     case 0:
00239                         return false;
00240                     case 1:
00241                         return array_shift( $collectedHits );
00242                     default:
00243                         return false;
00244                 }
00245             }
00246         }
00247         // should never be here
00248         return false;
00249     }
00250 
00259     public function getLinkPathType() {
00260         return self::PATH_PAGE;
00261     }
00262 
00270     public function getRelativePagePath() {
00271         return parse_url( $this->getPath( self::PATH_PAGE ), PHP_URL_PATH );
00272     }
00273 
00281     public function getRelativeFilePath() {
00282         return parse_url( $this->getPath( self::PATH_FILE ), PHP_URL_PATH );
00283     }
00284 
00292     public function setPagePath( $path ) {
00293         $this->setPath( self::PATH_PAGE, $path );
00294     }
00295 
00303     public function setFilePath( $path ) {
00304         $this->setPath( self::PATH_FILE, $path );
00305     }
00306 
00321     public function getPageUrl( $pageName = false ) {
00322         $url = $this->getLinkPath();
00323 
00324         if ( $url === false ) {
00325             return false;
00326         }
00327 
00328         if ( $pageName !== false ) {
00329             $pageName = $this->toDBKey( trim( $pageName ) );
00330             $url = str_replace( '$1', wfUrlencode( $pageName ), $url );
00331         }
00332 
00333         return $url;
00334     }
00335 
00347     public function getFileUrl( $path = false ) {
00348         $filePath = $this->getPath( self::PATH_FILE );
00349 
00350         if ( $filePath !== false ) {
00351             $filePath = str_replace( '$1', $path, $filePath );
00352         }
00353 
00354         return $filePath;
00355     }
00356 }