MediaWiki  REL1_21
MediaWikiSite.php
Go to the documentation of this file.
00001 <?php
00035 class MediaWikiSite extends Site {
00036 
00037         const PATH_FILE = 'file_path';
00038         const PATH_PAGE = 'page_path';
00039 
00048         public static function newFromGlobalId( $globalId ) {
00049                 $site = new static();
00050                 $site->setGlobalId( $globalId );
00051                 return $site;
00052         }
00053 
00061         public function __construct( $type = self::TYPE_MEDIAWIKI ) {
00062                 parent::__construct( $type );
00063         }
00064 
00074         public function toDBKey( $title ) {
00075                 return str_replace( ' ', '_', $title );
00076         }
00077 
00097         public function normalizePageName( $pageName ) {
00098 
00099                 // Check if we have strings as arguments.
00100                 if ( !is_string( $pageName ) ) {
00101                         throw new MWException( '$pageName must be a string' );
00102                 }
00103 
00104                 // Go on call the external site
00105                 if ( defined( 'MW_PHPUNIT_TEST' ) ) {
00106                         // If the code is under test, don't call out to other sites, just normalize locally.
00107                         // Note: this may cause results to be inconsistent with the actual normalization used by the respective remote site!
00108 
00109                         $t = Title::newFromText( $pageName );
00110                         return $t->getPrefixedText();
00111                 } else {
00112 
00113                         // Make sure the string is normalized into NFC (due to the bug 40017)
00114                         // but do nothing to the whitespaces, that should work appropriately.
00115                         // @see https://bugzilla.wikimedia.org/show_bug.cgi?id=40017
00116                         $pageName = UtfNormal::cleanUp( $pageName );
00117 
00118                         // Build the args for the specific call
00119                         $args = array(
00120                                 'action' => 'query',
00121                                 'prop' => 'info',
00122                                 'redirects' => true,
00123                                 'converttitles' => true,
00124                                 'format' => 'json',
00125                                 'titles' => $pageName,
00126                                 //@todo: options for maxlag and maxage
00127                                 // Note that maxlag will lead to a long delay before a reply is made,
00128                                 // but that maxage can avoid the extreme delay. On the other hand
00129                                 // maxage could be nice to use anyhow as it stops unnecessary requests.
00130                                 // Also consider smaxage if maxage is used.
00131                         );
00132 
00133                         $url = $this->getFileUrl( 'api.php' ) . '?' . wfArrayToCgi( $args );
00134 
00135                         // Go on call the external site
00136                         //@todo: we need a good way to specify a timeout here.
00137                         $ret = Http::get( $url );
00138                 }
00139 
00140                 if ( $ret === false ) {
00141                         wfDebugLog( "MediaWikiSite", "call to external site failed: $url" );
00142                         return false;
00143                 }
00144 
00145                 $data = FormatJson::decode( $ret, true );
00146 
00147                 if ( !is_array( $data ) ) {
00148                         wfDebugLog( "MediaWikiSite", "call to <$url> returned bad json: " . $ret );
00149                         return false;
00150                 }
00151 
00152                 $page = static::extractPageRecord( $data, $pageName );
00153 
00154                 if ( isset( $page['missing'] ) ) {
00155                         wfDebugLog( "MediaWikiSite", "call to <$url> returned a marker for a missing page title! " . $ret );
00156                         return false;
00157                 }
00158 
00159                 if ( isset( $page['invalid'] ) ) {
00160                         wfDebugLog( "MediaWikiSite", "call to <$url> returned a marker for an invalid page title! " . $ret );
00161                         return false;
00162                 }
00163 
00164                 if ( !isset( $page['title'] ) ) {
00165                         wfDebugLog( "MediaWikiSite", "call to <$url> did not return a page title! " . $ret );
00166                         return false;
00167                 }
00168 
00169                 return $page['title'];
00170         }
00171 
00182         private static function extractPageRecord( $externalData, $pageTitle ) {
00183                 // If there is a special case with only one returned page
00184                 // we can cheat, and only return
00185                 // the single page in the "pages" substructure.
00186                 if ( isset( $externalData['query']['pages'] ) ) {
00187                         $pages = array_values( $externalData['query']['pages'] );
00188                         if ( count( $pages) === 1 ) {
00189                                 return $pages[0];
00190                         }
00191                 }
00192                 // This is only used during internal testing, as it is assumed
00193                 // a more optimal (and lossfree) storage.
00194                 // Make initial checks and return if prerequisites are not meet.
00195                 if ( !is_array( $externalData ) || !isset( $externalData['query'] ) ) {
00196                         return false;
00197                 }
00198                 // Loop over the tree different named structures, that otherwise are similar
00199                 $structs = array(
00200                         'normalized' => 'from',
00201                         'converted' => 'from',
00202                         'redirects' => 'from',
00203                         'pages' => 'title'
00204                 );
00205                 foreach ( $structs as $listId => $fieldId ) {
00206                         // Check if the substructure exist at all.
00207                         if ( !isset( $externalData['query'][$listId] ) ) {
00208                                 continue;
00209                         }
00210                         // Filter the substructure down to what we actually are using.
00211                         $collectedHits = array_filter(
00212                                 array_values( $externalData['query'][$listId] ),
00213                                 function( $a ) use ( $fieldId, $pageTitle ) {
00214                                         return $a[$fieldId] === $pageTitle;
00215                                 }
00216                         );
00217                         // If still looping over normalization, conversion or redirects,
00218                         // then we need to keep the new page title for later rounds.
00219                         if ( $fieldId === 'from' && is_array( $collectedHits ) ) {
00220                                 switch ( count( $collectedHits ) ) {
00221                                         case 0:
00222                                                 break;
00223                                         case 1:
00224                                                 $pageTitle = $collectedHits[0]['to'];
00225                                                 break;
00226                                         default:
00227                                                 return false;
00228                                 }
00229                         }
00230                         // If on the pages structure we should prepare for returning.
00231                         elseif ( $fieldId === 'title' && is_array( $collectedHits ) ) {
00232                                 switch ( count( $collectedHits ) ) {
00233                                         case 0:
00234                                                 return false;
00235                                         case 1:
00236                                                 return array_shift( $collectedHits );
00237                                         default:
00238                                                 return false;
00239                                 }
00240                         }
00241                 }
00242                 // should never be here
00243                 return false;
00244         }
00245 
00254         public function getLinkPathType() {
00255                 return self::PATH_PAGE;
00256         }
00257 
00265         public function getRelativePagePath() {
00266                 return parse_url( $this->getPath( self::PATH_PAGE ), PHP_URL_PATH );
00267         }
00268 
00276         public function getRelativeFilePath() {
00277                 return parse_url( $this->getPath( self::PATH_FILE ), PHP_URL_PATH );
00278         }
00279 
00287         public function setPagePath( $path ) {
00288                 $this->setPath( self::PATH_PAGE, $path );
00289         }
00290 
00298         public function setFilePath( $path ) {
00299                 $this->setPath( self::PATH_FILE, $path );
00300         }
00301 
00316         public function getPageUrl( $pageName = false ) {
00317                 $url = $this->getLinkPath();
00318 
00319                 if ( $url === false ) {
00320                         return false;
00321                 }
00322 
00323                 if ( $pageName !== false ) {
00324                         $pageName = $this->toDBKey( trim( $pageName ) );
00325                         $url = str_replace( '$1', wfUrlencode( $pageName ), $url );
00326                 }
00327 
00328                 return $url;
00329         }
00330 
00342         public function getFileUrl( $path = false ) {
00343                 $filePath = $this->getPath( self::PATH_FILE );
00344 
00345                 if ( $filePath !== false ) {
00346                         $filePath = str_replace( '$1', $path, $filePath );
00347                 }
00348 
00349                 return $filePath;
00350         }
00351 
00352 }