MediaWiki
REL1_22
|
00001 <?php 00035 class MediaWikiSite extends Site { 00036 00037 const PATH_FILE = 'file_path'; 00038 const PATH_PAGE = 'page_path'; 00039 00048 public static function newFromGlobalId( $globalId ) { 00049 $site = new static(); 00050 $site->setGlobalId( $globalId ); 00051 return $site; 00052 } 00053 00061 public function __construct( $type = self::TYPE_MEDIAWIKI ) { 00062 parent::__construct( $type ); 00063 } 00064 00074 public function toDBKey( $title ) { 00075 return str_replace( ' ', '_', $title ); 00076 } 00077 00097 public function normalizePageName( $pageName ) { 00098 00099 // Check if we have strings as arguments. 00100 if ( !is_string( $pageName ) ) { 00101 throw new MWException( '$pageName must be a string' ); 00102 } 00103 00104 // Go on call the external site 00105 if ( defined( 'MW_PHPUNIT_TEST' ) ) { 00106 // If the code is under test, don't call out to other sites, just normalize locally. 00107 // Note: this may cause results to be inconsistent with the actual normalization used by the respective remote site! 00108 00109 $t = Title::newFromText( $pageName ); 00110 return $t->getPrefixedText(); 00111 } else { 00112 00113 // Make sure the string is normalized into NFC (due to the bug 40017) 00114 // but do nothing to the whitespaces, that should work appropriately. 00115 // @see https://bugzilla.wikimedia.org/show_bug.cgi?id=40017 00116 $pageName = UtfNormal::cleanUp( $pageName ); 00117 00118 // Build the args for the specific call 00119 $args = array( 00120 'action' => 'query', 00121 'prop' => 'info', 00122 'redirects' => true, 00123 'converttitles' => true, 00124 'format' => 'json', 00125 'titles' => $pageName, 00126 // @todo options for maxlag and maxage 00127 // Note that maxlag will lead to a long delay before a reply is made, 00128 // but that maxage can avoid the extreme delay. On the other hand 00129 // maxage could be nice to use anyhow as it stops unnecessary requests. 00130 // Also consider smaxage if maxage is used. 00131 ); 00132 00133 $url = wfAppendQuery( $this->getFileUrl( 'api.php' ), $args ); 00134 00135 // Go on call the external site 00136 // @todo we need a good way to specify a timeout here. 00137 $ret = Http::get( $url ); 00138 } 00139 00140 if ( $ret === false ) { 00141 wfDebugLog( "MediaWikiSite", "call to external site failed: $url" ); 00142 return false; 00143 } 00144 00145 $data = FormatJson::decode( $ret, true ); 00146 00147 if ( !is_array( $data ) ) { 00148 wfDebugLog( "MediaWikiSite", "call to <$url> returned bad json: " . $ret ); 00149 return false; 00150 } 00151 00152 $page = static::extractPageRecord( $data, $pageName ); 00153 00154 if ( isset( $page['missing'] ) ) { 00155 wfDebugLog( "MediaWikiSite", "call to <$url> returned a marker for a missing page title! " . $ret ); 00156 return false; 00157 } 00158 00159 if ( isset( $page['invalid'] ) ) { 00160 wfDebugLog( "MediaWikiSite", "call to <$url> returned a marker for an invalid page title! " . $ret ); 00161 return false; 00162 } 00163 00164 if ( !isset( $page['title'] ) ) { 00165 wfDebugLog( "MediaWikiSite", "call to <$url> did not return a page title! " . $ret ); 00166 return false; 00167 } 00168 00169 return $page['title']; 00170 } 00171 00182 private static function extractPageRecord( $externalData, $pageTitle ) { 00183 // If there is a special case with only one returned page 00184 // we can cheat, and only return 00185 // the single page in the "pages" substructure. 00186 if ( isset( $externalData['query']['pages'] ) ) { 00187 $pages = array_values( $externalData['query']['pages'] ); 00188 if ( count( $pages ) === 1 ) { 00189 return $pages[0]; 00190 } 00191 } 00192 // This is only used during internal testing, as it is assumed 00193 // a more optimal (and lossfree) storage. 00194 // Make initial checks and return if prerequisites are not meet. 00195 if ( !is_array( $externalData ) || !isset( $externalData['query'] ) ) { 00196 return false; 00197 } 00198 // Loop over the tree different named structures, that otherwise are similar 00199 $structs = array( 00200 'normalized' => 'from', 00201 'converted' => 'from', 00202 'redirects' => 'from', 00203 'pages' => 'title' 00204 ); 00205 foreach ( $structs as $listId => $fieldId ) { 00206 // Check if the substructure exist at all. 00207 if ( !isset( $externalData['query'][$listId] ) ) { 00208 continue; 00209 } 00210 // Filter the substructure down to what we actually are using. 00211 $collectedHits = array_filter( 00212 array_values( $externalData['query'][$listId] ), 00213 function( $a ) use ( $fieldId, $pageTitle ) { 00214 return $a[$fieldId] === $pageTitle; 00215 } 00216 ); 00217 // If still looping over normalization, conversion or redirects, 00218 // then we need to keep the new page title for later rounds. 00219 if ( $fieldId === 'from' && is_array( $collectedHits ) ) { 00220 switch ( count( $collectedHits ) ) { 00221 case 0: 00222 break; 00223 case 1: 00224 $pageTitle = $collectedHits[0]['to']; 00225 break; 00226 default: 00227 return false; 00228 } 00229 } 00230 // If on the pages structure we should prepare for returning. 00231 elseif ( $fieldId === 'title' && is_array( $collectedHits ) ) { 00232 switch ( count( $collectedHits ) ) { 00233 case 0: 00234 return false; 00235 case 1: 00236 return array_shift( $collectedHits ); 00237 default: 00238 return false; 00239 } 00240 } 00241 } 00242 // should never be here 00243 return false; 00244 } 00245 00254 public function getLinkPathType() { 00255 return self::PATH_PAGE; 00256 } 00257 00265 public function getRelativePagePath() { 00266 return parse_url( $this->getPath( self::PATH_PAGE ), PHP_URL_PATH ); 00267 } 00268 00276 public function getRelativeFilePath() { 00277 return parse_url( $this->getPath( self::PATH_FILE ), PHP_URL_PATH ); 00278 } 00279 00287 public function setPagePath( $path ) { 00288 $this->setPath( self::PATH_PAGE, $path ); 00289 } 00290 00298 public function setFilePath( $path ) { 00299 $this->setPath( self::PATH_FILE, $path ); 00300 } 00301 00316 public function getPageUrl( $pageName = false ) { 00317 $url = $this->getLinkPath(); 00318 00319 if ( $url === false ) { 00320 return false; 00321 } 00322 00323 if ( $pageName !== false ) { 00324 $pageName = $this->toDBKey( trim( $pageName ) ); 00325 $url = str_replace( '$1', wfUrlencode( $pageName ), $url ); 00326 } 00327 00328 return $url; 00329 } 00330 00342 public function getFileUrl( $path = false ) { 00343 $filePath = $this->getPath( self::PATH_FILE ); 00344 00345 if ( $filePath !== false ) { 00346 $filePath = str_replace( '$1', $path, $filePath ); 00347 } 00348 00349 return $filePath; 00350 } 00351 00352 }