MediaWiki  REL1_22
MediaWikiSite.php
Go to the documentation of this file.
00001 <?php
00035 class MediaWikiSite extends Site {
00036 
00037     const PATH_FILE = 'file_path';
00038     const PATH_PAGE = 'page_path';
00039 
00048     public static function newFromGlobalId( $globalId ) {
00049         $site = new static();
00050         $site->setGlobalId( $globalId );
00051         return $site;
00052     }
00053 
00061     public function __construct( $type = self::TYPE_MEDIAWIKI ) {
00062         parent::__construct( $type );
00063     }
00064 
00074     public function toDBKey( $title ) {
00075         return str_replace( ' ', '_', $title );
00076     }
00077 
00097     public function normalizePageName( $pageName ) {
00098 
00099         // Check if we have strings as arguments.
00100         if ( !is_string( $pageName ) ) {
00101             throw new MWException( '$pageName must be a string' );
00102         }
00103 
00104         // Go on call the external site
00105         if ( defined( 'MW_PHPUNIT_TEST' ) ) {
00106             // If the code is under test, don't call out to other sites, just normalize locally.
00107             // Note: this may cause results to be inconsistent with the actual normalization used by the respective remote site!
00108 
00109             $t = Title::newFromText( $pageName );
00110             return $t->getPrefixedText();
00111         } else {
00112 
00113             // Make sure the string is normalized into NFC (due to the bug 40017)
00114             // but do nothing to the whitespaces, that should work appropriately.
00115             // @see https://bugzilla.wikimedia.org/show_bug.cgi?id=40017
00116             $pageName = UtfNormal::cleanUp( $pageName );
00117 
00118             // Build the args for the specific call
00119             $args = array(
00120                 'action' => 'query',
00121                 'prop' => 'info',
00122                 'redirects' => true,
00123                 'converttitles' => true,
00124                 'format' => 'json',
00125                 'titles' => $pageName,
00126                 // @todo options for maxlag and maxage
00127                 // Note that maxlag will lead to a long delay before a reply is made,
00128                 // but that maxage can avoid the extreme delay. On the other hand
00129                 // maxage could be nice to use anyhow as it stops unnecessary requests.
00130                 // Also consider smaxage if maxage is used.
00131             );
00132 
00133             $url = wfAppendQuery( $this->getFileUrl( 'api.php' ), $args );
00134 
00135             // Go on call the external site
00136             // @todo we need a good way to specify a timeout here.
00137             $ret = Http::get( $url );
00138         }
00139 
00140         if ( $ret === false ) {
00141             wfDebugLog( "MediaWikiSite", "call to external site failed: $url" );
00142             return false;
00143         }
00144 
00145         $data = FormatJson::decode( $ret, true );
00146 
00147         if ( !is_array( $data ) ) {
00148             wfDebugLog( "MediaWikiSite", "call to <$url> returned bad json: " . $ret );
00149             return false;
00150         }
00151 
00152         $page = static::extractPageRecord( $data, $pageName );
00153 
00154         if ( isset( $page['missing'] ) ) {
00155             wfDebugLog( "MediaWikiSite", "call to <$url> returned a marker for a missing page title! " . $ret );
00156             return false;
00157         }
00158 
00159         if ( isset( $page['invalid'] ) ) {
00160             wfDebugLog( "MediaWikiSite", "call to <$url> returned a marker for an invalid page title! " . $ret );
00161             return false;
00162         }
00163 
00164         if ( !isset( $page['title'] ) ) {
00165             wfDebugLog( "MediaWikiSite", "call to <$url> did not return a page title! " . $ret );
00166             return false;
00167         }
00168 
00169         return $page['title'];
00170     }
00171 
00182     private static function extractPageRecord( $externalData, $pageTitle ) {
00183         // If there is a special case with only one returned page
00184         // we can cheat, and only return
00185         // the single page in the "pages" substructure.
00186         if ( isset( $externalData['query']['pages'] ) ) {
00187             $pages = array_values( $externalData['query']['pages'] );
00188             if ( count( $pages ) === 1 ) {
00189                 return $pages[0];
00190             }
00191         }
00192         // This is only used during internal testing, as it is assumed
00193         // a more optimal (and lossfree) storage.
00194         // Make initial checks and return if prerequisites are not meet.
00195         if ( !is_array( $externalData ) || !isset( $externalData['query'] ) ) {
00196             return false;
00197         }
00198         // Loop over the tree different named structures, that otherwise are similar
00199         $structs = array(
00200             'normalized' => 'from',
00201             'converted' => 'from',
00202             'redirects' => 'from',
00203             'pages' => 'title'
00204         );
00205         foreach ( $structs as $listId => $fieldId ) {
00206             // Check if the substructure exist at all.
00207             if ( !isset( $externalData['query'][$listId] ) ) {
00208                 continue;
00209             }
00210             // Filter the substructure down to what we actually are using.
00211             $collectedHits = array_filter(
00212                 array_values( $externalData['query'][$listId] ),
00213                 function( $a ) use ( $fieldId, $pageTitle ) {
00214                     return $a[$fieldId] === $pageTitle;
00215                 }
00216             );
00217             // If still looping over normalization, conversion or redirects,
00218             // then we need to keep the new page title for later rounds.
00219             if ( $fieldId === 'from' && is_array( $collectedHits ) ) {
00220                 switch ( count( $collectedHits ) ) {
00221                     case 0:
00222                         break;
00223                     case 1:
00224                         $pageTitle = $collectedHits[0]['to'];
00225                         break;
00226                     default:
00227                         return false;
00228                 }
00229             }
00230             // If on the pages structure we should prepare for returning.
00231             elseif ( $fieldId === 'title' && is_array( $collectedHits ) ) {
00232                 switch ( count( $collectedHits ) ) {
00233                     case 0:
00234                         return false;
00235                     case 1:
00236                         return array_shift( $collectedHits );
00237                     default:
00238                         return false;
00239                 }
00240             }
00241         }
00242         // should never be here
00243         return false;
00244     }
00245 
00254     public function getLinkPathType() {
00255         return self::PATH_PAGE;
00256     }
00257 
00265     public function getRelativePagePath() {
00266         return parse_url( $this->getPath( self::PATH_PAGE ), PHP_URL_PATH );
00267     }
00268 
00276     public function getRelativeFilePath() {
00277         return parse_url( $this->getPath( self::PATH_FILE ), PHP_URL_PATH );
00278     }
00279 
00287     public function setPagePath( $path ) {
00288         $this->setPath( self::PATH_PAGE, $path );
00289     }
00290 
00298     public function setFilePath( $path ) {
00299         $this->setPath( self::PATH_FILE, $path );
00300     }
00301 
00316     public function getPageUrl( $pageName = false ) {
00317         $url = $this->getLinkPath();
00318 
00319         if ( $url === false ) {
00320             return false;
00321         }
00322 
00323         if ( $pageName !== false ) {
00324             $pageName = $this->toDBKey( trim( $pageName ) );
00325             $url = str_replace( '$1', wfUrlencode( $pageName ), $url );
00326         }
00327 
00328         return $url;
00329     }
00330 
00342     public function getFileUrl( $path = false ) {
00343         $filePath = $this->getPath( self::PATH_FILE );
00344 
00345         if ( $filePath !== false ) {
00346             $filePath = str_replace( '$1', $path, $filePath );
00347         }
00348 
00349         return $filePath;
00350     }
00351 
00352 }