MediaWiki  REL1_24
ForeignAPIRepo.php
Go to the documentation of this file.
00001 <?php
00039 class ForeignAPIRepo extends FileRepo {
00040     /* This version string is used in the user agent for requests and will help
00041      * server maintainers in identify ForeignAPI usage.
00042      * Update the version every time you make breaking or significant changes. */
00043     const VERSION = "2.1";
00044 
00049     protected static $imageInfoProps = array(
00050         'url',
00051         'thumbnail',
00052         'timestamp',
00053     );
00054 
00055     protected $fileFactory = array( 'ForeignAPIFile', 'newFromTitle' );
00057     protected $apiThumbCacheExpiry = 86400;
00058 
00060     protected $fileCacheExpiry = 2592000;
00061 
00063     protected $mFileExists = array();
00064 
00066     private $mQueryCache = array();
00067 
00071     function __construct( $info ) {
00072         global $wgLocalFileRepo;
00073         parent::__construct( $info );
00074 
00075         // http://commons.wikimedia.org/w/api.php
00076         $this->mApiBase = isset( $info['apibase'] ) ? $info['apibase'] : null;
00077 
00078         if ( isset( $info['apiThumbCacheExpiry'] ) ) {
00079             $this->apiThumbCacheExpiry = $info['apiThumbCacheExpiry'];
00080         }
00081         if ( isset( $info['fileCacheExpiry'] ) ) {
00082             $this->fileCacheExpiry = $info['fileCacheExpiry'];
00083         }
00084         if ( !$this->scriptDirUrl ) {
00085             // hack for description fetches
00086             $this->scriptDirUrl = dirname( $this->mApiBase );
00087         }
00088         // If we can cache thumbs we can guess sane defaults for these
00089         if ( $this->canCacheThumbs() && !$this->url ) {
00090             $this->url = $wgLocalFileRepo['url'];
00091         }
00092         if ( $this->canCacheThumbs() && !$this->thumbUrl ) {
00093             $this->thumbUrl = $this->url . '/thumb';
00094         }
00095     }
00096 
00101     function getApiUrl() {
00102         return $this->mApiBase;
00103     }
00104 
00113     function newFile( $title, $time = false ) {
00114         if ( $time ) {
00115             return false;
00116         }
00117 
00118         return parent::newFile( $title, $time );
00119     }
00120 
00125     function fileExistsBatch( array $files ) {
00126         $results = array();
00127         foreach ( $files as $k => $f ) {
00128             if ( isset( $this->mFileExists[$f] ) ) {
00129                 $results[$k] = $this->mFileExists[$f];
00130                 unset( $files[$k] );
00131             } elseif ( self::isVirtualUrl( $f ) ) {
00132                 # @todo FIXME: We need to be able to handle virtual
00133                 # URLs better, at least when we know they refer to the
00134                 # same repo.
00135                 $results[$k] = false;
00136                 unset( $files[$k] );
00137             } elseif ( FileBackend::isStoragePath( $f ) ) {
00138                 $results[$k] = false;
00139                 unset( $files[$k] );
00140                 wfWarn( "Got mwstore:// path '$f'." );
00141             }
00142         }
00143 
00144         $data = $this->fetchImageQuery( array(
00145             'titles' => implode( $files, '|' ),
00146             'prop' => 'imageinfo' )
00147         );
00148 
00149         if ( isset( $data['query']['pages'] ) ) {
00150             # First, get results from the query. Note we only care whether the image exists,
00151             # not whether it has a description page.
00152             foreach ( $data['query']['pages'] as $p ) {
00153                 $this->mFileExists[$p['title']] = ( $p['imagerepository'] !== '' );
00154             }
00155             # Second, copy the results to any redirects that were queried
00156             if ( isset( $data['query']['redirects'] ) ) {
00157                 foreach ( $data['query']['redirects'] as $r ) {
00158                     $this->mFileExists[$r['from']] = $this->mFileExists[$r['to']];
00159                 }
00160             }
00161             # Third, copy the results to any non-normalized titles that were queried
00162             if ( isset( $data['query']['normalized'] ) ) {
00163                 foreach ( $data['query']['normalized'] as $n ) {
00164                     $this->mFileExists[$n['from']] = $this->mFileExists[$n['to']];
00165                 }
00166             }
00167             # Finally, copy the results to the output
00168             foreach ( $files as $key => $file ) {
00169                 $results[$key] = $this->mFileExists[$file];
00170             }
00171         }
00172 
00173         return $results;
00174     }
00175 
00180     function getFileProps( $virtualUrl ) {
00181         return false;
00182     }
00183 
00188     function fetchImageQuery( $query ) {
00189         global $wgLanguageCode;
00190 
00191         $query = array_merge( $query,
00192             array(
00193                 'format' => 'json',
00194                 'action' => 'query',
00195                 'redirects' => 'true'
00196             ) );
00197 
00198         if ( !isset( $query['uselang'] ) ) { // uselang is unset or null
00199             $query['uselang'] = $wgLanguageCode;
00200         }
00201 
00202         $data = $this->httpGetCached( 'Metadata', $query );
00203 
00204         if ( $data ) {
00205             return FormatJson::decode( $data, true );
00206         } else {
00207             return null;
00208         }
00209     }
00210 
00215     function getImageInfo( $data ) {
00216         if ( $data && isset( $data['query']['pages'] ) ) {
00217             foreach ( $data['query']['pages'] as $info ) {
00218                 if ( isset( $info['imageinfo'][0] ) ) {
00219                     return $info['imageinfo'][0];
00220                 }
00221             }
00222         }
00223 
00224         return false;
00225     }
00226 
00231     function findBySha1( $hash ) {
00232         $results = $this->fetchImageQuery( array(
00233             'aisha1base36' => $hash,
00234             'aiprop' => ForeignAPIFile::getProps(),
00235             'list' => 'allimages',
00236         ) );
00237         $ret = array();
00238         if ( isset( $results['query']['allimages'] ) ) {
00239             foreach ( $results['query']['allimages'] as $img ) {
00240                 // 1.14 was broken, doesn't return name attribute
00241                 if ( !isset( $img['name'] ) ) {
00242                     continue;
00243                 }
00244                 $ret[] = new ForeignAPIFile( Title::makeTitle( NS_FILE, $img['name'] ), $this, $img );
00245             }
00246         }
00247 
00248         return $ret;
00249     }
00250 
00260     function getThumbUrl( $name, $width = -1, $height = -1, &$result = null, $otherParams = '' ) {
00261         $data = $this->fetchImageQuery( array(
00262             'titles' => 'File:' . $name,
00263             'iiprop' => self::getIIProps(),
00264             'iiurlwidth' => $width,
00265             'iiurlheight' => $height,
00266             'iiurlparam' => $otherParams,
00267             'prop' => 'imageinfo' ) );
00268         $info = $this->getImageInfo( $data );
00269 
00270         if ( $data && $info && isset( $info['thumburl'] ) ) {
00271             wfDebug( __METHOD__ . " got remote thumb " . $info['thumburl'] . "\n" );
00272             $result = $info;
00273 
00274             return $info['thumburl'];
00275         } else {
00276             return false;
00277         }
00278     }
00279 
00289     function getThumbError( $name, $width = -1, $height = -1, $otherParams = '', $lang = null ) {
00290         $data = $this->fetchImageQuery( array(
00291             'titles' => 'File:' . $name,
00292             'iiprop' => self::getIIProps(),
00293             'iiurlwidth' => $width,
00294             'iiurlheight' => $height,
00295             'iiurlparam' => $otherParams,
00296             'prop' => 'imageinfo',
00297             'uselang' => $lang,
00298         ) );
00299         $info = $this->getImageInfo( $data );
00300 
00301         if ( $data && $info && isset( $info['thumberror'] ) ) {
00302             wfDebug( __METHOD__ . " got remote thumb error " . $info['thumberror'] . "\n" );
00303 
00304             return new MediaTransformError(
00305                 'thumbnail_error_remote',
00306                 $width,
00307                 $height,
00308                 $this->getDisplayName(),
00309                 $info['thumberror'] // already parsed message from foreign repo
00310             );
00311         } else {
00312             return false;
00313         }
00314     }
00315 
00329     function getThumbUrlFromCache( $name, $width, $height, $params = "" ) {
00330         global $wgMemc;
00331         // We can't check the local cache using FileRepo functions because
00332         // we override fileExistsBatch(). We have to use the FileBackend directly.
00333         $backend = $this->getBackend(); // convenience
00334 
00335         if ( !$this->canCacheThumbs() ) {
00336             $result = null; // can't pass "null" by reference, but it's ok as default value
00337             return $this->getThumbUrl( $name, $width, $height, $result, $params );
00338         }
00339         $key = $this->getLocalCacheKey( 'ForeignAPIRepo', 'ThumbUrl', $name );
00340         $sizekey = "$width:$height:$params";
00341 
00342         /* Get the array of urls that we already know */
00343         $knownThumbUrls = $wgMemc->get( $key );
00344         if ( !$knownThumbUrls ) {
00345             /* No knownThumbUrls for this file */
00346             $knownThumbUrls = array();
00347         } else {
00348             if ( isset( $knownThumbUrls[$sizekey] ) ) {
00349                 wfDebug( __METHOD__ . ': Got thumburl from local cache: ' .
00350                     "{$knownThumbUrls[$sizekey]} \n" );
00351 
00352                 return $knownThumbUrls[$sizekey];
00353             }
00354             /* This size is not yet known */
00355         }
00356 
00357         $metadata = null;
00358         $foreignUrl = $this->getThumbUrl( $name, $width, $height, $metadata, $params );
00359 
00360         if ( !$foreignUrl ) {
00361             wfDebug( __METHOD__ . " Could not find thumburl\n" );
00362 
00363             return false;
00364         }
00365 
00366         // We need the same filename as the remote one :)
00367         $fileName = rawurldecode( pathinfo( $foreignUrl, PATHINFO_BASENAME ) );
00368         if ( !$this->validateFilename( $fileName ) ) {
00369             wfDebug( __METHOD__ . " The deduced filename $fileName is not safe\n" );
00370 
00371             return false;
00372         }
00373         $localPath = $this->getZonePath( 'thumb' ) . "/" . $this->getHashPath( $name ) . $name;
00374         $localFilename = $localPath . "/" . $fileName;
00375         $localUrl = $this->getZoneUrl( 'thumb' ) . "/" . $this->getHashPath( $name ) .
00376             rawurlencode( $name ) . "/" . rawurlencode( $fileName );
00377 
00378         if ( $backend->fileExists( array( 'src' => $localFilename ) )
00379             && isset( $metadata['timestamp'] )
00380         ) {
00381             wfDebug( __METHOD__ . " Thumbnail was already downloaded before\n" );
00382             $modified = $backend->getFileTimestamp( array( 'src' => $localFilename ) );
00383             $remoteModified = strtotime( $metadata['timestamp'] );
00384             $current = time();
00385             $diff = abs( $modified - $current );
00386             if ( $remoteModified < $modified && $diff < $this->fileCacheExpiry ) {
00387                 /* Use our current and already downloaded thumbnail */
00388                 $knownThumbUrls[$sizekey] = $localUrl;
00389                 $wgMemc->set( $key, $knownThumbUrls, $this->apiThumbCacheExpiry );
00390 
00391                 return $localUrl;
00392             }
00393             /* There is a new Commons file, or existing thumbnail older than a month */
00394         }
00395         $thumb = self::httpGet( $foreignUrl );
00396         if ( !$thumb ) {
00397             wfDebug( __METHOD__ . " Could not download thumb\n" );
00398 
00399             return false;
00400         }
00401 
00402         # @todo FIXME: Delete old thumbs that aren't being used. Maintenance script?
00403         $backend->prepare( array( 'dir' => dirname( $localFilename ) ) );
00404         $params = array( 'dst' => $localFilename, 'content' => $thumb );
00405         if ( !$backend->quickCreate( $params )->isOK() ) {
00406             wfDebug( __METHOD__ . " could not write to thumb path '$localFilename'\n" );
00407 
00408             return $foreignUrl;
00409         }
00410         $knownThumbUrls[$sizekey] = $localUrl;
00411         $wgMemc->set( $key, $knownThumbUrls, $this->apiThumbCacheExpiry );
00412         wfDebug( __METHOD__ . " got local thumb $localUrl, saving to cache \n" );
00413 
00414         return $localUrl;
00415     }
00416 
00423     function getZoneUrl( $zone, $ext = null ) {
00424         switch ( $zone ) {
00425             case 'public':
00426                 return $this->url;
00427             case 'thumb':
00428                 return $this->thumbUrl;
00429             default:
00430                 return parent::getZoneUrl( $zone, $ext );
00431         }
00432     }
00433 
00439     function getZonePath( $zone ) {
00440         $supported = array( 'public', 'thumb' );
00441         if ( in_array( $zone, $supported ) ) {
00442             return parent::getZonePath( $zone );
00443         }
00444 
00445         return false;
00446     }
00447 
00452     public function canCacheThumbs() {
00453         return ( $this->apiThumbCacheExpiry > 0 );
00454     }
00455 
00460     public static function getUserAgent() {
00461         return Http::userAgent() . " ForeignAPIRepo/" . self::VERSION;
00462     }
00463 
00470     function getInfo() {
00471         $info = parent::getInfo();
00472         $info['apiurl'] = $this->getApiUrl();
00473 
00474         $query = array(
00475             'format' => 'json',
00476             'action' => 'query',
00477             'meta' => 'siteinfo',
00478             'siprop' => 'general',
00479         );
00480 
00481         $data = $this->httpGetCached( 'SiteInfo', $query, 7200 );
00482 
00483         if ( $data ) {
00484             $siteInfo = FormatJson::decode( $data, true );
00485             $general = $siteInfo['query']['general'];
00486 
00487             $info['articlepath'] = $general['articlepath'];
00488             $info['server'] = $general['server'];
00489 
00490             if ( isset( $general['favicon'] ) ) {
00491                 $info['favicon'] = $general['favicon'];
00492             }
00493         }
00494 
00495         return $info;
00496     }
00497 
00506     public static function httpGet( $url, $timeout = 'default', $options = array() ) {
00507         $options['timeout'] = $timeout;
00508         /* Http::get */
00509         $url = wfExpandUrl( $url, PROTO_HTTP );
00510         wfDebug( "ForeignAPIRepo: HTTP GET: $url\n" );
00511         $options['method'] = "GET";
00512 
00513         if ( !isset( $options['timeout'] ) ) {
00514             $options['timeout'] = 'default';
00515         }
00516 
00517         $req = MWHttpRequest::factory( $url, $options );
00518         $req->setUserAgent( ForeignAPIRepo::getUserAgent() );
00519         $status = $req->execute();
00520 
00521         if ( $status->isOK() ) {
00522             return $req->getContent();
00523         } else {
00524             return false;
00525         }
00526     }
00527 
00532     protected static function getIIProps() {
00533         return join( '|', self::$imageInfoProps );
00534     }
00535 
00543     public function httpGetCached( $target, $query, $cacheTTL = 3600 ) {
00544         if ( $this->mApiBase ) {
00545             $url = wfAppendQuery( $this->mApiBase, $query );
00546         } else {
00547             $url = $this->makeUrl( $query, 'api' );
00548         }
00549 
00550         if ( !isset( $this->mQueryCache[$url] ) ) {
00551             global $wgMemc;
00552 
00553             $key = $this->getLocalCacheKey( get_class( $this ), $target, md5( $url ) );
00554             $data = $wgMemc->get( $key );
00555 
00556             if ( !$data ) {
00557                 $data = self::httpGet( $url );
00558 
00559                 if ( !$data ) {
00560                     return null;
00561                 }
00562 
00563                 $wgMemc->set( $key, $data, $cacheTTL );
00564             }
00565 
00566             if ( count( $this->mQueryCache ) > 100 ) {
00567                 // Keep the cache from growing infinitely
00568                 $this->mQueryCache = array();
00569             }
00570 
00571             $this->mQueryCache[$url] = $data;
00572         }
00573 
00574         return $this->mQueryCache[$url];
00575     }
00576 
00581     function enumFiles( $callback ) {
00582         throw new MWException( 'enumFiles is not supported by ' . get_class( $this ) );
00583     }
00584 
00588     protected function assertWritableRepo() {
00589         throw new MWException( get_class( $this ) . ': write operations are not supported.' );
00590     }
00591 }