MediaWiki  REL1_24
ApiQueryDuplicateFiles.php
Go to the documentation of this file.
00001 <?php
00032 class ApiQueryDuplicateFiles extends ApiQueryGeneratorBase {
00033 
00034     public function __construct( ApiQuery $query, $moduleName ) {
00035         parent::__construct( $query, $moduleName, 'df' );
00036     }
00037 
00038     public function execute() {
00039         $this->run();
00040     }
00041 
00042     public function getCacheMode( $params ) {
00043         return 'public';
00044     }
00045 
00046     public function executeGenerator( $resultPageSet ) {
00047         $this->run( $resultPageSet );
00048     }
00049 
00053     private function run( $resultPageSet = null ) {
00054         $params = $this->extractRequestParams();
00055         $namespaces = $this->getPageSet()->getAllTitlesByNamespace();
00056         if ( empty( $namespaces[NS_FILE] ) ) {
00057             return;
00058         }
00059         $images = $namespaces[NS_FILE];
00060 
00061         if ( $params['dir'] == 'descending' ) {
00062             $images = array_reverse( $images );
00063         }
00064 
00065         $skipUntilThisDup = false;
00066         if ( isset( $params['continue'] ) ) {
00067             $cont = explode( '|', $params['continue'] );
00068             $this->dieContinueUsageIf( count( $cont ) != 2 );
00069             $fromImage = $cont[0];
00070             $skipUntilThisDup = $cont[1];
00071             // Filter out any images before $fromImage
00072             foreach ( $images as $image => $pageId ) {
00073                 if ( $image < $fromImage ) {
00074                     unset( $images[$image] );
00075                 } else {
00076                     break;
00077                 }
00078             }
00079         }
00080 
00081         $filesToFind = array_keys( $images );
00082         if ( $params['localonly'] ) {
00083             $files = RepoGroup::singleton()->getLocalRepo()->findFiles( $filesToFind );
00084         } else {
00085             $files = RepoGroup::singleton()->findFiles( $filesToFind );
00086         }
00087 
00088         $fit = true;
00089         $count = 0;
00090         $titles = array();
00091 
00092         $sha1s = array();
00093         foreach ( $files as $file ) {
00095             $sha1s[$file->getName()] = $file->getSha1();
00096         }
00097 
00098         // find all files with the hashes, result format is:
00099         // array( hash => array( dup1, dup2 ), hash1 => ... )
00100         $filesToFindBySha1s = array_unique( array_values( $sha1s ) );
00101         if ( $params['localonly'] ) {
00102             $filesBySha1s = RepoGroup::singleton()->getLocalRepo()->findBySha1s( $filesToFindBySha1s );
00103         } else {
00104             $filesBySha1s = RepoGroup::singleton()->findBySha1s( $filesToFindBySha1s );
00105         }
00106 
00107         // iterate over $images to handle continue param correct
00108         foreach ( $images as $image => $pageId ) {
00109             if ( !isset( $sha1s[$image] ) ) {
00110                 continue; //file does not exist
00111             }
00112             $sha1 = $sha1s[$image];
00113             $dupFiles = $filesBySha1s[$sha1];
00114             if ( $params['dir'] == 'descending' ) {
00115                 $dupFiles = array_reverse( $dupFiles );
00116             }
00118             foreach ( $dupFiles as $dupFile ) {
00119                 $dupName = $dupFile->getName();
00120                 if ( $image == $dupName && $dupFile->isLocal() ) {
00121                     continue; //ignore the local file itself
00122                 }
00123                 if ( $skipUntilThisDup !== false && $dupName < $skipUntilThisDup ) {
00124                     continue; //skip to pos after the image from continue param
00125                 }
00126                 $skipUntilThisDup = false;
00127                 if ( ++$count > $params['limit'] ) {
00128                     $fit = false; //break outer loop
00129                     // We're one over limit which shows that
00130                     // there are additional images to be had. Stop here...
00131                     $this->setContinueEnumParameter( 'continue', $image . '|' . $dupName );
00132                     break;
00133                 }
00134                 if ( !is_null( $resultPageSet ) ) {
00135                     $titles[] = $dupFile->getTitle();
00136                 } else {
00137                     $r = array(
00138                         'name' => $dupName,
00139                         'user' => $dupFile->getUser( 'text' ),
00140                         'timestamp' => wfTimestamp( TS_ISO_8601, $dupFile->getTimestamp() )
00141                     );
00142                     if ( !$dupFile->isLocal() ) {
00143                         $r['shared'] = '';
00144                     }
00145                     $fit = $this->addPageSubItem( $pageId, $r );
00146                     if ( !$fit ) {
00147                         $this->setContinueEnumParameter( 'continue', $image . '|' . $dupName );
00148                         break;
00149                     }
00150                 }
00151             }
00152             if ( !$fit ) {
00153                 break;
00154             }
00155         }
00156         if ( !is_null( $resultPageSet ) ) {
00157             $resultPageSet->populateFromTitles( $titles );
00158         }
00159     }
00160 
00161     public function getAllowedParams() {
00162         return array(
00163             'limit' => array(
00164                 ApiBase::PARAM_DFLT => 10,
00165                 ApiBase::PARAM_TYPE => 'limit',
00166                 ApiBase::PARAM_MIN => 1,
00167                 ApiBase::PARAM_MAX => ApiBase::LIMIT_BIG1,
00168                 ApiBase::PARAM_MAX2 => ApiBase::LIMIT_BIG2
00169             ),
00170             'continue' => null,
00171             'dir' => array(
00172                 ApiBase::PARAM_DFLT => 'ascending',
00173                 ApiBase::PARAM_TYPE => array(
00174                     'ascending',
00175                     'descending'
00176                 )
00177             ),
00178             'localonly' => false,
00179         );
00180     }
00181 
00182     public function getParamDescription() {
00183         return array(
00184             'limit' => 'How many duplicate files to return',
00185             'continue' => 'When more results are available, use this to continue',
00186             'dir' => 'The direction in which to list',
00187             'localonly' => 'Look only for files in the local repository',
00188         );
00189     }
00190 
00191     public function getDescription() {
00192         return 'List all files that are duplicates of the given file(s) based on hash values.';
00193     }
00194 
00195     public function getExamples() {
00196         return array(
00197             'api.php?action=query&titles=File:Albert_Einstein_Head.jpg&prop=duplicatefiles',
00198             'api.php?action=query&generator=allimages&prop=duplicatefiles',
00199         );
00200     }
00201 
00202     public function getHelpUrls() {
00203         return 'https://www.mediawiki.org/wiki/API:Properties#duplicatefiles_.2F_df';
00204     }
00205 }