MediaWiki  REL1_22
ApiQueryDuplicateFiles.php
Go to the documentation of this file.
00001 <?php
00032 class ApiQueryDuplicateFiles extends ApiQueryGeneratorBase {
00033 
00034     public function __construct( $query, $moduleName ) {
00035         parent::__construct( $query, $moduleName, 'df' );
00036     }
00037 
00038     public function execute() {
00039         $this->run();
00040     }
00041 
00042     public function getCacheMode( $params ) {
00043         return 'public';
00044     }
00045 
00046     public function executeGenerator( $resultPageSet ) {
00047         $this->run( $resultPageSet );
00048     }
00049 
00053     private function run( $resultPageSet = null ) {
00054         $params = $this->extractRequestParams();
00055         $namespaces = $this->getPageSet()->getAllTitlesByNamespace();
00056         if ( empty( $namespaces[NS_FILE] ) ) {
00057             return;
00058         }
00059         $images = $namespaces[NS_FILE];
00060 
00061         if ( $params['dir'] == 'descending' ) {
00062             $images = array_reverse( $images );
00063         }
00064 
00065         $skipUntilThisDup = false;
00066         if ( isset( $params['continue'] ) ) {
00067             $cont = explode( '|', $params['continue'] );
00068             $this->dieContinueUsageIf( count( $cont ) != 2 );
00069             $fromImage = $cont[0];
00070             $skipUntilThisDup = $cont[1];
00071             // Filter out any images before $fromImage
00072             foreach ( $images as $image => $pageId ) {
00073                 if ( $image < $fromImage ) {
00074                     unset( $images[$image] );
00075                 } else {
00076                     break;
00077                 }
00078             }
00079         }
00080 
00081         $filesToFind = array_keys( $images );
00082         if ( $params['localonly'] ) {
00083             $files = RepoGroup::singleton()->getLocalRepo()->findFiles( $filesToFind );
00084         } else {
00085             $files = RepoGroup::singleton()->findFiles( $filesToFind );
00086         }
00087 
00088         $fit = true;
00089         $count = 0;
00090         $titles = array();
00091 
00092         $sha1s = array();
00093         foreach ( $files as $file ) {
00095             $sha1s[$file->getName()] = $file->getSha1();
00096         }
00097 
00098         // find all files with the hashes, result format is: array( hash => array( dup1, dup2 ), hash1 => ... )
00099         $filesToFindBySha1s = array_unique( array_values( $sha1s ) );
00100         if ( $params['localonly'] ) {
00101             $filesBySha1s = RepoGroup::singleton()->getLocalRepo()->findBySha1s( $filesToFindBySha1s );
00102         } else {
00103             $filesBySha1s = RepoGroup::singleton()->findBySha1s( $filesToFindBySha1s );
00104         }
00105 
00106         // iterate over $images to handle continue param correct
00107         foreach ( $images as $image => $pageId ) {
00108             if ( !isset( $sha1s[$image] ) ) {
00109                 continue; //file does not exist
00110             }
00111             $sha1 = $sha1s[$image];
00112             $dupFiles = $filesBySha1s[$sha1];
00113             if ( $params['dir'] == 'descending' ) {
00114                 $dupFiles = array_reverse( $dupFiles );
00115             }
00117             foreach ( $dupFiles as $dupFile ) {
00118                 $dupName = $dupFile->getName();
00119                 if ( $image == $dupName && $dupFile->isLocal() ) {
00120                     continue; //ignore the local file itself
00121                 }
00122                 if ( $skipUntilThisDup !== false && $dupName < $skipUntilThisDup ) {
00123                     continue; //skip to pos after the image from continue param
00124                 }
00125                 $skipUntilThisDup = false;
00126                 if ( ++$count > $params['limit'] ) {
00127                     $fit = false; //break outer loop
00128                     // We're one over limit which shows that
00129                     // there are additional images to be had. Stop here...
00130                     $this->setContinueEnumParameter( 'continue', $image . '|' . $dupName );
00131                     break;
00132                 }
00133                 if ( !is_null( $resultPageSet ) ) {
00134                     $titles[] = $dupFile->getTitle();
00135                 } else {
00136                     $r = array(
00137                         'name' => $dupName,
00138                         'user' => $dupFile->getUser( 'text' ),
00139                         'timestamp' => wfTimestamp( TS_ISO_8601, $dupFile->getTimestamp() )
00140                     );
00141                     if ( !$dupFile->isLocal() ) {
00142                         $r['shared'] = '';
00143                     }
00144                     $fit = $this->addPageSubItem( $pageId, $r );
00145                     if ( !$fit ) {
00146                         $this->setContinueEnumParameter( 'continue', $image . '|' . $dupName );
00147                         break;
00148                     }
00149                 }
00150             }
00151             if ( !$fit ) {
00152                 break;
00153             }
00154         }
00155         if ( !is_null( $resultPageSet ) ) {
00156             $resultPageSet->populateFromTitles( $titles );
00157         }
00158     }
00159 
00160     public function getAllowedParams() {
00161         return array(
00162             'limit' => array(
00163                 ApiBase::PARAM_DFLT => 10,
00164                 ApiBase::PARAM_TYPE => 'limit',
00165                 ApiBase::PARAM_MIN => 1,
00166                 ApiBase::PARAM_MAX => ApiBase::LIMIT_BIG1,
00167                 ApiBase::PARAM_MAX2 => ApiBase::LIMIT_BIG2
00168             ),
00169             'continue' => null,
00170             'dir' => array(
00171                 ApiBase::PARAM_DFLT => 'ascending',
00172                 ApiBase::PARAM_TYPE => array(
00173                     'ascending',
00174                     'descending'
00175                 )
00176             ),
00177             'localonly' => false,
00178         );
00179     }
00180 
00181     public function getParamDescription() {
00182         return array(
00183             'limit' => 'How many duplicate files to return',
00184             'continue' => 'When more results are available, use this to continue',
00185             'dir' => 'The direction in which to list',
00186             'localonly' => 'Look only for files in the local repository',
00187         );
00188     }
00189 
00190     public function getResultProperties() {
00191         return array(
00192             '' => array(
00193                 'name' => 'string',
00194                 'user' => 'string',
00195                 'timestamp' => 'timestamp',
00196                 'shared' => 'boolean',
00197             )
00198         );
00199     }
00200 
00201     public function getDescription() {
00202         return 'List all files that are duplicates of the given file(s) based on hash values';
00203     }
00204 
00205     public function getExamples() {
00206         return array(
00207             'api.php?action=query&titles=File:Albert_Einstein_Head.jpg&prop=duplicatefiles',
00208             'api.php?action=query&generator=allimages&prop=duplicatefiles',
00209         );
00210     }
00211 
00212     public function getHelpUrls() {
00213         return 'https://www.mediawiki.org/wiki/API:Properties#duplicatefiles_.2F_df';
00214     }
00215 }