MediaWiki  REL1_21
ApiQueryDuplicateFiles.php
Go to the documentation of this file.
00001 <?php
00032 class ApiQueryDuplicateFiles extends ApiQueryGeneratorBase {
00033 
00034         public function __construct( $query, $moduleName ) {
00035                 parent::__construct( $query, $moduleName, 'df' );
00036         }
00037 
00038         public function execute() {
00039                 $this->run();
00040         }
00041 
00042         public function getCacheMode( $params ) {
00043                 return 'public';
00044         }
00045 
00046         public function executeGenerator( $resultPageSet ) {
00047                 $this->run( $resultPageSet );
00048         }
00049 
00054         private function run( $resultPageSet = null ) {
00055                 $params = $this->extractRequestParams();
00056                 $namespaces = $this->getPageSet()->getAllTitlesByNamespace();
00057                 if ( empty( $namespaces[NS_FILE] ) ) {
00058                         return;
00059                 }
00060                 $images = $namespaces[NS_FILE];
00061 
00062                 if( $params['dir'] == 'descending' ) {
00063                         $images = array_reverse( $images );
00064                 }
00065 
00066                 $skipUntilThisDup = false;
00067                 if ( isset( $params['continue'] ) ) {
00068                         $cont = explode( '|', $params['continue'] );
00069                         $this->dieContinueUsageIf( count( $cont ) != 2 );
00070                         $fromImage = $cont[0];
00071                         $skipUntilThisDup = $cont[1];
00072                         // Filter out any images before $fromImage
00073                         foreach ( $images as $image => $pageId ) {
00074                                 if ( $image < $fromImage ) {
00075                                         unset( $images[$image] );
00076                                 } else {
00077                                         break;
00078                                 }
00079                         }
00080                 }
00081 
00082                 $filesToFind = array_keys( $images );
00083                 if( $params['localonly'] ) {
00084                         $files = RepoGroup::singleton()->getLocalRepo()->findFiles( $filesToFind );
00085                 } else {
00086                         $files = RepoGroup::singleton()->findFiles( $filesToFind );
00087                 }
00088 
00089                 $fit = true;
00090                 $count = 0;
00091                 $titles = array();
00092 
00093                 $sha1s = array();
00094                 foreach ( $files as $file ) {
00096                         $sha1s[$file->getName()] = $file->getSha1();
00097                 }
00098 
00099                 // find all files with the hashes, result format is: array( hash => array( dup1, dup2 ), hash1 => ... )
00100                 $filesToFindBySha1s = array_unique( array_values( $sha1s ) );
00101                 if( $params['localonly'] ) {
00102                         $filesBySha1s = RepoGroup::singleton()->getLocalRepo()->findBySha1s( $filesToFindBySha1s );
00103                 } else {
00104                         $filesBySha1s = RepoGroup::singleton()->findBySha1s( $filesToFindBySha1s );
00105                 }
00106 
00107                 // iterate over $images to handle continue param correct
00108                 foreach( $images as $image => $pageId ) {
00109                         if( !isset( $sha1s[$image] ) ) {
00110                                 continue; //file does not exist
00111                         }
00112                         $sha1 = $sha1s[$image];
00113                         $dupFiles = $filesBySha1s[$sha1];
00114                         if( $params['dir'] == 'descending' ) {
00115                                 $dupFiles = array_reverse( $dupFiles );
00116                         }
00118                         foreach ( $dupFiles as $dupFile ) {
00119                                 $dupName = $dupFile->getName();
00120                                 if( $image == $dupName && $dupFile->isLocal() ) {
00121                                         continue; //ignore the local file itself
00122                                 }
00123                                 if( $skipUntilThisDup !== false && $dupName < $skipUntilThisDup ) {
00124                                         continue; //skip to pos after the image from continue param
00125                                 }
00126                                 $skipUntilThisDup = false;
00127                                 if ( ++$count > $params['limit'] ) {
00128                                         $fit = false; //break outer loop
00129                                         // We're one over limit which shows that
00130                                         // there are additional images to be had. Stop here...
00131                                         $this->setContinueEnumParameter( 'continue', $image . '|' . $dupName );
00132                                         break;
00133                                 }
00134                                 if ( !is_null( $resultPageSet ) ) {
00135                                         $titles[] = $dupFile->getTitle();
00136                                 } else {
00137                                         $r = array(
00138                                                 'name' => $dupName,
00139                                                 'user' => $dupFile->getUser( 'text' ),
00140                                                 'timestamp' => wfTimestamp( TS_ISO_8601, $dupFile->getTimestamp() )
00141                                         );
00142                                         if( !$dupFile->isLocal() ) {
00143                                                 $r['shared'] = '';
00144                                         }
00145                                         $fit = $this->addPageSubItem( $pageId, $r );
00146                                         if ( !$fit ) {
00147                                                 $this->setContinueEnumParameter( 'continue', $image . '|' . $dupName );
00148                                                 break;
00149                                         }
00150                                 }
00151                         }
00152                         if( !$fit ) {
00153                                 break;
00154                         }
00155                 }
00156                 if ( !is_null( $resultPageSet ) ) {
00157                         $resultPageSet->populateFromTitles( $titles );
00158                 }
00159         }
00160 
00161         public function getAllowedParams() {
00162                 return array(
00163                         'limit' => array(
00164                                 ApiBase::PARAM_DFLT => 10,
00165                                 ApiBase::PARAM_TYPE => 'limit',
00166                                 ApiBase::PARAM_MIN => 1,
00167                                 ApiBase::PARAM_MAX => ApiBase::LIMIT_BIG1,
00168                                 ApiBase::PARAM_MAX2 => ApiBase::LIMIT_BIG2
00169                         ),
00170                         'continue' => null,
00171                         'dir' => array(
00172                                 ApiBase::PARAM_DFLT => 'ascending',
00173                                 ApiBase::PARAM_TYPE => array(
00174                                         'ascending',
00175                                         'descending'
00176                                 )
00177                         ),
00178                         'localonly' => false,
00179                 );
00180         }
00181 
00182         public function getParamDescription() {
00183                 return array(
00184                         'limit' => 'How many duplicate files to return',
00185                         'continue' => 'When more results are available, use this to continue',
00186                         'dir' => 'The direction in which to list',
00187                         'localonly' => 'Look only for files in the local repository',
00188                 );
00189         }
00190 
00191         public function getResultProperties() {
00192                 return array(
00193                         '' => array(
00194                                 'name' => 'string',
00195                                 'user' => 'string',
00196                                 'timestamp' => 'timestamp',
00197                                 'shared' => 'boolean',
00198                         )
00199                 );
00200         }
00201 
00202         public function getDescription() {
00203                 return 'List all files that are duplicates of the given file(s) based on hash values';
00204         }
00205 
00206         public function getExamples() {
00207                 return array(
00208                         'api.php?action=query&titles=File:Albert_Einstein_Head.jpg&prop=duplicatefiles',
00209                         'api.php?action=query&generator=allimages&prop=duplicatefiles',
00210                 );
00211         }
00212 
00213         public function getHelpUrls() {
00214                 return 'https://www.mediawiki.org/wiki/API:Properties#duplicatefiles_.2F_df';
00215         }
00216 }