MediaWiki  REL1_20
ApiQueryDuplicateFiles.php
Go to the documentation of this file.
00001 <?php
00032 class ApiQueryDuplicateFiles extends ApiQueryGeneratorBase {
00033 
00034         public function __construct( $query, $moduleName ) {
00035                 parent::__construct( $query, $moduleName, 'df' );
00036         }
00037 
00038         public function execute() {
00039                 $this->run();
00040         }
00041 
00042         public function getCacheMode( $params ) {
00043                 return 'public';
00044         }
00045 
00046         public function executeGenerator( $resultPageSet ) {
00047                 $this->run( $resultPageSet );
00048         }
00049 
00054         private function run( $resultPageSet = null ) {
00055                 $params = $this->extractRequestParams();
00056                 $namespaces = $this->getPageSet()->getAllTitlesByNamespace();
00057                 if ( empty( $namespaces[NS_FILE] ) ) {
00058                         return;
00059                 }
00060                 $images = $namespaces[NS_FILE];
00061 
00062                 if( $params['dir'] == 'descending' ) {
00063                         $images = array_reverse( $images );
00064                 }
00065 
00066                 $skipUntilThisDup = false;
00067                 if ( isset( $params['continue'] ) ) {
00068                         $cont = explode( '|', $params['continue'] );
00069                         if ( count( $cont ) != 2 ) {
00070                                 $this->dieUsage( 'Invalid continue param. You should pass the ' .
00071                                         'original value returned by the previous query', '_badcontinue' );
00072                         }
00073                         $fromImage = $cont[0];
00074                         $skipUntilThisDup = $cont[1];
00075                         // Filter out any images before $fromImage
00076                         foreach ( $images as $image => $pageId ) {
00077                                 if ( $image < $fromImage ) {
00078                                         unset( $images[$image] );
00079                                 } else {
00080                                         break;
00081                                 }
00082                         }
00083                 }
00084 
00085                 $filesToFind = array_keys( $images );
00086                 if( $params['localonly'] ) {
00087                         $files = RepoGroup::singleton()->getLocalRepo()->findFiles( $filesToFind );
00088                 } else {
00089                         $files = RepoGroup::singleton()->findFiles( $filesToFind );
00090                 }
00091 
00092                 $fit = true;
00093                 $count = 0;
00094                 $titles = array();
00095 
00096                 $sha1s = array();
00097                 foreach ( $files as $file ) {
00098                         $sha1s[$file->getName()] = $file->getSha1();
00099                 }
00100 
00101                 // find all files with the hashes, result format is: array( hash => array( dup1, dup2 ), hash1 => ... )
00102                 $filesToFindBySha1s = array_unique( array_values( $sha1s ) );
00103                 if( $params['localonly'] ) {
00104                         $filesBySha1s = RepoGroup::singleton()->getLocalRepo()->findBySha1s( $filesToFindBySha1s );
00105                 } else {
00106                         $filesBySha1s = RepoGroup::singleton()->findBySha1s( $filesToFindBySha1s );
00107                 }
00108 
00109                 // iterate over $images to handle continue param correct
00110                 foreach( $images as $image => $pageId ) {
00111                         if( !isset( $sha1s[$image] ) ) {
00112                                 continue; //file does not exist
00113                         }
00114                         $sha1 = $sha1s[$image];
00115                         $dupFiles = $filesBySha1s[$sha1];
00116                         if( $params['dir'] == 'descending' ) {
00117                                 $dupFiles = array_reverse( $dupFiles );
00118                         }
00119                         foreach ( $dupFiles as $dupFile ) {
00120                                 $dupName = $dupFile->getName();
00121                                 if( $image == $dupName && $dupFile->isLocal() ) {
00122                                         continue; //ignore the local file itself
00123                                 }
00124                                 if( $skipUntilThisDup !== false && $dupName < $skipUntilThisDup ) {
00125                                         continue; //skip to pos after the image from continue param
00126                                 }
00127                                 $skipUntilThisDup = false;
00128                                 if ( ++$count > $params['limit'] ) {
00129                                         $fit = false; //break outer loop
00130                                         // We're one over limit which shows that
00131                                         // there are additional images to be had. Stop here...
00132                                         $this->setContinueEnumParameter( 'continue', $image . '|' . $dupName );
00133                                         break;
00134                                 }
00135                                 if ( !is_null( $resultPageSet ) ) {
00136                                         $titles[] = $file->getTitle();
00137                                 } else {
00138                                         $r = array(
00139                                                 'name' => $dupName,
00140                                                 'user' => $dupFile->getUser( 'text' ),
00141                                                 'timestamp' => wfTimestamp( TS_ISO_8601, $dupFile->getTimestamp() )
00142                                         );
00143                                         if( !$dupFile->isLocal() ) {
00144                                                 $r['shared'] = '';
00145                                         }
00146                                         $fit = $this->addPageSubItem( $pageId, $r );
00147                                         if ( !$fit ) {
00148                                                 $this->setContinueEnumParameter( 'continue', $image . '|' . $dupName );
00149                                                 break;
00150                                         }
00151                                 }
00152                         }
00153                         if( !$fit ) {
00154                                 break;
00155                         }
00156                 }
00157                 if ( !is_null( $resultPageSet ) ) {
00158                         $resultPageSet->populateFromTitles( $titles );
00159                 }
00160         }
00161 
00162         public function getAllowedParams() {
00163                 return array(
00164                         'limit' => array(
00165                                 ApiBase::PARAM_DFLT => 10,
00166                                 ApiBase::PARAM_TYPE => 'limit',
00167                                 ApiBase::PARAM_MIN => 1,
00168                                 ApiBase::PARAM_MAX => ApiBase::LIMIT_BIG1,
00169                                 ApiBase::PARAM_MAX2 => ApiBase::LIMIT_BIG2
00170                         ),
00171                         'continue' => null,
00172                         'dir' => array(
00173                                 ApiBase::PARAM_DFLT => 'ascending',
00174                                 ApiBase::PARAM_TYPE => array(
00175                                         'ascending',
00176                                         'descending'
00177                                 )
00178                         ),
00179                         'localonly' => false,
00180                 );
00181         }
00182 
00183         public function getParamDescription() {
00184                 return array(
00185                         'limit' => 'How many duplicate files to return',
00186                         'continue' => 'When more results are available, use this to continue',
00187                         'dir' => 'The direction in which to list',
00188                         'localonly' => 'Look only for files in the local repository',
00189                 );
00190         }
00191 
00192         public function getResultProperties() {
00193                 return array(
00194                         '' => array(
00195                                 'name' => 'string',
00196                                 'user' => 'string',
00197                                 'timestamp' => 'timestamp',
00198                                 'shared' => 'boolean',
00199                         )
00200                 );
00201         }
00202 
00203         public function getDescription() {
00204                 return 'List all files that are duplicates of the given file(s) based on hash values';
00205         }
00206 
00207         public function getPossibleErrors() {
00208                 return array_merge( parent::getPossibleErrors(), array(
00209                         array( 'code' => '_badcontinue', 'info' => 'Invalid continue param. You should pass the original value returned by the previous query' ),
00210                 ) );
00211         }
00212 
00213         public function getExamples() {
00214                 return array(
00215                         'api.php?action=query&titles=File:Albert_Einstein_Head.jpg&prop=duplicatefiles',
00216                         'api.php?action=query&generator=allimages&prop=duplicatefiles',
00217                 );
00218         }
00219 
00220         public function getHelpUrls() {
00221                 return 'https://www.mediawiki.org/wiki/API:Properties#duplicatefiles_.2F_df';
00222         }
00223 
00224         public function getVersion() {
00225                 return __CLASS__ . ': $Id$';
00226         }
00227 }