MediaWiki
REL1_20
|
00001 <?php 00032 class ApiQueryDuplicateFiles extends ApiQueryGeneratorBase { 00033 00034 public function __construct( $query, $moduleName ) { 00035 parent::__construct( $query, $moduleName, 'df' ); 00036 } 00037 00038 public function execute() { 00039 $this->run(); 00040 } 00041 00042 public function getCacheMode( $params ) { 00043 return 'public'; 00044 } 00045 00046 public function executeGenerator( $resultPageSet ) { 00047 $this->run( $resultPageSet ); 00048 } 00049 00054 private function run( $resultPageSet = null ) { 00055 $params = $this->extractRequestParams(); 00056 $namespaces = $this->getPageSet()->getAllTitlesByNamespace(); 00057 if ( empty( $namespaces[NS_FILE] ) ) { 00058 return; 00059 } 00060 $images = $namespaces[NS_FILE]; 00061 00062 if( $params['dir'] == 'descending' ) { 00063 $images = array_reverse( $images ); 00064 } 00065 00066 $skipUntilThisDup = false; 00067 if ( isset( $params['continue'] ) ) { 00068 $cont = explode( '|', $params['continue'] ); 00069 if ( count( $cont ) != 2 ) { 00070 $this->dieUsage( 'Invalid continue param. You should pass the ' . 00071 'original value returned by the previous query', '_badcontinue' ); 00072 } 00073 $fromImage = $cont[0]; 00074 $skipUntilThisDup = $cont[1]; 00075 // Filter out any images before $fromImage 00076 foreach ( $images as $image => $pageId ) { 00077 if ( $image < $fromImage ) { 00078 unset( $images[$image] ); 00079 } else { 00080 break; 00081 } 00082 } 00083 } 00084 00085 $filesToFind = array_keys( $images ); 00086 if( $params['localonly'] ) { 00087 $files = RepoGroup::singleton()->getLocalRepo()->findFiles( $filesToFind ); 00088 } else { 00089 $files = RepoGroup::singleton()->findFiles( $filesToFind ); 00090 } 00091 00092 $fit = true; 00093 $count = 0; 00094 $titles = array(); 00095 00096 $sha1s = array(); 00097 foreach ( $files as $file ) { 00098 $sha1s[$file->getName()] = $file->getSha1(); 00099 } 00100 00101 // find all files with the hashes, result format is: array( hash => array( dup1, dup2 ), hash1 => ... ) 00102 $filesToFindBySha1s = array_unique( array_values( $sha1s ) ); 00103 if( $params['localonly'] ) { 00104 $filesBySha1s = RepoGroup::singleton()->getLocalRepo()->findBySha1s( $filesToFindBySha1s ); 00105 } else { 00106 $filesBySha1s = RepoGroup::singleton()->findBySha1s( $filesToFindBySha1s ); 00107 } 00108 00109 // iterate over $images to handle continue param correct 00110 foreach( $images as $image => $pageId ) { 00111 if( !isset( $sha1s[$image] ) ) { 00112 continue; //file does not exist 00113 } 00114 $sha1 = $sha1s[$image]; 00115 $dupFiles = $filesBySha1s[$sha1]; 00116 if( $params['dir'] == 'descending' ) { 00117 $dupFiles = array_reverse( $dupFiles ); 00118 } 00119 foreach ( $dupFiles as $dupFile ) { 00120 $dupName = $dupFile->getName(); 00121 if( $image == $dupName && $dupFile->isLocal() ) { 00122 continue; //ignore the local file itself 00123 } 00124 if( $skipUntilThisDup !== false && $dupName < $skipUntilThisDup ) { 00125 continue; //skip to pos after the image from continue param 00126 } 00127 $skipUntilThisDup = false; 00128 if ( ++$count > $params['limit'] ) { 00129 $fit = false; //break outer loop 00130 // We're one over limit which shows that 00131 // there are additional images to be had. Stop here... 00132 $this->setContinueEnumParameter( 'continue', $image . '|' . $dupName ); 00133 break; 00134 } 00135 if ( !is_null( $resultPageSet ) ) { 00136 $titles[] = $file->getTitle(); 00137 } else { 00138 $r = array( 00139 'name' => $dupName, 00140 'user' => $dupFile->getUser( 'text' ), 00141 'timestamp' => wfTimestamp( TS_ISO_8601, $dupFile->getTimestamp() ) 00142 ); 00143 if( !$dupFile->isLocal() ) { 00144 $r['shared'] = ''; 00145 } 00146 $fit = $this->addPageSubItem( $pageId, $r ); 00147 if ( !$fit ) { 00148 $this->setContinueEnumParameter( 'continue', $image . '|' . $dupName ); 00149 break; 00150 } 00151 } 00152 } 00153 if( !$fit ) { 00154 break; 00155 } 00156 } 00157 if ( !is_null( $resultPageSet ) ) { 00158 $resultPageSet->populateFromTitles( $titles ); 00159 } 00160 } 00161 00162 public function getAllowedParams() { 00163 return array( 00164 'limit' => array( 00165 ApiBase::PARAM_DFLT => 10, 00166 ApiBase::PARAM_TYPE => 'limit', 00167 ApiBase::PARAM_MIN => 1, 00168 ApiBase::PARAM_MAX => ApiBase::LIMIT_BIG1, 00169 ApiBase::PARAM_MAX2 => ApiBase::LIMIT_BIG2 00170 ), 00171 'continue' => null, 00172 'dir' => array( 00173 ApiBase::PARAM_DFLT => 'ascending', 00174 ApiBase::PARAM_TYPE => array( 00175 'ascending', 00176 'descending' 00177 ) 00178 ), 00179 'localonly' => false, 00180 ); 00181 } 00182 00183 public function getParamDescription() { 00184 return array( 00185 'limit' => 'How many duplicate files to return', 00186 'continue' => 'When more results are available, use this to continue', 00187 'dir' => 'The direction in which to list', 00188 'localonly' => 'Look only for files in the local repository', 00189 ); 00190 } 00191 00192 public function getResultProperties() { 00193 return array( 00194 '' => array( 00195 'name' => 'string', 00196 'user' => 'string', 00197 'timestamp' => 'timestamp', 00198 'shared' => 'boolean', 00199 ) 00200 ); 00201 } 00202 00203 public function getDescription() { 00204 return 'List all files that are duplicates of the given file(s) based on hash values'; 00205 } 00206 00207 public function getPossibleErrors() { 00208 return array_merge( parent::getPossibleErrors(), array( 00209 array( 'code' => '_badcontinue', 'info' => 'Invalid continue param. You should pass the original value returned by the previous query' ), 00210 ) ); 00211 } 00212 00213 public function getExamples() { 00214 return array( 00215 'api.php?action=query&titles=File:Albert_Einstein_Head.jpg&prop=duplicatefiles', 00216 'api.php?action=query&generator=allimages&prop=duplicatefiles', 00217 ); 00218 } 00219 00220 public function getHelpUrls() { 00221 return 'https://www.mediawiki.org/wiki/API:Properties#duplicatefiles_.2F_df'; 00222 } 00223 00224 public function getVersion() { 00225 return __CLASS__ . ': $Id$'; 00226 } 00227 }