MediaWiki
REL1_19
|
00001 <?php 00017 class PNGMetadataExtractor { 00018 static $png_sig; 00019 static $CRC_size; 00020 static $text_chunks; 00021 00022 const VERSION = 1; 00023 const MAX_CHUNK_SIZE = 3145728; // 3 megabytes 00024 00025 static function getMetadata( $filename ) { 00026 self::$png_sig = pack( "C8", 137, 80, 78, 71, 13, 10, 26, 10 ); 00027 self::$CRC_size = 4; 00028 /* based on list at http://owl.phy.queensu.ca/~phil/exiftool/TagNames/PNG.html#TextualData 00029 * and http://www.w3.org/TR/PNG/#11keywords 00030 */ 00031 self::$text_chunks = array( 00032 'xml:com.adobe.xmp' => 'xmp', 00033 # Artist is unofficial. Author is the recommended 00034 # keyword in the PNG spec. However some people output 00035 # Artist so support both. 00036 'artist' => 'Artist', 00037 'model' => 'Model', 00038 'make' => 'Make', 00039 'author' => 'Artist', 00040 'comment' => 'PNGFileComment', 00041 'description' => 'ImageDescription', 00042 'title' => 'ObjectName', 00043 'copyright' => 'Copyright', 00044 # Source as in original device used to make image 00045 # not as in who gave you the image 00046 'source' => 'Model', 00047 'software' => 'Software', 00048 'disclaimer' => 'Disclaimer', 00049 'warning' => 'ContentWarning', 00050 'url' => 'Identifier', # Not sure if this is best mapping. Maybe WebStatement. 00051 'label' => 'Label', 00052 'creation time' => 'DateTimeDigitized', 00053 /* Other potentially useful things - Document */ 00054 ); 00055 00056 $frameCount = 0; 00057 $loopCount = 1; 00058 $text = array(); 00059 $duration = 0.0; 00060 $bitDepth = 0; 00061 $colorType = 'unknown'; 00062 00063 if ( !$filename ) { 00064 throw new Exception( __METHOD__ . ": No file name specified" ); 00065 } elseif ( !file_exists( $filename ) || is_dir( $filename ) ) { 00066 throw new Exception( __METHOD__ . ": File $filename does not exist" ); 00067 } 00068 00069 $fh = fopen( $filename, 'rb' ); 00070 00071 if ( !$fh ) { 00072 throw new Exception( __METHOD__ . ": Unable to open file $filename" ); 00073 } 00074 00075 // Check for the PNG header 00076 $buf = fread( $fh, 8 ); 00077 if ( $buf != self::$png_sig ) { 00078 throw new Exception( __METHOD__ . ": Not a valid PNG file; header: $buf" ); 00079 } 00080 00081 // Read chunks 00082 while ( !feof( $fh ) ) { 00083 $buf = fread( $fh, 4 ); 00084 if ( !$buf || strlen( $buf ) < 4 ) { 00085 throw new Exception( __METHOD__ . ": Read error" ); 00086 } 00087 $chunk_size = unpack( "N", $buf ); 00088 $chunk_size = $chunk_size[1]; 00089 00090 if ( $chunk_size < 0 ) { 00091 throw new Exception( __METHOD__ . ": Chunk size too big for unpack" ); 00092 } 00093 00094 $chunk_type = fread( $fh, 4 ); 00095 if ( !$chunk_type || strlen( $chunk_type ) < 4 ) { 00096 throw new Exception( __METHOD__ . ": Read error" ); 00097 } 00098 00099 if ( $chunk_type == "IHDR" ) { 00100 $buf = self::read( $fh, $chunk_size ); 00101 if ( !$buf || strlen( $buf ) < $chunk_size ) { 00102 throw new Exception( __METHOD__ . ": Read error" ); 00103 } 00104 $bitDepth = ord( substr( $buf, 8, 1 ) ); 00105 // Detect the color type in British English as per the spec 00106 // http://www.w3.org/TR/PNG/#11IHDR 00107 switch ( ord( substr( $buf, 9, 1 ) ) ) { 00108 case 0: 00109 $colorType = 'greyscale'; 00110 break; 00111 case 2: 00112 $colorType = 'truecolour'; 00113 break; 00114 case 3: 00115 $colorType = 'index-coloured'; 00116 break; 00117 case 4: 00118 $colorType = 'greyscale-alpha'; 00119 break; 00120 case 6: 00121 $colorType = 'truecolour-alpha'; 00122 break; 00123 default: 00124 $colorType = 'unknown'; 00125 break; 00126 } 00127 } elseif ( $chunk_type == "acTL" ) { 00128 $buf = fread( $fh, $chunk_size ); 00129 if( !$buf || strlen( $buf ) < $chunk_size || $chunk_size < 4 ) { 00130 throw new Exception( __METHOD__ . ": Read error" ); 00131 } 00132 00133 $actl = unpack( "Nframes/Nplays", $buf ); 00134 $frameCount = $actl['frames']; 00135 $loopCount = $actl['plays']; 00136 } elseif ( $chunk_type == "fcTL" ) { 00137 $buf = self::read( $fh, $chunk_size ); 00138 if ( !$buf || strlen( $buf ) < $chunk_size ) { 00139 throw new Exception( __METHOD__ . ": Read error" ); 00140 } 00141 $buf = substr( $buf, 20 ); 00142 if ( strlen( $buf ) < 4 ) { 00143 throw new Exception( __METHOD__ . ": Read error" ); 00144 } 00145 00146 $fctldur = unpack( "ndelay_num/ndelay_den", $buf ); 00147 if ( $fctldur['delay_den'] == 0 ) { 00148 $fctldur['delay_den'] = 100; 00149 } 00150 if ( $fctldur['delay_num'] ) { 00151 $duration += $fctldur['delay_num'] / $fctldur['delay_den']; 00152 } 00153 } elseif ( $chunk_type == "iTXt" ) { 00154 // Extracts iTXt chunks, uncompressing if necessary. 00155 $buf = self::read( $fh, $chunk_size ); 00156 $items = array(); 00157 if ( preg_match( 00158 '/^([^\x00]{1,79})\x00(\x00|\x01)\x00([^\x00]*)(.)[^\x00]*\x00(.*)$/Ds', 00159 $buf, $items ) 00160 ) { 00161 /* $items[1] = text chunk name, $items[2] = compressed flag, 00162 * $items[3] = lang code (or ""), $items[4]= compression type. 00163 * $items[5] = content 00164 */ 00165 00166 // Theoretically should be case-sensitive, but in practise... 00167 $items[1] = strtolower( $items[1] ); 00168 if ( !isset( self::$text_chunks[$items[1]] ) ) { 00169 // Only extract textual chunks on our list. 00170 fseek( $fh, self::$CRC_size, SEEK_CUR ); 00171 continue; 00172 } 00173 00174 $items[3] = strtolower( $items[3] ); 00175 if ( $items[3] == '' ) { 00176 // if no lang specified use x-default like in xmp. 00177 $items[3] = 'x-default'; 00178 } 00179 00180 // if compressed 00181 if ( $items[2] == "\x01" ) { 00182 if ( function_exists( 'gzuncompress' ) && $items[4] === "\x00" ) { 00183 wfSuppressWarnings(); 00184 $items[5] = gzuncompress( $items[5] ); 00185 wfRestoreWarnings(); 00186 00187 if ( $items[5] === false ) { 00188 // decompression failed 00189 wfDebug( __METHOD__ . ' Error decompressing iTxt chunk - ' . $items[1] ); 00190 fseek( $fh, self::$CRC_size, SEEK_CUR ); 00191 continue; 00192 } 00193 00194 } else { 00195 wfDebug( __METHOD__ . ' Skipping compressed png iTXt chunk due to lack of zlib,' 00196 . ' or potentially invalid compression method' ); 00197 fseek( $fh, self::$CRC_size, SEEK_CUR ); 00198 continue; 00199 } 00200 } 00201 $finalKeyword = self::$text_chunks[ $items[1] ]; 00202 $text[ $finalKeyword ][ $items[3] ] = $items[5]; 00203 $text[ $finalKeyword ]['_type'] = 'lang'; 00204 00205 } else { 00206 // Error reading iTXt chunk 00207 throw new Exception( __METHOD__ . ": Read error on iTXt chunk" ); 00208 } 00209 00210 } elseif ( $chunk_type == 'tEXt' ) { 00211 $buf = self::read( $fh, $chunk_size ); 00212 00213 // In case there is no \x00 which will make explode fail. 00214 if ( strpos( $buf, "\x00" ) === false ) { 00215 throw new Exception( __METHOD__ . ": Read error on tEXt chunk" ); 00216 } 00217 00218 list( $keyword, $content ) = explode( "\x00", $buf, 2 ); 00219 if ( $keyword === '' || $content === '' ) { 00220 throw new Exception( __METHOD__ . ": Read error on tEXt chunk" ); 00221 } 00222 00223 // Theoretically should be case-sensitive, but in practise... 00224 $keyword = strtolower( $keyword ); 00225 if ( !isset( self::$text_chunks[ $keyword ] ) ) { 00226 // Don't recognize chunk, so skip. 00227 fseek( $fh, self::$CRC_size, SEEK_CUR ); 00228 continue; 00229 } 00230 wfSuppressWarnings(); 00231 $content = iconv( 'ISO-8859-1', 'UTF-8', $content ); 00232 wfRestoreWarnings(); 00233 00234 if ( $content === false ) { 00235 throw new Exception( __METHOD__ . ": Read error (error with iconv)" ); 00236 } 00237 00238 $finalKeyword = self::$text_chunks[ $keyword ]; 00239 $text[ $finalKeyword ][ 'x-default' ] = $content; 00240 $text[ $finalKeyword ]['_type'] = 'lang'; 00241 00242 } elseif ( $chunk_type == 'zTXt' ) { 00243 if ( function_exists( 'gzuncompress' ) ) { 00244 $buf = self::read( $fh, $chunk_size ); 00245 00246 // In case there is no \x00 which will make explode fail. 00247 if ( strpos( $buf, "\x00" ) === false ) { 00248 throw new Exception( __METHOD__ . ": Read error on zTXt chunk" ); 00249 } 00250 00251 list( $keyword, $postKeyword ) = explode( "\x00", $buf, 2 ); 00252 if ( $keyword === '' || $postKeyword === '' ) { 00253 throw new Exception( __METHOD__ . ": Read error on zTXt chunk" ); 00254 } 00255 // Theoretically should be case-sensitive, but in practise... 00256 $keyword = strtolower( $keyword ); 00257 00258 if ( !isset( self::$text_chunks[ $keyword ] ) ) { 00259 // Don't recognize chunk, so skip. 00260 fseek( $fh, self::$CRC_size, SEEK_CUR ); 00261 continue; 00262 } 00263 $compression = substr( $postKeyword, 0, 1 ); 00264 $content = substr( $postKeyword, 1 ); 00265 if ( $compression !== "\x00" ) { 00266 wfDebug( __METHOD__ . " Unrecognized compression method in zTXt ($keyword). Skipping." ); 00267 fseek( $fh, self::$CRC_size, SEEK_CUR ); 00268 continue; 00269 } 00270 00271 wfSuppressWarnings(); 00272 $content = gzuncompress( $content ); 00273 wfRestoreWarnings(); 00274 00275 if ( $content === false ) { 00276 // decompression failed 00277 wfDebug( __METHOD__ . ' Error decompressing zTXt chunk - ' . $keyword ); 00278 fseek( $fh, self::$CRC_size, SEEK_CUR ); 00279 continue; 00280 } 00281 00282 wfSuppressWarnings(); 00283 $content = iconv( 'ISO-8859-1', 'UTF-8', $content ); 00284 wfRestoreWarnings(); 00285 00286 if ( $content === false ) { 00287 throw new Exception( __METHOD__ . ": Read error (error with iconv)" ); 00288 } 00289 00290 $finalKeyword = self::$text_chunks[ $keyword ]; 00291 $text[ $finalKeyword ][ 'x-default' ] = $content; 00292 $text[ $finalKeyword ]['_type'] = 'lang'; 00293 00294 } else { 00295 wfDebug( __METHOD__ . " Cannot decompress zTXt chunk due to lack of zlib. Skipping." ); 00296 fseek( $fh, $chunk_size, SEEK_CUR ); 00297 } 00298 } elseif ( $chunk_type == 'tIME' ) { 00299 // last mod timestamp. 00300 if ( $chunk_size !== 7 ) { 00301 throw new Exception( __METHOD__ . ": tIME wrong size" ); 00302 } 00303 $buf = self::read( $fh, $chunk_size ); 00304 if ( !$buf || strlen( $buf ) < $chunk_size ) { 00305 throw new Exception( __METHOD__ . ": Read error" ); 00306 } 00307 00308 // Note: spec says this should be UTC. 00309 $t = unpack( "ny/Cm/Cd/Ch/Cmin/Cs", $buf ); 00310 $strTime = sprintf( "%04d%02d%02d%02d%02d%02d", 00311 $t['y'], $t['m'], $t['d'], $t['h'], 00312 $t['min'], $t['s'] ); 00313 00314 $exifTime = wfTimestamp( TS_EXIF, $strTime ); 00315 00316 if ( $exifTime ) { 00317 $text['DateTime'] = $exifTime; 00318 } 00319 00320 } elseif ( $chunk_type == 'pHYs' ) { 00321 // how big pixels are (dots per meter). 00322 if ( $chunk_size !== 9 ) { 00323 throw new Exception( __METHOD__ . ": pHYs wrong size" ); 00324 } 00325 00326 $buf = self::read( $fh, $chunk_size ); 00327 if ( !$buf || strlen( $buf ) < $chunk_size ) { 00328 throw new Exception( __METHOD__ . ": Read error" ); 00329 } 00330 00331 $dim = unpack( "Nwidth/Nheight/Cunit", $buf ); 00332 if ( $dim['unit'] == 1 ) { 00333 // Need to check for negative because php 00334 // doesn't deal with super-large unsigned 32-bit ints well 00335 if ( $dim['width'] > 0 && $dim['height'] > 0 ) { 00336 // unit is meters 00337 // (as opposed to 0 = undefined ) 00338 $text['XResolution'] = $dim['width'] 00339 . '/100'; 00340 $text['YResolution'] = $dim['height'] 00341 . '/100'; 00342 $text['ResolutionUnit'] = 3; 00343 // 3 = dots per cm (from Exif). 00344 } 00345 } 00346 00347 } elseif ( $chunk_type == "IEND" ) { 00348 break; 00349 } else { 00350 fseek( $fh, $chunk_size, SEEK_CUR ); 00351 } 00352 fseek( $fh, self::$CRC_size, SEEK_CUR ); 00353 } 00354 fclose( $fh ); 00355 00356 if ( $loopCount > 1 ) { 00357 $duration *= $loopCount; 00358 } 00359 00360 if ( isset( $text['DateTimeDigitized'] ) ) { 00361 // Convert date format from rfc2822 to exif. 00362 foreach ( $text['DateTimeDigitized'] as $name => &$value ) { 00363 if ( $name === '_type' ) { 00364 continue; 00365 } 00366 00367 // @todo FIXME: Currently timezones are ignored. 00368 // possibly should be wfTimestamp's 00369 // responsibility. (at least for numeric TZ) 00370 $formatted = wfTimestamp( TS_EXIF, $value ); 00371 if ( $formatted ) { 00372 // Only change if we could convert the 00373 // date. 00374 // The png standard says it should be 00375 // in rfc2822 format, but not required. 00376 // In general for the exif stuff we 00377 // prettify the date if we can, but we 00378 // display as-is if we cannot or if 00379 // it is invalid. 00380 // So do the same here. 00381 00382 $value = $formatted; 00383 } 00384 } 00385 } 00386 return array( 00387 'frameCount' => $frameCount, 00388 'loopCount' => $loopCount, 00389 'duration' => $duration, 00390 'text' => $text, 00391 'bitDepth' => $bitDepth, 00392 'colorType' => $colorType, 00393 ); 00394 00395 } 00404 static private function read( $fh, $size ) { 00405 if ( $size > self::MAX_CHUNK_SIZE ) { 00406 throw new Exception( __METHOD__ . ': Chunk size of ' . $size . 00407 ' too big. Max size is: ' . self::MAX_CHUNK_SIZE ); 00408 } 00409 return fread( $fh, $size ); 00410 } 00411 }