MediaWiki
REL1_22
|
00001 <?php 00033 class PNGMetadataExtractor { 00034 static $png_sig; 00035 static $CRC_size; 00036 static $text_chunks; 00037 00038 const VERSION = 1; 00039 const MAX_CHUNK_SIZE = 3145728; // 3 megabytes 00040 00041 static function getMetadata( $filename ) { 00042 self::$png_sig = pack( "C8", 137, 80, 78, 71, 13, 10, 26, 10 ); 00043 self::$CRC_size = 4; 00044 /* based on list at http://owl.phy.queensu.ca/~phil/exiftool/TagNames/PNG.html#TextualData 00045 * and http://www.w3.org/TR/PNG/#11keywords 00046 */ 00047 self::$text_chunks = array( 00048 'xml:com.adobe.xmp' => 'xmp', 00049 # Artist is unofficial. Author is the recommended 00050 # keyword in the PNG spec. However some people output 00051 # Artist so support both. 00052 'artist' => 'Artist', 00053 'model' => 'Model', 00054 'make' => 'Make', 00055 'author' => 'Artist', 00056 'comment' => 'PNGFileComment', 00057 'description' => 'ImageDescription', 00058 'title' => 'ObjectName', 00059 'copyright' => 'Copyright', 00060 # Source as in original device used to make image 00061 # not as in who gave you the image 00062 'source' => 'Model', 00063 'software' => 'Software', 00064 'disclaimer' => 'Disclaimer', 00065 'warning' => 'ContentWarning', 00066 'url' => 'Identifier', # Not sure if this is best mapping. Maybe WebStatement. 00067 'label' => 'Label', 00068 'creation time' => 'DateTimeDigitized', 00069 /* Other potentially useful things - Document */ 00070 ); 00071 00072 $frameCount = 0; 00073 $loopCount = 1; 00074 $text = array(); 00075 $duration = 0.0; 00076 $bitDepth = 0; 00077 $colorType = 'unknown'; 00078 00079 if ( !$filename ) { 00080 throw new Exception( __METHOD__ . ": No file name specified" ); 00081 } elseif ( !file_exists( $filename ) || is_dir( $filename ) ) { 00082 throw new Exception( __METHOD__ . ": File $filename does not exist" ); 00083 } 00084 00085 $fh = fopen( $filename, 'rb' ); 00086 00087 if ( !$fh ) { 00088 throw new Exception( __METHOD__ . ": Unable to open file $filename" ); 00089 } 00090 00091 // Check for the PNG header 00092 $buf = fread( $fh, 8 ); 00093 if ( $buf != self::$png_sig ) { 00094 throw new Exception( __METHOD__ . ": Not a valid PNG file; header: $buf" ); 00095 } 00096 00097 // Read chunks 00098 while ( !feof( $fh ) ) { 00099 $buf = fread( $fh, 4 ); 00100 if ( !$buf || strlen( $buf ) < 4 ) { 00101 throw new Exception( __METHOD__ . ": Read error" ); 00102 } 00103 $chunk_size = unpack( "N", $buf ); 00104 $chunk_size = $chunk_size[1]; 00105 00106 if ( $chunk_size < 0 ) { 00107 throw new Exception( __METHOD__ . ": Chunk size too big for unpack" ); 00108 } 00109 00110 $chunk_type = fread( $fh, 4 ); 00111 if ( !$chunk_type || strlen( $chunk_type ) < 4 ) { 00112 throw new Exception( __METHOD__ . ": Read error" ); 00113 } 00114 00115 if ( $chunk_type == "IHDR" ) { 00116 $buf = self::read( $fh, $chunk_size ); 00117 if ( !$buf || strlen( $buf ) < $chunk_size ) { 00118 throw new Exception( __METHOD__ . ": Read error" ); 00119 } 00120 $bitDepth = ord( substr( $buf, 8, 1 ) ); 00121 // Detect the color type in British English as per the spec 00122 // http://www.w3.org/TR/PNG/#11IHDR 00123 switch ( ord( substr( $buf, 9, 1 ) ) ) { 00124 case 0: 00125 $colorType = 'greyscale'; 00126 break; 00127 case 2: 00128 $colorType = 'truecolour'; 00129 break; 00130 case 3: 00131 $colorType = 'index-coloured'; 00132 break; 00133 case 4: 00134 $colorType = 'greyscale-alpha'; 00135 break; 00136 case 6: 00137 $colorType = 'truecolour-alpha'; 00138 break; 00139 default: 00140 $colorType = 'unknown'; 00141 break; 00142 } 00143 } elseif ( $chunk_type == "acTL" ) { 00144 $buf = fread( $fh, $chunk_size ); 00145 if ( !$buf || strlen( $buf ) < $chunk_size || $chunk_size < 4 ) { 00146 throw new Exception( __METHOD__ . ": Read error" ); 00147 } 00148 00149 $actl = unpack( "Nframes/Nplays", $buf ); 00150 $frameCount = $actl['frames']; 00151 $loopCount = $actl['plays']; 00152 } elseif ( $chunk_type == "fcTL" ) { 00153 $buf = self::read( $fh, $chunk_size ); 00154 if ( !$buf || strlen( $buf ) < $chunk_size ) { 00155 throw new Exception( __METHOD__ . ": Read error" ); 00156 } 00157 $buf = substr( $buf, 20 ); 00158 if ( strlen( $buf ) < 4 ) { 00159 throw new Exception( __METHOD__ . ": Read error" ); 00160 } 00161 00162 $fctldur = unpack( "ndelay_num/ndelay_den", $buf ); 00163 if ( $fctldur['delay_den'] == 0 ) { 00164 $fctldur['delay_den'] = 100; 00165 } 00166 if ( $fctldur['delay_num'] ) { 00167 $duration += $fctldur['delay_num'] / $fctldur['delay_den']; 00168 } 00169 } elseif ( $chunk_type == "iTXt" ) { 00170 // Extracts iTXt chunks, uncompressing if necessary. 00171 $buf = self::read( $fh, $chunk_size ); 00172 $items = array(); 00173 if ( preg_match( 00174 '/^([^\x00]{1,79})\x00(\x00|\x01)\x00([^\x00]*)(.)[^\x00]*\x00(.*)$/Ds', 00175 $buf, $items ) 00176 ) { 00177 /* $items[1] = text chunk name, $items[2] = compressed flag, 00178 * $items[3] = lang code (or ""), $items[4]= compression type. 00179 * $items[5] = content 00180 */ 00181 00182 // Theoretically should be case-sensitive, but in practise... 00183 $items[1] = strtolower( $items[1] ); 00184 if ( !isset( self::$text_chunks[$items[1]] ) ) { 00185 // Only extract textual chunks on our list. 00186 fseek( $fh, self::$CRC_size, SEEK_CUR ); 00187 continue; 00188 } 00189 00190 $items[3] = strtolower( $items[3] ); 00191 if ( $items[3] == '' ) { 00192 // if no lang specified use x-default like in xmp. 00193 $items[3] = 'x-default'; 00194 } 00195 00196 // if compressed 00197 if ( $items[2] == "\x01" ) { 00198 if ( function_exists( 'gzuncompress' ) && $items[4] === "\x00" ) { 00199 wfSuppressWarnings(); 00200 $items[5] = gzuncompress( $items[5] ); 00201 wfRestoreWarnings(); 00202 00203 if ( $items[5] === false ) { 00204 // decompression failed 00205 wfDebug( __METHOD__ . ' Error decompressing iTxt chunk - ' . $items[1] . "\n" ); 00206 fseek( $fh, self::$CRC_size, SEEK_CUR ); 00207 continue; 00208 } 00209 00210 } else { 00211 wfDebug( __METHOD__ . ' Skipping compressed png iTXt chunk due to lack of zlib,' 00212 . " or potentially invalid compression method\n" ); 00213 fseek( $fh, self::$CRC_size, SEEK_CUR ); 00214 continue; 00215 } 00216 } 00217 $finalKeyword = self::$text_chunks[$items[1]]; 00218 $text[$finalKeyword][$items[3]] = $items[5]; 00219 $text[$finalKeyword]['_type'] = 'lang'; 00220 00221 } else { 00222 // Error reading iTXt chunk 00223 throw new Exception( __METHOD__ . ": Read error on iTXt chunk" ); 00224 } 00225 00226 } elseif ( $chunk_type == 'tEXt' ) { 00227 $buf = self::read( $fh, $chunk_size ); 00228 00229 // In case there is no \x00 which will make explode fail. 00230 if ( strpos( $buf, "\x00" ) === false ) { 00231 throw new Exception( __METHOD__ . ": Read error on tEXt chunk" ); 00232 } 00233 00234 list( $keyword, $content ) = explode( "\x00", $buf, 2 ); 00235 if ( $keyword === '' || $content === '' ) { 00236 throw new Exception( __METHOD__ . ": Read error on tEXt chunk" ); 00237 } 00238 00239 // Theoretically should be case-sensitive, but in practise... 00240 $keyword = strtolower( $keyword ); 00241 if ( !isset( self::$text_chunks[ $keyword ] ) ) { 00242 // Don't recognize chunk, so skip. 00243 fseek( $fh, self::$CRC_size, SEEK_CUR ); 00244 continue; 00245 } 00246 wfSuppressWarnings(); 00247 $content = iconv( 'ISO-8859-1', 'UTF-8', $content ); 00248 wfRestoreWarnings(); 00249 00250 if ( $content === false ) { 00251 throw new Exception( __METHOD__ . ": Read error (error with iconv)" ); 00252 } 00253 00254 $finalKeyword = self::$text_chunks[$keyword]; 00255 $text[$finalKeyword]['x-default'] = $content; 00256 $text[$finalKeyword]['_type'] = 'lang'; 00257 00258 } elseif ( $chunk_type == 'zTXt' ) { 00259 if ( function_exists( 'gzuncompress' ) ) { 00260 $buf = self::read( $fh, $chunk_size ); 00261 00262 // In case there is no \x00 which will make explode fail. 00263 if ( strpos( $buf, "\x00" ) === false ) { 00264 throw new Exception( __METHOD__ . ": Read error on zTXt chunk" ); 00265 } 00266 00267 list( $keyword, $postKeyword ) = explode( "\x00", $buf, 2 ); 00268 if ( $keyword === '' || $postKeyword === '' ) { 00269 throw new Exception( __METHOD__ . ": Read error on zTXt chunk" ); 00270 } 00271 // Theoretically should be case-sensitive, but in practise... 00272 $keyword = strtolower( $keyword ); 00273 00274 if ( !isset( self::$text_chunks[ $keyword ] ) ) { 00275 // Don't recognize chunk, so skip. 00276 fseek( $fh, self::$CRC_size, SEEK_CUR ); 00277 continue; 00278 } 00279 $compression = substr( $postKeyword, 0, 1 ); 00280 $content = substr( $postKeyword, 1 ); 00281 if ( $compression !== "\x00" ) { 00282 wfDebug( __METHOD__ . " Unrecognized compression method in zTXt ($keyword). Skipping.\n" ); 00283 fseek( $fh, self::$CRC_size, SEEK_CUR ); 00284 continue; 00285 } 00286 00287 wfSuppressWarnings(); 00288 $content = gzuncompress( $content ); 00289 wfRestoreWarnings(); 00290 00291 if ( $content === false ) { 00292 // decompression failed 00293 wfDebug( __METHOD__ . ' Error decompressing zTXt chunk - ' . $keyword . "\n" ); 00294 fseek( $fh, self::$CRC_size, SEEK_CUR ); 00295 continue; 00296 } 00297 00298 wfSuppressWarnings(); 00299 $content = iconv( 'ISO-8859-1', 'UTF-8', $content ); 00300 wfRestoreWarnings(); 00301 00302 if ( $content === false ) { 00303 throw new Exception( __METHOD__ . ": Read error (error with iconv)" ); 00304 } 00305 00306 $finalKeyword = self::$text_chunks[$keyword]; 00307 $text[$finalKeyword]['x-default'] = $content; 00308 $text[$finalKeyword]['_type'] = 'lang'; 00309 00310 } else { 00311 wfDebug( __METHOD__ . " Cannot decompress zTXt chunk due to lack of zlib. Skipping.\n" ); 00312 fseek( $fh, $chunk_size, SEEK_CUR ); 00313 } 00314 } elseif ( $chunk_type == 'tIME' ) { 00315 // last mod timestamp. 00316 if ( $chunk_size !== 7 ) { 00317 throw new Exception( __METHOD__ . ": tIME wrong size" ); 00318 } 00319 $buf = self::read( $fh, $chunk_size ); 00320 if ( !$buf || strlen( $buf ) < $chunk_size ) { 00321 throw new Exception( __METHOD__ . ": Read error" ); 00322 } 00323 00324 // Note: spec says this should be UTC. 00325 $t = unpack( "ny/Cm/Cd/Ch/Cmin/Cs", $buf ); 00326 $strTime = sprintf( "%04d%02d%02d%02d%02d%02d", 00327 $t['y'], $t['m'], $t['d'], $t['h'], 00328 $t['min'], $t['s'] ); 00329 00330 $exifTime = wfTimestamp( TS_EXIF, $strTime ); 00331 00332 if ( $exifTime ) { 00333 $text['DateTime'] = $exifTime; 00334 } 00335 00336 } elseif ( $chunk_type == 'pHYs' ) { 00337 // how big pixels are (dots per meter). 00338 if ( $chunk_size !== 9 ) { 00339 throw new Exception( __METHOD__ . ": pHYs wrong size" ); 00340 } 00341 00342 $buf = self::read( $fh, $chunk_size ); 00343 if ( !$buf || strlen( $buf ) < $chunk_size ) { 00344 throw new Exception( __METHOD__ . ": Read error" ); 00345 } 00346 00347 $dim = unpack( "Nwidth/Nheight/Cunit", $buf ); 00348 if ( $dim['unit'] == 1 ) { 00349 // Need to check for negative because php 00350 // doesn't deal with super-large unsigned 32-bit ints well 00351 if ( $dim['width'] > 0 && $dim['height'] > 0 ) { 00352 // unit is meters 00353 // (as opposed to 0 = undefined ) 00354 $text['XResolution'] = $dim['width'] 00355 . '/100'; 00356 $text['YResolution'] = $dim['height'] 00357 . '/100'; 00358 $text['ResolutionUnit'] = 3; 00359 // 3 = dots per cm (from Exif). 00360 } 00361 } 00362 00363 } elseif ( $chunk_type == "IEND" ) { 00364 break; 00365 } else { 00366 fseek( $fh, $chunk_size, SEEK_CUR ); 00367 } 00368 fseek( $fh, self::$CRC_size, SEEK_CUR ); 00369 } 00370 fclose( $fh ); 00371 00372 if ( $loopCount > 1 ) { 00373 $duration *= $loopCount; 00374 } 00375 00376 if ( isset( $text['DateTimeDigitized'] ) ) { 00377 // Convert date format from rfc2822 to exif. 00378 foreach ( $text['DateTimeDigitized'] as $name => &$value ) { 00379 if ( $name === '_type' ) { 00380 continue; 00381 } 00382 00383 // @todo FIXME: Currently timezones are ignored. 00384 // possibly should be wfTimestamp's 00385 // responsibility. (at least for numeric TZ) 00386 $formatted = wfTimestamp( TS_EXIF, $value ); 00387 if ( $formatted ) { 00388 // Only change if we could convert the 00389 // date. 00390 // The png standard says it should be 00391 // in rfc2822 format, but not required. 00392 // In general for the exif stuff we 00393 // prettify the date if we can, but we 00394 // display as-is if we cannot or if 00395 // it is invalid. 00396 // So do the same here. 00397 00398 $value = $formatted; 00399 } 00400 } 00401 } 00402 return array( 00403 'frameCount' => $frameCount, 00404 'loopCount' => $loopCount, 00405 'duration' => $duration, 00406 'text' => $text, 00407 'bitDepth' => $bitDepth, 00408 'colorType' => $colorType, 00409 ); 00410 00411 } 00420 private static function read( $fh, $size ) { 00421 if ( $size > self::MAX_CHUNK_SIZE ) { 00422 throw new Exception( __METHOD__ . ': Chunk size of ' . $size . 00423 ' too big. Max size is: ' . self::MAX_CHUNK_SIZE ); 00424 } 00425 return fread( $fh, $size ); 00426 } 00427 }