MediaWiki  REL1_24
PNGMetadataExtractor.php
Go to the documentation of this file.
00001 <?php
00033 class PNGMetadataExtractor {
00035     private static $pngSig;
00036 
00038     private static $crcSize;
00039 
00041     private static $textChunks;
00042 
00043     const VERSION = 1;
00044     const MAX_CHUNK_SIZE = 3145728; // 3 megabytes
00045 
00046     static function getMetadata( $filename ) {
00047         self::$pngSig = pack( "C8", 137, 80, 78, 71, 13, 10, 26, 10 );
00048         self::$crcSize = 4;
00049         /* based on list at http://owl.phy.queensu.ca/~phil/exiftool/TagNames/PNG.html#TextualData
00050          * and http://www.w3.org/TR/PNG/#11keywords
00051          */
00052         self::$textChunks = array(
00053             'xml:com.adobe.xmp' => 'xmp',
00054             # Artist is unofficial. Author is the recommended
00055             # keyword in the PNG spec. However some people output
00056             # Artist so support both.
00057             'artist' => 'Artist',
00058             'model' => 'Model',
00059             'make' => 'Make',
00060             'author' => 'Artist',
00061             'comment' => 'PNGFileComment',
00062             'description' => 'ImageDescription',
00063             'title' => 'ObjectName',
00064             'copyright' => 'Copyright',
00065             # Source as in original device used to make image
00066             # not as in who gave you the image
00067             'source' => 'Model',
00068             'software' => 'Software',
00069             'disclaimer' => 'Disclaimer',
00070             'warning' => 'ContentWarning',
00071             'url' => 'Identifier', # Not sure if this is best mapping. Maybe WebStatement.
00072             'label' => 'Label',
00073             'creation time' => 'DateTimeDigitized',
00074             /* Other potentially useful things - Document */
00075         );
00076 
00077         $frameCount = 0;
00078         $loopCount = 1;
00079         $text = array();
00080         $duration = 0.0;
00081         $bitDepth = 0;
00082         $colorType = 'unknown';
00083 
00084         if ( !$filename ) {
00085             throw new Exception( __METHOD__ . ": No file name specified" );
00086         } elseif ( !file_exists( $filename ) || is_dir( $filename ) ) {
00087             throw new Exception( __METHOD__ . ": File $filename does not exist" );
00088         }
00089 
00090         $fh = fopen( $filename, 'rb' );
00091 
00092         if ( !$fh ) {
00093             throw new Exception( __METHOD__ . ": Unable to open file $filename" );
00094         }
00095 
00096         // Check for the PNG header
00097         $buf = fread( $fh, 8 );
00098         if ( $buf != self::$pngSig ) {
00099             throw new Exception( __METHOD__ . ": Not a valid PNG file; header: $buf" );
00100         }
00101 
00102         // Read chunks
00103         while ( !feof( $fh ) ) {
00104             $buf = fread( $fh, 4 );
00105             if ( !$buf || strlen( $buf ) < 4 ) {
00106                 throw new Exception( __METHOD__ . ": Read error" );
00107             }
00108             $chunk_size = unpack( "N", $buf );
00109             $chunk_size = $chunk_size[1];
00110 
00111             if ( $chunk_size < 0 ) {
00112                 throw new Exception( __METHOD__ . ": Chunk size too big for unpack" );
00113             }
00114 
00115             $chunk_type = fread( $fh, 4 );
00116             if ( !$chunk_type || strlen( $chunk_type ) < 4 ) {
00117                 throw new Exception( __METHOD__ . ": Read error" );
00118             }
00119 
00120             if ( $chunk_type == "IHDR" ) {
00121                 $buf = self::read( $fh, $chunk_size );
00122                 if ( !$buf || strlen( $buf ) < $chunk_size ) {
00123                     throw new Exception( __METHOD__ . ": Read error" );
00124                 }
00125                 $bitDepth = ord( substr( $buf, 8, 1 ) );
00126                 // Detect the color type in British English as per the spec
00127                 // http://www.w3.org/TR/PNG/#11IHDR
00128                 switch ( ord( substr( $buf, 9, 1 ) ) ) {
00129                     case 0:
00130                         $colorType = 'greyscale';
00131                         break;
00132                     case 2:
00133                         $colorType = 'truecolour';
00134                         break;
00135                     case 3:
00136                         $colorType = 'index-coloured';
00137                         break;
00138                     case 4:
00139                         $colorType = 'greyscale-alpha';
00140                         break;
00141                     case 6:
00142                         $colorType = 'truecolour-alpha';
00143                         break;
00144                     default:
00145                         $colorType = 'unknown';
00146                         break;
00147                 }
00148             } elseif ( $chunk_type == "acTL" ) {
00149                 $buf = fread( $fh, $chunk_size );
00150                 if ( !$buf || strlen( $buf ) < $chunk_size || $chunk_size < 4 ) {
00151                     throw new Exception( __METHOD__ . ": Read error" );
00152                 }
00153 
00154                 $actl = unpack( "Nframes/Nplays", $buf );
00155                 $frameCount = $actl['frames'];
00156                 $loopCount = $actl['plays'];
00157             } elseif ( $chunk_type == "fcTL" ) {
00158                 $buf = self::read( $fh, $chunk_size );
00159                 if ( !$buf || strlen( $buf ) < $chunk_size ) {
00160                     throw new Exception( __METHOD__ . ": Read error" );
00161                 }
00162                 $buf = substr( $buf, 20 );
00163                 if ( strlen( $buf ) < 4 ) {
00164                     throw new Exception( __METHOD__ . ": Read error" );
00165                 }
00166 
00167                 $fctldur = unpack( "ndelay_num/ndelay_den", $buf );
00168                 if ( $fctldur['delay_den'] == 0 ) {
00169                     $fctldur['delay_den'] = 100;
00170                 }
00171                 if ( $fctldur['delay_num'] ) {
00172                     $duration += $fctldur['delay_num'] / $fctldur['delay_den'];
00173                 }
00174             } elseif ( $chunk_type == "iTXt" ) {
00175                 // Extracts iTXt chunks, uncompressing if necessary.
00176                 $buf = self::read( $fh, $chunk_size );
00177                 $items = array();
00178                 if ( preg_match(
00179                     '/^([^\x00]{1,79})\x00(\x00|\x01)\x00([^\x00]*)(.)[^\x00]*\x00(.*)$/Ds',
00180                     $buf, $items )
00181                 ) {
00182                     /* $items[1] = text chunk name, $items[2] = compressed flag,
00183                      * $items[3] = lang code (or ""), $items[4]= compression type.
00184                      * $items[5] = content
00185                      */
00186 
00187                     // Theoretically should be case-sensitive, but in practise...
00188                     $items[1] = strtolower( $items[1] );
00189                     if ( !isset( self::$textChunks[$items[1]] ) ) {
00190                         // Only extract textual chunks on our list.
00191                         fseek( $fh, self::$crcSize, SEEK_CUR );
00192                         continue;
00193                     }
00194 
00195                     $items[3] = strtolower( $items[3] );
00196                     if ( $items[3] == '' ) {
00197                         // if no lang specified use x-default like in xmp.
00198                         $items[3] = 'x-default';
00199                     }
00200 
00201                     // if compressed
00202                     if ( $items[2] == "\x01" ) {
00203                         if ( function_exists( 'gzuncompress' ) && $items[4] === "\x00" ) {
00204                             wfSuppressWarnings();
00205                             $items[5] = gzuncompress( $items[5] );
00206                             wfRestoreWarnings();
00207 
00208                             if ( $items[5] === false ) {
00209                                 // decompression failed
00210                                 wfDebug( __METHOD__ . ' Error decompressing iTxt chunk - ' . $items[1] . "\n" );
00211                                 fseek( $fh, self::$crcSize, SEEK_CUR );
00212                                 continue;
00213                             }
00214                         } else {
00215                             wfDebug( __METHOD__ . ' Skipping compressed png iTXt chunk due to lack of zlib,'
00216                                 . " or potentially invalid compression method\n" );
00217                             fseek( $fh, self::$crcSize, SEEK_CUR );
00218                             continue;
00219                         }
00220                     }
00221                     $finalKeyword = self::$textChunks[$items[1]];
00222                     $text[$finalKeyword][$items[3]] = $items[5];
00223                     $text[$finalKeyword]['_type'] = 'lang';
00224                 } else {
00225                     // Error reading iTXt chunk
00226                     throw new Exception( __METHOD__ . ": Read error on iTXt chunk" );
00227                 }
00228             } elseif ( $chunk_type == 'tEXt' ) {
00229                 $buf = self::read( $fh, $chunk_size );
00230 
00231                 // In case there is no \x00 which will make explode fail.
00232                 if ( strpos( $buf, "\x00" ) === false ) {
00233                     throw new Exception( __METHOD__ . ": Read error on tEXt chunk" );
00234                 }
00235 
00236                 list( $keyword, $content ) = explode( "\x00", $buf, 2 );
00237                 if ( $keyword === '' || $content === '' ) {
00238                     throw new Exception( __METHOD__ . ": Read error on tEXt chunk" );
00239                 }
00240 
00241                 // Theoretically should be case-sensitive, but in practise...
00242                 $keyword = strtolower( $keyword );
00243                 if ( !isset( self::$textChunks[$keyword] ) ) {
00244                     // Don't recognize chunk, so skip.
00245                     fseek( $fh, self::$crcSize, SEEK_CUR );
00246                     continue;
00247                 }
00248                 wfSuppressWarnings();
00249                 $content = iconv( 'ISO-8859-1', 'UTF-8', $content );
00250                 wfRestoreWarnings();
00251 
00252                 if ( $content === false ) {
00253                     throw new Exception( __METHOD__ . ": Read error (error with iconv)" );
00254                 }
00255 
00256                 $finalKeyword = self::$textChunks[$keyword];
00257                 $text[$finalKeyword]['x-default'] = $content;
00258                 $text[$finalKeyword]['_type'] = 'lang';
00259             } elseif ( $chunk_type == 'zTXt' ) {
00260                 if ( function_exists( 'gzuncompress' ) ) {
00261                     $buf = self::read( $fh, $chunk_size );
00262 
00263                     // In case there is no \x00 which will make explode fail.
00264                     if ( strpos( $buf, "\x00" ) === false ) {
00265                         throw new Exception( __METHOD__ . ": Read error on zTXt chunk" );
00266                     }
00267 
00268                     list( $keyword, $postKeyword ) = explode( "\x00", $buf, 2 );
00269                     if ( $keyword === '' || $postKeyword === '' ) {
00270                         throw new Exception( __METHOD__ . ": Read error on zTXt chunk" );
00271                     }
00272                     // Theoretically should be case-sensitive, but in practise...
00273                     $keyword = strtolower( $keyword );
00274 
00275                     if ( !isset( self::$textChunks[$keyword] ) ) {
00276                         // Don't recognize chunk, so skip.
00277                         fseek( $fh, self::$crcSize, SEEK_CUR );
00278                         continue;
00279                     }
00280                     $compression = substr( $postKeyword, 0, 1 );
00281                     $content = substr( $postKeyword, 1 );
00282                     if ( $compression !== "\x00" ) {
00283                         wfDebug( __METHOD__ . " Unrecognized compression method in zTXt ($keyword). Skipping.\n" );
00284                         fseek( $fh, self::$crcSize, SEEK_CUR );
00285                         continue;
00286                     }
00287 
00288                     wfSuppressWarnings();
00289                     $content = gzuncompress( $content );
00290                     wfRestoreWarnings();
00291 
00292                     if ( $content === false ) {
00293                         // decompression failed
00294                         wfDebug( __METHOD__ . ' Error decompressing zTXt chunk - ' . $keyword . "\n" );
00295                         fseek( $fh, self::$crcSize, SEEK_CUR );
00296                         continue;
00297                     }
00298 
00299                     wfSuppressWarnings();
00300                     $content = iconv( 'ISO-8859-1', 'UTF-8', $content );
00301                     wfRestoreWarnings();
00302 
00303                     if ( $content === false ) {
00304                         throw new Exception( __METHOD__ . ": Read error (error with iconv)" );
00305                     }
00306 
00307                     $finalKeyword = self::$textChunks[$keyword];
00308                     $text[$finalKeyword]['x-default'] = $content;
00309                     $text[$finalKeyword]['_type'] = 'lang';
00310                 } else {
00311                     wfDebug( __METHOD__ . " Cannot decompress zTXt chunk due to lack of zlib. Skipping.\n" );
00312                     fseek( $fh, $chunk_size, SEEK_CUR );
00313                 }
00314             } elseif ( $chunk_type == 'tIME' ) {
00315                 // last mod timestamp.
00316                 if ( $chunk_size !== 7 ) {
00317                     throw new Exception( __METHOD__ . ": tIME wrong size" );
00318                 }
00319                 $buf = self::read( $fh, $chunk_size );
00320                 if ( !$buf || strlen( $buf ) < $chunk_size ) {
00321                     throw new Exception( __METHOD__ . ": Read error" );
00322                 }
00323 
00324                 // Note: spec says this should be UTC.
00325                 $t = unpack( "ny/Cm/Cd/Ch/Cmin/Cs", $buf );
00326                 $strTime = sprintf( "%04d%02d%02d%02d%02d%02d",
00327                     $t['y'], $t['m'], $t['d'], $t['h'],
00328                     $t['min'], $t['s'] );
00329 
00330                 $exifTime = wfTimestamp( TS_EXIF, $strTime );
00331 
00332                 if ( $exifTime ) {
00333                     $text['DateTime'] = $exifTime;
00334                 }
00335             } elseif ( $chunk_type == 'pHYs' ) {
00336                 // how big pixels are (dots per meter).
00337                 if ( $chunk_size !== 9 ) {
00338                     throw new Exception( __METHOD__ . ": pHYs wrong size" );
00339                 }
00340 
00341                 $buf = self::read( $fh, $chunk_size );
00342                 if ( !$buf || strlen( $buf ) < $chunk_size ) {
00343                     throw new Exception( __METHOD__ . ": Read error" );
00344                 }
00345 
00346                 $dim = unpack( "Nwidth/Nheight/Cunit", $buf );
00347                 if ( $dim['unit'] == 1 ) {
00348                     // Need to check for negative because php
00349                     // doesn't deal with super-large unsigned 32-bit ints well
00350                     if ( $dim['width'] > 0 && $dim['height'] > 0 ) {
00351                         // unit is meters
00352                         // (as opposed to 0 = undefined )
00353                         $text['XResolution'] = $dim['width']
00354                             . '/100';
00355                         $text['YResolution'] = $dim['height']
00356                             . '/100';
00357                         $text['ResolutionUnit'] = 3;
00358                         // 3 = dots per cm (from Exif).
00359                     }
00360                 }
00361             } elseif ( $chunk_type == "IEND" ) {
00362                 break;
00363             } else {
00364                 fseek( $fh, $chunk_size, SEEK_CUR );
00365             }
00366             fseek( $fh, self::$crcSize, SEEK_CUR );
00367         }
00368         fclose( $fh );
00369 
00370         if ( $loopCount > 1 ) {
00371             $duration *= $loopCount;
00372         }
00373 
00374         if ( isset( $text['DateTimeDigitized'] ) ) {
00375             // Convert date format from rfc2822 to exif.
00376             foreach ( $text['DateTimeDigitized'] as $name => &$value ) {
00377                 if ( $name === '_type' ) {
00378                     continue;
00379                 }
00380 
00381                 // @todo FIXME: Currently timezones are ignored.
00382                 // possibly should be wfTimestamp's
00383                 // responsibility. (at least for numeric TZ)
00384                 $formatted = wfTimestamp( TS_EXIF, $value );
00385                 if ( $formatted ) {
00386                     // Only change if we could convert the
00387                     // date.
00388                     // The png standard says it should be
00389                     // in rfc2822 format, but not required.
00390                     // In general for the exif stuff we
00391                     // prettify the date if we can, but we
00392                     // display as-is if we cannot or if
00393                     // it is invalid.
00394                     // So do the same here.
00395 
00396                     $value = $formatted;
00397                 }
00398             }
00399         }
00400 
00401         return array(
00402             'frameCount' => $frameCount,
00403             'loopCount' => $loopCount,
00404             'duration' => $duration,
00405             'text' => $text,
00406             'bitDepth' => $bitDepth,
00407             'colorType' => $colorType,
00408         );
00409     }
00410 
00419     private static function read( $fh, $size ) {
00420         if ( $size > self::MAX_CHUNK_SIZE ) {
00421             throw new Exception( __METHOD__ . ': Chunk size of ' . $size .
00422                 ' too big. Max size is: ' . self::MAX_CHUNK_SIZE );
00423         }
00424 
00425         return fread( $fh, $size );
00426     }
00427 }