MediaWiki  REL1_22
GIFMetadataExtractor.php
Go to the documentation of this file.
00001 <?php
00034 class GIFMetadataExtractor {
00035     static $gif_frame_sep;
00036     static $gif_extension_sep;
00037     static $gif_term;
00038 
00039     const VERSION = 1;
00040 
00041     // Each sub-block is less than or equal to 255 bytes.
00042     // Most of the time its 255 bytes, except for in XMP
00043     // blocks, where it's usually between 32-127 bytes each.
00044     const MAX_SUBBLOCKS = 262144; // 5mb divided by 20.
00045 
00051     static function getMetadata( $filename ) {
00052         self::$gif_frame_sep = pack( "C", ord( "," ) );
00053         self::$gif_extension_sep = pack( "C", ord( "!" ) );
00054         self::$gif_term = pack( "C", ord( ";" ) );
00055 
00056         $frameCount = 0;
00057         $duration = 0.0;
00058         $isLooped = false;
00059         $xmp = "";
00060         $comment = array();
00061 
00062         if ( !$filename ) {
00063             throw new Exception( "No file name specified" );
00064         } elseif ( !file_exists( $filename ) || is_dir( $filename ) ) {
00065             throw new Exception( "File $filename does not exist" );
00066         }
00067 
00068         $fh = fopen( $filename, 'rb' );
00069 
00070         if ( !$fh ) {
00071             throw new Exception( "Unable to open file $filename" );
00072         }
00073 
00074         // Check for the GIF header
00075         $buf = fread( $fh, 6 );
00076         if ( !( $buf == 'GIF87a' || $buf == 'GIF89a' ) ) {
00077             throw new Exception( "Not a valid GIF file; header: $buf" );
00078         }
00079 
00080         // Skip over width and height.
00081         fread( $fh, 4 );
00082 
00083         // Read BPP
00084         $buf = fread( $fh, 1 );
00085         $bpp = self::decodeBPP( $buf );
00086 
00087         // Skip over background and aspect ratio
00088         fread( $fh, 2 );
00089 
00090         // Skip over the GCT
00091         self::readGCT( $fh, $bpp );
00092 
00093         while ( !feof( $fh ) ) {
00094             $buf = fread( $fh, 1 );
00095 
00096             if ( $buf == self::$gif_frame_sep ) {
00097                 // Found a frame
00098                 $frameCount++;
00099 
00100                 ## Skip bounding box
00101                 fread( $fh, 8 );
00102 
00103                 ## Read BPP
00104                 $buf = fread( $fh, 1 );
00105                 $bpp = self::decodeBPP( $buf );
00106 
00107                 ## Read GCT
00108                 self::readGCT( $fh, $bpp );
00109                 fread( $fh, 1 );
00110                 self::skipBlock( $fh );
00111             } elseif ( $buf == self::$gif_extension_sep ) {
00112                 $buf = fread( $fh, 1 );
00113                 if ( strlen( $buf ) < 1 ) {
00114                     throw new Exception( "Ran out of input" );
00115                 }
00116                 $extension_code = unpack( 'C', $buf );
00117                 $extension_code = $extension_code[1];
00118 
00119                 if ( $extension_code == 0xF9 ) {
00120                     // Graphics Control Extension.
00121                     fread( $fh, 1 ); // Block size
00122 
00123                     fread( $fh, 1 ); // Transparency, disposal method, user input
00124 
00125                     $buf = fread( $fh, 2 ); // Delay, in hundredths of seconds.
00126                     if ( strlen( $buf ) < 2 ) {
00127                         throw new Exception( "Ran out of input" );
00128                     }
00129                     $delay = unpack( 'v', $buf );
00130                     $delay = $delay[1];
00131                     $duration += $delay * 0.01;
00132 
00133                     fread( $fh, 1 ); // Transparent colour index
00134 
00135                     $term = fread( $fh, 1 ); // Should be a terminator
00136                     if ( strlen( $term ) < 1 ) {
00137                         throw new Exception( "Ran out of input" );
00138                     }
00139                     $term = unpack( 'C', $term );
00140                     $term = $term[1];
00141                     if ( $term != 0 ) {
00142                         throw new Exception( "Malformed Graphics Control Extension block" );
00143                     }
00144                 } elseif ( $extension_code == 0xFE ) {
00145                     // Comment block(s).
00146                     $data = self::readBlock( $fh );
00147                     if ( $data === "" ) {
00148                         throw new Exception( 'Read error, zero-length comment block' );
00149                     }
00150 
00151                     // The standard says this should be ASCII, however its unclear if
00152                     // thats true in practise. Check to see if its valid utf-8, if so
00153                     // assume its that, otherwise assume its windows-1252 (iso-8859-1)
00154                     $dataCopy = $data;
00155                     // quickIsNFCVerify has the side effect of replacing any invalid characters
00156                     UtfNormal::quickIsNFCVerify( $dataCopy );
00157 
00158                     if ( $dataCopy !== $data ) {
00159                         wfSuppressWarnings();
00160                         $data = iconv( 'windows-1252', 'UTF-8', $data );
00161                         wfRestoreWarnings();
00162                     }
00163 
00164                     $commentCount = count( $comment );
00165                     if ( $commentCount === 0
00166                         || $comment[$commentCount - 1] !== $data )
00167                     {
00168                         // Some applications repeat the same comment on each
00169                         // frame of an animated GIF image, so if this comment
00170                         // is identical to the last, only extract once.
00171                         $comment[] = $data;
00172                     }
00173                 } elseif ( $extension_code == 0xFF ) {
00174                     // Application extension (Netscape info about the animated gif)
00175                     // or XMP (or theoretically any other type of extension block)
00176                     $blockLength = fread( $fh, 1 );
00177                     if ( strlen( $blockLength ) < 1 ) {
00178                         throw new Exception( "Ran out of input" );
00179                     }
00180                     $blockLength = unpack( 'C', $blockLength );
00181                     $blockLength = $blockLength[1];
00182                     $data = fread( $fh, $blockLength );
00183 
00184                     if ( $blockLength != 11 ) {
00185                         wfDebug( __METHOD__ . " GIF application block with wrong length\n" );
00186                         fseek( $fh, -( $blockLength + 1 ), SEEK_CUR );
00187                         self::skipBlock( $fh );
00188                         continue;
00189                     }
00190 
00191                     // NETSCAPE2.0 (application name for animated gif)
00192                     if ( $data == 'NETSCAPE2.0' ) {
00193                         $data = fread( $fh, 2 ); // Block length and introduction, should be 03 01
00194 
00195                         if ( $data != "\x03\x01" ) {
00196                             throw new Exception( "Expected \x03\x01, got $data" );
00197                         }
00198 
00199                         // Unsigned little-endian integer, loop count or zero for "forever"
00200                         $loopData = fread( $fh, 2 );
00201                         if ( strlen( $loopData ) < 2 ) {
00202                             throw new Exception( "Ran out of input" );
00203                         }
00204                         $loopData = unpack( 'v', $loopData );
00205                         $loopCount = $loopData[1];
00206 
00207                         if ( $loopCount != 1 ) {
00208                             $isLooped = true;
00209                         }
00210 
00211                         // Read out terminator byte
00212                         fread( $fh, 1 );
00213                     } elseif ( $data == 'XMP DataXMP' ) {
00214                         // application name for XMP data.
00215                         // see pg 18 of XMP spec part 3.
00216 
00217                         $xmp = self::readBlock( $fh, true );
00218 
00219                         if ( substr( $xmp, -257, 3 ) !== "\x01\xFF\xFE"
00220                             || substr( $xmp, -4 ) !== "\x03\x02\x01\x00" )
00221                         {
00222                             // this is just a sanity check.
00223                             throw new Exception( "XMP does not have magic trailer!" );
00224                         }
00225 
00226                         // strip out trailer.
00227                         $xmp = substr( $xmp, 0, -257 );
00228 
00229                     } else {
00230                         // unrecognized extension block
00231                         fseek( $fh, -( $blockLength + 1 ), SEEK_CUR );
00232                         self::skipBlock( $fh );
00233                         continue;
00234                     }
00235                 } else {
00236                     self::skipBlock( $fh );
00237                 }
00238             } elseif ( $buf == self::$gif_term ) {
00239                 break;
00240             } else {
00241                 if ( strlen( $buf ) < 1 ) {
00242                     throw new Exception( "Ran out of input" );
00243                 }
00244                 $byte = unpack( 'C', $buf );
00245                 $byte = $byte[1];
00246                 throw new Exception( "At position: " . ftell( $fh ) . ", Unknown byte " . $byte );
00247             }
00248         }
00249 
00250         return array(
00251             'frameCount' => $frameCount,
00252             'looped' => $isLooped,
00253             'duration' => $duration,
00254             'xmp' => $xmp,
00255             'comment' => $comment,
00256         );
00257     }
00258 
00264     static function readGCT( $fh, $bpp ) {
00265         if ( $bpp > 0 ) {
00266             for ( $i = 1; $i <= pow( 2, $bpp ); ++$i ) {
00267                 fread( $fh, 3 );
00268             }
00269         }
00270     }
00271 
00277     static function decodeBPP( $data ) {
00278         if ( strlen( $data ) < 1 ) {
00279             throw new Exception( "Ran out of input" );
00280         }
00281         $buf = unpack( 'C', $data );
00282         $buf = $buf[1];
00283         $bpp = ( $buf & 7 ) + 1;
00284         $buf >>= 7;
00285 
00286         $have_map = $buf & 1;
00287 
00288         return $have_map ? $bpp : 0;
00289     }
00290 
00295     static function skipBlock( $fh ) {
00296         while ( !feof( $fh ) ) {
00297             $buf = fread( $fh, 1 );
00298             if ( strlen( $buf ) < 1 ) {
00299                 throw new Exception( "Ran out of input" );
00300             }
00301             $block_len = unpack( 'C', $buf );
00302             $block_len = $block_len[1];
00303             if ( $block_len == 0 ) {
00304                 return;
00305             }
00306             fread( $fh, $block_len );
00307         }
00308     }
00309 
00324     static function readBlock( $fh, $includeLengths = false ) {
00325         $data = '';
00326         $subLength = fread( $fh, 1 );
00327         $blocks = 0;
00328 
00329         while ( $subLength !== "\0" ) {
00330             $blocks++;
00331             if ( $blocks > self::MAX_SUBBLOCKS ) {
00332                 throw new Exception( "MAX_SUBBLOCKS exceeded (over $blocks sub-blocks)" );
00333             }
00334             if ( feof( $fh ) ) {
00335                 throw new Exception( "Read error: Unexpected EOF." );
00336             }
00337             if ( $includeLengths ) {
00338                 $data .= $subLength;
00339             }
00340 
00341             $data .= fread( $fh, ord( $subLength ) );
00342             $subLength = fread( $fh, 1 );
00343         }
00344         return $data;
00345     }
00346 
00347 }