[ Index ]

PHP Cross Reference of MediaWiki-1.24.0

title

Body

[close]

/includes/media/ -> PNGMetadataExtractor.php (source)

   1  <?php
   2  /**
   3   * PNG frame counter and metadata extractor.
   4   *
   5   * Slightly derived from GIFMetadataExtractor.php
   6   * Deliberately not using MWExceptions to avoid external dependencies, encouraging
   7   * redistribution.
   8   *
   9   * This program is free software; you can redistribute it and/or modify
  10   * it under the terms of the GNU General Public License as published by
  11   * the Free Software Foundation; either version 2 of the License, or
  12   * (at your option) any later version.
  13   *
  14   * This program is distributed in the hope that it will be useful,
  15   * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  17   * GNU General Public License for more details.
  18   *
  19   * You should have received a copy of the GNU General Public License along
  20   * with this program; if not, write to the Free Software Foundation, Inc.,
  21   * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  22   * http://www.gnu.org/copyleft/gpl.html
  23   *
  24   * @file
  25   * @ingroup Media
  26   */
  27  
  28  /**
  29   * PNG frame counter.
  30   *
  31   * @ingroup Media
  32   */
  33  class PNGMetadataExtractor {
  34      /** @var string */
  35      private static $pngSig;
  36  
  37      /** @var int */
  38      private static $crcSize;
  39  
  40      /** @var array */
  41      private static $textChunks;
  42  
  43      const VERSION = 1;
  44      const MAX_CHUNK_SIZE = 3145728; // 3 megabytes
  45  
  46  	static function getMetadata( $filename ) {
  47          self::$pngSig = pack( "C8", 137, 80, 78, 71, 13, 10, 26, 10 );
  48          self::$crcSize = 4;
  49          /* based on list at http://owl.phy.queensu.ca/~phil/exiftool/TagNames/PNG.html#TextualData
  50           * and http://www.w3.org/TR/PNG/#11keywords
  51           */
  52          self::$textChunks = array(
  53              'xml:com.adobe.xmp' => 'xmp',
  54              # Artist is unofficial. Author is the recommended
  55              # keyword in the PNG spec. However some people output
  56              # Artist so support both.
  57              'artist' => 'Artist',
  58              'model' => 'Model',
  59              'make' => 'Make',
  60              'author' => 'Artist',
  61              'comment' => 'PNGFileComment',
  62              'description' => 'ImageDescription',
  63              'title' => 'ObjectName',
  64              'copyright' => 'Copyright',
  65              # Source as in original device used to make image
  66              # not as in who gave you the image
  67              'source' => 'Model',
  68              'software' => 'Software',
  69              'disclaimer' => 'Disclaimer',
  70              'warning' => 'ContentWarning',
  71              'url' => 'Identifier', # Not sure if this is best mapping. Maybe WebStatement.
  72              'label' => 'Label',
  73              'creation time' => 'DateTimeDigitized',
  74              /* Other potentially useful things - Document */
  75          );
  76  
  77          $frameCount = 0;
  78          $loopCount = 1;
  79          $text = array();
  80          $duration = 0.0;
  81          $bitDepth = 0;
  82          $colorType = 'unknown';
  83  
  84          if ( !$filename ) {
  85              throw new Exception( __METHOD__ . ": No file name specified" );
  86          } elseif ( !file_exists( $filename ) || is_dir( $filename ) ) {
  87              throw new Exception( __METHOD__ . ": File $filename does not exist" );
  88          }
  89  
  90          $fh = fopen( $filename, 'rb' );
  91  
  92          if ( !$fh ) {
  93              throw new Exception( __METHOD__ . ": Unable to open file $filename" );
  94          }
  95  
  96          // Check for the PNG header
  97          $buf = fread( $fh, 8 );
  98          if ( $buf != self::$pngSig ) {
  99              throw new Exception( __METHOD__ . ": Not a valid PNG file; header: $buf" );
 100          }
 101  
 102          // Read chunks
 103          while ( !feof( $fh ) ) {
 104              $buf = fread( $fh, 4 );
 105              if ( !$buf || strlen( $buf ) < 4 ) {
 106                  throw new Exception( __METHOD__ . ": Read error" );
 107              }
 108              $chunk_size = unpack( "N", $buf );
 109              $chunk_size = $chunk_size[1];
 110  
 111              if ( $chunk_size < 0 ) {
 112                  throw new Exception( __METHOD__ . ": Chunk size too big for unpack" );
 113              }
 114  
 115              $chunk_type = fread( $fh, 4 );
 116              if ( !$chunk_type || strlen( $chunk_type ) < 4 ) {
 117                  throw new Exception( __METHOD__ . ": Read error" );
 118              }
 119  
 120              if ( $chunk_type == "IHDR" ) {
 121                  $buf = self::read( $fh, $chunk_size );
 122                  if ( !$buf || strlen( $buf ) < $chunk_size ) {
 123                      throw new Exception( __METHOD__ . ": Read error" );
 124                  }
 125                  $bitDepth = ord( substr( $buf, 8, 1 ) );
 126                  // Detect the color type in British English as per the spec
 127                  // http://www.w3.org/TR/PNG/#11IHDR
 128                  switch ( ord( substr( $buf, 9, 1 ) ) ) {
 129                      case 0:
 130                          $colorType = 'greyscale';
 131                          break;
 132                      case 2:
 133                          $colorType = 'truecolour';
 134                          break;
 135                      case 3:
 136                          $colorType = 'index-coloured';
 137                          break;
 138                      case 4:
 139                          $colorType = 'greyscale-alpha';
 140                          break;
 141                      case 6:
 142                          $colorType = 'truecolour-alpha';
 143                          break;
 144                      default:
 145                          $colorType = 'unknown';
 146                          break;
 147                  }
 148              } elseif ( $chunk_type == "acTL" ) {
 149                  $buf = fread( $fh, $chunk_size );
 150                  if ( !$buf || strlen( $buf ) < $chunk_size || $chunk_size < 4 ) {
 151                      throw new Exception( __METHOD__ . ": Read error" );
 152                  }
 153  
 154                  $actl = unpack( "Nframes/Nplays", $buf );
 155                  $frameCount = $actl['frames'];
 156                  $loopCount = $actl['plays'];
 157              } elseif ( $chunk_type == "fcTL" ) {
 158                  $buf = self::read( $fh, $chunk_size );
 159                  if ( !$buf || strlen( $buf ) < $chunk_size ) {
 160                      throw new Exception( __METHOD__ . ": Read error" );
 161                  }
 162                  $buf = substr( $buf, 20 );
 163                  if ( strlen( $buf ) < 4 ) {
 164                      throw new Exception( __METHOD__ . ": Read error" );
 165                  }
 166  
 167                  $fctldur = unpack( "ndelay_num/ndelay_den", $buf );
 168                  if ( $fctldur['delay_den'] == 0 ) {
 169                      $fctldur['delay_den'] = 100;
 170                  }
 171                  if ( $fctldur['delay_num'] ) {
 172                      $duration += $fctldur['delay_num'] / $fctldur['delay_den'];
 173                  }
 174              } elseif ( $chunk_type == "iTXt" ) {
 175                  // Extracts iTXt chunks, uncompressing if necessary.
 176                  $buf = self::read( $fh, $chunk_size );
 177                  $items = array();
 178                  if ( preg_match(
 179                      '/^([^\x00]{1,79})\x00(\x00|\x01)\x00([^\x00]*)(.)[^\x00]*\x00(.*)$/Ds',
 180                      $buf, $items )
 181                  ) {
 182                      /* $items[1] = text chunk name, $items[2] = compressed flag,
 183                       * $items[3] = lang code (or ""), $items[4]= compression type.
 184                       * $items[5] = content
 185                       */
 186  
 187                      // Theoretically should be case-sensitive, but in practise...
 188                      $items[1] = strtolower( $items[1] );
 189                      if ( !isset( self::$textChunks[$items[1]] ) ) {
 190                          // Only extract textual chunks on our list.
 191                          fseek( $fh, self::$crcSize, SEEK_CUR );
 192                          continue;
 193                      }
 194  
 195                      $items[3] = strtolower( $items[3] );
 196                      if ( $items[3] == '' ) {
 197                          // if no lang specified use x-default like in xmp.
 198                          $items[3] = 'x-default';
 199                      }
 200  
 201                      // if compressed
 202                      if ( $items[2] == "\x01" ) {
 203                          if ( function_exists( 'gzuncompress' ) && $items[4] === "\x00" ) {
 204                              wfSuppressWarnings();
 205                              $items[5] = gzuncompress( $items[5] );
 206                              wfRestoreWarnings();
 207  
 208                              if ( $items[5] === false ) {
 209                                  // decompression failed
 210                                  wfDebug( __METHOD__ . ' Error decompressing iTxt chunk - ' . $items[1] . "\n" );
 211                                  fseek( $fh, self::$crcSize, SEEK_CUR );
 212                                  continue;
 213                              }
 214                          } else {
 215                              wfDebug( __METHOD__ . ' Skipping compressed png iTXt chunk due to lack of zlib,'
 216                                  . " or potentially invalid compression method\n" );
 217                              fseek( $fh, self::$crcSize, SEEK_CUR );
 218                              continue;
 219                          }
 220                      }
 221                      $finalKeyword = self::$textChunks[$items[1]];
 222                      $text[$finalKeyword][$items[3]] = $items[5];
 223                      $text[$finalKeyword]['_type'] = 'lang';
 224                  } else {
 225                      // Error reading iTXt chunk
 226                      throw new Exception( __METHOD__ . ": Read error on iTXt chunk" );
 227                  }
 228              } elseif ( $chunk_type == 'tEXt' ) {
 229                  $buf = self::read( $fh, $chunk_size );
 230  
 231                  // In case there is no \x00 which will make explode fail.
 232                  if ( strpos( $buf, "\x00" ) === false ) {
 233                      throw new Exception( __METHOD__ . ": Read error on tEXt chunk" );
 234                  }
 235  
 236                  list( $keyword, $content ) = explode( "\x00", $buf, 2 );
 237                  if ( $keyword === '' || $content === '' ) {
 238                      throw new Exception( __METHOD__ . ": Read error on tEXt chunk" );
 239                  }
 240  
 241                  // Theoretically should be case-sensitive, but in practise...
 242                  $keyword = strtolower( $keyword );
 243                  if ( !isset( self::$textChunks[$keyword] ) ) {
 244                      // Don't recognize chunk, so skip.
 245                      fseek( $fh, self::$crcSize, SEEK_CUR );
 246                      continue;
 247                  }
 248                  wfSuppressWarnings();
 249                  $content = iconv( 'ISO-8859-1', 'UTF-8', $content );
 250                  wfRestoreWarnings();
 251  
 252                  if ( $content === false ) {
 253                      throw new Exception( __METHOD__ . ": Read error (error with iconv)" );
 254                  }
 255  
 256                  $finalKeyword = self::$textChunks[$keyword];
 257                  $text[$finalKeyword]['x-default'] = $content;
 258                  $text[$finalKeyword]['_type'] = 'lang';
 259              } elseif ( $chunk_type == 'zTXt' ) {
 260                  if ( function_exists( 'gzuncompress' ) ) {
 261                      $buf = self::read( $fh, $chunk_size );
 262  
 263                      // In case there is no \x00 which will make explode fail.
 264                      if ( strpos( $buf, "\x00" ) === false ) {
 265                          throw new Exception( __METHOD__ . ": Read error on zTXt chunk" );
 266                      }
 267  
 268                      list( $keyword, $postKeyword ) = explode( "\x00", $buf, 2 );
 269                      if ( $keyword === '' || $postKeyword === '' ) {
 270                          throw new Exception( __METHOD__ . ": Read error on zTXt chunk" );
 271                      }
 272                      // Theoretically should be case-sensitive, but in practise...
 273                      $keyword = strtolower( $keyword );
 274  
 275                      if ( !isset( self::$textChunks[$keyword] ) ) {
 276                          // Don't recognize chunk, so skip.
 277                          fseek( $fh, self::$crcSize, SEEK_CUR );
 278                          continue;
 279                      }
 280                      $compression = substr( $postKeyword, 0, 1 );
 281                      $content = substr( $postKeyword, 1 );
 282                      if ( $compression !== "\x00" ) {
 283                          wfDebug( __METHOD__ . " Unrecognized compression method in zTXt ($keyword). Skipping.\n" );
 284                          fseek( $fh, self::$crcSize, SEEK_CUR );
 285                          continue;
 286                      }
 287  
 288                      wfSuppressWarnings();
 289                      $content = gzuncompress( $content );
 290                      wfRestoreWarnings();
 291  
 292                      if ( $content === false ) {
 293                          // decompression failed
 294                          wfDebug( __METHOD__ . ' Error decompressing zTXt chunk - ' . $keyword . "\n" );
 295                          fseek( $fh, self::$crcSize, SEEK_CUR );
 296                          continue;
 297                      }
 298  
 299                      wfSuppressWarnings();
 300                      $content = iconv( 'ISO-8859-1', 'UTF-8', $content );
 301                      wfRestoreWarnings();
 302  
 303                      if ( $content === false ) {
 304                          throw new Exception( __METHOD__ . ": Read error (error with iconv)" );
 305                      }
 306  
 307                      $finalKeyword = self::$textChunks[$keyword];
 308                      $text[$finalKeyword]['x-default'] = $content;
 309                      $text[$finalKeyword]['_type'] = 'lang';
 310                  } else {
 311                      wfDebug( __METHOD__ . " Cannot decompress zTXt chunk due to lack of zlib. Skipping.\n" );
 312                      fseek( $fh, $chunk_size, SEEK_CUR );
 313                  }
 314              } elseif ( $chunk_type == 'tIME' ) {
 315                  // last mod timestamp.
 316                  if ( $chunk_size !== 7 ) {
 317                      throw new Exception( __METHOD__ . ": tIME wrong size" );
 318                  }
 319                  $buf = self::read( $fh, $chunk_size );
 320                  if ( !$buf || strlen( $buf ) < $chunk_size ) {
 321                      throw new Exception( __METHOD__ . ": Read error" );
 322                  }
 323  
 324                  // Note: spec says this should be UTC.
 325                  $t = unpack( "ny/Cm/Cd/Ch/Cmin/Cs", $buf );
 326                  $strTime = sprintf( "%04d%02d%02d%02d%02d%02d",
 327                      $t['y'], $t['m'], $t['d'], $t['h'],
 328                      $t['min'], $t['s'] );
 329  
 330                  $exifTime = wfTimestamp( TS_EXIF, $strTime );
 331  
 332                  if ( $exifTime ) {
 333                      $text['DateTime'] = $exifTime;
 334                  }
 335              } elseif ( $chunk_type == 'pHYs' ) {
 336                  // how big pixels are (dots per meter).
 337                  if ( $chunk_size !== 9 ) {
 338                      throw new Exception( __METHOD__ . ": pHYs wrong size" );
 339                  }
 340  
 341                  $buf = self::read( $fh, $chunk_size );
 342                  if ( !$buf || strlen( $buf ) < $chunk_size ) {
 343                      throw new Exception( __METHOD__ . ": Read error" );
 344                  }
 345  
 346                  $dim = unpack( "Nwidth/Nheight/Cunit", $buf );
 347                  if ( $dim['unit'] == 1 ) {
 348                      // Need to check for negative because php
 349                      // doesn't deal with super-large unsigned 32-bit ints well
 350                      if ( $dim['width'] > 0 && $dim['height'] > 0 ) {
 351                          // unit is meters
 352                          // (as opposed to 0 = undefined )
 353                          $text['XResolution'] = $dim['width']
 354                              . '/100';
 355                          $text['YResolution'] = $dim['height']
 356                              . '/100';
 357                          $text['ResolutionUnit'] = 3;
 358                          // 3 = dots per cm (from Exif).
 359                      }
 360                  }
 361              } elseif ( $chunk_type == "IEND" ) {
 362                  break;
 363              } else {
 364                  fseek( $fh, $chunk_size, SEEK_CUR );
 365              }
 366              fseek( $fh, self::$crcSize, SEEK_CUR );
 367          }
 368          fclose( $fh );
 369  
 370          if ( $loopCount > 1 ) {
 371              $duration *= $loopCount;
 372          }
 373  
 374          if ( isset( $text['DateTimeDigitized'] ) ) {
 375              // Convert date format from rfc2822 to exif.
 376              foreach ( $text['DateTimeDigitized'] as $name => &$value ) {
 377                  if ( $name === '_type' ) {
 378                      continue;
 379                  }
 380  
 381                  // @todo FIXME: Currently timezones are ignored.
 382                  // possibly should be wfTimestamp's
 383                  // responsibility. (at least for numeric TZ)
 384                  $formatted = wfTimestamp( TS_EXIF, $value );
 385                  if ( $formatted ) {
 386                      // Only change if we could convert the
 387                      // date.
 388                      // The png standard says it should be
 389                      // in rfc2822 format, but not required.
 390                      // In general for the exif stuff we
 391                      // prettify the date if we can, but we
 392                      // display as-is if we cannot or if
 393                      // it is invalid.
 394                      // So do the same here.
 395  
 396                      $value = $formatted;
 397                  }
 398              }
 399          }
 400  
 401          return array(
 402              'frameCount' => $frameCount,
 403              'loopCount' => $loopCount,
 404              'duration' => $duration,
 405              'text' => $text,
 406              'bitDepth' => $bitDepth,
 407              'colorType' => $colorType,
 408          );
 409      }
 410  
 411      /**
 412       * Read a chunk, checking to make sure its not too big.
 413       *
 414       * @param resource $fh The file handle
 415       * @param int $size Size in bytes.
 416       * @throws Exception If too big
 417       * @return string The chunk.
 418       */
 419  	private static function read( $fh, $size ) {
 420          if ( $size > self::MAX_CHUNK_SIZE ) {
 421              throw new Exception( __METHOD__ . ': Chunk size of ' . $size .
 422                  ' too big. Max size is: ' . self::MAX_CHUNK_SIZE );
 423          }
 424  
 425          return fread( $fh, $size );
 426      }
 427  }


Generated: Fri Nov 28 14:03:12 2014 Cross-referenced by PHPXref 0.7.1