[ Index ]

PHP Cross Reference of MediaWiki-1.24.0

title

Body

[close]

/includes/media/ -> BitmapMetadataHandler.php (source)

   1  <?php
   2  /**
   3   * Extraction of metadata from different bitmap image types.
   4   *
   5   * This program is free software; you can redistribute it and/or modify
   6   * it under the terms of the GNU General Public License as published by
   7   * the Free Software Foundation; either version 2 of the License, or
   8   * (at your option) any later version.
   9   *
  10   * This program is distributed in the hope that it will be useful,
  11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13   * GNU General Public License for more details.
  14   *
  15   * You should have received a copy of the GNU General Public License along
  16   * with this program; if not, write to the Free Software Foundation, Inc.,
  17   * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18   * http://www.gnu.org/copyleft/gpl.html
  19   *
  20   * @file
  21   * @ingroup Media
  22   */
  23  
  24  /**
  25   * Class to deal with reconciling and extracting metadata from bitmap images.
  26   * This is meant to comply with http://www.metadataworkinggroup.org/pdf/mwg_guidance.pdf
  27   *
  28   * This sort of acts as an intermediary between MediaHandler::getMetadata
  29   * and the various metadata extractors.
  30   *
  31   * @todo Other image formats.
  32   * @ingroup Media
  33   */
  34  class BitmapMetadataHandler {
  35      /** @var array */
  36      private $metadata = array();
  37  
  38      /** @var array Metadata priority */
  39      private $metaPriority = array(
  40          20 => array( 'other' ),
  41          40 => array( 'native' ),
  42          60 => array( 'iptc-good-hash', 'iptc-no-hash' ),
  43          70 => array( 'xmp-deprecated' ),
  44          80 => array( 'xmp-general' ),
  45          90 => array( 'xmp-exif' ),
  46          100 => array( 'iptc-bad-hash' ),
  47          120 => array( 'exif' ),
  48      );
  49  
  50      /** @var string */
  51      private $iptcType = 'iptc-no-hash';
  52  
  53      /**
  54       * This does the photoshop image resource app13 block
  55       * of interest, IPTC-IIM metadata is stored here.
  56       *
  57       * Mostly just calls doPSIR and doIPTC
  58       *
  59       * @param string $app13 String containing app13 block from jpeg file
  60       */
  61  	private function doApp13( $app13 ) {
  62          try {
  63              $this->iptcType = JpegMetadataExtractor::doPSIR( $app13 );
  64          } catch ( MWException $e ) {
  65              // Error reading the iptc hash information.
  66              // This probably means the App13 segment is something other than what we expect.
  67              // However, still try to read it, and treat it as if the hash didn't exist.
  68              wfDebug( "Error parsing iptc data of file: " . $e->getMessage() . "\n" );
  69              $this->iptcType = 'iptc-no-hash';
  70          }
  71  
  72          $iptc = IPTC::parse( $app13 );
  73          $this->addMetadata( $iptc, $this->iptcType );
  74      }
  75  
  76      /**
  77       * Get exif info using exif class.
  78       * Basically what used to be in BitmapHandler::getMetadata().
  79       * Just calls stuff in the Exif class.
  80       *
  81       * Parameters are passed to the Exif class.
  82       *
  83       * @param string $filename
  84       * @param string $byteOrder
  85       */
  86  	function getExif( $filename, $byteOrder ) {
  87          global $wgShowEXIF;
  88          if ( file_exists( $filename ) && $wgShowEXIF ) {
  89              $exif = new Exif( $filename, $byteOrder );
  90              $data = $exif->getFilteredData();
  91              if ( $data ) {
  92                  $this->addMetadata( $data, 'exif' );
  93              }
  94          }
  95      }
  96  
  97      /** Add misc metadata. Warning: atm if the metadata category
  98       * doesn't have a priority, it will be silently discarded.
  99       *
 100       * @param array $metaArray Array of metadata values
 101       * @param string $type Type. defaults to other. if two things have the same type they're merged
 102       */
 103  	function addMetadata( $metaArray, $type = 'other' ) {
 104          if ( isset( $this->metadata[$type] ) ) {
 105              /* merge with old data */
 106              $metaArray = $metaArray + $this->metadata[$type];
 107          }
 108  
 109          $this->metadata[$type] = $metaArray;
 110      }
 111  
 112      /**
 113       * Merge together the various types of metadata
 114       * the different types have different priorites,
 115       * and are merged in order.
 116       *
 117       * This function is generally called by the media handlers' getMetadata()
 118       *
 119       * @return array Metadata array
 120       */
 121  	function getMetadataArray() {
 122          // this seems a bit ugly... This is all so its merged in right order
 123          // based on the MWG recomendation.
 124          $temp = array();
 125          krsort( $this->metaPriority );
 126          foreach ( $this->metaPriority as $pri ) {
 127              foreach ( $pri as $type ) {
 128                  if ( isset( $this->metadata[$type] ) ) {
 129                      // Do some special casing for multilingual values.
 130                      // Don't discard translations if also as a simple value.
 131                      foreach ( $this->metadata[$type] as $itemName => $item ) {
 132                          if ( is_array( $item ) && isset( $item['_type'] ) && $item['_type'] === 'lang' ) {
 133                              if ( isset( $temp[$itemName] ) && !is_array( $temp[$itemName] ) ) {
 134                                  $default = $temp[$itemName];
 135                                  $temp[$itemName] = $item;
 136                                  $temp[$itemName]['x-default'] = $default;
 137                                  unset( $this->metadata[$type][$itemName] );
 138                              }
 139                          }
 140                      }
 141  
 142                      $temp = $temp + $this->metadata[$type];
 143                  }
 144              }
 145          }
 146  
 147          return $temp;
 148      }
 149  
 150      /** Main entry point for jpeg's.
 151       *
 152       * @param string $filename Filename (with full path)
 153       * @return array Metadata result array.
 154       * @throws MWException On invalid file.
 155       */
 156  	static function Jpeg( $filename ) {
 157          $showXMP = function_exists( 'xml_parser_create_ns' );
 158          $meta = new self();
 159  
 160          $seg = JpegMetadataExtractor::segmentSplitter( $filename );
 161          if ( isset( $seg['COM'] ) && isset( $seg['COM'][0] ) ) {
 162              $meta->addMetadata( array( 'JPEGFileComment' => $seg['COM'] ), 'native' );
 163          }
 164          if ( isset( $seg['PSIR'] ) && count( $seg['PSIR'] ) > 0 ) {
 165              foreach ( $seg['PSIR'] as $curPSIRValue ) {
 166                  $meta->doApp13( $curPSIRValue );
 167              }
 168          }
 169          if ( isset( $seg['XMP'] ) && $showXMP ) {
 170              $xmp = new XMPReader();
 171              $xmp->parse( $seg['XMP'] );
 172              foreach ( $seg['XMP_ext'] as $xmpExt ) {
 173                  /* Support for extended xmp in jpeg files
 174                   * is not well tested and a bit fragile.
 175                   */
 176                  $xmp->parseExtended( $xmpExt );
 177              }
 178              $res = $xmp->getResults();
 179              foreach ( $res as $type => $array ) {
 180                  $meta->addMetadata( $array, $type );
 181              }
 182          }
 183          if ( isset( $seg['byteOrder'] ) ) {
 184              $meta->getExif( $filename, $seg['byteOrder'] );
 185          }
 186  
 187          return $meta->getMetadataArray();
 188      }
 189  
 190      /** Entry point for png
 191       * At some point in the future this might
 192       * merge the png various tEXt chunks to that
 193       * are interesting, but for now it only does XMP
 194       *
 195       * @param string $filename Full path to file
 196       * @return array Array for storage in img_metadata.
 197       */
 198  	public static function PNG( $filename ) {
 199          $showXMP = function_exists( 'xml_parser_create_ns' );
 200  
 201          $meta = new self();
 202          $array = PNGMetadataExtractor::getMetadata( $filename );
 203          if ( isset( $array['text']['xmp']['x-default'] )
 204              && $array['text']['xmp']['x-default'] !== '' && $showXMP
 205          ) {
 206              $xmp = new XMPReader();
 207              $xmp->parse( $array['text']['xmp']['x-default'] );
 208              $xmpRes = $xmp->getResults();
 209              foreach ( $xmpRes as $type => $xmpSection ) {
 210                  $meta->addMetadata( $xmpSection, $type );
 211              }
 212          }
 213          unset( $array['text']['xmp'] );
 214          $meta->addMetadata( $array['text'], 'native' );
 215          unset( $array['text'] );
 216          $array['metadata'] = $meta->getMetadataArray();
 217          $array['metadata']['_MW_PNG_VERSION'] = PNGMetadataExtractor::VERSION;
 218  
 219          return $array;
 220      }
 221  
 222      /** function for gif images.
 223       *
 224       * They don't really have native metadata, so just merges together
 225       * XMP and image comment.
 226       *
 227       * @param string $filename Full path to file
 228       * @return array Metadata array
 229       */
 230  	public static function GIF( $filename ) {
 231  
 232          $meta = new self();
 233          $baseArray = GIFMetadataExtractor::getMetadata( $filename );
 234  
 235          if ( count( $baseArray['comment'] ) > 0 ) {
 236              $meta->addMetadata( array( 'GIFFileComment' => $baseArray['comment'] ), 'native' );
 237          }
 238  
 239          if ( $baseArray['xmp'] !== '' && function_exists( 'xml_parser_create_ns' ) ) {
 240              $xmp = new XMPReader();
 241              $xmp->parse( $baseArray['xmp'] );
 242              $xmpRes = $xmp->getResults();
 243              foreach ( $xmpRes as $type => $xmpSection ) {
 244                  $meta->addMetadata( $xmpSection, $type );
 245              }
 246          }
 247  
 248          unset( $baseArray['comment'] );
 249          unset( $baseArray['xmp'] );
 250  
 251          $baseArray['metadata'] = $meta->getMetadataArray();
 252          $baseArray['metadata']['_MW_GIF_VERSION'] = GIFMetadataExtractor::VERSION;
 253  
 254          return $baseArray;
 255      }
 256  
 257      /**
 258       * This doesn't do much yet, but eventually I plan to add
 259       * XMP support for Tiff. (PHP's exif support already extracts
 260       * but needs some further processing because PHP's exif support
 261       * is stupid...)
 262       *
 263       * @todo Add XMP support, so this function actually makes sense to put here.
 264       *
 265       * The various exceptions this throws are caught later.
 266       * @param string $filename
 267       * @throws MWException
 268       * @return array The metadata.
 269       */
 270  	public static function Tiff( $filename ) {
 271          if ( file_exists( $filename ) ) {
 272              $byteOrder = self::getTiffByteOrder( $filename );
 273              if ( !$byteOrder ) {
 274                  throw new MWException( "Error determining byte order of $filename" );
 275              }
 276              $exif = new Exif( $filename, $byteOrder );
 277              $data = $exif->getFilteredData();
 278              if ( $data ) {
 279                  $data['MEDIAWIKI_EXIF_VERSION'] = Exif::version();
 280  
 281                  return $data;
 282              } else {
 283                  throw new MWException( "Could not extract data from tiff file $filename" );
 284              }
 285          } else {
 286              throw new MWException( "File doesn't exist - $filename" );
 287          }
 288      }
 289  
 290      /**
 291       * Read the first 2 bytes of a tiff file to figure out
 292       * Little Endian or Big Endian. Needed for exif stuff.
 293       *
 294       * @param string $filename The filename
 295       * @return string 'BE' or 'LE' or false
 296       */
 297  	static function getTiffByteOrder( $filename ) {
 298          $fh = fopen( $filename, 'rb' );
 299          if ( !$fh ) {
 300              return false;
 301          }
 302          $head = fread( $fh, 2 );
 303          fclose( $fh );
 304  
 305          switch ( $head ) {
 306              case 'II':
 307                  return 'LE'; // II for intel.
 308              case 'MM':
 309                  return 'BE'; // MM for motorla.
 310              default:
 311                  return false; // Something went wrong.
 312  
 313          }
 314      }
 315  }


Generated: Fri Nov 28 14:03:12 2014 Cross-referenced by PHPXref 0.7.1