[ Index ]

PHP Cross Reference of MediaWiki-1.24.0

title

Body

[close]

/includes/media/ -> SVGMetadataExtractor.php (source)

   1  <?php
   2  /**
   3   * Extraction of SVG image metadata.
   4   *
   5   * This program is free software; you can redistribute it and/or modify
   6   * it under the terms of the GNU General Public License as published by
   7   * the Free Software Foundation; either version 2 of the License, or
   8   * (at your option) any later version.
   9   *
  10   * This program is distributed in the hope that it will be useful,
  11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13   * GNU General Public License for more details.
  14   *
  15   * You should have received a copy of the GNU General Public License along
  16   * with this program; if not, write to the Free Software Foundation, Inc.,
  17   * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18   * http://www.gnu.org/copyleft/gpl.html
  19   *
  20   * @file
  21   * @ingroup Media
  22   * @author "Derk-Jan Hartman <hartman _at_ videolan d0t org>"
  23   * @author Brion Vibber
  24   * @copyright Copyright © 2010-2010 Brion Vibber, Derk-Jan Hartman
  25   * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License
  26   */
  27  
  28  /**
  29   * @ingroup Media
  30   */
  31  class SVGMetadataExtractor {
  32  	static function getMetadata( $filename ) {
  33          $svg = new SVGReader( $filename );
  34  
  35          return $svg->getMetadata();
  36      }
  37  }
  38  
  39  /**
  40   * @ingroup Media
  41   */
  42  class SVGReader {
  43      const DEFAULT_WIDTH = 512;
  44      const DEFAULT_HEIGHT = 512;
  45      const NS_SVG = 'http://www.w3.org/2000/svg';
  46      const LANG_PREFIX_MATCH = 1;
  47      const LANG_FULL_MATCH = 2;
  48  
  49      /** @var null|XMLReader */
  50      private $reader = null;
  51  
  52      /** @var bool */
  53      private $mDebug = false;
  54  
  55      /** @var array */
  56      private $metadata = array();
  57      private $languages = array();
  58      private $languagePrefixes = array();
  59  
  60      /**
  61       * Constructor
  62       *
  63       * Creates an SVGReader drawing from the source provided
  64       * @param string $source URI from which to read
  65       * @throws MWException|Exception
  66       */
  67  	function __construct( $source ) {
  68          global $wgSVGMetadataCutoff;
  69          $this->reader = new XMLReader();
  70  
  71          // Don't use $file->getSize() since file object passed to SVGHandler::getMetadata is bogus.
  72          $size = filesize( $source );
  73          if ( $size === false ) {
  74              throw new MWException( "Error getting filesize of SVG." );
  75          }
  76  
  77          if ( $size > $wgSVGMetadataCutoff ) {
  78              $this->debug( "SVG is $size bytes, which is bigger than $wgSVGMetadataCutoff. Truncating." );
  79              $contents = file_get_contents( $source, false, null, -1, $wgSVGMetadataCutoff );
  80              if ( $contents === false ) {
  81                  throw new MWException( 'Error reading SVG file.' );
  82              }
  83              $this->reader->XML( $contents, null, LIBXML_NOERROR | LIBXML_NOWARNING );
  84          } else {
  85              $this->reader->open( $source, null, LIBXML_NOERROR | LIBXML_NOWARNING );
  86          }
  87  
  88          // Expand entities, since Adobe Illustrator uses them for xmlns
  89          // attributes (bug 31719). Note that libxml2 has some protection
  90          // against large recursive entity expansions so this is not as
  91          // insecure as it might appear to be. However, it is still extremely
  92          // insecure. It's necessary to wrap any read() calls with
  93          // libxml_disable_entity_loader() to avoid arbitrary local file
  94          // inclusion, or even arbitrary code execution if the expect
  95          // extension is installed (bug 46859).
  96          $oldDisable = libxml_disable_entity_loader( true );
  97          $this->reader->setParserProperty( XMLReader::SUBST_ENTITIES, true );
  98  
  99          $this->metadata['width'] = self::DEFAULT_WIDTH;
 100          $this->metadata['height'] = self::DEFAULT_HEIGHT;
 101  
 102          // The size in the units specified by the SVG file
 103          // (for the metadata box)
 104          // Per the SVG spec, if unspecified, default to '100%'
 105          $this->metadata['originalWidth'] = '100%';
 106          $this->metadata['originalHeight'] = '100%';
 107  
 108          // Because we cut off the end of the svg making an invalid one. Complicated
 109          // try catch thing to make sure warnings get restored. Seems like there should
 110          // be a better way.
 111          wfSuppressWarnings();
 112          try {
 113              $this->read();
 114          } catch ( Exception $e ) {
 115              // Note, if this happens, the width/height will be taken to be 0x0.
 116              // Should we consider it the default 512x512 instead?
 117              wfRestoreWarnings();
 118              libxml_disable_entity_loader( $oldDisable );
 119              throw $e;
 120          }
 121          wfRestoreWarnings();
 122          libxml_disable_entity_loader( $oldDisable );
 123      }
 124  
 125      /**
 126       * @return array Array with the known metadata
 127       */
 128  	public function getMetadata() {
 129          return $this->metadata;
 130      }
 131  
 132      /**
 133       * Read the SVG
 134       * @throws MWException
 135       * @return bool
 136       */
 137  	protected function read() {
 138          $keepReading = $this->reader->read();
 139  
 140          /* Skip until first element */
 141          while ( $keepReading && $this->reader->nodeType != XmlReader::ELEMENT ) {
 142              $keepReading = $this->reader->read();
 143          }
 144  
 145          if ( $this->reader->localName != 'svg' || $this->reader->namespaceURI != self::NS_SVG ) {
 146              throw new MWException( "Expected <svg> tag, got " .
 147                  $this->reader->localName . " in NS " . $this->reader->namespaceURI );
 148          }
 149          $this->debug( "<svg> tag is correct." );
 150          $this->handleSVGAttribs();
 151  
 152          $exitDepth = $this->reader->depth;
 153          $keepReading = $this->reader->read();
 154          while ( $keepReading ) {
 155              $tag = $this->reader->localName;
 156              $type = $this->reader->nodeType;
 157              $isSVG = ( $this->reader->namespaceURI == self::NS_SVG );
 158  
 159              $this->debug( "$tag" );
 160  
 161              if ( $isSVG && $tag == 'svg' && $type == XmlReader::END_ELEMENT
 162                  && $this->reader->depth <= $exitDepth
 163              ) {
 164                  break;
 165              } elseif ( $isSVG && $tag == 'title' ) {
 166                  $this->readField( $tag, 'title' );
 167              } elseif ( $isSVG && $tag == 'desc' ) {
 168                  $this->readField( $tag, 'description' );
 169              } elseif ( $isSVG && $tag == 'metadata' && $type == XmlReader::ELEMENT ) {
 170                  $this->readXml( $tag, 'metadata' );
 171              } elseif ( $isSVG && $tag == 'script' ) {
 172                  // We normally do not allow scripted svgs.
 173                  // However its possible to configure MW to let them
 174                  // in, and such files should be considered animated.
 175                  $this->metadata['animated'] = true;
 176              } elseif ( $tag !== '#text' ) {
 177                  $this->debug( "Unhandled top-level XML tag $tag" );
 178  
 179                  // Recurse into children of current tag, looking for animation and languages.
 180                  $this->animateFilterAndLang( $tag );
 181              }
 182  
 183              // Goto next element, which is sibling of current (Skip children).
 184              $keepReading = $this->reader->next();
 185          }
 186  
 187          $this->reader->close();
 188  
 189          $this->metadata['translations'] = $this->languages + $this->languagePrefixes;
 190  
 191          return true;
 192      }
 193  
 194      /**
 195       * Read a textelement from an element
 196       *
 197       * @param string $name Name of the element that we are reading from
 198       * @param string $metafield Field that we will fill with the result
 199       */
 200  	private function readField( $name, $metafield = null ) {
 201          $this->debug( "Read field $metafield" );
 202          if ( !$metafield || $this->reader->nodeType != XmlReader::ELEMENT ) {
 203              return;
 204          }
 205          $keepReading = $this->reader->read();
 206          while ( $keepReading ) {
 207              if ( $this->reader->localName == $name
 208                  && $this->reader->namespaceURI == self::NS_SVG
 209                  && $this->reader->nodeType == XmlReader::END_ELEMENT
 210              ) {
 211                  break;
 212              } elseif ( $this->reader->nodeType == XmlReader::TEXT ) {
 213                  $this->metadata[$metafield] = trim( $this->reader->value );
 214              }
 215              $keepReading = $this->reader->read();
 216          }
 217      }
 218  
 219      /**
 220       * Read an XML snippet from an element
 221       *
 222       * @param string $metafield Field that we will fill with the result
 223       * @throws MWException
 224       */
 225  	private function readXml( $metafield = null ) {
 226          $this->debug( "Read top level metadata" );
 227          if ( !$metafield || $this->reader->nodeType != XmlReader::ELEMENT ) {
 228              return;
 229          }
 230          // @todo Find and store type of xml snippet. metadata['metadataType'] = "rdf"
 231          if ( method_exists( $this->reader, 'readInnerXML' ) ) {
 232              $this->metadata[$metafield] = trim( $this->reader->readInnerXML() );
 233          } else {
 234              throw new MWException( "The PHP XMLReader extension does not come " .
 235                  "with readInnerXML() method. Your libxml is probably out of " .
 236                  "date (need 2.6.20 or later)." );
 237          }
 238          $this->reader->next();
 239      }
 240  
 241      /**
 242       * Filter all children, looking for animated elements.
 243       * Also get a list of languages that can be targeted.
 244       *
 245       * @param string $name Name of the element that we are reading from
 246       */
 247  	private function animateFilterAndLang( $name ) {
 248          $this->debug( "animate filter for tag $name" );
 249          if ( $this->reader->nodeType != XmlReader::ELEMENT ) {
 250              return;
 251          }
 252          if ( $this->reader->isEmptyElement ) {
 253              return;
 254          }
 255          $exitDepth = $this->reader->depth;
 256          $keepReading = $this->reader->read();
 257          while ( $keepReading ) {
 258              if ( $this->reader->localName == $name && $this->reader->depth <= $exitDepth
 259                  && $this->reader->nodeType == XmlReader::END_ELEMENT
 260              ) {
 261                  break;
 262              } elseif ( $this->reader->namespaceURI == self::NS_SVG
 263                  && $this->reader->nodeType == XmlReader::ELEMENT
 264              ) {
 265  
 266                  $sysLang = $this->reader->getAttribute( 'systemLanguage' );
 267                  if ( !is_null( $sysLang ) && $sysLang !== '' ) {
 268                      // See http://www.w3.org/TR/SVG/struct.html#SystemLanguageAttribute
 269                      $langList = explode( ',', $sysLang );
 270                      foreach ( $langList as $langItem ) {
 271                          $langItem = trim( $langItem );
 272                          if ( Language::isWellFormedLanguageTag( $langItem ) ) {
 273                              $this->languages[$langItem] = self::LANG_FULL_MATCH;
 274                          }
 275                          // Note, the standard says that any prefix should work,
 276                          // here we do only the initial prefix, since that will catch
 277                          // 99% of cases, and we are going to compare against fallbacks.
 278                          // This differs mildly from how the spec says languages should be
 279                          // handled, however it matches better how the MediaWiki language
 280                          // preference is generally handled.
 281                          $dash = strpos( $langItem, '-' );
 282                          // Intentionally checking both !false and > 0 at the same time.
 283                          if ( $dash ) {
 284                              $itemPrefix = substr( $langItem, 0, $dash );
 285                              if ( Language::isWellFormedLanguageTag( $itemPrefix ) ) {
 286                                  $this->languagePrefixes[$itemPrefix] = self::LANG_PREFIX_MATCH;
 287                              }
 288                          }
 289                      }
 290                  }
 291                  switch ( $this->reader->localName ) {
 292                      case 'script':
 293                          // Normally we disallow files with
 294                          // <script>, but its possible
 295                          // to configure MW to disable
 296                          // such checks.
 297                      case 'animate':
 298                      case 'set':
 299                      case 'animateMotion':
 300                      case 'animateColor':
 301                      case 'animateTransform':
 302                          $this->debug( "HOUSTON WE HAVE ANIMATION" );
 303                          $this->metadata['animated'] = true;
 304                          break;
 305                  }
 306              }
 307              $keepReading = $this->reader->read();
 308          }
 309      }
 310  
 311      // @todo FIXME: Unused, remove?
 312  	private function throwXmlError( $err ) {
 313          $this->debug( "FAILURE: $err" );
 314          wfDebug( "SVGReader XML error: $err\n" );
 315      }
 316  
 317  	private function debug( $data ) {
 318          if ( $this->mDebug ) {
 319              wfDebug( "SVGReader: $data\n" );
 320          }
 321      }
 322  
 323      // @todo FIXME: Unused, remove?
 324  	private function warn( $data ) {
 325          wfDebug( "SVGReader: $data\n" );
 326      }
 327  
 328      // @todo FIXME: Unused, remove?
 329  	private function notice( $data ) {
 330          wfDebug( "SVGReader WARN: $data\n" );
 331      }
 332  
 333      /**
 334       * Parse the attributes of an SVG element
 335       *
 336       * The parser has to be in the start element of "<svg>"
 337       */
 338  	private function handleSVGAttribs() {
 339          $defaultWidth = self::DEFAULT_WIDTH;
 340          $defaultHeight = self::DEFAULT_HEIGHT;
 341          $aspect = 1.0;
 342          $width = null;
 343          $height = null;
 344  
 345          if ( $this->reader->getAttribute( 'viewBox' ) ) {
 346              // min-x min-y width height
 347              $viewBox = preg_split( '/\s+/', trim( $this->reader->getAttribute( 'viewBox' ) ) );
 348              if ( count( $viewBox ) == 4 ) {
 349                  $viewWidth = $this->scaleSVGUnit( $viewBox[2] );
 350                  $viewHeight = $this->scaleSVGUnit( $viewBox[3] );
 351                  if ( $viewWidth > 0 && $viewHeight > 0 ) {
 352                      $aspect = $viewWidth / $viewHeight;
 353                      $defaultHeight = $defaultWidth / $aspect;
 354                  }
 355              }
 356          }
 357          if ( $this->reader->getAttribute( 'width' ) ) {
 358              $width = $this->scaleSVGUnit( $this->reader->getAttribute( 'width' ), $defaultWidth );
 359              $this->metadata['originalWidth'] = $this->reader->getAttribute( 'width' );
 360          }
 361          if ( $this->reader->getAttribute( 'height' ) ) {
 362              $height = $this->scaleSVGUnit( $this->reader->getAttribute( 'height' ), $defaultHeight );
 363              $this->metadata['originalHeight'] = $this->reader->getAttribute( 'height' );
 364          }
 365  
 366          if ( !isset( $width ) && !isset( $height ) ) {
 367              $width = $defaultWidth;
 368              $height = $width / $aspect;
 369          } elseif ( isset( $width ) && !isset( $height ) ) {
 370              $height = $width / $aspect;
 371          } elseif ( isset( $height ) && !isset( $width ) ) {
 372              $width = $height * $aspect;
 373          }
 374  
 375          if ( $width > 0 && $height > 0 ) {
 376              $this->metadata['width'] = intval( round( $width ) );
 377              $this->metadata['height'] = intval( round( $height ) );
 378          }
 379      }
 380  
 381      /**
 382       * Return a rounded pixel equivalent for a labeled CSS/SVG length.
 383       * http://www.w3.org/TR/SVG11/coords.html#UnitIdentifiers
 384       *
 385       * @param string $length CSS/SVG length.
 386       * @param float|int $viewportSize Optional scale for percentage units...
 387       * @return float Length in pixels
 388       */
 389  	static function scaleSVGUnit( $length, $viewportSize = 512 ) {
 390          static $unitLength = array(
 391              'px' => 1.0,
 392              'pt' => 1.25,
 393              'pc' => 15.0,
 394              'mm' => 3.543307,
 395              'cm' => 35.43307,
 396              'in' => 90.0,
 397              'em' => 16.0, // fake it?
 398              'ex' => 12.0, // fake it?
 399              '' => 1.0, // "User units" pixels by default
 400          );
 401          $matches = array();
 402          if ( preg_match( '/^\s*(\d+(?:\.\d+)?)(em|ex|px|pt|pc|cm|mm|in|%|)\s*$/', $length, $matches ) ) {
 403              $length = floatval( $matches[1] );
 404              $unit = $matches[2];
 405              if ( $unit == '%' ) {
 406                  return $length * 0.01 * $viewportSize;
 407              } else {
 408                  return $length * $unitLength[$unit];
 409              }
 410          } else {
 411              // Assume pixels
 412              return floatval( $length );
 413          }
 414      }
 415  }


Generated: Fri Nov 28 14:03:12 2014 Cross-referenced by PHPXref 0.7.1