MediaWiki  REL1_24
SVGMetadataExtractor.php
Go to the documentation of this file.
00001 <?php
00031 class SVGMetadataExtractor {
00032     static function getMetadata( $filename ) {
00033         $svg = new SVGReader( $filename );
00034 
00035         return $svg->getMetadata();
00036     }
00037 }
00038 
00042 class SVGReader {
00043     const DEFAULT_WIDTH = 512;
00044     const DEFAULT_HEIGHT = 512;
00045     const NS_SVG = 'http://www.w3.org/2000/svg';
00046     const LANG_PREFIX_MATCH = 1;
00047     const LANG_FULL_MATCH = 2;
00048 
00050     private $reader = null;
00051 
00053     private $mDebug = false;
00054 
00056     private $metadata = array();
00057     private $languages = array();
00058     private $languagePrefixes = array();
00059 
00067     function __construct( $source ) {
00068         global $wgSVGMetadataCutoff;
00069         $this->reader = new XMLReader();
00070 
00071         // Don't use $file->getSize() since file object passed to SVGHandler::getMetadata is bogus.
00072         $size = filesize( $source );
00073         if ( $size === false ) {
00074             throw new MWException( "Error getting filesize of SVG." );
00075         }
00076 
00077         if ( $size > $wgSVGMetadataCutoff ) {
00078             $this->debug( "SVG is $size bytes, which is bigger than $wgSVGMetadataCutoff. Truncating." );
00079             $contents = file_get_contents( $source, false, null, -1, $wgSVGMetadataCutoff );
00080             if ( $contents === false ) {
00081                 throw new MWException( 'Error reading SVG file.' );
00082             }
00083             $this->reader->XML( $contents, null, LIBXML_NOERROR | LIBXML_NOWARNING );
00084         } else {
00085             $this->reader->open( $source, null, LIBXML_NOERROR | LIBXML_NOWARNING );
00086         }
00087 
00088         // Expand entities, since Adobe Illustrator uses them for xmlns
00089         // attributes (bug 31719). Note that libxml2 has some protection
00090         // against large recursive entity expansions so this is not as
00091         // insecure as it might appear to be. However, it is still extremely
00092         // insecure. It's necessary to wrap any read() calls with
00093         // libxml_disable_entity_loader() to avoid arbitrary local file
00094         // inclusion, or even arbitrary code execution if the expect
00095         // extension is installed (bug 46859).
00096         $oldDisable = libxml_disable_entity_loader( true );
00097         $this->reader->setParserProperty( XMLReader::SUBST_ENTITIES, true );
00098 
00099         $this->metadata['width'] = self::DEFAULT_WIDTH;
00100         $this->metadata['height'] = self::DEFAULT_HEIGHT;
00101 
00102         // The size in the units specified by the SVG file
00103         // (for the metadata box)
00104         // Per the SVG spec, if unspecified, default to '100%'
00105         $this->metadata['originalWidth'] = '100%';
00106         $this->metadata['originalHeight'] = '100%';
00107 
00108         // Because we cut off the end of the svg making an invalid one. Complicated
00109         // try catch thing to make sure warnings get restored. Seems like there should
00110         // be a better way.
00111         wfSuppressWarnings();
00112         try {
00113             $this->read();
00114         } catch ( Exception $e ) {
00115             // Note, if this happens, the width/height will be taken to be 0x0.
00116             // Should we consider it the default 512x512 instead?
00117             wfRestoreWarnings();
00118             libxml_disable_entity_loader( $oldDisable );
00119             throw $e;
00120         }
00121         wfRestoreWarnings();
00122         libxml_disable_entity_loader( $oldDisable );
00123     }
00124 
00128     public function getMetadata() {
00129         return $this->metadata;
00130     }
00131 
00137     protected function read() {
00138         $keepReading = $this->reader->read();
00139 
00140         /* Skip until first element */
00141         while ( $keepReading && $this->reader->nodeType != XmlReader::ELEMENT ) {
00142             $keepReading = $this->reader->read();
00143         }
00144 
00145         if ( $this->reader->localName != 'svg' || $this->reader->namespaceURI != self::NS_SVG ) {
00146             throw new MWException( "Expected <svg> tag, got " .
00147                 $this->reader->localName . " in NS " . $this->reader->namespaceURI );
00148         }
00149         $this->debug( "<svg> tag is correct." );
00150         $this->handleSVGAttribs();
00151 
00152         $exitDepth = $this->reader->depth;
00153         $keepReading = $this->reader->read();
00154         while ( $keepReading ) {
00155             $tag = $this->reader->localName;
00156             $type = $this->reader->nodeType;
00157             $isSVG = ( $this->reader->namespaceURI == self::NS_SVG );
00158 
00159             $this->debug( "$tag" );
00160 
00161             if ( $isSVG && $tag == 'svg' && $type == XmlReader::END_ELEMENT
00162                 && $this->reader->depth <= $exitDepth
00163             ) {
00164                 break;
00165             } elseif ( $isSVG && $tag == 'title' ) {
00166                 $this->readField( $tag, 'title' );
00167             } elseif ( $isSVG && $tag == 'desc' ) {
00168                 $this->readField( $tag, 'description' );
00169             } elseif ( $isSVG && $tag == 'metadata' && $type == XmlReader::ELEMENT ) {
00170                 $this->readXml( $tag, 'metadata' );
00171             } elseif ( $isSVG && $tag == 'script' ) {
00172                 // We normally do not allow scripted svgs.
00173                 // However its possible to configure MW to let them
00174                 // in, and such files should be considered animated.
00175                 $this->metadata['animated'] = true;
00176             } elseif ( $tag !== '#text' ) {
00177                 $this->debug( "Unhandled top-level XML tag $tag" );
00178 
00179                 // Recurse into children of current tag, looking for animation and languages.
00180                 $this->animateFilterAndLang( $tag );
00181             }
00182 
00183             // Goto next element, which is sibling of current (Skip children).
00184             $keepReading = $this->reader->next();
00185         }
00186 
00187         $this->reader->close();
00188 
00189         $this->metadata['translations'] = $this->languages + $this->languagePrefixes;
00190 
00191         return true;
00192     }
00193 
00200     private function readField( $name, $metafield = null ) {
00201         $this->debug( "Read field $metafield" );
00202         if ( !$metafield || $this->reader->nodeType != XmlReader::ELEMENT ) {
00203             return;
00204         }
00205         $keepReading = $this->reader->read();
00206         while ( $keepReading ) {
00207             if ( $this->reader->localName == $name
00208                 && $this->reader->namespaceURI == self::NS_SVG
00209                 && $this->reader->nodeType == XmlReader::END_ELEMENT
00210             ) {
00211                 break;
00212             } elseif ( $this->reader->nodeType == XmlReader::TEXT ) {
00213                 $this->metadata[$metafield] = trim( $this->reader->value );
00214             }
00215             $keepReading = $this->reader->read();
00216         }
00217     }
00218 
00225     private function readXml( $metafield = null ) {
00226         $this->debug( "Read top level metadata" );
00227         if ( !$metafield || $this->reader->nodeType != XmlReader::ELEMENT ) {
00228             return;
00229         }
00230         // @todo Find and store type of xml snippet. metadata['metadataType'] = "rdf"
00231         if ( method_exists( $this->reader, 'readInnerXML' ) ) {
00232             $this->metadata[$metafield] = trim( $this->reader->readInnerXML() );
00233         } else {
00234             throw new MWException( "The PHP XMLReader extension does not come " .
00235                 "with readInnerXML() method. Your libxml is probably out of " .
00236                 "date (need 2.6.20 or later)." );
00237         }
00238         $this->reader->next();
00239     }
00240 
00247     private function animateFilterAndLang( $name ) {
00248         $this->debug( "animate filter for tag $name" );
00249         if ( $this->reader->nodeType != XmlReader::ELEMENT ) {
00250             return;
00251         }
00252         if ( $this->reader->isEmptyElement ) {
00253             return;
00254         }
00255         $exitDepth = $this->reader->depth;
00256         $keepReading = $this->reader->read();
00257         while ( $keepReading ) {
00258             if ( $this->reader->localName == $name && $this->reader->depth <= $exitDepth
00259                 && $this->reader->nodeType == XmlReader::END_ELEMENT
00260             ) {
00261                 break;
00262             } elseif ( $this->reader->namespaceURI == self::NS_SVG
00263                 && $this->reader->nodeType == XmlReader::ELEMENT
00264             ) {
00265 
00266                 $sysLang = $this->reader->getAttribute( 'systemLanguage' );
00267                 if ( !is_null( $sysLang ) && $sysLang !== '' ) {
00268                     // See http://www.w3.org/TR/SVG/struct.html#SystemLanguageAttribute
00269                     $langList = explode( ',', $sysLang );
00270                     foreach ( $langList as $langItem ) {
00271                         $langItem = trim( $langItem );
00272                         if ( Language::isWellFormedLanguageTag( $langItem ) ) {
00273                             $this->languages[$langItem] = self::LANG_FULL_MATCH;
00274                         }
00275                         // Note, the standard says that any prefix should work,
00276                         // here we do only the initial prefix, since that will catch
00277                         // 99% of cases, and we are going to compare against fallbacks.
00278                         // This differs mildly from how the spec says languages should be
00279                         // handled, however it matches better how the MediaWiki language
00280                         // preference is generally handled.
00281                         $dash = strpos( $langItem, '-' );
00282                         // Intentionally checking both !false and > 0 at the same time.
00283                         if ( $dash ) {
00284                             $itemPrefix = substr( $langItem, 0, $dash );
00285                             if ( Language::isWellFormedLanguageTag( $itemPrefix ) ) {
00286                                 $this->languagePrefixes[$itemPrefix] = self::LANG_PREFIX_MATCH;
00287                             }
00288                         }
00289                     }
00290                 }
00291                 switch ( $this->reader->localName ) {
00292                     case 'script':
00293                         // Normally we disallow files with
00294                         // <script>, but its possible
00295                         // to configure MW to disable
00296                         // such checks.
00297                     case 'animate':
00298                     case 'set':
00299                     case 'animateMotion':
00300                     case 'animateColor':
00301                     case 'animateTransform':
00302                         $this->debug( "HOUSTON WE HAVE ANIMATION" );
00303                         $this->metadata['animated'] = true;
00304                         break;
00305                 }
00306             }
00307             $keepReading = $this->reader->read();
00308         }
00309     }
00310 
00311     // @todo FIXME: Unused, remove?
00312     private function throwXmlError( $err ) {
00313         $this->debug( "FAILURE: $err" );
00314         wfDebug( "SVGReader XML error: $err\n" );
00315     }
00316 
00317     private function debug( $data ) {
00318         if ( $this->mDebug ) {
00319             wfDebug( "SVGReader: $data\n" );
00320         }
00321     }
00322 
00323     // @todo FIXME: Unused, remove?
00324     private function warn( $data ) {
00325         wfDebug( "SVGReader: $data\n" );
00326     }
00327 
00328     // @todo FIXME: Unused, remove?
00329     private function notice( $data ) {
00330         wfDebug( "SVGReader WARN: $data\n" );
00331     }
00332 
00338     private function handleSVGAttribs() {
00339         $defaultWidth = self::DEFAULT_WIDTH;
00340         $defaultHeight = self::DEFAULT_HEIGHT;
00341         $aspect = 1.0;
00342         $width = null;
00343         $height = null;
00344 
00345         if ( $this->reader->getAttribute( 'viewBox' ) ) {
00346             // min-x min-y width height
00347             $viewBox = preg_split( '/\s+/', trim( $this->reader->getAttribute( 'viewBox' ) ) );
00348             if ( count( $viewBox ) == 4 ) {
00349                 $viewWidth = $this->scaleSVGUnit( $viewBox[2] );
00350                 $viewHeight = $this->scaleSVGUnit( $viewBox[3] );
00351                 if ( $viewWidth > 0 && $viewHeight > 0 ) {
00352                     $aspect = $viewWidth / $viewHeight;
00353                     $defaultHeight = $defaultWidth / $aspect;
00354                 }
00355             }
00356         }
00357         if ( $this->reader->getAttribute( 'width' ) ) {
00358             $width = $this->scaleSVGUnit( $this->reader->getAttribute( 'width' ), $defaultWidth );
00359             $this->metadata['originalWidth'] = $this->reader->getAttribute( 'width' );
00360         }
00361         if ( $this->reader->getAttribute( 'height' ) ) {
00362             $height = $this->scaleSVGUnit( $this->reader->getAttribute( 'height' ), $defaultHeight );
00363             $this->metadata['originalHeight'] = $this->reader->getAttribute( 'height' );
00364         }
00365 
00366         if ( !isset( $width ) && !isset( $height ) ) {
00367             $width = $defaultWidth;
00368             $height = $width / $aspect;
00369         } elseif ( isset( $width ) && !isset( $height ) ) {
00370             $height = $width / $aspect;
00371         } elseif ( isset( $height ) && !isset( $width ) ) {
00372             $width = $height * $aspect;
00373         }
00374 
00375         if ( $width > 0 && $height > 0 ) {
00376             $this->metadata['width'] = intval( round( $width ) );
00377             $this->metadata['height'] = intval( round( $height ) );
00378         }
00379     }
00380 
00389     static function scaleSVGUnit( $length, $viewportSize = 512 ) {
00390         static $unitLength = array(
00391             'px' => 1.0,
00392             'pt' => 1.25,
00393             'pc' => 15.0,
00394             'mm' => 3.543307,
00395             'cm' => 35.43307,
00396             'in' => 90.0,
00397             'em' => 16.0, // fake it?
00398             'ex' => 12.0, // fake it?
00399             '' => 1.0, // "User units" pixels by default
00400         );
00401         $matches = array();
00402         if ( preg_match( '/^\s*(\d+(?:\.\d+)?)(em|ex|px|pt|pc|cm|mm|in|%|)\s*$/', $length, $matches ) ) {
00403             $length = floatval( $matches[1] );
00404             $unit = $matches[2];
00405             if ( $unit == '%' ) {
00406                 return $length * 0.01 * $viewportSize;
00407             } else {
00408                 return $length * $unitLength[$unit];
00409             }
00410         } else {
00411             // Assume pixels
00412             return floatval( $length );
00413         }
00414     }
00415 }