MediaWiki
REL1_24
|
00001 <?php 00031 class SVGMetadataExtractor { 00032 static function getMetadata( $filename ) { 00033 $svg = new SVGReader( $filename ); 00034 00035 return $svg->getMetadata(); 00036 } 00037 } 00038 00042 class SVGReader { 00043 const DEFAULT_WIDTH = 512; 00044 const DEFAULT_HEIGHT = 512; 00045 const NS_SVG = 'http://www.w3.org/2000/svg'; 00046 const LANG_PREFIX_MATCH = 1; 00047 const LANG_FULL_MATCH = 2; 00048 00050 private $reader = null; 00051 00053 private $mDebug = false; 00054 00056 private $metadata = array(); 00057 private $languages = array(); 00058 private $languagePrefixes = array(); 00059 00067 function __construct( $source ) { 00068 global $wgSVGMetadataCutoff; 00069 $this->reader = new XMLReader(); 00070 00071 // Don't use $file->getSize() since file object passed to SVGHandler::getMetadata is bogus. 00072 $size = filesize( $source ); 00073 if ( $size === false ) { 00074 throw new MWException( "Error getting filesize of SVG." ); 00075 } 00076 00077 if ( $size > $wgSVGMetadataCutoff ) { 00078 $this->debug( "SVG is $size bytes, which is bigger than $wgSVGMetadataCutoff. Truncating." ); 00079 $contents = file_get_contents( $source, false, null, -1, $wgSVGMetadataCutoff ); 00080 if ( $contents === false ) { 00081 throw new MWException( 'Error reading SVG file.' ); 00082 } 00083 $this->reader->XML( $contents, null, LIBXML_NOERROR | LIBXML_NOWARNING ); 00084 } else { 00085 $this->reader->open( $source, null, LIBXML_NOERROR | LIBXML_NOWARNING ); 00086 } 00087 00088 // Expand entities, since Adobe Illustrator uses them for xmlns 00089 // attributes (bug 31719). Note that libxml2 has some protection 00090 // against large recursive entity expansions so this is not as 00091 // insecure as it might appear to be. However, it is still extremely 00092 // insecure. It's necessary to wrap any read() calls with 00093 // libxml_disable_entity_loader() to avoid arbitrary local file 00094 // inclusion, or even arbitrary code execution if the expect 00095 // extension is installed (bug 46859). 00096 $oldDisable = libxml_disable_entity_loader( true ); 00097 $this->reader->setParserProperty( XMLReader::SUBST_ENTITIES, true ); 00098 00099 $this->metadata['width'] = self::DEFAULT_WIDTH; 00100 $this->metadata['height'] = self::DEFAULT_HEIGHT; 00101 00102 // The size in the units specified by the SVG file 00103 // (for the metadata box) 00104 // Per the SVG spec, if unspecified, default to '100%' 00105 $this->metadata['originalWidth'] = '100%'; 00106 $this->metadata['originalHeight'] = '100%'; 00107 00108 // Because we cut off the end of the svg making an invalid one. Complicated 00109 // try catch thing to make sure warnings get restored. Seems like there should 00110 // be a better way. 00111 wfSuppressWarnings(); 00112 try { 00113 $this->read(); 00114 } catch ( Exception $e ) { 00115 // Note, if this happens, the width/height will be taken to be 0x0. 00116 // Should we consider it the default 512x512 instead? 00117 wfRestoreWarnings(); 00118 libxml_disable_entity_loader( $oldDisable ); 00119 throw $e; 00120 } 00121 wfRestoreWarnings(); 00122 libxml_disable_entity_loader( $oldDisable ); 00123 } 00124 00128 public function getMetadata() { 00129 return $this->metadata; 00130 } 00131 00137 protected function read() { 00138 $keepReading = $this->reader->read(); 00139 00140 /* Skip until first element */ 00141 while ( $keepReading && $this->reader->nodeType != XmlReader::ELEMENT ) { 00142 $keepReading = $this->reader->read(); 00143 } 00144 00145 if ( $this->reader->localName != 'svg' || $this->reader->namespaceURI != self::NS_SVG ) { 00146 throw new MWException( "Expected <svg> tag, got " . 00147 $this->reader->localName . " in NS " . $this->reader->namespaceURI ); 00148 } 00149 $this->debug( "<svg> tag is correct." ); 00150 $this->handleSVGAttribs(); 00151 00152 $exitDepth = $this->reader->depth; 00153 $keepReading = $this->reader->read(); 00154 while ( $keepReading ) { 00155 $tag = $this->reader->localName; 00156 $type = $this->reader->nodeType; 00157 $isSVG = ( $this->reader->namespaceURI == self::NS_SVG ); 00158 00159 $this->debug( "$tag" ); 00160 00161 if ( $isSVG && $tag == 'svg' && $type == XmlReader::END_ELEMENT 00162 && $this->reader->depth <= $exitDepth 00163 ) { 00164 break; 00165 } elseif ( $isSVG && $tag == 'title' ) { 00166 $this->readField( $tag, 'title' ); 00167 } elseif ( $isSVG && $tag == 'desc' ) { 00168 $this->readField( $tag, 'description' ); 00169 } elseif ( $isSVG && $tag == 'metadata' && $type == XmlReader::ELEMENT ) { 00170 $this->readXml( $tag, 'metadata' ); 00171 } elseif ( $isSVG && $tag == 'script' ) { 00172 // We normally do not allow scripted svgs. 00173 // However its possible to configure MW to let them 00174 // in, and such files should be considered animated. 00175 $this->metadata['animated'] = true; 00176 } elseif ( $tag !== '#text' ) { 00177 $this->debug( "Unhandled top-level XML tag $tag" ); 00178 00179 // Recurse into children of current tag, looking for animation and languages. 00180 $this->animateFilterAndLang( $tag ); 00181 } 00182 00183 // Goto next element, which is sibling of current (Skip children). 00184 $keepReading = $this->reader->next(); 00185 } 00186 00187 $this->reader->close(); 00188 00189 $this->metadata['translations'] = $this->languages + $this->languagePrefixes; 00190 00191 return true; 00192 } 00193 00200 private function readField( $name, $metafield = null ) { 00201 $this->debug( "Read field $metafield" ); 00202 if ( !$metafield || $this->reader->nodeType != XmlReader::ELEMENT ) { 00203 return; 00204 } 00205 $keepReading = $this->reader->read(); 00206 while ( $keepReading ) { 00207 if ( $this->reader->localName == $name 00208 && $this->reader->namespaceURI == self::NS_SVG 00209 && $this->reader->nodeType == XmlReader::END_ELEMENT 00210 ) { 00211 break; 00212 } elseif ( $this->reader->nodeType == XmlReader::TEXT ) { 00213 $this->metadata[$metafield] = trim( $this->reader->value ); 00214 } 00215 $keepReading = $this->reader->read(); 00216 } 00217 } 00218 00225 private function readXml( $metafield = null ) { 00226 $this->debug( "Read top level metadata" ); 00227 if ( !$metafield || $this->reader->nodeType != XmlReader::ELEMENT ) { 00228 return; 00229 } 00230 // @todo Find and store type of xml snippet. metadata['metadataType'] = "rdf" 00231 if ( method_exists( $this->reader, 'readInnerXML' ) ) { 00232 $this->metadata[$metafield] = trim( $this->reader->readInnerXML() ); 00233 } else { 00234 throw new MWException( "The PHP XMLReader extension does not come " . 00235 "with readInnerXML() method. Your libxml is probably out of " . 00236 "date (need 2.6.20 or later)." ); 00237 } 00238 $this->reader->next(); 00239 } 00240 00247 private function animateFilterAndLang( $name ) { 00248 $this->debug( "animate filter for tag $name" ); 00249 if ( $this->reader->nodeType != XmlReader::ELEMENT ) { 00250 return; 00251 } 00252 if ( $this->reader->isEmptyElement ) { 00253 return; 00254 } 00255 $exitDepth = $this->reader->depth; 00256 $keepReading = $this->reader->read(); 00257 while ( $keepReading ) { 00258 if ( $this->reader->localName == $name && $this->reader->depth <= $exitDepth 00259 && $this->reader->nodeType == XmlReader::END_ELEMENT 00260 ) { 00261 break; 00262 } elseif ( $this->reader->namespaceURI == self::NS_SVG 00263 && $this->reader->nodeType == XmlReader::ELEMENT 00264 ) { 00265 00266 $sysLang = $this->reader->getAttribute( 'systemLanguage' ); 00267 if ( !is_null( $sysLang ) && $sysLang !== '' ) { 00268 // See http://www.w3.org/TR/SVG/struct.html#SystemLanguageAttribute 00269 $langList = explode( ',', $sysLang ); 00270 foreach ( $langList as $langItem ) { 00271 $langItem = trim( $langItem ); 00272 if ( Language::isWellFormedLanguageTag( $langItem ) ) { 00273 $this->languages[$langItem] = self::LANG_FULL_MATCH; 00274 } 00275 // Note, the standard says that any prefix should work, 00276 // here we do only the initial prefix, since that will catch 00277 // 99% of cases, and we are going to compare against fallbacks. 00278 // This differs mildly from how the spec says languages should be 00279 // handled, however it matches better how the MediaWiki language 00280 // preference is generally handled. 00281 $dash = strpos( $langItem, '-' ); 00282 // Intentionally checking both !false and > 0 at the same time. 00283 if ( $dash ) { 00284 $itemPrefix = substr( $langItem, 0, $dash ); 00285 if ( Language::isWellFormedLanguageTag( $itemPrefix ) ) { 00286 $this->languagePrefixes[$itemPrefix] = self::LANG_PREFIX_MATCH; 00287 } 00288 } 00289 } 00290 } 00291 switch ( $this->reader->localName ) { 00292 case 'script': 00293 // Normally we disallow files with 00294 // <script>, but its possible 00295 // to configure MW to disable 00296 // such checks. 00297 case 'animate': 00298 case 'set': 00299 case 'animateMotion': 00300 case 'animateColor': 00301 case 'animateTransform': 00302 $this->debug( "HOUSTON WE HAVE ANIMATION" ); 00303 $this->metadata['animated'] = true; 00304 break; 00305 } 00306 } 00307 $keepReading = $this->reader->read(); 00308 } 00309 } 00310 00311 // @todo FIXME: Unused, remove? 00312 private function throwXmlError( $err ) { 00313 $this->debug( "FAILURE: $err" ); 00314 wfDebug( "SVGReader XML error: $err\n" ); 00315 } 00316 00317 private function debug( $data ) { 00318 if ( $this->mDebug ) { 00319 wfDebug( "SVGReader: $data\n" ); 00320 } 00321 } 00322 00323 // @todo FIXME: Unused, remove? 00324 private function warn( $data ) { 00325 wfDebug( "SVGReader: $data\n" ); 00326 } 00327 00328 // @todo FIXME: Unused, remove? 00329 private function notice( $data ) { 00330 wfDebug( "SVGReader WARN: $data\n" ); 00331 } 00332 00338 private function handleSVGAttribs() { 00339 $defaultWidth = self::DEFAULT_WIDTH; 00340 $defaultHeight = self::DEFAULT_HEIGHT; 00341 $aspect = 1.0; 00342 $width = null; 00343 $height = null; 00344 00345 if ( $this->reader->getAttribute( 'viewBox' ) ) { 00346 // min-x min-y width height 00347 $viewBox = preg_split( '/\s+/', trim( $this->reader->getAttribute( 'viewBox' ) ) ); 00348 if ( count( $viewBox ) == 4 ) { 00349 $viewWidth = $this->scaleSVGUnit( $viewBox[2] ); 00350 $viewHeight = $this->scaleSVGUnit( $viewBox[3] ); 00351 if ( $viewWidth > 0 && $viewHeight > 0 ) { 00352 $aspect = $viewWidth / $viewHeight; 00353 $defaultHeight = $defaultWidth / $aspect; 00354 } 00355 } 00356 } 00357 if ( $this->reader->getAttribute( 'width' ) ) { 00358 $width = $this->scaleSVGUnit( $this->reader->getAttribute( 'width' ), $defaultWidth ); 00359 $this->metadata['originalWidth'] = $this->reader->getAttribute( 'width' ); 00360 } 00361 if ( $this->reader->getAttribute( 'height' ) ) { 00362 $height = $this->scaleSVGUnit( $this->reader->getAttribute( 'height' ), $defaultHeight ); 00363 $this->metadata['originalHeight'] = $this->reader->getAttribute( 'height' ); 00364 } 00365 00366 if ( !isset( $width ) && !isset( $height ) ) { 00367 $width = $defaultWidth; 00368 $height = $width / $aspect; 00369 } elseif ( isset( $width ) && !isset( $height ) ) { 00370 $height = $width / $aspect; 00371 } elseif ( isset( $height ) && !isset( $width ) ) { 00372 $width = $height * $aspect; 00373 } 00374 00375 if ( $width > 0 && $height > 0 ) { 00376 $this->metadata['width'] = intval( round( $width ) ); 00377 $this->metadata['height'] = intval( round( $height ) ); 00378 } 00379 } 00380 00389 static function scaleSVGUnit( $length, $viewportSize = 512 ) { 00390 static $unitLength = array( 00391 'px' => 1.0, 00392 'pt' => 1.25, 00393 'pc' => 15.0, 00394 'mm' => 3.543307, 00395 'cm' => 35.43307, 00396 'in' => 90.0, 00397 'em' => 16.0, // fake it? 00398 'ex' => 12.0, // fake it? 00399 '' => 1.0, // "User units" pixels by default 00400 ); 00401 $matches = array(); 00402 if ( preg_match( '/^\s*(\d+(?:\.\d+)?)(em|ex|px|pt|pc|cm|mm|in|%|)\s*$/', $length, $matches ) ) { 00403 $length = floatval( $matches[1] ); 00404 $unit = $matches[2]; 00405 if ( $unit == '%' ) { 00406 return $length * 0.01 * $viewportSize; 00407 } else { 00408 return $length * $unitLength[$unit]; 00409 } 00410 } else { 00411 // Assume pixels 00412 return floatval( $length ); 00413 } 00414 } 00415 }