MediaWiki
REL1_22
|
00001 <?php 00031 class SVGMetadataExtractor { 00032 static function getMetadata( $filename ) { 00033 $svg = new SVGReader( $filename ); 00034 return $svg->getMetadata(); 00035 } 00036 } 00037 00041 class SVGReader { 00042 const DEFAULT_WIDTH = 512; 00043 const DEFAULT_HEIGHT = 512; 00044 const NS_SVG = 'http://www.w3.org/2000/svg'; 00045 00046 private $reader = null; 00047 private $mDebug = false; 00048 private $metadata = Array(); 00049 00057 function __construct( $source ) { 00058 global $wgSVGMetadataCutoff; 00059 $this->reader = new XMLReader(); 00060 00061 // Don't use $file->getSize() since file object passed to SVGHandler::getMetadata is bogus. 00062 $size = filesize( $source ); 00063 if ( $size === false ) { 00064 throw new MWException( "Error getting filesize of SVG." ); 00065 } 00066 00067 if ( $size > $wgSVGMetadataCutoff ) { 00068 $this->debug( "SVG is $size bytes, which is bigger than $wgSVGMetadataCutoff. Truncating." ); 00069 $contents = file_get_contents( $source, false, null, -1, $wgSVGMetadataCutoff ); 00070 if ( $contents === false ) { 00071 throw new MWException( 'Error reading SVG file.' ); 00072 } 00073 $this->reader->XML( $contents, null, LIBXML_NOERROR | LIBXML_NOWARNING ); 00074 } else { 00075 $this->reader->open( $source, null, LIBXML_NOERROR | LIBXML_NOWARNING ); 00076 } 00077 00078 // Expand entities, since Adobe Illustrator uses them for xmlns 00079 // attributes (bug 31719). Note that libxml2 has some protection 00080 // against large recursive entity expansions so this is not as 00081 // insecure as it might appear to be. However, it is still extremely 00082 // insecure. It's necessary to wrap any read() calls with 00083 // libxml_disable_entity_loader() to avoid arbitrary local file 00084 // inclusion, or even arbitrary code execution if the expect 00085 // extension is installed (bug 46859). 00086 $oldDisable = libxml_disable_entity_loader( true ); 00087 $this->reader->setParserProperty( XMLReader::SUBST_ENTITIES, true ); 00088 00089 $this->metadata['width'] = self::DEFAULT_WIDTH; 00090 $this->metadata['height'] = self::DEFAULT_HEIGHT; 00091 00092 // The size in the units specified by the SVG file 00093 // (for the metadata box) 00094 // Per the SVG spec, if unspecified, default to '100%' 00095 $this->metadata['originalWidth'] = '100%'; 00096 $this->metadata['originalHeight'] = '100%'; 00097 00098 // Because we cut off the end of the svg making an invalid one. Complicated 00099 // try catch thing to make sure warnings get restored. Seems like there should 00100 // be a better way. 00101 wfSuppressWarnings(); 00102 try { 00103 $this->read(); 00104 } catch ( Exception $e ) { 00105 // Note, if this happens, the width/height will be taken to be 0x0. 00106 // Should we consider it the default 512x512 instead? 00107 wfRestoreWarnings(); 00108 libxml_disable_entity_loader( $oldDisable ); 00109 throw $e; 00110 } 00111 wfRestoreWarnings(); 00112 libxml_disable_entity_loader( $oldDisable ); 00113 } 00114 00118 public function getMetadata() { 00119 return $this->metadata; 00120 } 00121 00127 protected function read() { 00128 $keepReading = $this->reader->read(); 00129 00130 /* Skip until first element */ 00131 while ( $keepReading && $this->reader->nodeType != XmlReader::ELEMENT ) { 00132 $keepReading = $this->reader->read(); 00133 } 00134 00135 if ( $this->reader->localName != 'svg' || $this->reader->namespaceURI != self::NS_SVG ) { 00136 throw new MWException( "Expected <svg> tag, got " . 00137 $this->reader->localName . " in NS " . $this->reader->namespaceURI ); 00138 } 00139 $this->debug( "<svg> tag is correct." ); 00140 $this->handleSVGAttribs(); 00141 00142 $exitDepth = $this->reader->depth; 00143 $keepReading = $this->reader->read(); 00144 while ( $keepReading ) { 00145 $tag = $this->reader->localName; 00146 $type = $this->reader->nodeType; 00147 $isSVG = ( $this->reader->namespaceURI == self::NS_SVG ); 00148 00149 $this->debug( "$tag" ); 00150 00151 if ( $isSVG && $tag == 'svg' && $type == XmlReader::END_ELEMENT && $this->reader->depth <= $exitDepth ) { 00152 break; 00153 } elseif ( $isSVG && $tag == 'title' ) { 00154 $this->readField( $tag, 'title' ); 00155 } elseif ( $isSVG && $tag == 'desc' ) { 00156 $this->readField( $tag, 'description' ); 00157 } elseif ( $isSVG && $tag == 'metadata' && $type == XmlReader::ELEMENT ) { 00158 $this->readXml( $tag, 'metadata' ); 00159 } elseif ( $isSVG && $tag == 'script' ) { 00160 // We normally do not allow scripted svgs. 00161 // However its possible to configure MW to let them 00162 // in, and such files should be considered animated. 00163 $this->metadata['animated'] = true; 00164 } elseif ( $tag !== '#text' ) { 00165 $this->debug( "Unhandled top-level XML tag $tag" ); 00166 00167 if ( !isset( $this->metadata['animated'] ) ) { 00168 // Recurse into children of current tag, looking for animation. 00169 $this->animateFilter( $tag ); 00170 } 00171 } 00172 00173 // Goto next element, which is sibling of current (Skip children). 00174 $keepReading = $this->reader->next(); 00175 } 00176 00177 $this->reader->close(); 00178 00179 return true; 00180 } 00181 00188 private function readField( $name, $metafield = null ) { 00189 $this->debug( "Read field $metafield" ); 00190 if ( !$metafield || $this->reader->nodeType != XmlReader::ELEMENT ) { 00191 return; 00192 } 00193 $keepReading = $this->reader->read(); 00194 while ( $keepReading ) { 00195 if ( $this->reader->localName == $name && $this->reader->namespaceURI == self::NS_SVG && $this->reader->nodeType == XmlReader::END_ELEMENT ) { 00196 break; 00197 } elseif ( $this->reader->nodeType == XmlReader::TEXT ) { 00198 $this->metadata[$metafield] = trim( $this->reader->value ); 00199 } 00200 $keepReading = $this->reader->read(); 00201 } 00202 } 00203 00210 private function readXml( $metafield = null ) { 00211 $this->debug( "Read top level metadata" ); 00212 if ( !$metafield || $this->reader->nodeType != XmlReader::ELEMENT ) { 00213 return; 00214 } 00215 // TODO: find and store type of xml snippet. metadata['metadataType'] = "rdf" 00216 if ( method_exists( $this->reader, 'readInnerXML' ) ) { 00217 $this->metadata[$metafield] = trim( $this->reader->readInnerXML() ); 00218 } else { 00219 throw new MWException( "The PHP XMLReader extension does not come with readInnerXML() method. Your libxml is probably out of date (need 2.6.20 or later)." ); 00220 } 00221 $this->reader->next(); 00222 } 00223 00229 private function animateFilter( $name ) { 00230 $this->debug( "animate filter for tag $name" ); 00231 if ( $this->reader->nodeType != XmlReader::ELEMENT ) { 00232 return; 00233 } 00234 if ( $this->reader->isEmptyElement ) { 00235 return; 00236 } 00237 $exitDepth = $this->reader->depth; 00238 $keepReading = $this->reader->read(); 00239 while ( $keepReading ) { 00240 if ( $this->reader->localName == $name && $this->reader->depth <= $exitDepth 00241 && $this->reader->nodeType == XmlReader::END_ELEMENT ) { 00242 break; 00243 } elseif ( $this->reader->namespaceURI == self::NS_SVG && $this->reader->nodeType == XmlReader::ELEMENT ) { 00244 switch ( $this->reader->localName ) { 00245 case 'script': 00246 // Normally we disallow files with 00247 // <script>, but its possible 00248 // to configure MW to disable 00249 // such checks. 00250 case 'animate': 00251 case 'set': 00252 case 'animateMotion': 00253 case 'animateColor': 00254 case 'animateTransform': 00255 $this->debug( "HOUSTON WE HAVE ANIMATION" ); 00256 $this->metadata['animated'] = true; 00257 break; 00258 } 00259 } 00260 $keepReading = $this->reader->read(); 00261 } 00262 } 00263 00264 private function throwXmlError( $err ) { 00265 $this->debug( "FAILURE: $err" ); 00266 wfDebug( "SVGReader XML error: $err\n" ); 00267 } 00268 00269 private function debug( $data ) { 00270 if ( $this->mDebug ) { 00271 wfDebug( "SVGReader: $data\n" ); 00272 } 00273 } 00274 00275 private function warn( $data ) { 00276 wfDebug( "SVGReader: $data\n" ); 00277 } 00278 00279 private function notice( $data ) { 00280 wfDebug( "SVGReader WARN: $data\n" ); 00281 } 00282 00288 private function handleSVGAttribs() { 00289 $defaultWidth = self::DEFAULT_WIDTH; 00290 $defaultHeight = self::DEFAULT_HEIGHT; 00291 $aspect = 1.0; 00292 $width = null; 00293 $height = null; 00294 00295 if ( $this->reader->getAttribute( 'viewBox' ) ) { 00296 // min-x min-y width height 00297 $viewBox = preg_split( '/\s+/', trim( $this->reader->getAttribute( 'viewBox' ) ) ); 00298 if ( count( $viewBox ) == 4 ) { 00299 $viewWidth = $this->scaleSVGUnit( $viewBox[2] ); 00300 $viewHeight = $this->scaleSVGUnit( $viewBox[3] ); 00301 if ( $viewWidth > 0 && $viewHeight > 0 ) { 00302 $aspect = $viewWidth / $viewHeight; 00303 $defaultHeight = $defaultWidth / $aspect; 00304 } 00305 } 00306 } 00307 if ( $this->reader->getAttribute( 'width' ) ) { 00308 $width = $this->scaleSVGUnit( $this->reader->getAttribute( 'width' ), $defaultWidth ); 00309 $this->metadata['originalWidth'] = $this->reader->getAttribute( 'width' ); 00310 } 00311 if ( $this->reader->getAttribute( 'height' ) ) { 00312 $height = $this->scaleSVGUnit( $this->reader->getAttribute( 'height' ), $defaultHeight ); 00313 $this->metadata['originalHeight'] = $this->reader->getAttribute( 'height' ); 00314 } 00315 00316 if ( !isset( $width ) && !isset( $height ) ) { 00317 $width = $defaultWidth; 00318 $height = $width / $aspect; 00319 } elseif ( isset( $width ) && !isset( $height ) ) { 00320 $height = $width / $aspect; 00321 } elseif ( isset( $height ) && !isset( $width ) ) { 00322 $width = $height * $aspect; 00323 } 00324 00325 if ( $width > 0 && $height > 0 ) { 00326 $this->metadata['width'] = intval( round( $width ) ); 00327 $this->metadata['height'] = intval( round( $height ) ); 00328 } 00329 } 00330 00339 static function scaleSVGUnit( $length, $viewportSize = 512 ) { 00340 static $unitLength = array( 00341 'px' => 1.0, 00342 'pt' => 1.25, 00343 'pc' => 15.0, 00344 'mm' => 3.543307, 00345 'cm' => 35.43307, 00346 'in' => 90.0, 00347 'em' => 16.0, // fake it? 00348 'ex' => 12.0, // fake it? 00349 '' => 1.0, // "User units" pixels by default 00350 ); 00351 $matches = array(); 00352 if ( preg_match( '/^\s*(\d+(?:\.\d+)?)(em|ex|px|pt|pc|cm|mm|in|%|)\s*$/', $length, $matches ) ) { 00353 $length = floatval( $matches[1] ); 00354 $unit = $matches[2]; 00355 if ( $unit == '%' ) { 00356 return $length * 0.01 * $viewportSize; 00357 } else { 00358 return $length * $unitLength[$unit]; 00359 } 00360 } else { 00361 // Assume pixels 00362 return floatval( $length ); 00363 } 00364 } 00365 }