[ Index ] |
PHP Cross Reference of MediaWiki-1.24.0 |
[Summary view] [Print] [Text view]
1 <?php 2 /** 3 * Module defining helper functions for detecting and dealing with MIME types. 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 2 of the License, or 8 * (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License along 16 * with this program; if not, write to the Free Software Foundation, Inc., 17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 18 * http://www.gnu.org/copyleft/gpl.html 19 * 20 * @file 21 */ 22 23 /** 24 * Defines a set of well known MIME types 25 * This is used as a fallback to mime.types files. 26 * An extensive list of well known MIME types is provided by 27 * the file mime.types in the includes directory. 28 * 29 * This list concatenated with mime.types is used to create a MIME <-> ext 30 * map. Each line contains a MIME type followed by a space separated list of 31 * extensions. If multiple extensions for a single MIME type exist or if 32 * multiple MIME types exist for a single extension then in most cases 33 * MediaWiki assumes that the first extension following the MIME type is the 34 * canonical extension, and the first time a MIME type appears for a certain 35 * extension is considered the canonical MIME type. 36 * 37 * (Note that appending $wgMimeTypeFile to the end of MM_WELL_KNOWN_MIME_TYPES 38 * sucks because you can't redefine canonical types. This could be fixed by 39 * appending MM_WELL_KNOWN_MIME_TYPES behind $wgMimeTypeFile, but who knows 40 * what will break? In practice this probably isn't a problem anyway -- Bryan) 41 */ 42 define( 'MM_WELL_KNOWN_MIME_TYPES', <<<END_STRING 43 application/ogg ogx ogg ogm ogv oga spx 44 application/pdf pdf 45 application/vnd.oasis.opendocument.chart odc 46 application/vnd.oasis.opendocument.chart-template otc 47 application/vnd.oasis.opendocument.database odb 48 application/vnd.oasis.opendocument.formula odf 49 application/vnd.oasis.opendocument.formula-template otf 50 application/vnd.oasis.opendocument.graphics odg 51 application/vnd.oasis.opendocument.graphics-template otg 52 application/vnd.oasis.opendocument.image odi 53 application/vnd.oasis.opendocument.image-template oti 54 application/vnd.oasis.opendocument.presentation odp 55 application/vnd.oasis.opendocument.presentation-template otp 56 application/vnd.oasis.opendocument.spreadsheet ods 57 application/vnd.oasis.opendocument.spreadsheet-template ots 58 application/vnd.oasis.opendocument.text odt 59 application/vnd.oasis.opendocument.text-master otm 60 application/vnd.oasis.opendocument.text-template ott 61 application/vnd.oasis.opendocument.text-web oth 62 application/x-javascript js 63 application/x-shockwave-flash swf 64 audio/midi mid midi kar 65 audio/mpeg mpga mpa mp2 mp3 66 audio/x-aiff aif aiff aifc 67 audio/x-wav wav 68 audio/ogg oga spx ogg 69 image/x-bmp bmp 70 image/gif gif 71 image/jpeg jpeg jpg jpe 72 image/png png 73 image/svg+xml svg 74 image/svg svg 75 image/tiff tiff tif 76 image/vnd.djvu djvu 77 image/x.djvu djvu 78 image/x-djvu djvu 79 image/x-portable-pixmap ppm 80 image/x-xcf xcf 81 text/plain txt 82 text/html html htm 83 video/ogg ogv ogm ogg 84 video/mpeg mpg mpeg 85 END_STRING 86 ); 87 88 /** 89 * Defines a set of well known MIME info entries 90 * This is used as a fallback to mime.info files. 91 * An extensive list of well known MIME types is provided by 92 * the file mime.info in the includes directory. 93 */ 94 define( 'MM_WELL_KNOWN_MIME_INFO', <<<END_STRING 95 application/pdf [OFFICE] 96 application/vnd.oasis.opendocument.chart [OFFICE] 97 application/vnd.oasis.opendocument.chart-template [OFFICE] 98 application/vnd.oasis.opendocument.database [OFFICE] 99 application/vnd.oasis.opendocument.formula [OFFICE] 100 application/vnd.oasis.opendocument.formula-template [OFFICE] 101 application/vnd.oasis.opendocument.graphics [OFFICE] 102 application/vnd.oasis.opendocument.graphics-template [OFFICE] 103 application/vnd.oasis.opendocument.image [OFFICE] 104 application/vnd.oasis.opendocument.image-template [OFFICE] 105 application/vnd.oasis.opendocument.presentation [OFFICE] 106 application/vnd.oasis.opendocument.presentation-template [OFFICE] 107 application/vnd.oasis.opendocument.spreadsheet [OFFICE] 108 application/vnd.oasis.opendocument.spreadsheet-template [OFFICE] 109 application/vnd.oasis.opendocument.text [OFFICE] 110 application/vnd.oasis.opendocument.text-template [OFFICE] 111 application/vnd.oasis.opendocument.text-master [OFFICE] 112 application/vnd.oasis.opendocument.text-web [OFFICE] 113 text/javascript application/x-javascript [EXECUTABLE] 114 application/x-shockwave-flash [MULTIMEDIA] 115 audio/midi [AUDIO] 116 audio/x-aiff [AUDIO] 117 audio/x-wav [AUDIO] 118 audio/mp3 audio/mpeg [AUDIO] 119 application/ogg audio/ogg video/ogg [MULTIMEDIA] 120 image/x-bmp image/x-ms-bmp image/bmp [BITMAP] 121 image/gif [BITMAP] 122 image/jpeg [BITMAP] 123 image/png [BITMAP] 124 image/svg+xml [DRAWING] 125 image/tiff [BITMAP] 126 image/vnd.djvu [BITMAP] 127 image/x-xcf [BITMAP] 128 image/x-portable-pixmap [BITMAP] 129 text/plain [TEXT] 130 text/html [TEXT] 131 video/ogg [VIDEO] 132 video/mpeg [VIDEO] 133 unknown/unknown application/octet-stream application/x-empty [UNKNOWN] 134 END_STRING 135 ); 136 137 /** 138 * Implements functions related to MIME types such as detection and mapping to 139 * file extension. 140 * 141 * Instances of this class are stateless, there only needs to be one global instance 142 * of MimeMagic. Please use MimeMagic::singleton() to get that instance. 143 */ 144 class MimeMagic { 145 /** 146 * @var array Mapping of media types to arrays of MIME types. 147 * This is used by findMediaType and getMediaType, respectively 148 */ 149 protected $mMediaTypes = null; 150 151 /** @var array Map of MIME type aliases 152 */ 153 protected $mMimeTypeAliases = null; 154 155 /** @var array Map of MIME types to file extensions (as a space separated list) 156 */ 157 protected $mMimeToExt = null; 158 159 /** @var array Map of file extensions types to MIME types (as a space separated list) 160 */ 161 public $mExtToMime = null; 162 163 /** @var IEContentAnalyzer 164 */ 165 protected $mIEAnalyzer; 166 167 /** @var string Extra MIME types, set for example by media handling extensions 168 */ 169 private $mExtraTypes = ''; 170 171 /** @var string Extra MIME info, set for example by media handling extensions 172 */ 173 private $mExtraInfo = ''; 174 175 /** @var Config */ 176 private $mConfig; 177 178 /** @var MimeMagic The singleton instance 179 */ 180 private static $instance = null; 181 182 /** Initializes the MimeMagic object. This is called by MimeMagic::singleton(). 183 * 184 * This constructor parses the mime.types and mime.info files and build internal mappings. 185 * 186 * @todo Make this constructor private once everything uses the singleton instance 187 * @param Config $config 188 */ 189 function __construct( Config $config = null ) { 190 if ( !$config ) { 191 wfDebug( __METHOD__ . ' called with no Config instance passed to it' ); 192 $config = ConfigFactory::getDefaultInstance()->makeConfig( 'main' ); 193 } 194 $this->mConfig = $config; 195 196 /** 197 * --- load mime.types --- 198 */ 199 200 global $IP; 201 202 # Allow media handling extensions adding MIME-types and MIME-info 203 wfRunHooks( 'MimeMagicInit', array( $this ) ); 204 205 $types = MM_WELL_KNOWN_MIME_TYPES; 206 207 $mimeTypeFile = $this->mConfig->get( 'MimeTypeFile' ); 208 if ( $mimeTypeFile == 'includes/mime.types' ) { 209 $mimeTypeFile = "$IP/$mimeTypeFile"; 210 } 211 212 if ( $mimeTypeFile ) { 213 if ( is_file( $mimeTypeFile ) and is_readable( $mimeTypeFile ) ) { 214 wfDebug( __METHOD__ . ": loading mime types from $mimeTypeFile\n" ); 215 $types .= "\n"; 216 $types .= file_get_contents( $mimeTypeFile ); 217 } else { 218 wfDebug( __METHOD__ . ": can't load mime types from $mimeTypeFile\n" ); 219 } 220 } else { 221 wfDebug( __METHOD__ . ": no mime types file defined, using build-ins only.\n" ); 222 } 223 224 $types .= "\n" . $this->mExtraTypes; 225 226 $types = str_replace( array( "\r\n", "\n\r", "\n\n", "\r\r", "\r" ), "\n", $types ); 227 $types = str_replace( "\t", " ", $types ); 228 229 $this->mMimeToExt = array(); 230 $this->mExtToMime = array(); 231 232 $lines = explode( "\n", $types ); 233 foreach ( $lines as $s ) { 234 $s = trim( $s ); 235 if ( empty( $s ) ) { 236 continue; 237 } 238 if ( strpos( $s, '#' ) === 0 ) { 239 continue; 240 } 241 242 $s = strtolower( $s ); 243 $i = strpos( $s, ' ' ); 244 245 if ( $i === false ) { 246 continue; 247 } 248 249 $mime = substr( $s, 0, $i ); 250 $ext = trim( substr( $s, $i + 1 ) ); 251 252 if ( empty( $ext ) ) { 253 continue; 254 } 255 256 if ( !empty( $this->mMimeToExt[$mime] ) ) { 257 $this->mMimeToExt[$mime] .= ' ' . $ext; 258 } else { 259 $this->mMimeToExt[$mime] = $ext; 260 } 261 262 $extensions = explode( ' ', $ext ); 263 264 foreach ( $extensions as $e ) { 265 $e = trim( $e ); 266 if ( empty( $e ) ) { 267 continue; 268 } 269 270 if ( !empty( $this->mExtToMime[$e] ) ) { 271 $this->mExtToMime[$e] .= ' ' . $mime; 272 } else { 273 $this->mExtToMime[$e] = $mime; 274 } 275 } 276 } 277 278 /** 279 * --- load mime.info --- 280 */ 281 282 $mimeInfoFile = $this->mConfig->get( 'MimeInfoFile' ); 283 if ( $mimeInfoFile == 'includes/mime.info' ) { 284 $mimeInfoFile = "$IP/$mimeInfoFile"; 285 } 286 287 $info = MM_WELL_KNOWN_MIME_INFO; 288 289 if ( $mimeInfoFile ) { 290 if ( is_file( $mimeInfoFile ) and is_readable( $mimeInfoFile ) ) { 291 wfDebug( __METHOD__ . ": loading mime info from $mimeInfoFile\n" ); 292 $info .= "\n"; 293 $info .= file_get_contents( $mimeInfoFile ); 294 } else { 295 wfDebug( __METHOD__ . ": can't load mime info from $mimeInfoFile\n" ); 296 } 297 } else { 298 wfDebug( __METHOD__ . ": no mime info file defined, using build-ins only.\n" ); 299 } 300 301 $info .= "\n" . $this->mExtraInfo; 302 303 $info = str_replace( array( "\r\n", "\n\r", "\n\n", "\r\r", "\r" ), "\n", $info ); 304 $info = str_replace( "\t", " ", $info ); 305 306 $this->mMimeTypeAliases = array(); 307 $this->mMediaTypes = array(); 308 309 $lines = explode( "\n", $info ); 310 foreach ( $lines as $s ) { 311 $s = trim( $s ); 312 if ( empty( $s ) ) { 313 continue; 314 } 315 if ( strpos( $s, '#' ) === 0 ) { 316 continue; 317 } 318 319 $s = strtolower( $s ); 320 $i = strpos( $s, ' ' ); 321 322 if ( $i === false ) { 323 continue; 324 } 325 326 #print "processing MIME INFO line $s<br>"; 327 328 $match = array(); 329 if ( preg_match( '!\[\s*(\w+)\s*\]!', $s, $match ) ) { 330 $s = preg_replace( '!\[\s*(\w+)\s*\]!', '', $s ); 331 $mtype = trim( strtoupper( $match[1] ) ); 332 } else { 333 $mtype = MEDIATYPE_UNKNOWN; 334 } 335 336 $m = explode( ' ', $s ); 337 338 if ( !isset( $this->mMediaTypes[$mtype] ) ) { 339 $this->mMediaTypes[$mtype] = array(); 340 } 341 342 foreach ( $m as $mime ) { 343 $mime = trim( $mime ); 344 if ( empty( $mime ) ) { 345 continue; 346 } 347 348 $this->mMediaTypes[$mtype][] = $mime; 349 } 350 351 if ( count( $m ) > 1 ) { 352 $main = $m[0]; 353 $mCount = count( $m ); 354 for ( $i = 1; $i < $mCount; $i += 1 ) { 355 $mime = $m[$i]; 356 $this->mMimeTypeAliases[$mime] = $main; 357 } 358 } 359 } 360 } 361 362 /** 363 * Get an instance of this class 364 * @return MimeMagic 365 */ 366 public static function singleton() { 367 if ( self::$instance === null ) { 368 self::$instance = new MimeMagic( 369 ConfigFactory::getDefaultInstance()->makeConfig( 'main' ) 370 ); 371 } 372 return self::$instance; 373 } 374 375 /** 376 * Adds to the list mapping MIME to file extensions. 377 * As an extension author, you are encouraged to submit patches to 378 * MediaWiki's core to add new MIME types to mime.types. 379 * @param string $types 380 */ 381 public function addExtraTypes( $types ) { 382 $this->mExtraTypes .= "\n" . $types; 383 } 384 385 /** 386 * Adds to the list mapping MIME to media type. 387 * As an extension author, you are encouraged to submit patches to 388 * MediaWiki's core to add new MIME info to mime.info. 389 * @param string $info 390 */ 391 public function addExtraInfo( $info ) { 392 $this->mExtraInfo .= "\n" . $info; 393 } 394 395 /** 396 * Returns a list of file extensions for a given MIME type as a space 397 * separated string or null if the MIME type was unrecognized. Resolves 398 * MIME type aliases. 399 * 400 * @param string $mime 401 * @return string|null 402 */ 403 public function getExtensionsForType( $mime ) { 404 $mime = strtolower( $mime ); 405 406 // Check the mime-to-ext map 407 if ( isset( $this->mMimeToExt[$mime] ) ) { 408 return $this->mMimeToExt[$mime]; 409 } 410 411 // Resolve the MIME type to the canonical type 412 if ( isset( $this->mMimeTypeAliases[$mime] ) ) { 413 $mime = $this->mMimeTypeAliases[$mime]; 414 if ( isset( $this->mMimeToExt[$mime] ) ) { 415 return $this->mMimeToExt[$mime]; 416 } 417 } 418 419 return null; 420 } 421 422 /** 423 * Returns a list of MIME types for a given file extension as a space 424 * separated string or null if the extension was unrecognized. 425 * 426 * @param string $ext 427 * @return string|null 428 */ 429 public function getTypesForExtension( $ext ) { 430 $ext = strtolower( $ext ); 431 432 $r = isset( $this->mExtToMime[$ext] ) ? $this->mExtToMime[$ext] : null; 433 return $r; 434 } 435 436 /** 437 * Returns a single MIME type for a given file extension or null if unknown. 438 * This is always the first type from the list returned by getTypesForExtension($ext). 439 * 440 * @param string $ext 441 * @return string|null 442 */ 443 public function guessTypesForExtension( $ext ) { 444 $m = $this->getTypesForExtension( $ext ); 445 if ( is_null( $m ) ) { 446 return null; 447 } 448 449 // TODO: Check if this is needed; strtok( $m, ' ' ) should be sufficient 450 $m = trim( $m ); 451 $m = preg_replace( '/\s.*$/', '', $m ); 452 453 return $m; 454 } 455 456 /** 457 * Tests if the extension matches the given MIME type. Returns true if a 458 * match was found, null if the MIME type is unknown, and false if the 459 * MIME type is known but no matches where found. 460 * 461 * @param string $extension 462 * @param string $mime 463 * @return bool|null 464 */ 465 public function isMatchingExtension( $extension, $mime ) { 466 $ext = $this->getExtensionsForType( $mime ); 467 468 if ( !$ext ) { 469 return null; // Unknown MIME type 470 } 471 472 $ext = explode( ' ', $ext ); 473 474 $extension = strtolower( $extension ); 475 return in_array( $extension, $ext ); 476 } 477 478 /** 479 * Returns true if the MIME type is known to represent an image format 480 * supported by the PHP GD library. 481 * 482 * @param string $mime 483 * 484 * @return bool 485 */ 486 public function isPHPImageType( $mime ) { 487 // As defined by imagegetsize and image_type_to_mime 488 static $types = array( 489 'image/gif', 'image/jpeg', 'image/png', 490 'image/x-bmp', 'image/xbm', 'image/tiff', 491 'image/jp2', 'image/jpeg2000', 'image/iff', 492 'image/xbm', 'image/x-xbitmap', 493 'image/vnd.wap.wbmp', 'image/vnd.xiff', 494 'image/x-photoshop', 495 'application/x-shockwave-flash', 496 ); 497 498 return in_array( $mime, $types ); 499 } 500 501 /** 502 * Returns true if the extension represents a type which can 503 * be reliably detected from its content. Use this to determine 504 * whether strict content checks should be applied to reject 505 * invalid uploads; if we can't identify the type we won't 506 * be able to say if it's invalid. 507 * 508 * @todo Be more accurate when using fancy MIME detector plugins; 509 * right now this is the bare minimum getimagesize() list. 510 * @param string $extension 511 * @return bool 512 */ 513 function isRecognizableExtension( $extension ) { 514 static $types = array( 515 // Types recognized by getimagesize() 516 'gif', 'jpeg', 'jpg', 'png', 'swf', 'psd', 517 'bmp', 'tiff', 'tif', 'jpc', 'jp2', 518 'jpx', 'jb2', 'swc', 'iff', 'wbmp', 519 'xbm', 520 521 // Formats we recognize magic numbers for 522 'djvu', 'ogx', 'ogg', 'ogv', 'oga', 'spx', 523 'mid', 'pdf', 'wmf', 'xcf', 'webm', 'mkv', 'mka', 524 'webp', 525 526 // XML formats we sure hope we recognize reliably 527 'svg', 528 ); 529 return in_array( strtolower( $extension ), $types ); 530 } 531 532 /** 533 * Improves a MIME type using the file extension. Some file formats are very generic, 534 * so their MIME type is not very meaningful. A more useful MIME type can be derived 535 * by looking at the file extension. Typically, this method would be called on the 536 * result of guessMimeType(). 537 * 538 * @param string $mime The MIME type, typically guessed from a file's content. 539 * @param string $ext The file extension, as taken from the file name 540 * 541 * @return string The MIME type 542 */ 543 public function improveTypeFromExtension( $mime, $ext ) { 544 if ( $mime === 'unknown/unknown' ) { 545 if ( $this->isRecognizableExtension( $ext ) ) { 546 wfDebug( __METHOD__ . ': refusing to guess mime type for .' . 547 "$ext file, we should have recognized it\n" ); 548 } else { 549 // Not something we can detect, so simply 550 // trust the file extension 551 $mime = $this->guessTypesForExtension( $ext ); 552 } 553 } elseif ( $mime === 'application/x-opc+zip' ) { 554 if ( $this->isMatchingExtension( $ext, $mime ) ) { 555 // A known file extension for an OPC file, 556 // find the proper MIME type for that file extension 557 $mime = $this->guessTypesForExtension( $ext ); 558 } else { 559 wfDebug( __METHOD__ . ": refusing to guess better type for $mime file, " . 560 ".$ext is not a known OPC extension.\n" ); 561 $mime = 'application/zip'; 562 } 563 } elseif ( $mime === 'text/plain' && $this->findMediaType( ".$ext" ) === MEDIATYPE_TEXT ) { 564 // Textual types are sometimes not recognized properly. 565 // If detected as text/plain, and has an extension which is textual 566 // improve to the extension's type. For example, csv and json are often 567 // misdetected as text/plain. 568 $mime = $this->guessTypesForExtension( $ext ); 569 } 570 571 # Media handling extensions can improve the MIME detected 572 wfRunHooks( 'MimeMagicImproveFromExtension', array( $this, $ext, &$mime ) ); 573 574 if ( isset( $this->mMimeTypeAliases[$mime] ) ) { 575 $mime = $this->mMimeTypeAliases[$mime]; 576 } 577 578 wfDebug( __METHOD__ . ": improved mime type for .$ext: $mime\n" ); 579 return $mime; 580 } 581 582 /** 583 * MIME type detection. This uses detectMimeType to detect the MIME type 584 * of the file, but applies additional checks to determine some well known 585 * file formats that may be missed or misinterpreted by the default MIME 586 * detection (namely XML based formats like XHTML or SVG, as well as ZIP 587 * based formats like OPC/ODF files). 588 * 589 * @param string $file The file to check 590 * @param string|bool $ext The file extension, or true (default) to extract it from the filename. 591 * Set it to false to ignore the extension. DEPRECATED! Set to false, use 592 * improveTypeFromExtension($mime, $ext) later to improve MIME type. 593 * 594 * @return string The MIME type of $file 595 */ 596 public function guessMimeType( $file, $ext = true ) { 597 if ( $ext ) { // TODO: make $ext default to false. Or better, remove it. 598 wfDebug( __METHOD__ . ": WARNING: use of the \$ext parameter is deprecated. " . 599 "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" ); 600 } 601 602 $mime = $this->doGuessMimeType( $file, $ext ); 603 604 if ( !$mime ) { 605 wfDebug( __METHOD__ . ": internal type detection failed for $file (.$ext)...\n" ); 606 $mime = $this->detectMimeType( $file, $ext ); 607 } 608 609 if ( isset( $this->mMimeTypeAliases[$mime] ) ) { 610 $mime = $this->mMimeTypeAliases[$mime]; 611 } 612 613 wfDebug( __METHOD__ . ": guessed mime type of $file: $mime\n" ); 614 return $mime; 615 } 616 617 /** 618 * Guess the MIME type from the file contents. 619 * 620 * @param string $file 621 * @param mixed $ext 622 * @return bool|string 623 * @throws MWException 624 */ 625 private function doGuessMimeType( $file, $ext ) { // TODO: remove $ext param 626 // Read a chunk of the file 627 wfSuppressWarnings(); 628 $f = fopen( $file, 'rb' ); 629 wfRestoreWarnings(); 630 631 if ( !$f ) { 632 return 'unknown/unknown'; 633 } 634 635 $fsize = filesize( $file ); 636 if ( $fsize === false ) { 637 return 'unknown/unknown'; 638 } 639 640 $head = fread( $f, 1024 ); 641 $tailLength = min( 65558, $fsize ); // 65558 = maximum size of a zip EOCDR 642 if ( fseek( $f, -1 * $tailLength, SEEK_END ) === -1 ) { 643 throw new MWException( 644 "Seeking $tailLength bytes from EOF failed in " . __METHOD__ ); 645 } 646 $tail = fread( $f, $tailLength ); 647 fclose( $f ); 648 649 wfDebug( __METHOD__ . ": analyzing head and tail of $file for magic numbers.\n" ); 650 651 // Hardcode a few magic number checks... 652 $headers = array( 653 // Multimedia... 654 'MThd' => 'audio/midi', 655 'OggS' => 'application/ogg', 656 657 // Image formats... 658 // Note that WMF may have a bare header, no magic number. 659 "\x01\x00\x09\x00" => 'application/x-msmetafile', // Possibly prone to false positives? 660 "\xd7\xcd\xc6\x9a" => 'application/x-msmetafile', 661 '%PDF' => 'application/pdf', 662 'gimp xcf' => 'image/x-xcf', 663 664 // Some forbidden fruit... 665 'MZ' => 'application/octet-stream', // DOS/Windows executable 666 "\xca\xfe\xba\xbe" => 'application/octet-stream', // Mach-O binary 667 "\x7fELF" => 'application/octet-stream', // ELF binary 668 ); 669 670 foreach ( $headers as $magic => $candidate ) { 671 if ( strncmp( $head, $magic, strlen( $magic ) ) == 0 ) { 672 wfDebug( __METHOD__ . ": magic header in $file recognized as $candidate\n" ); 673 return $candidate; 674 } 675 } 676 677 /* Look for WebM and Matroska files */ 678 if ( strncmp( $head, pack( "C4", 0x1a, 0x45, 0xdf, 0xa3 ), 4 ) == 0 ) { 679 $doctype = strpos( $head, "\x42\x82" ); 680 if ( $doctype ) { 681 // Next byte is datasize, then data (sizes larger than 1 byte are very stupid muxers) 682 $data = substr( $head, $doctype + 3, 8 ); 683 if ( strncmp( $data, "matroska", 8 ) == 0 ) { 684 wfDebug( __METHOD__ . ": recognized file as video/x-matroska\n" ); 685 return "video/x-matroska"; 686 } elseif ( strncmp( $data, "webm", 4 ) == 0 ) { 687 wfDebug( __METHOD__ . ": recognized file as video/webm\n" ); 688 return "video/webm"; 689 } 690 } 691 wfDebug( __METHOD__ . ": unknown EBML file\n" ); 692 return "unknown/unknown"; 693 } 694 695 /* Look for WebP */ 696 if ( strncmp( $head, "RIFF", 4 ) == 0 && strncmp( substr( $head, 8, 8 ), "WEBPVP8 ", 8 ) == 0 ) { 697 wfDebug( __METHOD__ . ": recognized file as image/webp\n" ); 698 return "image/webp"; 699 } 700 701 /** 702 * Look for PHP. Check for this before HTML/XML... Warning: this is a 703 * heuristic, and won't match a file with a lot of non-PHP before. It 704 * will also match text files which could be PHP. :) 705 * 706 * @todo FIXME: For this reason, the check is probably useless -- an attacker 707 * could almost certainly just pad the file with a lot of nonsense to 708 * circumvent the check in any case where it would be a security 709 * problem. On the other hand, it causes harmful false positives (bug 710 * 16583). The heuristic has been cut down to exclude three-character 711 * strings like "<? ", but should it be axed completely? 712 */ 713 if ( ( strpos( $head, '<?php' ) !== false ) || 714 ( strpos( $head, "<\x00?\x00p\x00h\x00p" ) !== false ) || 715 ( strpos( $head, "<\x00?\x00 " ) !== false ) || 716 ( strpos( $head, "<\x00?\x00\n" ) !== false ) || 717 ( strpos( $head, "<\x00?\x00\t" ) !== false ) || 718 ( strpos( $head, "<\x00?\x00=" ) !== false ) ) { 719 720 wfDebug( __METHOD__ . ": recognized $file as application/x-php\n" ); 721 return 'application/x-php'; 722 } 723 724 /** 725 * look for XML formats (XHTML and SVG) 726 */ 727 $xml = new XmlTypeCheck( $file ); 728 if ( $xml->wellFormed ) { 729 $xmlMimeTypes = $this->mConfig->get( 'XMLMimeTypes' ); 730 if ( isset( $xmlMimeTypes[$xml->getRootElement()] ) ) { 731 return $xmlMimeTypes[$xml->getRootElement()]; 732 } else { 733 return 'application/xml'; 734 } 735 } 736 737 /** 738 * look for shell scripts 739 */ 740 $script_type = null; 741 742 # detect by shebang 743 if ( substr( $head, 0, 2 ) == "#!" ) { 744 $script_type = "ASCII"; 745 } elseif ( substr( $head, 0, 5 ) == "\xef\xbb\xbf#!" ) { 746 $script_type = "UTF-8"; 747 } elseif ( substr( $head, 0, 7 ) == "\xfe\xff\x00#\x00!" ) { 748 $script_type = "UTF-16BE"; 749 } elseif ( substr( $head, 0, 7 ) == "\xff\xfe#\x00!" ) { 750 $script_type = "UTF-16LE"; 751 } 752 753 if ( $script_type ) { 754 if ( $script_type !== "UTF-8" && $script_type !== "ASCII" ) { 755 // Quick and dirty fold down to ASCII! 756 $pack = array( 'UTF-16BE' => 'n*', 'UTF-16LE' => 'v*' ); 757 $chars = unpack( $pack[$script_type], substr( $head, 2 ) ); 758 $head = ''; 759 foreach ( $chars as $codepoint ) { 760 if ( $codepoint < 128 ) { 761 $head .= chr( $codepoint ); 762 } else { 763 $head .= '?'; 764 } 765 } 766 } 767 768 $match = array(); 769 770 if ( preg_match( '%/?([^\s]+/)(\w+)%', $head, $match ) ) { 771 $mime = "application/x-{$match[2]}"; 772 wfDebug( __METHOD__ . ": shell script recognized as $mime\n" ); 773 return $mime; 774 } 775 } 776 777 // Check for ZIP variants (before getimagesize) 778 if ( strpos( $tail, "PK\x05\x06" ) !== false ) { 779 wfDebug( __METHOD__ . ": ZIP header present in $file\n" ); 780 return $this->detectZipType( $head, $tail, $ext ); 781 } 782 783 wfSuppressWarnings(); 784 $gis = getimagesize( $file ); 785 wfRestoreWarnings(); 786 787 if ( $gis && isset( $gis['mime'] ) ) { 788 $mime = $gis['mime']; 789 wfDebug( __METHOD__ . ": getimagesize detected $file as $mime\n" ); 790 return $mime; 791 } 792 793 // Also test DjVu 794 $deja = new DjVuImage( $file ); 795 if ( $deja->isValid() ) { 796 wfDebug( __METHOD__ . ": detected $file as image/vnd.djvu\n" ); 797 return 'image/vnd.djvu'; 798 } 799 800 # Media handling extensions can guess the MIME by content 801 # It's intentionally here so that if core is wrong about a type (false positive), 802 # people will hopefully nag and submit patches :) 803 $mime = false; 804 # Some strings by reference for performance - assuming well-behaved hooks 805 wfRunHooks( 806 'MimeMagicGuessFromContent', 807 array( $this, &$head, &$tail, $file, &$mime ) 808 ); 809 810 return $mime; 811 } 812 813 /** 814 * Detect application-specific file type of a given ZIP file from its 815 * header data. Currently works for OpenDocument and OpenXML types... 816 * If can't tell, returns 'application/zip'. 817 * 818 * @param string $header Some reasonably-sized chunk of file header 819 * @param string|null $tail The tail of the file 820 * @param string|bool $ext The file extension, or true to extract it from the filename. 821 * Set it to false (default) to ignore the extension. DEPRECATED! Set to false, 822 * use improveTypeFromExtension($mime, $ext) later to improve MIME type. 823 * 824 * @return string 825 */ 826 function detectZipType( $header, $tail = null, $ext = false ) { 827 if ( $ext ) { # TODO: remove $ext param 828 wfDebug( __METHOD__ . ": WARNING: use of the \$ext parameter is deprecated. " . 829 "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" ); 830 } 831 832 $mime = 'application/zip'; 833 $opendocTypes = array( 834 'chart-template', 835 'chart', 836 'formula-template', 837 'formula', 838 'graphics-template', 839 'graphics', 840 'image-template', 841 'image', 842 'presentation-template', 843 'presentation', 844 'spreadsheet-template', 845 'spreadsheet', 846 'text-template', 847 'text-master', 848 'text-web', 849 'text' ); 850 851 // http://lists.oasis-open.org/archives/office/200505/msg00006.html 852 $types = '(?:' . implode( '|', $opendocTypes ) . ')'; 853 $opendocRegex = "/^mimetype(application\/vnd\.oasis\.opendocument\.$types)/"; 854 855 $openxmlRegex = "/^\[Content_Types\].xml/"; 856 857 if ( preg_match( $opendocRegex, substr( $header, 30 ), $matches ) ) { 858 $mime = $matches[1]; 859 wfDebug( __METHOD__ . ": detected $mime from ZIP archive\n" ); 860 } elseif ( preg_match( $openxmlRegex, substr( $header, 30 ) ) ) { 861 $mime = "application/x-opc+zip"; 862 # TODO: remove the block below, as soon as improveTypeFromExtension is used everywhere 863 if ( $ext !== true && $ext !== false ) { 864 /** This is the mode used by getPropsFromPath 865 * These MIME's are stored in the database, where we don't really want 866 * x-opc+zip, because we use it only for internal purposes 867 */ 868 if ( $this->isMatchingExtension( $ext, $mime ) ) { 869 /* A known file extension for an OPC file, 870 * find the proper mime type for that file extension 871 */ 872 $mime = $this->guessTypesForExtension( $ext ); 873 } else { 874 $mime = "application/zip"; 875 } 876 } 877 wfDebug( __METHOD__ . ": detected an Open Packaging Conventions archive: $mime\n" ); 878 } elseif ( substr( $header, 0, 8 ) == "\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1" && 879 ( $headerpos = strpos( $tail, "PK\x03\x04" ) ) !== false && 880 preg_match( $openxmlRegex, substr( $tail, $headerpos + 30 ) ) ) { 881 if ( substr( $header, 512, 4 ) == "\xEC\xA5\xC1\x00" ) { 882 $mime = "application/msword"; 883 } 884 switch ( substr( $header, 512, 6 ) ) { 885 case "\xEC\xA5\xC1\x00\x0E\x00": 886 case "\xEC\xA5\xC1\x00\x1C\x00": 887 case "\xEC\xA5\xC1\x00\x43\x00": 888 $mime = "application/vnd.ms-powerpoint"; 889 break; 890 case "\xFD\xFF\xFF\xFF\x10\x00": 891 case "\xFD\xFF\xFF\xFF\x1F\x00": 892 case "\xFD\xFF\xFF\xFF\x22\x00": 893 case "\xFD\xFF\xFF\xFF\x23\x00": 894 case "\xFD\xFF\xFF\xFF\x28\x00": 895 case "\xFD\xFF\xFF\xFF\x29\x00": 896 case "\xFD\xFF\xFF\xFF\x10\x02": 897 case "\xFD\xFF\xFF\xFF\x1F\x02": 898 case "\xFD\xFF\xFF\xFF\x22\x02": 899 case "\xFD\xFF\xFF\xFF\x23\x02": 900 case "\xFD\xFF\xFF\xFF\x28\x02": 901 case "\xFD\xFF\xFF\xFF\x29\x02": 902 $mime = "application/vnd.msexcel"; 903 break; 904 } 905 906 wfDebug( __METHOD__ . ": detected a MS Office document with OPC trailer\n" ); 907 } else { 908 wfDebug( __METHOD__ . ": unable to identify type of ZIP archive\n" ); 909 } 910 return $mime; 911 } 912 913 /** 914 * Internal MIME type detection. Detection is done using an external 915 * program, if $wgMimeDetectorCommand is set. Otherwise, the fileinfo 916 * extension is tried if it is available. If detection fails and $ext 917 * is not false, the MIME type is guessed from the file extension, 918 * using guessTypesForExtension. 919 * 920 * If the MIME type is still unknown, getimagesize is used to detect the 921 * MIME type if the file is an image. If no MIME type can be determined, 922 * this function returns 'unknown/unknown'. 923 * 924 * @param string $file The file to check 925 * @param string|bool $ext The file extension, or true (default) to extract it from the filename. 926 * Set it to false to ignore the extension. DEPRECATED! Set to false, use 927 * improveTypeFromExtension($mime, $ext) later to improve MIME type. 928 * 929 * @return string The MIME type of $file 930 */ 931 private function detectMimeType( $file, $ext = true ) { 932 /** @todo Make $ext default to false. Or better, remove it. */ 933 if ( $ext ) { 934 wfDebug( __METHOD__ . ": WARNING: use of the \$ext parameter is deprecated. " 935 . "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" ); 936 } 937 938 $mimeDetectorCommand = $this->mConfig->get( 'MimeDetectorCommand' ); 939 $m = null; 940 if ( $mimeDetectorCommand ) { 941 $args = wfEscapeShellArg( $file ); 942 $m = wfShellExec( "$mimeDetectorCommand $args" ); 943 } elseif ( function_exists( "finfo_open" ) && function_exists( "finfo_file" ) ) { 944 $mime_magic_resource = finfo_open( FILEINFO_MIME ); 945 946 if ( $mime_magic_resource ) { 947 $m = finfo_file( $mime_magic_resource, $file ); 948 finfo_close( $mime_magic_resource ); 949 } else { 950 wfDebug( __METHOD__ . ": finfo_open failed on " . FILEINFO_MIME . "!\n" ); 951 } 952 } else { 953 wfDebug( __METHOD__ . ": no magic mime detector found!\n" ); 954 } 955 956 if ( $m ) { 957 # normalize 958 $m = preg_replace( '![;, ].*$!', '', $m ); #strip charset, etc 959 $m = trim( $m ); 960 $m = strtolower( $m ); 961 962 if ( strpos( $m, 'unknown' ) !== false ) { 963 $m = null; 964 } else { 965 wfDebug( __METHOD__ . ": magic mime type of $file: $m\n" ); 966 return $m; 967 } 968 } 969 970 // If desired, look at extension as a fallback. 971 if ( $ext === true ) { 972 $i = strrpos( $file, '.' ); 973 $ext = strtolower( $i ? substr( $file, $i + 1 ) : '' ); 974 } 975 if ( $ext ) { 976 if ( $this->isRecognizableExtension( $ext ) ) { 977 wfDebug( __METHOD__ . ": refusing to guess mime type for .$ext file, " 978 . "we should have recognized it\n" ); 979 } else { 980 $m = $this->guessTypesForExtension( $ext ); 981 if ( $m ) { 982 wfDebug( __METHOD__ . ": extension mime type of $file: $m\n" ); 983 return $m; 984 } 985 } 986 } 987 988 // Unknown type 989 wfDebug( __METHOD__ . ": failed to guess mime type for $file!\n" ); 990 return 'unknown/unknown'; 991 } 992 993 /** 994 * Determine the media type code for a file, using its MIME type, name and 995 * possibly its contents. 996 * 997 * This function relies on the findMediaType(), mapping extensions and MIME 998 * types to media types. 999 * 1000 * @todo analyse file if need be 1001 * @todo look at multiple extension, separately and together. 1002 * 1003 * @param string $path Full path to the image file, in case we have to look at the contents 1004 * (if null, only the MIME type is used to determine the media type code). 1005 * @param string $mime MIME type. If null it will be guessed using guessMimeType. 1006 * 1007 * @return string A value to be used with the MEDIATYPE_xxx constants. 1008 */ 1009 function getMediaType( $path = null, $mime = null ) { 1010 if ( !$mime && !$path ) { 1011 return MEDIATYPE_UNKNOWN; 1012 } 1013 1014 // If MIME type is unknown, guess it 1015 if ( !$mime ) { 1016 $mime = $this->guessMimeType( $path, false ); 1017 } 1018 1019 // Special code for ogg - detect if it's video (theora), 1020 // else label it as sound. 1021 if ( $mime == 'application/ogg' && file_exists( $path ) ) { 1022 1023 // Read a chunk of the file 1024 $f = fopen( $path, "rt" ); 1025 if ( !$f ) { 1026 return MEDIATYPE_UNKNOWN; 1027 } 1028 $head = fread( $f, 256 ); 1029 fclose( $f ); 1030 1031 $head = str_replace( 'ffmpeg2theora', '', strtolower( $head ) ); 1032 1033 // This is an UGLY HACK, file should be parsed correctly 1034 if ( strpos( $head, 'theora' ) !== false ) { 1035 return MEDIATYPE_VIDEO; 1036 } elseif ( strpos( $head, 'vorbis' ) !== false ) { 1037 return MEDIATYPE_AUDIO; 1038 } elseif ( strpos( $head, 'flac' ) !== false ) { 1039 return MEDIATYPE_AUDIO; 1040 } elseif ( strpos( $head, 'speex' ) !== false ) { 1041 return MEDIATYPE_AUDIO; 1042 } else { 1043 return MEDIATYPE_MULTIMEDIA; 1044 } 1045 } 1046 1047 // Check for entry for full MIME type 1048 if ( $mime ) { 1049 $type = $this->findMediaType( $mime ); 1050 if ( $type !== MEDIATYPE_UNKNOWN ) { 1051 return $type; 1052 } 1053 } 1054 1055 // Check for entry for file extension 1056 if ( $path ) { 1057 $i = strrpos( $path, '.' ); 1058 $e = strtolower( $i ? substr( $path, $i + 1 ) : '' ); 1059 1060 // TODO: look at multi-extension if this fails, parse from full path 1061 $type = $this->findMediaType( '.' . $e ); 1062 if ( $type !== MEDIATYPE_UNKNOWN ) { 1063 return $type; 1064 } 1065 } 1066 1067 // Check major MIME type 1068 if ( $mime ) { 1069 $i = strpos( $mime, '/' ); 1070 if ( $i !== false ) { 1071 $major = substr( $mime, 0, $i ); 1072 $type = $this->findMediaType( $major ); 1073 if ( $type !== MEDIATYPE_UNKNOWN ) { 1074 return $type; 1075 } 1076 } 1077 } 1078 1079 if ( !$type ) { 1080 $type = MEDIATYPE_UNKNOWN; 1081 } 1082 1083 return $type; 1084 } 1085 1086 /** 1087 * Returns a media code matching the given MIME type or file extension. 1088 * File extensions are represented by a string starting with a dot (.) to 1089 * distinguish them from MIME types. 1090 * 1091 * This function relies on the mapping defined by $this->mMediaTypes 1092 * @access private 1093 * @param string $extMime 1094 * @return int|string 1095 */ 1096 function findMediaType( $extMime ) { 1097 if ( strpos( $extMime, '.' ) === 0 ) { 1098 // If it's an extension, look up the MIME types 1099 $m = $this->getTypesForExtension( substr( $extMime, 1 ) ); 1100 if ( !$m ) { 1101 return MEDIATYPE_UNKNOWN; 1102 } 1103 1104 $m = explode( ' ', $m ); 1105 } else { 1106 // Normalize MIME type 1107 if ( isset( $this->mMimeTypeAliases[$extMime] ) ) { 1108 $extMime = $this->mMimeTypeAliases[$extMime]; 1109 } 1110 1111 $m = array( $extMime ); 1112 } 1113 1114 foreach ( $m as $mime ) { 1115 foreach ( $this->mMediaTypes as $type => $codes ) { 1116 if ( in_array( $mime, $codes, true ) ) { 1117 return $type; 1118 } 1119 } 1120 } 1121 1122 return MEDIATYPE_UNKNOWN; 1123 } 1124 1125 /** 1126 * Get the MIME types that various versions of Internet Explorer would 1127 * detect from a chunk of the content. 1128 * 1129 * @param string $fileName The file name (unused at present) 1130 * @param string $chunk The first 256 bytes of the file 1131 * @param string $proposed The MIME type proposed by the server 1132 * @return array 1133 */ 1134 public function getIEMimeTypes( $fileName, $chunk, $proposed ) { 1135 $ca = $this->getIEContentAnalyzer(); 1136 return $ca->getRealMimesFromData( $fileName, $chunk, $proposed ); 1137 } 1138 1139 /** 1140 * Get a cached instance of IEContentAnalyzer 1141 * 1142 * @return IEContentAnalyzer 1143 */ 1144 protected function getIEContentAnalyzer() { 1145 if ( is_null( $this->mIEAnalyzer ) ) { 1146 $this->mIEAnalyzer = new IEContentAnalyzer; 1147 } 1148 return $this->mIEAnalyzer; 1149 } 1150 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Fri Nov 28 14:03:12 2014 | Cross-referenced by PHPXref 0.7.1 |