MediaWiki
REL1_24
|
00001 <?php 00031 class ZipDirectoryReader { 00089 public static function read( $fileName, $callback, $options = array() ) { 00090 $zdr = new self( $fileName, $callback, $options ); 00091 00092 return $zdr->execute(); 00093 } 00094 00096 protected $fileName; 00097 00099 protected $file; 00100 00102 protected $fileLength; 00103 00105 protected $buffer; 00106 00108 protected $callback; 00109 00111 protected $zip64 = false; 00112 00114 protected $eocdr, $eocdr64, $eocdr64Locator; 00115 00116 protected $data; 00117 00119 const ZIP64_EXTRA_HEADER = 0x0001; 00120 00122 const SEGSIZE = 16384; 00123 00125 const GENERAL_UTF8 = 11; 00126 00128 const GENERAL_CD_ENCRYPTED = 13; 00129 00136 protected function __construct( $fileName, $callback, $options ) { 00137 $this->fileName = $fileName; 00138 $this->callback = $callback; 00139 00140 if ( isset( $options['zip64'] ) ) { 00141 $this->zip64 = $options['zip64']; 00142 } 00143 } 00144 00150 function execute() { 00151 $this->file = fopen( $this->fileName, 'r' ); 00152 $this->data = array(); 00153 if ( !$this->file ) { 00154 return Status::newFatal( 'zip-file-open-error' ); 00155 } 00156 00157 $status = Status::newGood(); 00158 try { 00159 $this->readEndOfCentralDirectoryRecord(); 00160 if ( $this->zip64 ) { 00161 list( $offset, $size ) = $this->findZip64CentralDirectory(); 00162 $this->readCentralDirectory( $offset, $size ); 00163 } else { 00164 if ( $this->eocdr['CD size'] == 0xffffffff 00165 || $this->eocdr['CD offset'] == 0xffffffff 00166 || $this->eocdr['CD entries total'] == 0xffff 00167 ) { 00168 $this->error( 'zip-unsupported', 'Central directory header indicates ZIP64, ' . 00169 'but we are in legacy mode. Rejecting this upload is necessary to avoid ' . 00170 'opening vulnerabilities on clients using OpenJDK 7 or later.' ); 00171 } 00172 00173 list( $offset, $size ) = $this->findOldCentralDirectory(); 00174 $this->readCentralDirectory( $offset, $size ); 00175 } 00176 } catch ( ZipDirectoryReaderError $e ) { 00177 $status->fatal( $e->getErrorCode() ); 00178 } 00179 00180 fclose( $this->file ); 00181 00182 return $status; 00183 } 00184 00190 function error( $code, $debugMessage ) { 00191 wfDebug( __CLASS__ . ": Fatal error: $debugMessage\n" ); 00192 throw new ZipDirectoryReaderError( $code ); 00193 } 00194 00200 function readEndOfCentralDirectoryRecord() { 00201 $info = array( 00202 'signature' => 4, 00203 'disk' => 2, 00204 'CD start disk' => 2, 00205 'CD entries this disk' => 2, 00206 'CD entries total' => 2, 00207 'CD size' => 4, 00208 'CD offset' => 4, 00209 'file comment length' => 2, 00210 ); 00211 $structSize = $this->getStructSize( $info ); 00212 $startPos = $this->getFileLength() - 65536 - $structSize; 00213 if ( $startPos < 0 ) { 00214 $startPos = 0; 00215 } 00216 00217 $block = $this->getBlock( $startPos ); 00218 $sigPos = strrpos( $block, "PK\x05\x06" ); 00219 if ( $sigPos === false ) { 00220 $this->error( 'zip-wrong-format', 00221 "zip file lacks EOCDR signature. It probably isn't a zip file." ); 00222 } 00223 00224 $this->eocdr = $this->unpack( substr( $block, $sigPos ), $info ); 00225 $this->eocdr['EOCDR size'] = $structSize + $this->eocdr['file comment length']; 00226 00227 if ( $structSize + $this->eocdr['file comment length'] != strlen( $block ) - $sigPos ) { 00228 $this->error( 'zip-bad', 'trailing bytes after the end of the file comment' ); 00229 } 00230 if ( $this->eocdr['disk'] !== 0 00231 || $this->eocdr['CD start disk'] !== 0 00232 ) { 00233 $this->error( 'zip-unsupported', 'more than one disk (in EOCDR)' ); 00234 } 00235 $this->eocdr += $this->unpack( 00236 $block, 00237 array( 'file comment' => array( 'string', $this->eocdr['file comment length'] ) ), 00238 $sigPos + $structSize ); 00239 $this->eocdr['position'] = $startPos + $sigPos; 00240 } 00241 00246 function readZip64EndOfCentralDirectoryLocator() { 00247 $info = array( 00248 'signature' => array( 'string', 4 ), 00249 'eocdr64 start disk' => 4, 00250 'eocdr64 offset' => 8, 00251 'number of disks' => 4, 00252 ); 00253 $structSize = $this->getStructSize( $info ); 00254 00255 $start = $this->getFileLength() - $this->eocdr['EOCDR size'] - $structSize; 00256 $block = $this->getBlock( $start, $structSize ); 00257 $this->eocdr64Locator = $data = $this->unpack( $block, $info ); 00258 00259 if ( $data['signature'] !== "PK\x06\x07" ) { 00260 // Note: Java will allow this and continue to read the 00261 // EOCDR64, so we have to reject the upload, we can't 00262 // just use the EOCDR header instead. 00263 $this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory locator' ); 00264 } 00265 } 00266 00271 function readZip64EndOfCentralDirectoryRecord() { 00272 if ( $this->eocdr64Locator['eocdr64 start disk'] != 0 00273 || $this->eocdr64Locator['number of disks'] != 0 00274 ) { 00275 $this->error( 'zip-unsupported', 'more than one disk (in EOCDR64 locator)' ); 00276 } 00277 00278 $info = array( 00279 'signature' => array( 'string', 4 ), 00280 'EOCDR64 size' => 8, 00281 'version made by' => 2, 00282 'version needed' => 2, 00283 'disk' => 4, 00284 'CD start disk' => 4, 00285 'CD entries this disk' => 8, 00286 'CD entries total' => 8, 00287 'CD size' => 8, 00288 'CD offset' => 8 00289 ); 00290 $structSize = $this->getStructSize( $info ); 00291 $block = $this->getBlock( $this->eocdr64Locator['eocdr64 offset'], $structSize ); 00292 $this->eocdr64 = $data = $this->unpack( $block, $info ); 00293 if ( $data['signature'] !== "PK\x06\x06" ) { 00294 $this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory record' ); 00295 } 00296 if ( $data['disk'] !== 0 00297 || $data['CD start disk'] !== 0 00298 ) { 00299 $this->error( 'zip-unsupported', 'more than one disk (in EOCDR64)' ); 00300 } 00301 } 00302 00309 function findOldCentralDirectory() { 00310 $size = $this->eocdr['CD size']; 00311 $offset = $this->eocdr['CD offset']; 00312 $endPos = $this->eocdr['position']; 00313 00314 // Some readers use the EOCDR position instead of the offset field 00315 // to find the directory, so to be safe, we check if they both agree. 00316 if ( $offset + $size != $endPos ) { 00317 $this->error( 'zip-bad', 'the central directory does not immediately precede the end ' . 00318 'of central directory record' ); 00319 } 00320 00321 return array( $offset, $size ); 00322 } 00323 00330 function findZip64CentralDirectory() { 00331 // The spec is ambiguous about the exact rules of precedence between the 00332 // ZIP64 headers and the original headers. Here we follow zip_util.c 00333 // from OpenJDK 7. 00334 $size = $this->eocdr['CD size']; 00335 $offset = $this->eocdr['CD offset']; 00336 $numEntries = $this->eocdr['CD entries total']; 00337 $endPos = $this->eocdr['position']; 00338 if ( $size == 0xffffffff 00339 || $offset == 0xffffffff 00340 || $numEntries == 0xffff 00341 ) { 00342 $this->readZip64EndOfCentralDirectoryLocator(); 00343 00344 if ( isset( $this->eocdr64Locator['eocdr64 offset'] ) ) { 00345 $this->readZip64EndOfCentralDirectoryRecord(); 00346 if ( isset( $this->eocdr64['CD offset'] ) ) { 00347 $size = $this->eocdr64['CD size']; 00348 $offset = $this->eocdr64['CD offset']; 00349 $endPos = $this->eocdr64Locator['eocdr64 offset']; 00350 } 00351 } 00352 } 00353 // Some readers use the EOCDR position instead of the offset field 00354 // to find the directory, so to be safe, we check if they both agree. 00355 if ( $offset + $size != $endPos ) { 00356 $this->error( 'zip-bad', 'the central directory does not immediately precede the end ' . 00357 'of central directory record' ); 00358 } 00359 00360 return array( $offset, $size ); 00361 } 00362 00368 function readCentralDirectory( $offset, $size ) { 00369 $block = $this->getBlock( $offset, $size ); 00370 00371 $fixedInfo = array( 00372 'signature' => array( 'string', 4 ), 00373 'version made by' => 2, 00374 'version needed' => 2, 00375 'general bits' => 2, 00376 'compression method' => 2, 00377 'mod time' => 2, 00378 'mod date' => 2, 00379 'crc-32' => 4, 00380 'compressed size' => 4, 00381 'uncompressed size' => 4, 00382 'name length' => 2, 00383 'extra field length' => 2, 00384 'comment length' => 2, 00385 'disk number start' => 2, 00386 'internal attrs' => 2, 00387 'external attrs' => 4, 00388 'local header offset' => 4, 00389 ); 00390 $fixedSize = $this->getStructSize( $fixedInfo ); 00391 00392 $pos = 0; 00393 while ( $pos < $size ) { 00394 $data = $this->unpack( $block, $fixedInfo, $pos ); 00395 $pos += $fixedSize; 00396 00397 if ( $data['signature'] !== "PK\x01\x02" ) { 00398 $this->error( 'zip-bad', 'Invalid signature found in directory entry' ); 00399 } 00400 00401 $variableInfo = array( 00402 'name' => array( 'string', $data['name length'] ), 00403 'extra field' => array( 'string', $data['extra field length'] ), 00404 'comment' => array( 'string', $data['comment length'] ), 00405 ); 00406 $data += $this->unpack( $block, $variableInfo, $pos ); 00407 $pos += $this->getStructSize( $variableInfo ); 00408 00409 if ( $this->zip64 && ( 00410 $data['compressed size'] == 0xffffffff 00411 || $data['uncompressed size'] == 0xffffffff 00412 || $data['local header offset'] == 0xffffffff ) 00413 ) { 00414 $zip64Data = $this->unpackZip64Extra( $data['extra field'] ); 00415 if ( $zip64Data ) { 00416 $data = $zip64Data + $data; 00417 } 00418 } 00419 00420 if ( $this->testBit( $data['general bits'], self::GENERAL_CD_ENCRYPTED ) ) { 00421 $this->error( 'zip-unsupported', 'central directory encryption is not supported' ); 00422 } 00423 00424 // Convert the timestamp into MediaWiki format 00425 // For the format, please see the MS-DOS 2.0 Programmer's Reference, 00426 // pages 3-5 and 3-6. 00427 $time = $data['mod time']; 00428 $date = $data['mod date']; 00429 00430 $year = 1980 + ( $date >> 9 ); 00431 $month = ( $date >> 5 ) & 15; 00432 $day = $date & 31; 00433 $hour = ( $time >> 11 ) & 31; 00434 $minute = ( $time >> 5 ) & 63; 00435 $second = ( $time & 31 ) * 2; 00436 $timestamp = sprintf( "%04d%02d%02d%02d%02d%02d", 00437 $year, $month, $day, $hour, $minute, $second ); 00438 00439 // Convert the character set in the file name 00440 if ( $this->testBit( $data['general bits'], self::GENERAL_UTF8 ) ) { 00441 $name = $data['name']; 00442 } else { 00443 $name = iconv( 'CP437', 'UTF-8', $data['name'] ); 00444 } 00445 00446 // Compile a data array for the user, with a sensible format 00447 $userData = array( 00448 'name' => $name, 00449 'mtime' => $timestamp, 00450 'size' => $data['uncompressed size'], 00451 ); 00452 call_user_func( $this->callback, $userData ); 00453 } 00454 } 00455 00461 function unpackZip64Extra( $extraField ) { 00462 $extraHeaderInfo = array( 00463 'id' => 2, 00464 'size' => 2, 00465 ); 00466 $extraHeaderSize = $this->getStructSize( $extraHeaderInfo ); 00467 00468 $zip64ExtraInfo = array( 00469 'uncompressed size' => 8, 00470 'compressed size' => 8, 00471 'local header offset' => 8, 00472 'disk number start' => 4, 00473 ); 00474 00475 $extraPos = 0; 00476 while ( $extraPos < strlen( $extraField ) ) { 00477 $extra = $this->unpack( $extraField, $extraHeaderInfo, $extraPos ); 00478 $extraPos += $extraHeaderSize; 00479 $extra += $this->unpack( $extraField, 00480 array( 'data' => array( 'string', $extra['size'] ) ), 00481 $extraPos ); 00482 $extraPos += $extra['size']; 00483 00484 if ( $extra['id'] == self::ZIP64_EXTRA_HEADER ) { 00485 return $this->unpack( $extra['data'], $zip64ExtraInfo ); 00486 } 00487 } 00488 00489 return false; 00490 } 00491 00496 function getFileLength() { 00497 if ( $this->fileLength === null ) { 00498 $stat = fstat( $this->file ); 00499 $this->fileLength = $stat['size']; 00500 } 00501 00502 return $this->fileLength; 00503 } 00504 00515 function getBlock( $start, $length = null ) { 00516 $fileLength = $this->getFileLength(); 00517 if ( $start >= $fileLength ) { 00518 $this->error( 'zip-bad', "getBlock() requested position $start, " . 00519 "file length is $fileLength" ); 00520 } 00521 if ( $length === null ) { 00522 $length = $fileLength - $start; 00523 } 00524 $end = $start + $length; 00525 if ( $end > $fileLength ) { 00526 $this->error( 'zip-bad', "getBlock() requested end position $end, " . 00527 "file length is $fileLength" ); 00528 } 00529 $startSeg = floor( $start / self::SEGSIZE ); 00530 $endSeg = ceil( $end / self::SEGSIZE ); 00531 00532 $block = ''; 00533 for ( $segIndex = $startSeg; $segIndex <= $endSeg; $segIndex++ ) { 00534 $block .= $this->getSegment( $segIndex ); 00535 } 00536 00537 $block = substr( $block, 00538 $start - $startSeg * self::SEGSIZE, 00539 $length ); 00540 00541 if ( strlen( $block ) < $length ) { 00542 $this->error( 'zip-bad', 'getBlock() returned an unexpectedly small amount of data' ); 00543 } 00544 00545 return $block; 00546 } 00547 00561 function getSegment( $segIndex ) { 00562 if ( !isset( $this->buffer[$segIndex] ) ) { 00563 $bytePos = $segIndex * self::SEGSIZE; 00564 if ( $bytePos >= $this->getFileLength() ) { 00565 $this->buffer[$segIndex] = ''; 00566 00567 return ''; 00568 } 00569 if ( fseek( $this->file, $bytePos ) ) { 00570 $this->error( 'zip-bad', "seek to $bytePos failed" ); 00571 } 00572 $seg = fread( $this->file, self::SEGSIZE ); 00573 if ( $seg === false ) { 00574 $this->error( 'zip-bad', "read from $bytePos failed" ); 00575 } 00576 $this->buffer[$segIndex] = $seg; 00577 } 00578 00579 return $this->buffer[$segIndex]; 00580 } 00581 00587 function getStructSize( $struct ) { 00588 $size = 0; 00589 foreach ( $struct as $type ) { 00590 if ( is_array( $type ) ) { 00591 list( , $fieldSize ) = $type; 00592 $size += $fieldSize; 00593 } else { 00594 $size += $type; 00595 } 00596 } 00597 00598 return $size; 00599 } 00600 00623 function unpack( $string, $struct, $offset = 0 ) { 00624 $size = $this->getStructSize( $struct ); 00625 if ( $offset + $size > strlen( $string ) ) { 00626 $this->error( 'zip-bad', 'unpack() would run past the end of the supplied string' ); 00627 } 00628 00629 $data = array(); 00630 $pos = $offset; 00631 foreach ( $struct as $key => $type ) { 00632 if ( is_array( $type ) ) { 00633 list( $typeName, $fieldSize ) = $type; 00634 switch ( $typeName ) { 00635 case 'string': 00636 $data[$key] = substr( $string, $pos, $fieldSize ); 00637 $pos += $fieldSize; 00638 break; 00639 default: 00640 throw new MWException( __METHOD__ . ": invalid type \"$typeName\"" ); 00641 } 00642 } else { 00643 // Unsigned little-endian integer 00644 $length = intval( $type ); 00645 00646 // Calculate the value. Use an algorithm which automatically 00647 // upgrades the value to floating point if necessary. 00648 $value = 0; 00649 for ( $i = $length - 1; $i >= 0; $i-- ) { 00650 $value *= 256; 00651 $value += ord( $string[$pos + $i] ); 00652 } 00653 00654 // Throw an exception if there was loss of precision 00655 if ( $value > pow( 2, 52 ) ) { 00656 $this->error( 'zip-unsupported', 'number too large to be stored in a double. ' . 00657 'This could happen if we tried to unpack a 64-bit structure ' . 00658 'at an invalid location.' ); 00659 } 00660 $data[$key] = $value; 00661 $pos += $length; 00662 } 00663 } 00664 00665 return $data; 00666 } 00667 00676 function testBit( $value, $bitIndex ) { 00677 return (bool)( ( $value >> $bitIndex ) & 1 ); 00678 } 00679 00684 function hexDump( $s ) { 00685 $n = strlen( $s ); 00686 for ( $i = 0; $i < $n; $i += 16 ) { 00687 printf( "%08X ", $i ); 00688 for ( $j = 0; $j < 16; $j++ ) { 00689 print " "; 00690 if ( $j == 8 ) { 00691 print " "; 00692 } 00693 if ( $i + $j >= $n ) { 00694 print " "; 00695 } else { 00696 printf( "%02X", ord( $s[$i + $j] ) ); 00697 } 00698 } 00699 00700 print " |"; 00701 for ( $j = 0; $j < 16; $j++ ) { 00702 if ( $i + $j >= $n ) { 00703 print " "; 00704 } elseif ( ctype_print( $s[$i + $j] ) ) { 00705 print $s[$i + $j]; 00706 } else { 00707 print '.'; 00708 } 00709 } 00710 print "|\n"; 00711 } 00712 } 00713 } 00714 00718 class ZipDirectoryReaderError extends Exception { 00719 protected $errorCode; 00720 00721 function __construct( $code ) { 00722 $this->errorCode = $code; 00723 parent::__construct( "ZipDirectoryReader error: $code" ); 00724 } 00725 00729 function getErrorCode() { 00730 return $this->errorCode; 00731 } 00732 }