MediaWiki
REL1_23
|
00001 <?php 00031 class ZipDirectoryReader { 00089 public static function read( $fileName, $callback, $options = array() ) { 00090 $zdr = new self( $fileName, $callback, $options ); 00091 00092 return $zdr->execute(); 00093 } 00094 00096 protected $fileName; 00097 00099 protected $file; 00100 00102 protected $fileLength; 00103 00105 protected $buffer; 00106 00108 protected $callback; 00109 00111 protected $zip64 = false; 00112 00114 protected $eocdr, $eocdr64, $eocdr64Locator; 00115 00116 protected $data; 00117 00119 const ZIP64_EXTRA_HEADER = 0x0001; 00120 00122 const SEGSIZE = 16384; 00123 00125 const GENERAL_UTF8 = 11; 00126 00128 const GENERAL_CD_ENCRYPTED = 13; 00129 00133 protected function __construct( $fileName, $callback, $options ) { 00134 $this->fileName = $fileName; 00135 $this->callback = $callback; 00136 00137 if ( isset( $options['zip64'] ) ) { 00138 $this->zip64 = $options['zip64']; 00139 } 00140 } 00141 00147 function execute() { 00148 $this->file = fopen( $this->fileName, 'r' ); 00149 $this->data = array(); 00150 if ( !$this->file ) { 00151 return Status::newFatal( 'zip-file-open-error' ); 00152 } 00153 00154 $status = Status::newGood(); 00155 try { 00156 $this->readEndOfCentralDirectoryRecord(); 00157 if ( $this->zip64 ) { 00158 list( $offset, $size ) = $this->findZip64CentralDirectory(); 00159 $this->readCentralDirectory( $offset, $size ); 00160 } else { 00161 if ( $this->eocdr['CD size'] == 0xffffffff 00162 || $this->eocdr['CD offset'] == 0xffffffff 00163 || $this->eocdr['CD entries total'] == 0xffff 00164 ) { 00165 $this->error( 'zip-unsupported', 'Central directory header indicates ZIP64, ' . 00166 'but we are in legacy mode. Rejecting this upload is necessary to avoid ' . 00167 'opening vulnerabilities on clients using OpenJDK 7 or later.' ); 00168 } 00169 00170 list( $offset, $size ) = $this->findOldCentralDirectory(); 00171 $this->readCentralDirectory( $offset, $size ); 00172 } 00173 } catch ( ZipDirectoryReaderError $e ) { 00174 $status->fatal( $e->getErrorCode() ); 00175 } 00176 00177 fclose( $this->file ); 00178 00179 return $status; 00180 } 00181 00185 function error( $code, $debugMessage ) { 00186 wfDebug( __CLASS__ . ": Fatal error: $debugMessage\n" ); 00187 throw new ZipDirectoryReaderError( $code ); 00188 } 00189 00195 function readEndOfCentralDirectoryRecord() { 00196 $info = array( 00197 'signature' => 4, 00198 'disk' => 2, 00199 'CD start disk' => 2, 00200 'CD entries this disk' => 2, 00201 'CD entries total' => 2, 00202 'CD size' => 4, 00203 'CD offset' => 4, 00204 'file comment length' => 2, 00205 ); 00206 $structSize = $this->getStructSize( $info ); 00207 $startPos = $this->getFileLength() - 65536 - $structSize; 00208 if ( $startPos < 0 ) { 00209 $startPos = 0; 00210 } 00211 00212 $block = $this->getBlock( $startPos ); 00213 $sigPos = strrpos( $block, "PK\x05\x06" ); 00214 if ( $sigPos === false ) { 00215 $this->error( 'zip-wrong-format', 00216 "zip file lacks EOCDR signature. It probably isn't a zip file." ); 00217 } 00218 00219 $this->eocdr = $this->unpack( substr( $block, $sigPos ), $info ); 00220 $this->eocdr['EOCDR size'] = $structSize + $this->eocdr['file comment length']; 00221 00222 if ( $structSize + $this->eocdr['file comment length'] != strlen( $block ) - $sigPos ) { 00223 $this->error( 'zip-bad', 'trailing bytes after the end of the file comment' ); 00224 } 00225 if ( $this->eocdr['disk'] !== 0 00226 || $this->eocdr['CD start disk'] !== 0 00227 ) { 00228 $this->error( 'zip-unsupported', 'more than one disk (in EOCDR)' ); 00229 } 00230 $this->eocdr += $this->unpack( 00231 $block, 00232 array( 'file comment' => array( 'string', $this->eocdr['file comment length'] ) ), 00233 $sigPos + $structSize ); 00234 $this->eocdr['position'] = $startPos + $sigPos; 00235 } 00236 00241 function readZip64EndOfCentralDirectoryLocator() { 00242 $info = array( 00243 'signature' => array( 'string', 4 ), 00244 'eocdr64 start disk' => 4, 00245 'eocdr64 offset' => 8, 00246 'number of disks' => 4, 00247 ); 00248 $structSize = $this->getStructSize( $info ); 00249 00250 $start = $this->getFileLength() - $this->eocdr['EOCDR size'] - $structSize; 00251 $block = $this->getBlock( $start, $structSize ); 00252 $this->eocdr64Locator = $data = $this->unpack( $block, $info ); 00253 00254 if ( $data['signature'] !== "PK\x06\x07" ) { 00255 // Note: Java will allow this and continue to read the 00256 // EOCDR64, so we have to reject the upload, we can't 00257 // just use the EOCDR header instead. 00258 $this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory locator' ); 00259 } 00260 } 00261 00266 function readZip64EndOfCentralDirectoryRecord() { 00267 if ( $this->eocdr64Locator['eocdr64 start disk'] != 0 00268 || $this->eocdr64Locator['number of disks'] != 0 00269 ) { 00270 $this->error( 'zip-unsupported', 'more than one disk (in EOCDR64 locator)' ); 00271 } 00272 00273 $info = array( 00274 'signature' => array( 'string', 4 ), 00275 'EOCDR64 size' => 8, 00276 'version made by' => 2, 00277 'version needed' => 2, 00278 'disk' => 4, 00279 'CD start disk' => 4, 00280 'CD entries this disk' => 8, 00281 'CD entries total' => 8, 00282 'CD size' => 8, 00283 'CD offset' => 8 00284 ); 00285 $structSize = $this->getStructSize( $info ); 00286 $block = $this->getBlock( $this->eocdr64Locator['eocdr64 offset'], $structSize ); 00287 $this->eocdr64 = $data = $this->unpack( $block, $info ); 00288 if ( $data['signature'] !== "PK\x06\x06" ) { 00289 $this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory record' ); 00290 } 00291 if ( $data['disk'] !== 0 00292 || $data['CD start disk'] !== 0 00293 ) { 00294 $this->error( 'zip-unsupported', 'more than one disk (in EOCDR64)' ); 00295 } 00296 } 00297 00304 function findOldCentralDirectory() { 00305 $size = $this->eocdr['CD size']; 00306 $offset = $this->eocdr['CD offset']; 00307 $endPos = $this->eocdr['position']; 00308 00309 // Some readers use the EOCDR position instead of the offset field 00310 // to find the directory, so to be safe, we check if they both agree. 00311 if ( $offset + $size != $endPos ) { 00312 $this->error( 'zip-bad', 'the central directory does not immediately precede the end ' . 00313 'of central directory record' ); 00314 } 00315 00316 return array( $offset, $size ); 00317 } 00318 00325 function findZip64CentralDirectory() { 00326 // The spec is ambiguous about the exact rules of precedence between the 00327 // ZIP64 headers and the original headers. Here we follow zip_util.c 00328 // from OpenJDK 7. 00329 $size = $this->eocdr['CD size']; 00330 $offset = $this->eocdr['CD offset']; 00331 $numEntries = $this->eocdr['CD entries total']; 00332 $endPos = $this->eocdr['position']; 00333 if ( $size == 0xffffffff 00334 || $offset == 0xffffffff 00335 || $numEntries == 0xffff 00336 ) { 00337 $this->readZip64EndOfCentralDirectoryLocator(); 00338 00339 if ( isset( $this->eocdr64Locator['eocdr64 offset'] ) ) { 00340 $this->readZip64EndOfCentralDirectoryRecord(); 00341 if ( isset( $this->eocdr64['CD offset'] ) ) { 00342 $size = $this->eocdr64['CD size']; 00343 $offset = $this->eocdr64['CD offset']; 00344 $endPos = $this->eocdr64Locator['eocdr64 offset']; 00345 } 00346 } 00347 } 00348 // Some readers use the EOCDR position instead of the offset field 00349 // to find the directory, so to be safe, we check if they both agree. 00350 if ( $offset + $size != $endPos ) { 00351 $this->error( 'zip-bad', 'the central directory does not immediately precede the end ' . 00352 'of central directory record' ); 00353 } 00354 00355 return array( $offset, $size ); 00356 } 00357 00361 function readCentralDirectory( $offset, $size ) { 00362 $block = $this->getBlock( $offset, $size ); 00363 00364 $fixedInfo = array( 00365 'signature' => array( 'string', 4 ), 00366 'version made by' => 2, 00367 'version needed' => 2, 00368 'general bits' => 2, 00369 'compression method' => 2, 00370 'mod time' => 2, 00371 'mod date' => 2, 00372 'crc-32' => 4, 00373 'compressed size' => 4, 00374 'uncompressed size' => 4, 00375 'name length' => 2, 00376 'extra field length' => 2, 00377 'comment length' => 2, 00378 'disk number start' => 2, 00379 'internal attrs' => 2, 00380 'external attrs' => 4, 00381 'local header offset' => 4, 00382 ); 00383 $fixedSize = $this->getStructSize( $fixedInfo ); 00384 00385 $pos = 0; 00386 while ( $pos < $size ) { 00387 $data = $this->unpack( $block, $fixedInfo, $pos ); 00388 $pos += $fixedSize; 00389 00390 if ( $data['signature'] !== "PK\x01\x02" ) { 00391 $this->error( 'zip-bad', 'Invalid signature found in directory entry' ); 00392 } 00393 00394 $variableInfo = array( 00395 'name' => array( 'string', $data['name length'] ), 00396 'extra field' => array( 'string', $data['extra field length'] ), 00397 'comment' => array( 'string', $data['comment length'] ), 00398 ); 00399 $data += $this->unpack( $block, $variableInfo, $pos ); 00400 $pos += $this->getStructSize( $variableInfo ); 00401 00402 if ( $this->zip64 && ( 00403 $data['compressed size'] == 0xffffffff 00404 || $data['uncompressed size'] == 0xffffffff 00405 || $data['local header offset'] == 0xffffffff ) 00406 ) { 00407 $zip64Data = $this->unpackZip64Extra( $data['extra field'] ); 00408 if ( $zip64Data ) { 00409 $data = $zip64Data + $data; 00410 } 00411 } 00412 00413 if ( $this->testBit( $data['general bits'], self::GENERAL_CD_ENCRYPTED ) ) { 00414 $this->error( 'zip-unsupported', 'central directory encryption is not supported' ); 00415 } 00416 00417 // Convert the timestamp into MediaWiki format 00418 // For the format, please see the MS-DOS 2.0 Programmer's Reference, 00419 // pages 3-5 and 3-6. 00420 $time = $data['mod time']; 00421 $date = $data['mod date']; 00422 00423 $year = 1980 + ( $date >> 9 ); 00424 $month = ( $date >> 5 ) & 15; 00425 $day = $date & 31; 00426 $hour = ( $time >> 11 ) & 31; 00427 $minute = ( $time >> 5 ) & 63; 00428 $second = ( $time & 31 ) * 2; 00429 $timestamp = sprintf( "%04d%02d%02d%02d%02d%02d", 00430 $year, $month, $day, $hour, $minute, $second ); 00431 00432 // Convert the character set in the file name 00433 if ( !function_exists( 'iconv' ) 00434 || $this->testBit( $data['general bits'], self::GENERAL_UTF8 ) 00435 ) { 00436 $name = $data['name']; 00437 } else { 00438 $name = iconv( 'CP437', 'UTF-8', $data['name'] ); 00439 } 00440 00441 // Compile a data array for the user, with a sensible format 00442 $userData = array( 00443 'name' => $name, 00444 'mtime' => $timestamp, 00445 'size' => $data['uncompressed size'], 00446 ); 00447 call_user_func( $this->callback, $userData ); 00448 } 00449 } 00450 00455 function unpackZip64Extra( $extraField ) { 00456 $extraHeaderInfo = array( 00457 'id' => 2, 00458 'size' => 2, 00459 ); 00460 $extraHeaderSize = $this->getStructSize( $extraHeaderInfo ); 00461 00462 $zip64ExtraInfo = array( 00463 'uncompressed size' => 8, 00464 'compressed size' => 8, 00465 'local header offset' => 8, 00466 'disk number start' => 4, 00467 ); 00468 00469 $extraPos = 0; 00470 while ( $extraPos < strlen( $extraField ) ) { 00471 $extra = $this->unpack( $extraField, $extraHeaderInfo, $extraPos ); 00472 $extraPos += $extraHeaderSize; 00473 $extra += $this->unpack( $extraField, 00474 array( 'data' => array( 'string', $extra['size'] ) ), 00475 $extraPos ); 00476 $extraPos += $extra['size']; 00477 00478 if ( $extra['id'] == self::ZIP64_EXTRA_HEADER ) { 00479 return $this->unpack( $extra['data'], $zip64ExtraInfo ); 00480 } 00481 } 00482 00483 return false; 00484 } 00485 00489 function getFileLength() { 00490 if ( $this->fileLength === null ) { 00491 $stat = fstat( $this->file ); 00492 $this->fileLength = $stat['size']; 00493 } 00494 00495 return $this->fileLength; 00496 } 00497 00508 function getBlock( $start, $length = null ) { 00509 $fileLength = $this->getFileLength(); 00510 if ( $start >= $fileLength ) { 00511 $this->error( 'zip-bad', "getBlock() requested position $start, " . 00512 "file length is $fileLength" ); 00513 } 00514 if ( $length === null ) { 00515 $length = $fileLength - $start; 00516 } 00517 $end = $start + $length; 00518 if ( $end > $fileLength ) { 00519 $this->error( 'zip-bad', "getBlock() requested end position $end, " . 00520 "file length is $fileLength" ); 00521 } 00522 $startSeg = floor( $start / self::SEGSIZE ); 00523 $endSeg = ceil( $end / self::SEGSIZE ); 00524 00525 $block = ''; 00526 for ( $segIndex = $startSeg; $segIndex <= $endSeg; $segIndex++ ) { 00527 $block .= $this->getSegment( $segIndex ); 00528 } 00529 00530 $block = substr( $block, 00531 $start - $startSeg * self::SEGSIZE, 00532 $length ); 00533 00534 if ( strlen( $block ) < $length ) { 00535 $this->error( 'zip-bad', 'getBlock() returned an unexpectedly small amount of data' ); 00536 } 00537 00538 return $block; 00539 } 00540 00551 function getSegment( $segIndex ) { 00552 if ( !isset( $this->buffer[$segIndex] ) ) { 00553 $bytePos = $segIndex * self::SEGSIZE; 00554 if ( $bytePos >= $this->getFileLength() ) { 00555 $this->buffer[$segIndex] = ''; 00556 00557 return ''; 00558 } 00559 if ( fseek( $this->file, $bytePos ) ) { 00560 $this->error( 'zip-bad', "seek to $bytePos failed" ); 00561 } 00562 $seg = fread( $this->file, self::SEGSIZE ); 00563 if ( $seg === false ) { 00564 $this->error( 'zip-bad', "read from $bytePos failed" ); 00565 } 00566 $this->buffer[$segIndex] = $seg; 00567 } 00568 00569 return $this->buffer[$segIndex]; 00570 } 00571 00576 function getStructSize( $struct ) { 00577 $size = 0; 00578 foreach ( $struct as $type ) { 00579 if ( is_array( $type ) ) { 00580 list( , $fieldSize ) = $type; 00581 $size += $fieldSize; 00582 } else { 00583 $size += $type; 00584 } 00585 } 00586 00587 return $size; 00588 } 00589 00612 function unpack( $string, $struct, $offset = 0 ) { 00613 $size = $this->getStructSize( $struct ); 00614 if ( $offset + $size > strlen( $string ) ) { 00615 $this->error( 'zip-bad', 'unpack() would run past the end of the supplied string' ); 00616 } 00617 00618 $data = array(); 00619 $pos = $offset; 00620 foreach ( $struct as $key => $type ) { 00621 if ( is_array( $type ) ) { 00622 list( $typeName, $fieldSize ) = $type; 00623 switch ( $typeName ) { 00624 case 'string': 00625 $data[$key] = substr( $string, $pos, $fieldSize ); 00626 $pos += $fieldSize; 00627 break; 00628 default: 00629 throw new MWException( __METHOD__ . ": invalid type \"$typeName\"" ); 00630 } 00631 } else { 00632 // Unsigned little-endian integer 00633 $length = intval( $type ); 00634 00635 // Calculate the value. Use an algorithm which automatically 00636 // upgrades the value to floating point if necessary. 00637 $value = 0; 00638 for ( $i = $length - 1; $i >= 0; $i-- ) { 00639 $value *= 256; 00640 $value += ord( $string[$pos + $i] ); 00641 } 00642 00643 // Throw an exception if there was loss of precision 00644 if ( $value > pow( 2, 52 ) ) { 00645 $this->error( 'zip-unsupported', 'number too large to be stored in a double. ' . 00646 'This could happen if we tried to unpack a 64-bit structure ' . 00647 'at an invalid location.' ); 00648 } 00649 $data[$key] = $value; 00650 $pos += $length; 00651 } 00652 } 00653 00654 return $data; 00655 } 00656 00665 function testBit( $value, $bitIndex ) { 00666 return (bool)( ( $value >> $bitIndex ) & 1 ); 00667 } 00668 00672 function hexDump( $s ) { 00673 $n = strlen( $s ); 00674 for ( $i = 0; $i < $n; $i += 16 ) { 00675 printf( "%08X ", $i ); 00676 for ( $j = 0; $j < 16; $j++ ) { 00677 print " "; 00678 if ( $j == 8 ) { 00679 print " "; 00680 } 00681 if ( $i + $j >= $n ) { 00682 print " "; 00683 } else { 00684 printf( "%02X", ord( $s[$i + $j] ) ); 00685 } 00686 } 00687 00688 print " |"; 00689 for ( $j = 0; $j < 16; $j++ ) { 00690 if ( $i + $j >= $n ) { 00691 print " "; 00692 } elseif ( ctype_print( $s[$i + $j] ) ) { 00693 print $s[$i + $j]; 00694 } else { 00695 print '.'; 00696 } 00697 } 00698 print "|\n"; 00699 } 00700 } 00701 } 00702 00706 class ZipDirectoryReaderError extends Exception { 00707 protected $errorCode; 00708 00709 function __construct( $code ) { 00710 $this->errorCode = $code; 00711 parent::__construct( "ZipDirectoryReader error: $code" ); 00712 } 00713 00717 function getErrorCode() { 00718 return $this->errorCode; 00719 } 00720 }