MediaWiki
REL1_20
|
00001 <?php 00031 class ZipDirectoryReader { 00089 public static function read( $fileName, $callback, $options = array() ) { 00090 $zdr = new self( $fileName, $callback, $options ); 00091 return $zdr->execute(); 00092 } 00093 00095 var $fileName; 00096 00098 var $file; 00099 00101 var $fileLength; 00102 00104 var $buffer; 00105 00107 var $callback; 00108 00110 var $zip64 = false; 00111 00113 var $eocdr, $eocdr64, $eocdr64Locator; 00114 00115 var $data; 00116 00118 const ZIP64_EXTRA_HEADER = 0x0001; 00119 00121 const SEGSIZE = 16384; 00122 00124 const GENERAL_UTF8 = 11; 00125 00127 const GENERAL_CD_ENCRYPTED = 13; 00128 00132 protected function __construct( $fileName, $callback, $options ) { 00133 $this->fileName = $fileName; 00134 $this->callback = $callback; 00135 00136 if ( isset( $options['zip64'] ) ) { 00137 $this->zip64 = $options['zip64']; 00138 } 00139 } 00140 00146 function execute() { 00147 $this->file = fopen( $this->fileName, 'r' ); 00148 $this->data = array(); 00149 if ( !$this->file ) { 00150 return Status::newFatal( 'zip-file-open-error' ); 00151 } 00152 00153 $status = Status::newGood(); 00154 try { 00155 $this->readEndOfCentralDirectoryRecord(); 00156 if ( $this->zip64 ) { 00157 list( $offset, $size ) = $this->findZip64CentralDirectory(); 00158 $this->readCentralDirectory( $offset, $size ); 00159 } else { 00160 if ( $this->eocdr['CD size'] == 0xffffffff 00161 || $this->eocdr['CD offset'] == 0xffffffff 00162 || $this->eocdr['CD entries total'] == 0xffff ) 00163 { 00164 $this->error( 'zip-unsupported', 'Central directory header indicates ZIP64, ' . 00165 'but we are in legacy mode. Rejecting this upload is necessary to avoid '. 00166 'opening vulnerabilities on clients using OpenJDK 7 or later.' ); 00167 } 00168 00169 list( $offset, $size ) = $this->findOldCentralDirectory(); 00170 $this->readCentralDirectory( $offset, $size ); 00171 } 00172 } catch ( ZipDirectoryReaderError $e ) { 00173 $status->fatal( $e->getErrorCode() ); 00174 } 00175 00176 fclose( $this->file ); 00177 return $status; 00178 } 00179 00183 function error( $code, $debugMessage ) { 00184 wfDebug( __CLASS__.": Fatal error: $debugMessage\n" ); 00185 throw new ZipDirectoryReaderError( $code ); 00186 } 00187 00193 function readEndOfCentralDirectoryRecord() { 00194 $info = array( 00195 'signature' => 4, 00196 'disk' => 2, 00197 'CD start disk' => 2, 00198 'CD entries this disk' => 2, 00199 'CD entries total' => 2, 00200 'CD size' => 4, 00201 'CD offset' => 4, 00202 'file comment length' => 2, 00203 ); 00204 $structSize = $this->getStructSize( $info ); 00205 $startPos = $this->getFileLength() - 65536 - $structSize; 00206 if ( $startPos < 0 ) { 00207 $startPos = 0; 00208 } 00209 00210 $block = $this->getBlock( $startPos ); 00211 $sigPos = strrpos( $block, "PK\x05\x06" ); 00212 if ( $sigPos === false ) { 00213 $this->error( 'zip-wrong-format', 00214 "zip file lacks EOCDR signature. It probably isn't a zip file." ); 00215 } 00216 00217 $this->eocdr = $this->unpack( substr( $block, $sigPos ), $info ); 00218 $this->eocdr['EOCDR size'] = $structSize + $this->eocdr['file comment length']; 00219 00220 if ( $structSize + $this->eocdr['file comment length'] != strlen( $block ) - $sigPos ) { 00221 $this->error( 'zip-bad', 'trailing bytes after the end of the file comment' ); 00222 } 00223 if ( $this->eocdr['disk'] !== 0 00224 || $this->eocdr['CD start disk'] !== 0 ) 00225 { 00226 $this->error( 'zip-unsupported', 'more than one disk (in EOCDR)' ); 00227 } 00228 $this->eocdr += $this->unpack( 00229 $block, 00230 array( 'file comment' => array( 'string', $this->eocdr['file comment length'] ) ), 00231 $sigPos + $structSize ); 00232 $this->eocdr['position'] = $startPos + $sigPos; 00233 } 00234 00239 function readZip64EndOfCentralDirectoryLocator() { 00240 $info = array( 00241 'signature' => array( 'string', 4 ), 00242 'eocdr64 start disk' => 4, 00243 'eocdr64 offset' => 8, 00244 'number of disks' => 4, 00245 ); 00246 $structSize = $this->getStructSize( $info ); 00247 00248 $block = $this->getBlock( $this->getFileLength() - $this->eocdr['EOCDR size'] 00249 - $structSize, $structSize ); 00250 $this->eocdr64Locator = $data = $this->unpack( $block, $info ); 00251 00252 if ( $data['signature'] !== "PK\x06\x07" ) { 00253 // Note: Java will allow this and continue to read the 00254 // EOCDR64, so we have to reject the upload, we can't 00255 // just use the EOCDR header instead. 00256 $this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory locator' ); 00257 } 00258 } 00259 00264 function readZip64EndOfCentralDirectoryRecord() { 00265 if ( $this->eocdr64Locator['eocdr64 start disk'] != 0 00266 || $this->eocdr64Locator['number of disks'] != 0 ) 00267 { 00268 $this->error( 'zip-unsupported', 'more than one disk (in EOCDR64 locator)' ); 00269 } 00270 00271 $info = array( 00272 'signature' => array( 'string', 4 ), 00273 'EOCDR64 size' => 8, 00274 'version made by' => 2, 00275 'version needed' => 2, 00276 'disk' => 4, 00277 'CD start disk' => 4, 00278 'CD entries this disk' => 8, 00279 'CD entries total' => 8, 00280 'CD size' => 8, 00281 'CD offset' => 8 00282 ); 00283 $structSize = $this->getStructSize( $info ); 00284 $block = $this->getBlock( $this->eocdr64Locator['eocdr64 offset'], $structSize ); 00285 $this->eocdr64 = $data = $this->unpack( $block, $info ); 00286 if ( $data['signature'] !== "PK\x06\x06" ) { 00287 $this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory record' ); 00288 } 00289 if ( $data['disk'] !== 0 00290 || $data['CD start disk'] !== 0 ) 00291 { 00292 $this->error( 'zip-unsupported', 'more than one disk (in EOCDR64)' ); 00293 } 00294 } 00295 00302 function findOldCentralDirectory() { 00303 $size = $this->eocdr['CD size']; 00304 $offset = $this->eocdr['CD offset']; 00305 $endPos = $this->eocdr['position']; 00306 00307 // Some readers use the EOCDR position instead of the offset field 00308 // to find the directory, so to be safe, we check if they both agree. 00309 if ( $offset + $size != $endPos ) { 00310 $this->error( 'zip-bad', 'the central directory does not immediately precede the end ' . 00311 'of central directory record' ); 00312 } 00313 return array( $offset, $size ); 00314 } 00315 00322 function findZip64CentralDirectory() { 00323 // The spec is ambiguous about the exact rules of precedence between the 00324 // ZIP64 headers and the original headers. Here we follow zip_util.c 00325 // from OpenJDK 7. 00326 $size = $this->eocdr['CD size']; 00327 $offset = $this->eocdr['CD offset']; 00328 $numEntries = $this->eocdr['CD entries total']; 00329 $endPos = $this->eocdr['position']; 00330 if ( $size == 0xffffffff 00331 || $offset == 0xffffffff 00332 || $numEntries == 0xffff ) 00333 { 00334 $this->readZip64EndOfCentralDirectoryLocator(); 00335 00336 if ( isset( $this->eocdr64Locator['eocdr64 offset'] ) ) { 00337 $this->readZip64EndOfCentralDirectoryRecord(); 00338 if ( isset( $this->eocdr64['CD offset'] ) ) { 00339 $size = $this->eocdr64['CD size']; 00340 $offset = $this->eocdr64['CD offset']; 00341 $endPos = $this->eocdr64Locator['eocdr64 offset']; 00342 } 00343 } 00344 } 00345 // Some readers use the EOCDR position instead of the offset field 00346 // to find the directory, so to be safe, we check if they both agree. 00347 if ( $offset + $size != $endPos ) { 00348 $this->error( 'zip-bad', 'the central directory does not immediately precede the end ' . 00349 'of central directory record' ); 00350 } 00351 return array( $offset, $size ); 00352 } 00353 00357 function readCentralDirectory( $offset, $size ) { 00358 $block = $this->getBlock( $offset, $size ); 00359 00360 $fixedInfo = array( 00361 'signature' => array( 'string', 4 ), 00362 'version made by' => 2, 00363 'version needed' => 2, 00364 'general bits' => 2, 00365 'compression method' => 2, 00366 'mod time' => 2, 00367 'mod date' => 2, 00368 'crc-32' => 4, 00369 'compressed size' => 4, 00370 'uncompressed size' => 4, 00371 'name length' => 2, 00372 'extra field length' => 2, 00373 'comment length' => 2, 00374 'disk number start' => 2, 00375 'internal attrs' => 2, 00376 'external attrs' => 4, 00377 'local header offset' => 4, 00378 ); 00379 $fixedSize = $this->getStructSize( $fixedInfo ); 00380 00381 $pos = 0; 00382 while ( $pos < $size ) { 00383 $data = $this->unpack( $block, $fixedInfo, $pos ); 00384 $pos += $fixedSize; 00385 00386 if ( $data['signature'] !== "PK\x01\x02" ) { 00387 $this->error( 'zip-bad', 'Invalid signature found in directory entry' ); 00388 } 00389 00390 $variableInfo = array( 00391 'name' => array( 'string', $data['name length'] ), 00392 'extra field' => array( 'string', $data['extra field length'] ), 00393 'comment' => array( 'string', $data['comment length'] ), 00394 ); 00395 $data += $this->unpack( $block, $variableInfo, $pos ); 00396 $pos += $this->getStructSize( $variableInfo ); 00397 00398 if ( $this->zip64 && ( 00399 $data['compressed size'] == 0xffffffff 00400 || $data['uncompressed size'] == 0xffffffff 00401 || $data['local header offset'] == 0xffffffff ) ) 00402 { 00403 $zip64Data = $this->unpackZip64Extra( $data['extra field'] ); 00404 if ( $zip64Data ) { 00405 $data = $zip64Data + $data; 00406 } 00407 } 00408 00409 if ( $this->testBit( $data['general bits'], self::GENERAL_CD_ENCRYPTED ) ) { 00410 $this->error( 'zip-unsupported', 'central directory encryption is not supported' ); 00411 } 00412 00413 // Convert the timestamp into MediaWiki format 00414 // For the format, please see the MS-DOS 2.0 Programmer's Reference, 00415 // pages 3-5 and 3-6. 00416 $time = $data['mod time']; 00417 $date = $data['mod date']; 00418 00419 $year = 1980 + ( $date >> 9 ); 00420 $month = ( $date >> 5 ) & 15; 00421 $day = $date & 31; 00422 $hour = ( $time >> 11 ) & 31; 00423 $minute = ( $time >> 5 ) & 63; 00424 $second = ( $time & 31 ) * 2; 00425 $timestamp = sprintf( "%04d%02d%02d%02d%02d%02d", 00426 $year, $month, $day, $hour, $minute, $second ); 00427 00428 // Convert the character set in the file name 00429 if ( !function_exists( 'iconv' ) 00430 || $this->testBit( $data['general bits'], self::GENERAL_UTF8 ) ) 00431 { 00432 $name = $data['name']; 00433 } else { 00434 $name = iconv( 'CP437', 'UTF-8', $data['name'] ); 00435 } 00436 00437 // Compile a data array for the user, with a sensible format 00438 $userData = array( 00439 'name' => $name, 00440 'mtime' => $timestamp, 00441 'size' => $data['uncompressed size'], 00442 ); 00443 call_user_func( $this->callback, $userData ); 00444 } 00445 } 00446 00451 function unpackZip64Extra( $extraField ) { 00452 $extraHeaderInfo = array( 00453 'id' => 2, 00454 'size' => 2, 00455 ); 00456 $extraHeaderSize = $this->getStructSize( $extraHeaderInfo ); 00457 00458 $zip64ExtraInfo = array( 00459 'uncompressed size' => 8, 00460 'compressed size' => 8, 00461 'local header offset' => 8, 00462 'disk number start' => 4, 00463 ); 00464 00465 $extraPos = 0; 00466 while ( $extraPos < strlen( $extraField ) ) { 00467 $extra = $this->unpack( $extraField, $extraHeaderInfo, $extraPos ); 00468 $extraPos += $extraHeaderSize; 00469 $extra += $this->unpack( $extraField, 00470 array( 'data' => array( 'string', $extra['size'] ) ), 00471 $extraPos ); 00472 $extraPos += $extra['size']; 00473 00474 if ( $extra['id'] == self::ZIP64_EXTRA_HEADER ) { 00475 return $this->unpack( $extra['data'], $zip64ExtraInfo ); 00476 } 00477 } 00478 00479 return false; 00480 } 00481 00485 function getFileLength() { 00486 if ( $this->fileLength === null ) { 00487 $stat = fstat( $this->file ); 00488 $this->fileLength = $stat['size']; 00489 } 00490 return $this->fileLength; 00491 } 00492 00503 function getBlock( $start, $length = null ) { 00504 $fileLength = $this->getFileLength(); 00505 if ( $start >= $fileLength ) { 00506 $this->error( 'zip-bad', "getBlock() requested position $start, " . 00507 "file length is $fileLength" ); 00508 } 00509 if ( $length === null ) { 00510 $length = $fileLength - $start; 00511 } 00512 $end = $start + $length; 00513 if ( $end > $fileLength ) { 00514 $this->error( 'zip-bad', "getBlock() requested end position $end, " . 00515 "file length is $fileLength" ); 00516 } 00517 $startSeg = floor( $start / self::SEGSIZE ); 00518 $endSeg = ceil( $end / self::SEGSIZE ); 00519 00520 $block = ''; 00521 for ( $segIndex = $startSeg; $segIndex <= $endSeg; $segIndex++ ) { 00522 $block .= $this->getSegment( $segIndex ); 00523 } 00524 00525 $block = substr( $block, 00526 $start - $startSeg * self::SEGSIZE, 00527 $length ); 00528 00529 if ( strlen( $block ) < $length ) { 00530 $this->error( 'zip-bad', 'getBlock() returned an unexpectedly small amount of data' ); 00531 } 00532 00533 return $block; 00534 } 00535 00546 function getSegment( $segIndex ) { 00547 if ( !isset( $this->buffer[$segIndex] ) ) { 00548 $bytePos = $segIndex * self::SEGSIZE; 00549 if ( $bytePos >= $this->getFileLength() ) { 00550 $this->buffer[$segIndex] = ''; 00551 return ''; 00552 } 00553 if ( fseek( $this->file, $bytePos ) ) { 00554 $this->error( 'zip-bad', "seek to $bytePos failed" ); 00555 } 00556 $seg = fread( $this->file, self::SEGSIZE ); 00557 if ( $seg === false ) { 00558 $this->error( 'zip-bad', "read from $bytePos failed" ); 00559 } 00560 $this->buffer[$segIndex] = $seg; 00561 } 00562 return $this->buffer[$segIndex]; 00563 } 00564 00569 function getStructSize( $struct ) { 00570 $size = 0; 00571 foreach ( $struct as $type ) { 00572 if ( is_array( $type ) ) { 00573 list( $typeName, $fieldSize ) = $type; 00574 $size += $fieldSize; 00575 } else { 00576 $size += $type; 00577 } 00578 } 00579 return $size; 00580 } 00581 00603 function unpack( $string, $struct, $offset = 0 ) { 00604 $size = $this->getStructSize( $struct ); 00605 if ( $offset + $size > strlen( $string ) ) { 00606 $this->error( 'zip-bad', 'unpack() would run past the end of the supplied string' ); 00607 } 00608 00609 $data = array(); 00610 $pos = $offset; 00611 foreach ( $struct as $key => $type ) { 00612 if ( is_array( $type ) ) { 00613 list( $typeName, $fieldSize ) = $type; 00614 switch ( $typeName ) { 00615 case 'string': 00616 $data[$key] = substr( $string, $pos, $fieldSize ); 00617 $pos += $fieldSize; 00618 break; 00619 default: 00620 throw new MWException( __METHOD__.": invalid type \"$typeName\"" ); 00621 } 00622 } else { 00623 // Unsigned little-endian integer 00624 $length = intval( $type ); 00625 $bytes = substr( $string, $pos, $length ); 00626 00627 // Calculate the value. Use an algorithm which automatically 00628 // upgrades the value to floating point if necessary. 00629 $value = 0; 00630 for ( $i = $length - 1; $i >= 0; $i-- ) { 00631 $value *= 256; 00632 $value += ord( $string[$pos + $i] ); 00633 } 00634 00635 // Throw an exception if there was loss of precision 00636 if ( $value > pow( 2, 52 ) ) { 00637 $this->error( 'zip-unsupported', 'number too large to be stored in a double. ' . 00638 'This could happen if we tried to unpack a 64-bit structure ' . 00639 'at an invalid location.' ); 00640 } 00641 $data[$key] = $value; 00642 $pos += $length; 00643 } 00644 } 00645 00646 return $data; 00647 } 00648 00657 function testBit( $value, $bitIndex ) { 00658 return (bool)( ( $value >> $bitIndex ) & 1 ); 00659 } 00660 00664 function hexDump( $s ) { 00665 $n = strlen( $s ); 00666 for ( $i = 0; $i < $n; $i += 16 ) { 00667 printf( "%08X ", $i ); 00668 for ( $j = 0; $j < 16; $j++ ) { 00669 print " "; 00670 if ( $j == 8 ) { 00671 print " "; 00672 } 00673 if ( $i + $j >= $n ) { 00674 print " "; 00675 } else { 00676 printf( "%02X", ord( $s[$i + $j] ) ); 00677 } 00678 } 00679 00680 print " |"; 00681 for ( $j = 0; $j < 16; $j++ ) { 00682 if ( $i + $j >= $n ) { 00683 print " "; 00684 } elseif ( ctype_print( $s[$i + $j] ) ) { 00685 print $s[$i + $j]; 00686 } else { 00687 print '.'; 00688 } 00689 } 00690 print "|\n"; 00691 } 00692 } 00693 } 00694 00698 class ZipDirectoryReaderError extends Exception { 00699 var $errorCode; 00700 00701 function __construct( $code ) { 00702 $this->errorCode = $code; 00703 parent::__construct( "ZipDirectoryReader error: $code" ); 00704 } 00705 00709 function getErrorCode() { 00710 return $this->errorCode; 00711 } 00712 }