MediaWiki  REL1_20
ZipDirectoryReader.php
Go to the documentation of this file.
00001 <?php
00031 class ZipDirectoryReader {
00089         public static function read( $fileName, $callback, $options = array() ) {
00090                 $zdr = new self( $fileName, $callback, $options );
00091                 return $zdr->execute();
00092         }
00093 
00095         var $fileName;
00096 
00098         var $file;
00099 
00101         var $fileLength;
00102 
00104         var $buffer;
00105 
00107         var $callback;
00108 
00110         var $zip64 = false;
00111 
00113         var $eocdr, $eocdr64, $eocdr64Locator;
00114 
00115         var $data;
00116 
00118         const ZIP64_EXTRA_HEADER = 0x0001;
00119 
00121         const SEGSIZE = 16384;
00122 
00124         const GENERAL_UTF8 = 11;
00125 
00127         const GENERAL_CD_ENCRYPTED = 13;
00128 
00132         protected function __construct( $fileName, $callback, $options ) {
00133                 $this->fileName = $fileName;
00134                 $this->callback = $callback;
00135 
00136                 if ( isset( $options['zip64'] ) ) {
00137                         $this->zip64 = $options['zip64'];
00138                 }
00139         }
00140 
00146         function execute() {
00147                 $this->file = fopen( $this->fileName, 'r' );
00148                 $this->data = array();
00149                 if ( !$this->file ) {
00150                         return Status::newFatal( 'zip-file-open-error' );
00151                 }
00152 
00153                 $status = Status::newGood();
00154                 try {
00155                         $this->readEndOfCentralDirectoryRecord();
00156                         if ( $this->zip64 ) {
00157                                 list( $offset, $size ) = $this->findZip64CentralDirectory();
00158                                 $this->readCentralDirectory( $offset, $size );
00159                         } else {
00160                                 if ( $this->eocdr['CD size'] == 0xffffffff
00161                                         || $this->eocdr['CD offset'] == 0xffffffff
00162                                         || $this->eocdr['CD entries total'] == 0xffff )
00163                                 {
00164                                         $this->error( 'zip-unsupported', 'Central directory header indicates ZIP64, ' .
00165                                                 'but we are in legacy mode. Rejecting this upload is necessary to avoid '.
00166                                                 'opening vulnerabilities on clients using OpenJDK 7 or later.' );
00167                                 }
00168 
00169                                 list( $offset, $size ) = $this->findOldCentralDirectory();
00170                                 $this->readCentralDirectory( $offset, $size );
00171                         }
00172                 } catch ( ZipDirectoryReaderError $e ) {
00173                         $status->fatal( $e->getErrorCode() );
00174                 }
00175 
00176                 fclose( $this->file );
00177                 return $status;
00178         }
00179 
00183         function error( $code, $debugMessage ) {
00184                 wfDebug( __CLASS__.": Fatal error: $debugMessage\n" );
00185                 throw new ZipDirectoryReaderError( $code );
00186         }
00187 
00193         function readEndOfCentralDirectoryRecord() {
00194                 $info = array(
00195                         'signature' => 4,
00196                         'disk' => 2,
00197                         'CD start disk' => 2,
00198                         'CD entries this disk' => 2,
00199                         'CD entries total' => 2,
00200                         'CD size' => 4,
00201                         'CD offset' => 4,
00202                         'file comment length' => 2,
00203                 );
00204                 $structSize = $this->getStructSize( $info );
00205                 $startPos = $this->getFileLength() - 65536 - $structSize;
00206                 if ( $startPos < 0 ) {
00207                         $startPos = 0;
00208                 }
00209 
00210                 $block = $this->getBlock( $startPos );
00211                 $sigPos = strrpos( $block, "PK\x05\x06" );
00212                 if ( $sigPos === false ) {
00213                         $this->error( 'zip-wrong-format',
00214                                 "zip file lacks EOCDR signature. It probably isn't a zip file." );
00215                 }
00216 
00217                 $this->eocdr = $this->unpack( substr( $block, $sigPos ), $info );
00218                 $this->eocdr['EOCDR size'] = $structSize + $this->eocdr['file comment length'];
00219 
00220                 if ( $structSize + $this->eocdr['file comment length'] != strlen( $block ) - $sigPos ) {
00221                         $this->error( 'zip-bad', 'trailing bytes after the end of the file comment' );
00222                 }
00223                 if (   $this->eocdr['disk'] !== 0
00224                         || $this->eocdr['CD start disk'] !== 0 )
00225                 {
00226                         $this->error( 'zip-unsupported', 'more than one disk (in EOCDR)' );
00227                 }
00228                 $this->eocdr += $this->unpack(
00229                         $block,
00230                         array( 'file comment' => array( 'string', $this->eocdr['file comment length'] ) ),
00231                         $sigPos + $structSize );
00232                 $this->eocdr['position'] = $startPos + $sigPos;
00233         }
00234 
00239         function readZip64EndOfCentralDirectoryLocator() {
00240                 $info = array(
00241                         'signature' => array( 'string', 4 ),
00242                         'eocdr64 start disk' => 4,
00243                         'eocdr64 offset' => 8,
00244                         'number of disks' => 4,
00245                 );
00246                 $structSize = $this->getStructSize( $info );
00247 
00248                 $block = $this->getBlock( $this->getFileLength() - $this->eocdr['EOCDR size']
00249                         - $structSize, $structSize );
00250                 $this->eocdr64Locator = $data = $this->unpack( $block, $info );
00251 
00252                 if ( $data['signature'] !== "PK\x06\x07" ) {
00253                         // Note: Java will allow this and continue to read the
00254                         // EOCDR64, so we have to reject the upload, we can't
00255                         // just use the EOCDR header instead.
00256                         $this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory locator' );
00257                 }
00258         }
00259 
00264         function readZip64EndOfCentralDirectoryRecord() {
00265                 if (   $this->eocdr64Locator['eocdr64 start disk'] != 0
00266                         || $this->eocdr64Locator['number of disks'] != 0 )
00267                 {
00268                         $this->error( 'zip-unsupported', 'more than one disk (in EOCDR64 locator)' );
00269                 }
00270 
00271                 $info = array(
00272                         'signature' => array( 'string', 4 ),
00273                         'EOCDR64 size' => 8,
00274                         'version made by' => 2,
00275                         'version needed' => 2,
00276                         'disk' => 4,
00277                         'CD start disk' => 4,
00278                         'CD entries this disk' => 8,
00279                         'CD entries total' => 8,
00280                         'CD size' => 8,
00281                         'CD offset' => 8
00282                 );
00283                 $structSize = $this->getStructSize( $info );
00284                 $block = $this->getBlock( $this->eocdr64Locator['eocdr64 offset'], $structSize );
00285                 $this->eocdr64 = $data = $this->unpack( $block, $info );
00286                 if ( $data['signature'] !== "PK\x06\x06" ) {
00287                         $this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory record' );
00288                 }
00289                 if (   $data['disk'] !== 0
00290                         || $data['CD start disk'] !== 0 )
00291                 {
00292                         $this->error( 'zip-unsupported', 'more than one disk (in EOCDR64)' );
00293                 }
00294         }
00295 
00302         function findOldCentralDirectory() {
00303                 $size = $this->eocdr['CD size'];
00304                 $offset = $this->eocdr['CD offset'];
00305                 $endPos = $this->eocdr['position'];
00306 
00307                 // Some readers use the EOCDR position instead of the offset field
00308                 // to find the directory, so to be safe, we check if they both agree.
00309                 if ( $offset + $size != $endPos ) {
00310                         $this->error( 'zip-bad', 'the central directory does not immediately precede the end ' .
00311                                 'of central directory record' );
00312                 }
00313                 return array( $offset, $size );
00314         }
00315 
00322         function findZip64CentralDirectory() {
00323                 // The spec is ambiguous about the exact rules of precedence between the
00324                 // ZIP64 headers and the original headers. Here we follow zip_util.c
00325                 // from OpenJDK 7.
00326                 $size = $this->eocdr['CD size'];
00327                 $offset = $this->eocdr['CD offset'];
00328                 $numEntries = $this->eocdr['CD entries total'];
00329                 $endPos = $this->eocdr['position'];
00330                 if (   $size == 0xffffffff
00331                         || $offset == 0xffffffff
00332                         || $numEntries == 0xffff )
00333                 {
00334                         $this->readZip64EndOfCentralDirectoryLocator();
00335 
00336                         if ( isset( $this->eocdr64Locator['eocdr64 offset'] ) ) {
00337                                 $this->readZip64EndOfCentralDirectoryRecord();
00338                                 if ( isset( $this->eocdr64['CD offset'] ) ) {
00339                                         $size = $this->eocdr64['CD size'];
00340                                         $offset = $this->eocdr64['CD offset'];
00341                                         $endPos = $this->eocdr64Locator['eocdr64 offset'];
00342                                 }
00343                         }
00344                 }
00345                 // Some readers use the EOCDR position instead of the offset field
00346                 // to find the directory, so to be safe, we check if they both agree.
00347                 if ( $offset + $size != $endPos ) {
00348                         $this->error( 'zip-bad', 'the central directory does not immediately precede the end ' .
00349                                 'of central directory record' );
00350                 }
00351                 return array( $offset, $size );
00352         }
00353 
00357         function readCentralDirectory( $offset, $size ) {
00358                 $block = $this->getBlock( $offset, $size );
00359 
00360                 $fixedInfo = array(
00361                         'signature' => array( 'string', 4 ),
00362                         'version made by' => 2,
00363                         'version needed' => 2,
00364                         'general bits' => 2,
00365                         'compression method' => 2,
00366                         'mod time' => 2,
00367                         'mod date' => 2,
00368                         'crc-32' => 4,
00369                         'compressed size' => 4,
00370                         'uncompressed size' => 4,
00371                         'name length' => 2,
00372                         'extra field length' => 2,
00373                         'comment length' => 2,
00374                         'disk number start' => 2,
00375                         'internal attrs' => 2,
00376                         'external attrs' => 4,
00377                         'local header offset' => 4,
00378                 );
00379                 $fixedSize = $this->getStructSize( $fixedInfo );
00380 
00381                 $pos = 0;
00382                 while ( $pos < $size ) {
00383                         $data = $this->unpack( $block, $fixedInfo, $pos );
00384                         $pos += $fixedSize;
00385 
00386                         if ( $data['signature'] !== "PK\x01\x02" ) {
00387                                 $this->error( 'zip-bad', 'Invalid signature found in directory entry' );
00388                         }
00389 
00390                         $variableInfo = array(
00391                                 'name' => array( 'string', $data['name length'] ),
00392                                 'extra field' => array( 'string', $data['extra field length'] ),
00393                                 'comment' => array( 'string', $data['comment length'] ),
00394                         );
00395                         $data += $this->unpack( $block, $variableInfo, $pos );
00396                         $pos += $this->getStructSize( $variableInfo );
00397 
00398                         if (   $this->zip64 && (
00399                                    $data['compressed size'] == 0xffffffff
00400                                 || $data['uncompressed size'] == 0xffffffff
00401                                 || $data['local header offset'] == 0xffffffff ) )
00402                         {
00403                                 $zip64Data = $this->unpackZip64Extra( $data['extra field'] );
00404                                 if ( $zip64Data ) {
00405                                         $data = $zip64Data + $data;
00406                                 }
00407                         }
00408 
00409                         if ( $this->testBit( $data['general bits'], self::GENERAL_CD_ENCRYPTED ) ) {
00410                                 $this->error( 'zip-unsupported', 'central directory encryption is not supported' );
00411                         }
00412 
00413                         // Convert the timestamp into MediaWiki format
00414                         // For the format, please see the MS-DOS 2.0 Programmer's Reference,
00415                         // pages 3-5 and 3-6.
00416                         $time = $data['mod time'];
00417                         $date = $data['mod date'];
00418 
00419                         $year = 1980 + ( $date >> 9 );
00420                         $month = ( $date >> 5 ) & 15;
00421                         $day = $date & 31;
00422                         $hour = ( $time >> 11 ) & 31;
00423                         $minute = ( $time >> 5 ) & 63;
00424                         $second = ( $time & 31 ) * 2;
00425                         $timestamp = sprintf( "%04d%02d%02d%02d%02d%02d",
00426                                 $year, $month, $day, $hour, $minute, $second );
00427 
00428                         // Convert the character set in the file name
00429                         if ( !function_exists( 'iconv' )
00430                                 || $this->testBit( $data['general bits'], self::GENERAL_UTF8 ) )
00431                         {
00432                                 $name = $data['name'];
00433                         } else {
00434                                 $name = iconv( 'CP437', 'UTF-8', $data['name'] );
00435                         }
00436 
00437                         // Compile a data array for the user, with a sensible format
00438                         $userData = array(
00439                                 'name' => $name,
00440                                 'mtime' => $timestamp,
00441                                 'size' => $data['uncompressed size'],
00442                         );
00443                         call_user_func( $this->callback, $userData );
00444                 }
00445         }
00446 
00451         function unpackZip64Extra( $extraField ) {
00452                 $extraHeaderInfo = array(
00453                         'id' => 2,
00454                         'size' => 2,
00455                 );
00456                 $extraHeaderSize = $this->getStructSize( $extraHeaderInfo );
00457 
00458                 $zip64ExtraInfo = array(
00459                         'uncompressed size' => 8,
00460                         'compressed size' => 8,
00461                         'local header offset' => 8,
00462                         'disk number start' => 4,
00463                 );
00464 
00465                 $extraPos = 0;
00466                 while ( $extraPos < strlen( $extraField ) ) {
00467                         $extra = $this->unpack( $extraField, $extraHeaderInfo, $extraPos );
00468                         $extraPos += $extraHeaderSize;
00469                         $extra += $this->unpack( $extraField,
00470                                 array( 'data' => array( 'string', $extra['size'] ) ),
00471                                 $extraPos );
00472                         $extraPos += $extra['size'];
00473 
00474                         if ( $extra['id'] == self::ZIP64_EXTRA_HEADER ) {
00475                                 return $this->unpack( $extra['data'], $zip64ExtraInfo );
00476                         }
00477                 }
00478 
00479                 return false;
00480         }
00481 
00485         function getFileLength() {
00486                 if ( $this->fileLength === null ) {
00487                         $stat = fstat( $this->file );
00488                         $this->fileLength = $stat['size'];
00489                 }
00490                 return $this->fileLength;
00491         }
00492 
00503         function getBlock( $start, $length = null ) {
00504                 $fileLength = $this->getFileLength();
00505                 if ( $start >= $fileLength ) {
00506                         $this->error( 'zip-bad', "getBlock() requested position $start, " .
00507                                 "file length is $fileLength" );
00508                 }
00509                 if ( $length === null ) {
00510                         $length = $fileLength - $start;
00511                 }
00512                 $end = $start + $length;
00513                 if ( $end > $fileLength ) {
00514                         $this->error( 'zip-bad', "getBlock() requested end position $end, " .
00515                                 "file length is $fileLength" );
00516                 }
00517                 $startSeg = floor( $start / self::SEGSIZE );
00518                 $endSeg = ceil( $end / self::SEGSIZE );
00519 
00520                 $block = '';
00521                 for ( $segIndex = $startSeg; $segIndex <= $endSeg; $segIndex++ ) {
00522                         $block .= $this->getSegment( $segIndex );
00523                 }
00524 
00525                 $block = substr( $block,
00526                         $start - $startSeg * self::SEGSIZE,
00527                         $length );
00528 
00529                 if ( strlen( $block ) < $length ) {
00530                         $this->error( 'zip-bad', 'getBlock() returned an unexpectedly small amount of data' );
00531                 }
00532 
00533                 return $block;
00534         }
00535 
00546         function getSegment( $segIndex ) {
00547                 if ( !isset( $this->buffer[$segIndex] ) ) {
00548                         $bytePos = $segIndex * self::SEGSIZE;
00549                         if ( $bytePos >= $this->getFileLength() ) {
00550                                 $this->buffer[$segIndex] = '';
00551                                 return '';
00552                         }
00553                         if ( fseek( $this->file, $bytePos ) ) {
00554                                 $this->error( 'zip-bad', "seek to $bytePos failed" );
00555                         }
00556                         $seg = fread( $this->file, self::SEGSIZE );
00557                         if ( $seg === false ) {
00558                                 $this->error( 'zip-bad', "read from $bytePos failed" );
00559                         }
00560                         $this->buffer[$segIndex] = $seg;
00561                 }
00562                 return $this->buffer[$segIndex];
00563         }
00564 
00569         function getStructSize( $struct ) {
00570                 $size = 0;
00571                 foreach ( $struct as $type ) {
00572                         if ( is_array( $type ) ) {
00573                                 list( $typeName, $fieldSize ) = $type;
00574                                 $size += $fieldSize;
00575                         } else {
00576                                 $size += $type;
00577                         }
00578                 }
00579                 return $size;
00580         }
00581 
00603         function unpack( $string, $struct, $offset = 0 ) {
00604                 $size = $this->getStructSize( $struct );
00605                 if ( $offset + $size > strlen( $string ) ) {
00606                         $this->error( 'zip-bad', 'unpack() would run past the end of the supplied string' );
00607                 }
00608 
00609                 $data = array();
00610                 $pos = $offset;
00611                 foreach ( $struct as $key => $type ) {
00612                         if ( is_array( $type ) ) {
00613                                 list( $typeName, $fieldSize ) = $type;
00614                                 switch ( $typeName ) {
00615                                 case 'string':
00616                                         $data[$key] = substr( $string, $pos, $fieldSize );
00617                                         $pos += $fieldSize;
00618                                         break;
00619                                 default:
00620                                         throw new MWException( __METHOD__.": invalid type \"$typeName\"" );
00621                                 }
00622                         } else {
00623                                 // Unsigned little-endian integer
00624                                 $length = intval( $type );
00625                                 $bytes = substr( $string, $pos, $length );
00626 
00627                                 // Calculate the value. Use an algorithm which automatically
00628                                 // upgrades the value to floating point if necessary.
00629                                 $value = 0;
00630                                 for ( $i = $length - 1; $i >= 0; $i-- ) {
00631                                         $value *= 256;
00632                                         $value += ord( $string[$pos + $i] );
00633                                 }
00634 
00635                                 // Throw an exception if there was loss of precision
00636                                 if ( $value > pow( 2, 52 ) ) {
00637                                         $this->error( 'zip-unsupported', 'number too large to be stored in a double. ' .
00638                                                 'This could happen if we tried to unpack a 64-bit structure ' .
00639                                                 'at an invalid location.' );
00640                                 }
00641                                 $data[$key] = $value;
00642                                 $pos += $length;
00643                         }
00644                 }
00645 
00646                 return $data;
00647         }
00648 
00657         function testBit( $value, $bitIndex ) {
00658                 return (bool)( ( $value >> $bitIndex ) & 1 );
00659         }
00660 
00664         function hexDump( $s ) {
00665                 $n = strlen( $s );
00666                 for ( $i = 0; $i < $n; $i += 16 ) {
00667                         printf( "%08X ", $i );
00668                         for ( $j = 0; $j < 16; $j++ ) {
00669                                 print " ";
00670                                 if ( $j == 8 ) {
00671                                         print " ";
00672                                 }
00673                                 if ( $i + $j >= $n ) {
00674                                         print "  ";
00675                                 } else {
00676                                         printf( "%02X", ord( $s[$i + $j] ) );
00677                                 }
00678                         }
00679 
00680                         print "  |";
00681                         for ( $j = 0; $j < 16; $j++ ) {
00682                                 if ( $i + $j >= $n ) {
00683                                         print " ";
00684                                 } elseif ( ctype_print( $s[$i + $j] ) ) {
00685                                         print $s[$i + $j];
00686                                 } else {
00687                                         print '.';
00688                                 }
00689                         }
00690                         print "|\n";
00691                 }
00692         }
00693 }
00694 
00698 class ZipDirectoryReaderError extends Exception {
00699         var $errorCode;
00700 
00701         function __construct( $code ) {
00702                 $this->errorCode = $code;
00703                 parent::__construct( "ZipDirectoryReader error: $code" );
00704         }
00705 
00709         function getErrorCode() {
00710                 return $this->errorCode;
00711         }
00712 }