MediaWiki  REL1_22
ZipDirectoryReader.php
Go to the documentation of this file.
00001 <?php
00031 class ZipDirectoryReader {
00089     public static function read( $fileName, $callback, $options = array() ) {
00090         $zdr = new self( $fileName, $callback, $options );
00091         return $zdr->execute();
00092     }
00093 
00095     var $fileName;
00096 
00098     var $file;
00099 
00101     var $fileLength;
00102 
00104     var $buffer;
00105 
00107     var $callback;
00108 
00110     var $zip64 = false;
00111 
00113     var $eocdr, $eocdr64, $eocdr64Locator;
00114 
00115     var $data;
00116 
00118     const ZIP64_EXTRA_HEADER = 0x0001;
00119 
00121     const SEGSIZE = 16384;
00122 
00124     const GENERAL_UTF8 = 11;
00125 
00127     const GENERAL_CD_ENCRYPTED = 13;
00128 
00132     protected function __construct( $fileName, $callback, $options ) {
00133         $this->fileName = $fileName;
00134         $this->callback = $callback;
00135 
00136         if ( isset( $options['zip64'] ) ) {
00137             $this->zip64 = $options['zip64'];
00138         }
00139     }
00140 
00146     function execute() {
00147         $this->file = fopen( $this->fileName, 'r' );
00148         $this->data = array();
00149         if ( !$this->file ) {
00150             return Status::newFatal( 'zip-file-open-error' );
00151         }
00152 
00153         $status = Status::newGood();
00154         try {
00155             $this->readEndOfCentralDirectoryRecord();
00156             if ( $this->zip64 ) {
00157                 list( $offset, $size ) = $this->findZip64CentralDirectory();
00158                 $this->readCentralDirectory( $offset, $size );
00159             } else {
00160                 if ( $this->eocdr['CD size'] == 0xffffffff
00161                     || $this->eocdr['CD offset'] == 0xffffffff
00162                     || $this->eocdr['CD entries total'] == 0xffff )
00163                 {
00164                     $this->error( 'zip-unsupported', 'Central directory header indicates ZIP64, ' .
00165                         'but we are in legacy mode. Rejecting this upload is necessary to avoid ' .
00166                         'opening vulnerabilities on clients using OpenJDK 7 or later.' );
00167                 }
00168 
00169                 list( $offset, $size ) = $this->findOldCentralDirectory();
00170                 $this->readCentralDirectory( $offset, $size );
00171             }
00172         } catch ( ZipDirectoryReaderError $e ) {
00173             $status->fatal( $e->getErrorCode() );
00174         }
00175 
00176         fclose( $this->file );
00177         return $status;
00178     }
00179 
00183     function error( $code, $debugMessage ) {
00184         wfDebug( __CLASS__ . ": Fatal error: $debugMessage\n" );
00185         throw new ZipDirectoryReaderError( $code );
00186     }
00187 
00193     function readEndOfCentralDirectoryRecord() {
00194         $info = array(
00195             'signature' => 4,
00196             'disk' => 2,
00197             'CD start disk' => 2,
00198             'CD entries this disk' => 2,
00199             'CD entries total' => 2,
00200             'CD size' => 4,
00201             'CD offset' => 4,
00202             'file comment length' => 2,
00203         );
00204         $structSize = $this->getStructSize( $info );
00205         $startPos = $this->getFileLength() - 65536 - $structSize;
00206         if ( $startPos < 0 ) {
00207             $startPos = 0;
00208         }
00209 
00210         $block = $this->getBlock( $startPos );
00211         $sigPos = strrpos( $block, "PK\x05\x06" );
00212         if ( $sigPos === false ) {
00213             $this->error( 'zip-wrong-format',
00214                 "zip file lacks EOCDR signature. It probably isn't a zip file." );
00215         }
00216 
00217         $this->eocdr = $this->unpack( substr( $block, $sigPos ), $info );
00218         $this->eocdr['EOCDR size'] = $structSize + $this->eocdr['file comment length'];
00219 
00220         if ( $structSize + $this->eocdr['file comment length'] != strlen( $block ) - $sigPos ) {
00221             $this->error( 'zip-bad', 'trailing bytes after the end of the file comment' );
00222         }
00223         if ( $this->eocdr['disk'] !== 0
00224             || $this->eocdr['CD start disk'] !== 0 )
00225         {
00226             $this->error( 'zip-unsupported', 'more than one disk (in EOCDR)' );
00227         }
00228         $this->eocdr += $this->unpack(
00229             $block,
00230             array( 'file comment' => array( 'string', $this->eocdr['file comment length'] ) ),
00231             $sigPos + $structSize );
00232         $this->eocdr['position'] = $startPos + $sigPos;
00233     }
00234 
00239     function readZip64EndOfCentralDirectoryLocator() {
00240         $info = array(
00241             'signature' => array( 'string', 4 ),
00242             'eocdr64 start disk' => 4,
00243             'eocdr64 offset' => 8,
00244             'number of disks' => 4,
00245         );
00246         $structSize = $this->getStructSize( $info );
00247 
00248         $block = $this->getBlock( $this->getFileLength() - $this->eocdr['EOCDR size']
00249             - $structSize, $structSize );
00250         $this->eocdr64Locator = $data = $this->unpack( $block, $info );
00251 
00252         if ( $data['signature'] !== "PK\x06\x07" ) {
00253             // Note: Java will allow this and continue to read the
00254             // EOCDR64, so we have to reject the upload, we can't
00255             // just use the EOCDR header instead.
00256             $this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory locator' );
00257         }
00258     }
00259 
00264     function readZip64EndOfCentralDirectoryRecord() {
00265         if ( $this->eocdr64Locator['eocdr64 start disk'] != 0
00266             || $this->eocdr64Locator['number of disks'] != 0 )
00267         {
00268             $this->error( 'zip-unsupported', 'more than one disk (in EOCDR64 locator)' );
00269         }
00270 
00271         $info = array(
00272             'signature' => array( 'string', 4 ),
00273             'EOCDR64 size' => 8,
00274             'version made by' => 2,
00275             'version needed' => 2,
00276             'disk' => 4,
00277             'CD start disk' => 4,
00278             'CD entries this disk' => 8,
00279             'CD entries total' => 8,
00280             'CD size' => 8,
00281             'CD offset' => 8
00282         );
00283         $structSize = $this->getStructSize( $info );
00284         $block = $this->getBlock( $this->eocdr64Locator['eocdr64 offset'], $structSize );
00285         $this->eocdr64 = $data = $this->unpack( $block, $info );
00286         if ( $data['signature'] !== "PK\x06\x06" ) {
00287             $this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory record' );
00288         }
00289         if ( $data['disk'] !== 0
00290             || $data['CD start disk'] !== 0 )
00291         {
00292             $this->error( 'zip-unsupported', 'more than one disk (in EOCDR64)' );
00293         }
00294     }
00295 
00302     function findOldCentralDirectory() {
00303         $size = $this->eocdr['CD size'];
00304         $offset = $this->eocdr['CD offset'];
00305         $endPos = $this->eocdr['position'];
00306 
00307         // Some readers use the EOCDR position instead of the offset field
00308         // to find the directory, so to be safe, we check if they both agree.
00309         if ( $offset + $size != $endPos ) {
00310             $this->error( 'zip-bad', 'the central directory does not immediately precede the end ' .
00311                 'of central directory record' );
00312         }
00313         return array( $offset, $size );
00314     }
00315 
00322     function findZip64CentralDirectory() {
00323         // The spec is ambiguous about the exact rules of precedence between the
00324         // ZIP64 headers and the original headers. Here we follow zip_util.c
00325         // from OpenJDK 7.
00326         $size = $this->eocdr['CD size'];
00327         $offset = $this->eocdr['CD offset'];
00328         $numEntries = $this->eocdr['CD entries total'];
00329         $endPos = $this->eocdr['position'];
00330         if ( $size == 0xffffffff
00331             || $offset == 0xffffffff
00332             || $numEntries == 0xffff )
00333         {
00334             $this->readZip64EndOfCentralDirectoryLocator();
00335 
00336             if ( isset( $this->eocdr64Locator['eocdr64 offset'] ) ) {
00337                 $this->readZip64EndOfCentralDirectoryRecord();
00338                 if ( isset( $this->eocdr64['CD offset'] ) ) {
00339                     $size = $this->eocdr64['CD size'];
00340                     $offset = $this->eocdr64['CD offset'];
00341                     $endPos = $this->eocdr64Locator['eocdr64 offset'];
00342                 }
00343             }
00344         }
00345         // Some readers use the EOCDR position instead of the offset field
00346         // to find the directory, so to be safe, we check if they both agree.
00347         if ( $offset + $size != $endPos ) {
00348             $this->error( 'zip-bad', 'the central directory does not immediately precede the end ' .
00349                 'of central directory record' );
00350         }
00351         return array( $offset, $size );
00352     }
00353 
00357     function readCentralDirectory( $offset, $size ) {
00358         $block = $this->getBlock( $offset, $size );
00359 
00360         $fixedInfo = array(
00361             'signature' => array( 'string', 4 ),
00362             'version made by' => 2,
00363             'version needed' => 2,
00364             'general bits' => 2,
00365             'compression method' => 2,
00366             'mod time' => 2,
00367             'mod date' => 2,
00368             'crc-32' => 4,
00369             'compressed size' => 4,
00370             'uncompressed size' => 4,
00371             'name length' => 2,
00372             'extra field length' => 2,
00373             'comment length' => 2,
00374             'disk number start' => 2,
00375             'internal attrs' => 2,
00376             'external attrs' => 4,
00377             'local header offset' => 4,
00378         );
00379         $fixedSize = $this->getStructSize( $fixedInfo );
00380 
00381         $pos = 0;
00382         while ( $pos < $size ) {
00383             $data = $this->unpack( $block, $fixedInfo, $pos );
00384             $pos += $fixedSize;
00385 
00386             if ( $data['signature'] !== "PK\x01\x02" ) {
00387                 $this->error( 'zip-bad', 'Invalid signature found in directory entry' );
00388             }
00389 
00390             $variableInfo = array(
00391                 'name' => array( 'string', $data['name length'] ),
00392                 'extra field' => array( 'string', $data['extra field length'] ),
00393                 'comment' => array( 'string', $data['comment length'] ),
00394             );
00395             $data += $this->unpack( $block, $variableInfo, $pos );
00396             $pos += $this->getStructSize( $variableInfo );
00397 
00398             if ( $this->zip64 && (
00399                    $data['compressed size'] == 0xffffffff
00400                 || $data['uncompressed size'] == 0xffffffff
00401                 || $data['local header offset'] == 0xffffffff ) )
00402             {
00403                 $zip64Data = $this->unpackZip64Extra( $data['extra field'] );
00404                 if ( $zip64Data ) {
00405                     $data = $zip64Data + $data;
00406                 }
00407             }
00408 
00409             if ( $this->testBit( $data['general bits'], self::GENERAL_CD_ENCRYPTED ) ) {
00410                 $this->error( 'zip-unsupported', 'central directory encryption is not supported' );
00411             }
00412 
00413             // Convert the timestamp into MediaWiki format
00414             // For the format, please see the MS-DOS 2.0 Programmer's Reference,
00415             // pages 3-5 and 3-6.
00416             $time = $data['mod time'];
00417             $date = $data['mod date'];
00418 
00419             $year = 1980 + ( $date >> 9 );
00420             $month = ( $date >> 5 ) & 15;
00421             $day = $date & 31;
00422             $hour = ( $time >> 11 ) & 31;
00423             $minute = ( $time >> 5 ) & 63;
00424             $second = ( $time & 31 ) * 2;
00425             $timestamp = sprintf( "%04d%02d%02d%02d%02d%02d",
00426                 $year, $month, $day, $hour, $minute, $second );
00427 
00428             // Convert the character set in the file name
00429             if ( !function_exists( 'iconv' )
00430                 || $this->testBit( $data['general bits'], self::GENERAL_UTF8 ) )
00431             {
00432                 $name = $data['name'];
00433             } else {
00434                 $name = iconv( 'CP437', 'UTF-8', $data['name'] );
00435             }
00436 
00437             // Compile a data array for the user, with a sensible format
00438             $userData = array(
00439                 'name' => $name,
00440                 'mtime' => $timestamp,
00441                 'size' => $data['uncompressed size'],
00442             );
00443             call_user_func( $this->callback, $userData );
00444         }
00445     }
00446 
00451     function unpackZip64Extra( $extraField ) {
00452         $extraHeaderInfo = array(
00453             'id' => 2,
00454             'size' => 2,
00455         );
00456         $extraHeaderSize = $this->getStructSize( $extraHeaderInfo );
00457 
00458         $zip64ExtraInfo = array(
00459             'uncompressed size' => 8,
00460             'compressed size' => 8,
00461             'local header offset' => 8,
00462             'disk number start' => 4,
00463         );
00464 
00465         $extraPos = 0;
00466         while ( $extraPos < strlen( $extraField ) ) {
00467             $extra = $this->unpack( $extraField, $extraHeaderInfo, $extraPos );
00468             $extraPos += $extraHeaderSize;
00469             $extra += $this->unpack( $extraField,
00470                 array( 'data' => array( 'string', $extra['size'] ) ),
00471                 $extraPos );
00472             $extraPos += $extra['size'];
00473 
00474             if ( $extra['id'] == self::ZIP64_EXTRA_HEADER ) {
00475                 return $this->unpack( $extra['data'], $zip64ExtraInfo );
00476             }
00477         }
00478 
00479         return false;
00480     }
00481 
00485     function getFileLength() {
00486         if ( $this->fileLength === null ) {
00487             $stat = fstat( $this->file );
00488             $this->fileLength = $stat['size'];
00489         }
00490         return $this->fileLength;
00491     }
00492 
00503     function getBlock( $start, $length = null ) {
00504         $fileLength = $this->getFileLength();
00505         if ( $start >= $fileLength ) {
00506             $this->error( 'zip-bad', "getBlock() requested position $start, " .
00507                 "file length is $fileLength" );
00508         }
00509         if ( $length === null ) {
00510             $length = $fileLength - $start;
00511         }
00512         $end = $start + $length;
00513         if ( $end > $fileLength ) {
00514             $this->error( 'zip-bad', "getBlock() requested end position $end, " .
00515                 "file length is $fileLength" );
00516         }
00517         $startSeg = floor( $start / self::SEGSIZE );
00518         $endSeg = ceil( $end / self::SEGSIZE );
00519 
00520         $block = '';
00521         for ( $segIndex = $startSeg; $segIndex <= $endSeg; $segIndex++ ) {
00522             $block .= $this->getSegment( $segIndex );
00523         }
00524 
00525         $block = substr( $block,
00526             $start - $startSeg * self::SEGSIZE,
00527             $length );
00528 
00529         if ( strlen( $block ) < $length ) {
00530             $this->error( 'zip-bad', 'getBlock() returned an unexpectedly small amount of data' );
00531         }
00532 
00533         return $block;
00534     }
00535 
00546     function getSegment( $segIndex ) {
00547         if ( !isset( $this->buffer[$segIndex] ) ) {
00548             $bytePos = $segIndex * self::SEGSIZE;
00549             if ( $bytePos >= $this->getFileLength() ) {
00550                 $this->buffer[$segIndex] = '';
00551                 return '';
00552             }
00553             if ( fseek( $this->file, $bytePos ) ) {
00554                 $this->error( 'zip-bad', "seek to $bytePos failed" );
00555             }
00556             $seg = fread( $this->file, self::SEGSIZE );
00557             if ( $seg === false ) {
00558                 $this->error( 'zip-bad', "read from $bytePos failed" );
00559             }
00560             $this->buffer[$segIndex] = $seg;
00561         }
00562         return $this->buffer[$segIndex];
00563     }
00564 
00569     function getStructSize( $struct ) {
00570         $size = 0;
00571         foreach ( $struct as $type ) {
00572             if ( is_array( $type ) ) {
00573                 list( , $fieldSize ) = $type;
00574                 $size += $fieldSize;
00575             } else {
00576                 $size += $type;
00577             }
00578         }
00579         return $size;
00580     }
00581 
00604     function unpack( $string, $struct, $offset = 0 ) {
00605         $size = $this->getStructSize( $struct );
00606         if ( $offset + $size > strlen( $string ) ) {
00607             $this->error( 'zip-bad', 'unpack() would run past the end of the supplied string' );
00608         }
00609 
00610         $data = array();
00611         $pos = $offset;
00612         foreach ( $struct as $key => $type ) {
00613             if ( is_array( $type ) ) {
00614                 list( $typeName, $fieldSize ) = $type;
00615                 switch ( $typeName ) {
00616                 case 'string':
00617                     $data[$key] = substr( $string, $pos, $fieldSize );
00618                     $pos += $fieldSize;
00619                     break;
00620                 default:
00621                     throw new MWException( __METHOD__ . ": invalid type \"$typeName\"" );
00622                 }
00623             } else {
00624                 // Unsigned little-endian integer
00625                 $length = intval( $type );
00626 
00627                 // Calculate the value. Use an algorithm which automatically
00628                 // upgrades the value to floating point if necessary.
00629                 $value = 0;
00630                 for ( $i = $length - 1; $i >= 0; $i-- ) {
00631                     $value *= 256;
00632                     $value += ord( $string[$pos + $i] );
00633                 }
00634 
00635                 // Throw an exception if there was loss of precision
00636                 if ( $value > pow( 2, 52 ) ) {
00637                     $this->error( 'zip-unsupported', 'number too large to be stored in a double. ' .
00638                         'This could happen if we tried to unpack a 64-bit structure ' .
00639                         'at an invalid location.' );
00640                 }
00641                 $data[$key] = $value;
00642                 $pos += $length;
00643             }
00644         }
00645 
00646         return $data;
00647     }
00648 
00657     function testBit( $value, $bitIndex ) {
00658         return (bool)( ( $value >> $bitIndex ) & 1 );
00659     }
00660 
00664     function hexDump( $s ) {
00665         $n = strlen( $s );
00666         for ( $i = 0; $i < $n; $i += 16 ) {
00667             printf( "%08X ", $i );
00668             for ( $j = 0; $j < 16; $j++ ) {
00669                 print " ";
00670                 if ( $j == 8 ) {
00671                     print " ";
00672                 }
00673                 if ( $i + $j >= $n ) {
00674                     print "  ";
00675                 } else {
00676                     printf( "%02X", ord( $s[$i + $j] ) );
00677                 }
00678             }
00679 
00680             print "  |";
00681             for ( $j = 0; $j < 16; $j++ ) {
00682                 if ( $i + $j >= $n ) {
00683                     print " ";
00684                 } elseif ( ctype_print( $s[$i + $j] ) ) {
00685                     print $s[$i + $j];
00686                 } else {
00687                     print '.';
00688                 }
00689             }
00690             print "|\n";
00691         }
00692     }
00693 }
00694 
00698 class ZipDirectoryReaderError extends Exception {
00699     var $errorCode;
00700 
00701     function __construct( $code ) {
00702         $this->errorCode = $code;
00703         parent::__construct( "ZipDirectoryReader error: $code" );
00704     }
00705 
00709     function getErrorCode() {
00710         return $this->errorCode;
00711     }
00712 }