MediaWiki  REL1_19
ZipDirectoryReader.php
Go to the documentation of this file.
00001 <?php
00002 
00011 class ZipDirectoryReader {
00069         public static function read( $fileName, $callback, $options = array() ) {
00070                 $zdr = new self( $fileName, $callback, $options );
00071                 return $zdr->execute();
00072         }
00073 
00075         var $fileName;
00076 
00078         var $file;
00079 
00081         var $fileLength;
00082 
00084         var $buffer;
00085 
00087         var $callback;
00088 
00090         var $zip64 = false;
00091 
00093         var $eocdr, $eocdr64, $eocdr64Locator;
00094 
00095         var $data;
00096 
00098         const ZIP64_EXTRA_HEADER = 0x0001;
00099 
00101         const SEGSIZE = 16384;
00102 
00104         const GENERAL_UTF8 = 11;
00105 
00107         const GENERAL_CD_ENCRYPTED = 13;
00108 
00112         protected function __construct( $fileName, $callback, $options ) {
00113                 $this->fileName = $fileName;
00114                 $this->callback = $callback;
00115 
00116                 if ( isset( $options['zip64'] ) ) {
00117                         $this->zip64 = $options['zip64'];
00118                 }
00119         }
00120 
00126         function execute() {
00127                 $this->file = fopen( $this->fileName, 'r' );
00128                 $this->data = array();
00129                 if ( !$this->file ) {
00130                         return Status::newFatal( 'zip-file-open-error' );
00131                 }
00132 
00133                 $status = Status::newGood();
00134                 try {
00135                         $this->readEndOfCentralDirectoryRecord();
00136                         if ( $this->zip64 ) {
00137                                 list( $offset, $size ) = $this->findZip64CentralDirectory();
00138                                 $this->readCentralDirectory( $offset, $size );
00139                         } else {
00140                                 if ( $this->eocdr['CD size'] == 0xffffffff
00141                                         || $this->eocdr['CD offset'] == 0xffffffff
00142                                         || $this->eocdr['CD entries total'] == 0xffff )
00143                                 {
00144                                         $this->error( 'zip-unsupported', 'Central directory header indicates ZIP64, ' .
00145                                                 'but we are in legacy mode. Rejecting this upload is necessary to avoid '.
00146                                                 'opening vulnerabilities on clients using OpenJDK 7 or later.' );
00147                                 }
00148 
00149                                 list( $offset, $size ) = $this->findOldCentralDirectory();
00150                                 $this->readCentralDirectory( $offset, $size );
00151                         }
00152                 } catch ( ZipDirectoryReaderError $e ) {
00153                         $status->fatal( $e->getErrorCode() );
00154                 }
00155 
00156                 fclose( $this->file );
00157                 return $status;
00158         }
00159 
00163         function error( $code, $debugMessage ) {
00164                 wfDebug( __CLASS__.": Fatal error: $debugMessage\n" );
00165                 throw new ZipDirectoryReaderError( $code );
00166         }
00167 
00173         function readEndOfCentralDirectoryRecord() {
00174                 $info = array(
00175                         'signature' => 4,
00176                         'disk' => 2,
00177                         'CD start disk' => 2,
00178                         'CD entries this disk' => 2,
00179                         'CD entries total' => 2,
00180                         'CD size' => 4,
00181                         'CD offset' => 4,
00182                         'file comment length' => 2,
00183                 );
00184                 $structSize = $this->getStructSize( $info );
00185                 $startPos = $this->getFileLength() - 65536 - $structSize;
00186                 if ( $startPos < 0 ) {
00187                         $startPos = 0;
00188                 }
00189 
00190                 $block = $this->getBlock( $startPos );
00191                 $sigPos = strrpos( $block, "PK\x05\x06" );
00192                 if ( $sigPos === false ) {
00193                         $this->error( 'zip-wrong-format',
00194                                 "zip file lacks EOCDR signature. It probably isn't a zip file." );
00195                 }
00196 
00197                 $this->eocdr = $this->unpack( substr( $block, $sigPos ), $info );
00198                 $this->eocdr['EOCDR size'] = $structSize + $this->eocdr['file comment length'];
00199 
00200                 if ( $structSize + $this->eocdr['file comment length'] != strlen( $block ) - $sigPos ) {
00201                         $this->error( 'zip-bad', 'trailing bytes after the end of the file comment' );
00202                 }
00203                 if (   $this->eocdr['disk'] !== 0
00204                         || $this->eocdr['CD start disk'] !== 0 )
00205                 {
00206                         $this->error( 'zip-unsupported', 'more than one disk (in EOCDR)' );
00207                 }
00208                 $this->eocdr += $this->unpack(
00209                         $block,
00210                         array( 'file comment' => array( 'string', $this->eocdr['file comment length'] ) ),
00211                         $sigPos + $structSize );
00212                 $this->eocdr['position'] = $startPos + $sigPos;
00213         }
00214 
00219         function readZip64EndOfCentralDirectoryLocator() {
00220                 $info = array(
00221                         'signature' => array( 'string', 4 ),
00222                         'eocdr64 start disk' => 4,
00223                         'eocdr64 offset' => 8,
00224                         'number of disks' => 4,
00225                 );
00226                 $structSize = $this->getStructSize( $info );
00227 
00228                 $block = $this->getBlock( $this->getFileLength() - $this->eocdr['EOCDR size']
00229                         - $structSize, $structSize );
00230                 $this->eocdr64Locator = $data = $this->unpack( $block, $info );
00231 
00232                 if ( $data['signature'] !== "PK\x06\x07" ) {
00233                         // Note: Java will allow this and continue to read the
00234                         // EOCDR64, so we have to reject the upload, we can't
00235                         // just use the EOCDR header instead.
00236                         $this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory locator' );
00237                 }
00238         }
00239 
00244         function readZip64EndOfCentralDirectoryRecord() {
00245                 if (   $this->eocdr64Locator['eocdr64 start disk'] != 0
00246                         || $this->eocdr64Locator['number of disks'] != 0 )
00247                 {
00248                         $this->error( 'zip-unsupported', 'more than one disk (in EOCDR64 locator)' );
00249                 }
00250 
00251                 $info = array(
00252                         'signature' => array( 'string', 4 ),
00253                         'EOCDR64 size' => 8,
00254                         'version made by' => 2,
00255                         'version needed' => 2,
00256                         'disk' => 4,
00257                         'CD start disk' => 4,
00258                         'CD entries this disk' => 8,
00259                         'CD entries total' => 8,
00260                         'CD size' => 8,
00261                         'CD offset' => 8
00262                 );
00263                 $structSize = $this->getStructSize( $info );
00264                 $block = $this->getBlock( $this->eocdr64Locator['eocdr64 offset'], $structSize );
00265                 $this->eocdr64 = $data = $this->unpack( $block, $info );
00266                 if ( $data['signature'] !== "PK\x06\x06" ) {
00267                         $this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory record' );
00268                 }
00269                 if (   $data['disk'] !== 0
00270                         || $data['CD start disk'] !== 0 )
00271                 {
00272                         $this->error( 'zip-unsupported', 'more than one disk (in EOCDR64)' );
00273                 }
00274         }
00275 
00282         function findOldCentralDirectory() {
00283                 $size = $this->eocdr['CD size'];
00284                 $offset = $this->eocdr['CD offset'];
00285                 $endPos = $this->eocdr['position'];
00286 
00287                 // Some readers use the EOCDR position instead of the offset field
00288                 // to find the directory, so to be safe, we check if they both agree.
00289                 if ( $offset + $size != $endPos ) {
00290                         $this->error( 'zip-bad', 'the central directory does not immediately precede the end ' .
00291                                 'of central directory record' );
00292                 }
00293                 return array( $offset, $size );
00294         }
00295 
00302         function findZip64CentralDirectory() {
00303                 // The spec is ambiguous about the exact rules of precedence between the
00304                 // ZIP64 headers and the original headers. Here we follow zip_util.c
00305                 // from OpenJDK 7.
00306                 $size = $this->eocdr['CD size'];
00307                 $offset = $this->eocdr['CD offset'];
00308                 $numEntries = $this->eocdr['CD entries total'];
00309                 $endPos = $this->eocdr['position'];
00310                 if (   $size == 0xffffffff
00311                         || $offset == 0xffffffff
00312                         || $numEntries == 0xffff )
00313                 {
00314                         $this->readZip64EndOfCentralDirectoryLocator();
00315 
00316                         if ( isset( $this->eocdr64Locator['eocdr64 offset'] ) ) {
00317                                 $this->readZip64EndOfCentralDirectoryRecord();
00318                                 if ( isset( $this->eocdr64['CD offset'] ) ) {
00319                                         $size = $this->eocdr64['CD size'];
00320                                         $offset = $this->eocdr64['CD offset'];
00321                                         $endPos = $this->eocdr64Locator['eocdr64 offset'];
00322                                 }
00323                         }
00324                 }
00325                 // Some readers use the EOCDR position instead of the offset field
00326                 // to find the directory, so to be safe, we check if they both agree.
00327                 if ( $offset + $size != $endPos ) {
00328                         $this->error( 'zip-bad', 'the central directory does not immediately precede the end ' .
00329                                 'of central directory record' );
00330                 }
00331                 return array( $offset, $size );
00332         }
00333 
00337         function readCentralDirectory( $offset, $size ) {
00338                 $block = $this->getBlock( $offset, $size );
00339 
00340                 $fixedInfo = array(
00341                         'signature' => array( 'string', 4 ),
00342                         'version made by' => 2,
00343                         'version needed' => 2,
00344                         'general bits' => 2,
00345                         'compression method' => 2,
00346                         'mod time' => 2,
00347                         'mod date' => 2,
00348                         'crc-32' => 4,
00349                         'compressed size' => 4,
00350                         'uncompressed size' => 4,
00351                         'name length' => 2,
00352                         'extra field length' => 2,
00353                         'comment length' => 2,
00354                         'disk number start' => 2,
00355                         'internal attrs' => 2,
00356                         'external attrs' => 4,
00357                         'local header offset' => 4,
00358                 );
00359                 $fixedSize = $this->getStructSize( $fixedInfo );
00360 
00361                 $pos = 0;
00362                 while ( $pos < $size ) {
00363                         $data = $this->unpack( $block, $fixedInfo, $pos );
00364                         $pos += $fixedSize;
00365 
00366                         if ( $data['signature'] !== "PK\x01\x02" ) {
00367                                 $this->error( 'zip-bad', 'Invalid signature found in directory entry' );
00368                         }
00369 
00370                         $variableInfo = array(
00371                                 'name' => array( 'string', $data['name length'] ),
00372                                 'extra field' => array( 'string', $data['extra field length'] ),
00373                                 'comment' => array( 'string', $data['comment length'] ),
00374                         );
00375                         $data += $this->unpack( $block, $variableInfo, $pos );
00376                         $pos += $this->getStructSize( $variableInfo );
00377 
00378                         if (   $this->zip64 && (
00379                                    $data['compressed size'] == 0xffffffff
00380                                 || $data['uncompressed size'] == 0xffffffff
00381                                 || $data['local header offset'] == 0xffffffff ) )
00382                         {
00383                                 $zip64Data = $this->unpackZip64Extra( $data['extra field'] );
00384                                 if ( $zip64Data ) {
00385                                         $data = $zip64Data + $data;
00386                                 }
00387                         }
00388 
00389                         if ( $this->testBit( $data['general bits'], self::GENERAL_CD_ENCRYPTED ) ) {
00390                                 $this->error( 'zip-unsupported', 'central directory encryption is not supported' );
00391                         }
00392 
00393                         // Convert the timestamp into MediaWiki format
00394                         // For the format, please see the MS-DOS 2.0 Programmer's Reference,
00395                         // pages 3-5 and 3-6.
00396                         $time = $data['mod time'];
00397                         $date = $data['mod date'];
00398 
00399                         $year = 1980 + ( $date >> 9 );
00400                         $month = ( $date >> 5 ) & 15;
00401                         $day = $date & 31;
00402                         $hour = ( $time >> 11 ) & 31;
00403                         $minute = ( $time >> 5 ) & 63;
00404                         $second = ( $time & 31 ) * 2;
00405                         $timestamp = sprintf( "%04d%02d%02d%02d%02d%02d",
00406                                 $year, $month, $day, $hour, $minute, $second );
00407 
00408                         // Convert the character set in the file name
00409                         if ( !function_exists( 'iconv' )
00410                                 || $this->testBit( $data['general bits'], self::GENERAL_UTF8 ) )
00411                         {
00412                                 $name = $data['name'];
00413                         } else {
00414                                 $name = iconv( 'CP437', 'UTF-8', $data['name'] );
00415                         }
00416 
00417                         // Compile a data array for the user, with a sensible format
00418                         $userData = array(
00419                                 'name' => $name,
00420                                 'mtime' => $timestamp,
00421                                 'size' => $data['uncompressed size'],
00422                         );
00423                         call_user_func( $this->callback, $userData );
00424                 }
00425         }
00426 
00430         function unpackZip64Extra( $extraField ) {
00431                 $extraHeaderInfo = array(
00432                         'id' => 2,
00433                         'size' => 2,
00434                 );
00435                 $extraHeaderSize = $this->getStructSize( $extraHeaderInfo );
00436 
00437                 $zip64ExtraInfo = array(
00438                         'uncompressed size' => 8,
00439                         'compressed size' => 8,
00440                         'local header offset' => 8,
00441                         'disk number start' => 4,
00442                 );
00443 
00444                 $extraPos = 0;
00445                 while ( $extraPos < strlen( $extraField ) ) {
00446                         $extra = $this->unpack( $extraField, $extraHeaderInfo, $extraPos );
00447                         $extraPos += $extraHeaderSize;
00448                         $extra += $this->unpack( $extraField,
00449                                 array( 'data' => array( 'string', $extra['size'] ) ),
00450                                 $extraPos );
00451                         $extraPos += $extra['size'];
00452 
00453                         if ( $extra['id'] == self::ZIP64_EXTRA_HEADER ) {
00454                                 return $this->unpack( $extra['data'], $zip64ExtraInfo );
00455                         }
00456                 }
00457 
00458                 return false;
00459         }
00460 
00464         function getFileLength() {
00465                 if ( $this->fileLength === null ) {
00466                         $stat = fstat( $this->file );
00467                         $this->fileLength = $stat['size'];
00468                 }
00469                 return $this->fileLength;
00470         }
00471 
00482         function getBlock( $start, $length = null ) {
00483                 $fileLength = $this->getFileLength();
00484                 if ( $start >= $fileLength ) {
00485                         $this->error( 'zip-bad', "getBlock() requested position $start, " .
00486                                 "file length is $fileLength" );
00487                 }
00488                 if ( $length === null ) {
00489                         $length = $fileLength - $start;
00490                 }
00491                 $end = $start + $length;
00492                 if ( $end > $fileLength ) {
00493                         $this->error( 'zip-bad', "getBlock() requested end position $end, " .
00494                                 "file length is $fileLength" );
00495                 }
00496                 $startSeg = floor( $start / self::SEGSIZE );
00497                 $endSeg = ceil( $end / self::SEGSIZE );
00498 
00499                 $block = '';
00500                 for ( $segIndex = $startSeg; $segIndex <= $endSeg; $segIndex++ ) {
00501                         $block .= $this->getSegment( $segIndex );
00502                 }
00503 
00504                 $block = substr( $block,
00505                         $start - $startSeg * self::SEGSIZE,
00506                         $length );
00507 
00508                 if ( strlen( $block ) < $length ) {
00509                         $this->error( 'zip-bad', 'getBlock() returned an unexpectedly small amount of data' );
00510                 }
00511 
00512                 return $block;
00513         }
00514 
00524         function getSegment( $segIndex ) {
00525                 if ( !isset( $this->buffer[$segIndex] ) ) {
00526                         $bytePos = $segIndex * self::SEGSIZE;
00527                         if ( $bytePos >= $this->getFileLength() ) {
00528                                 $this->buffer[$segIndex] = '';
00529                                 return '';
00530                         }
00531                         if ( fseek( $this->file, $bytePos ) ) {
00532                                 $this->error( 'zip-bad', "seek to $bytePos failed" );
00533                         }
00534                         $seg = fread( $this->file, self::SEGSIZE );
00535                         if ( $seg === false ) {
00536                                 $this->error( 'zip-bad', "read from $bytePos failed" );
00537                         }
00538                         $this->buffer[$segIndex] = $seg;
00539                 }
00540                 return $this->buffer[$segIndex];
00541         }
00542 
00546         function getStructSize( $struct ) {
00547                 $size = 0;
00548                 foreach ( $struct as $type ) {
00549                         if ( is_array( $type ) ) {
00550                                 list( $typeName, $fieldSize ) = $type;
00551                                 $size += $fieldSize;
00552                         } else {
00553                                 $size += $type;
00554                         }
00555                 }
00556                 return $size;
00557         }
00558 
00580         function unpack( $string, $struct, $offset = 0 ) {
00581                 $size = $this->getStructSize( $struct );
00582                 if ( $offset + $size > strlen( $string ) ) {
00583                         $this->error( 'zip-bad', 'unpack() would run past the end of the supplied string' );
00584                 }
00585 
00586                 $data = array();
00587                 $pos = $offset;
00588                 foreach ( $struct as $key => $type ) {
00589                         if ( is_array( $type ) ) {
00590                                 list( $typeName, $fieldSize ) = $type;
00591                                 switch ( $typeName ) {
00592                                 case 'string':
00593                                         $data[$key] = substr( $string, $pos, $fieldSize );
00594                                         $pos += $fieldSize;
00595                                         break;
00596                                 default:
00597                                         throw new MWException( __METHOD__.": invalid type \"$typeName\"" );
00598                                 }
00599                         } else {
00600                                 // Unsigned little-endian integer
00601                                 $length = intval( $type );
00602                                 $bytes = substr( $string, $pos, $length );
00603 
00604                                 // Calculate the value. Use an algorithm which automatically
00605                                 // upgrades the value to floating point if necessary.
00606                                 $value = 0;
00607                                 for ( $i = $length - 1; $i >= 0; $i-- ) {
00608                                         $value *= 256;
00609                                         $value += ord( $string[$pos + $i] );
00610                                 }
00611 
00612                                 // Throw an exception if there was loss of precision
00613                                 if ( $value > pow( 2, 52 ) ) {
00614                                         $this->error( 'zip-unsupported', 'number too large to be stored in a double. ' .
00615                                                 'This could happen if we tried to unpack a 64-bit structure ' .
00616                                                 'at an invalid location.' );
00617                                 }
00618                                 $data[$key] = $value;
00619                                 $pos += $length;
00620                         }
00621                 }
00622 
00623                 return $data;
00624         }
00625 
00633         function testBit( $value, $bitIndex ) {
00634                 return (bool)( ( $value >> $bitIndex ) & 1 );
00635         }
00636 
00640         function hexDump( $s ) {
00641                 $n = strlen( $s );
00642                 for ( $i = 0; $i < $n; $i += 16 ) {
00643                         printf( "%08X ", $i );
00644                         for ( $j = 0; $j < 16; $j++ ) {
00645                                 print " ";
00646                                 if ( $j == 8 ) {
00647                                         print " ";
00648                                 }
00649                                 if ( $i + $j >= $n ) {
00650                                         print "  ";
00651                                 } else {
00652                                         printf( "%02X", ord( $s[$i + $j] ) );
00653                                 }
00654                         }
00655 
00656                         print "  |";
00657                         for ( $j = 0; $j < 16; $j++ ) {
00658                                 if ( $i + $j >= $n ) {
00659                                         print " ";
00660                                 } elseif ( ctype_print( $s[$i + $j] ) ) {
00661                                         print $s[$i + $j];
00662                                 } else {
00663                                         print '.';
00664                                 }
00665                         }
00666                         print "|\n";
00667                 }
00668         }
00669 }
00670 
00674 class ZipDirectoryReaderError extends Exception {
00675         var $code;
00676 
00677         function __construct( $code ) {
00678                 $this->code = $code;
00679                 parent::__construct( "ZipDirectoryReader error: $code" );
00680         }
00681 
00685         function getErrorCode() {
00686                 return $this->code;
00687         }
00688 }