MediaWiki  REL1_23
ZipDirectoryReader.php
Go to the documentation of this file.
00001 <?php
00031 class ZipDirectoryReader {
00089     public static function read( $fileName, $callback, $options = array() ) {
00090         $zdr = new self( $fileName, $callback, $options );
00091 
00092         return $zdr->execute();
00093     }
00094 
00096     protected $fileName;
00097 
00099     protected $file;
00100 
00102     protected $fileLength;
00103 
00105     protected $buffer;
00106 
00108     protected $callback;
00109 
00111     protected $zip64 = false;
00112 
00114     protected $eocdr, $eocdr64, $eocdr64Locator;
00115 
00116     protected $data;
00117 
00119     const ZIP64_EXTRA_HEADER = 0x0001;
00120 
00122     const SEGSIZE = 16384;
00123 
00125     const GENERAL_UTF8 = 11;
00126 
00128     const GENERAL_CD_ENCRYPTED = 13;
00129 
00133     protected function __construct( $fileName, $callback, $options ) {
00134         $this->fileName = $fileName;
00135         $this->callback = $callback;
00136 
00137         if ( isset( $options['zip64'] ) ) {
00138             $this->zip64 = $options['zip64'];
00139         }
00140     }
00141 
00147     function execute() {
00148         $this->file = fopen( $this->fileName, 'r' );
00149         $this->data = array();
00150         if ( !$this->file ) {
00151             return Status::newFatal( 'zip-file-open-error' );
00152         }
00153 
00154         $status = Status::newGood();
00155         try {
00156             $this->readEndOfCentralDirectoryRecord();
00157             if ( $this->zip64 ) {
00158                 list( $offset, $size ) = $this->findZip64CentralDirectory();
00159                 $this->readCentralDirectory( $offset, $size );
00160             } else {
00161                 if ( $this->eocdr['CD size'] == 0xffffffff
00162                     || $this->eocdr['CD offset'] == 0xffffffff
00163                     || $this->eocdr['CD entries total'] == 0xffff
00164                 ) {
00165                     $this->error( 'zip-unsupported', 'Central directory header indicates ZIP64, ' .
00166                         'but we are in legacy mode. Rejecting this upload is necessary to avoid ' .
00167                         'opening vulnerabilities on clients using OpenJDK 7 or later.' );
00168                 }
00169 
00170                 list( $offset, $size ) = $this->findOldCentralDirectory();
00171                 $this->readCentralDirectory( $offset, $size );
00172             }
00173         } catch ( ZipDirectoryReaderError $e ) {
00174             $status->fatal( $e->getErrorCode() );
00175         }
00176 
00177         fclose( $this->file );
00178 
00179         return $status;
00180     }
00181 
00185     function error( $code, $debugMessage ) {
00186         wfDebug( __CLASS__ . ": Fatal error: $debugMessage\n" );
00187         throw new ZipDirectoryReaderError( $code );
00188     }
00189 
00195     function readEndOfCentralDirectoryRecord() {
00196         $info = array(
00197             'signature' => 4,
00198             'disk' => 2,
00199             'CD start disk' => 2,
00200             'CD entries this disk' => 2,
00201             'CD entries total' => 2,
00202             'CD size' => 4,
00203             'CD offset' => 4,
00204             'file comment length' => 2,
00205         );
00206         $structSize = $this->getStructSize( $info );
00207         $startPos = $this->getFileLength() - 65536 - $structSize;
00208         if ( $startPos < 0 ) {
00209             $startPos = 0;
00210         }
00211 
00212         $block = $this->getBlock( $startPos );
00213         $sigPos = strrpos( $block, "PK\x05\x06" );
00214         if ( $sigPos === false ) {
00215             $this->error( 'zip-wrong-format',
00216                 "zip file lacks EOCDR signature. It probably isn't a zip file." );
00217         }
00218 
00219         $this->eocdr = $this->unpack( substr( $block, $sigPos ), $info );
00220         $this->eocdr['EOCDR size'] = $structSize + $this->eocdr['file comment length'];
00221 
00222         if ( $structSize + $this->eocdr['file comment length'] != strlen( $block ) - $sigPos ) {
00223             $this->error( 'zip-bad', 'trailing bytes after the end of the file comment' );
00224         }
00225         if ( $this->eocdr['disk'] !== 0
00226             || $this->eocdr['CD start disk'] !== 0
00227         ) {
00228             $this->error( 'zip-unsupported', 'more than one disk (in EOCDR)' );
00229         }
00230         $this->eocdr += $this->unpack(
00231             $block,
00232             array( 'file comment' => array( 'string', $this->eocdr['file comment length'] ) ),
00233             $sigPos + $structSize );
00234         $this->eocdr['position'] = $startPos + $sigPos;
00235     }
00236 
00241     function readZip64EndOfCentralDirectoryLocator() {
00242         $info = array(
00243             'signature' => array( 'string', 4 ),
00244             'eocdr64 start disk' => 4,
00245             'eocdr64 offset' => 8,
00246             'number of disks' => 4,
00247         );
00248         $structSize = $this->getStructSize( $info );
00249 
00250         $start = $this->getFileLength() - $this->eocdr['EOCDR size'] - $structSize;
00251         $block = $this->getBlock( $start, $structSize );
00252         $this->eocdr64Locator = $data = $this->unpack( $block, $info );
00253 
00254         if ( $data['signature'] !== "PK\x06\x07" ) {
00255             // Note: Java will allow this and continue to read the
00256             // EOCDR64, so we have to reject the upload, we can't
00257             // just use the EOCDR header instead.
00258             $this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory locator' );
00259         }
00260     }
00261 
00266     function readZip64EndOfCentralDirectoryRecord() {
00267         if ( $this->eocdr64Locator['eocdr64 start disk'] != 0
00268             || $this->eocdr64Locator['number of disks'] != 0
00269         ) {
00270             $this->error( 'zip-unsupported', 'more than one disk (in EOCDR64 locator)' );
00271         }
00272 
00273         $info = array(
00274             'signature' => array( 'string', 4 ),
00275             'EOCDR64 size' => 8,
00276             'version made by' => 2,
00277             'version needed' => 2,
00278             'disk' => 4,
00279             'CD start disk' => 4,
00280             'CD entries this disk' => 8,
00281             'CD entries total' => 8,
00282             'CD size' => 8,
00283             'CD offset' => 8
00284         );
00285         $structSize = $this->getStructSize( $info );
00286         $block = $this->getBlock( $this->eocdr64Locator['eocdr64 offset'], $structSize );
00287         $this->eocdr64 = $data = $this->unpack( $block, $info );
00288         if ( $data['signature'] !== "PK\x06\x06" ) {
00289             $this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory record' );
00290         }
00291         if ( $data['disk'] !== 0
00292             || $data['CD start disk'] !== 0
00293         ) {
00294             $this->error( 'zip-unsupported', 'more than one disk (in EOCDR64)' );
00295         }
00296     }
00297 
00304     function findOldCentralDirectory() {
00305         $size = $this->eocdr['CD size'];
00306         $offset = $this->eocdr['CD offset'];
00307         $endPos = $this->eocdr['position'];
00308 
00309         // Some readers use the EOCDR position instead of the offset field
00310         // to find the directory, so to be safe, we check if they both agree.
00311         if ( $offset + $size != $endPos ) {
00312             $this->error( 'zip-bad', 'the central directory does not immediately precede the end ' .
00313                 'of central directory record' );
00314         }
00315 
00316         return array( $offset, $size );
00317     }
00318 
00325     function findZip64CentralDirectory() {
00326         // The spec is ambiguous about the exact rules of precedence between the
00327         // ZIP64 headers and the original headers. Here we follow zip_util.c
00328         // from OpenJDK 7.
00329         $size = $this->eocdr['CD size'];
00330         $offset = $this->eocdr['CD offset'];
00331         $numEntries = $this->eocdr['CD entries total'];
00332         $endPos = $this->eocdr['position'];
00333         if ( $size == 0xffffffff
00334             || $offset == 0xffffffff
00335             || $numEntries == 0xffff
00336         ) {
00337             $this->readZip64EndOfCentralDirectoryLocator();
00338 
00339             if ( isset( $this->eocdr64Locator['eocdr64 offset'] ) ) {
00340                 $this->readZip64EndOfCentralDirectoryRecord();
00341                 if ( isset( $this->eocdr64['CD offset'] ) ) {
00342                     $size = $this->eocdr64['CD size'];
00343                     $offset = $this->eocdr64['CD offset'];
00344                     $endPos = $this->eocdr64Locator['eocdr64 offset'];
00345                 }
00346             }
00347         }
00348         // Some readers use the EOCDR position instead of the offset field
00349         // to find the directory, so to be safe, we check if they both agree.
00350         if ( $offset + $size != $endPos ) {
00351             $this->error( 'zip-bad', 'the central directory does not immediately precede the end ' .
00352                 'of central directory record' );
00353         }
00354 
00355         return array( $offset, $size );
00356     }
00357 
00361     function readCentralDirectory( $offset, $size ) {
00362         $block = $this->getBlock( $offset, $size );
00363 
00364         $fixedInfo = array(
00365             'signature' => array( 'string', 4 ),
00366             'version made by' => 2,
00367             'version needed' => 2,
00368             'general bits' => 2,
00369             'compression method' => 2,
00370             'mod time' => 2,
00371             'mod date' => 2,
00372             'crc-32' => 4,
00373             'compressed size' => 4,
00374             'uncompressed size' => 4,
00375             'name length' => 2,
00376             'extra field length' => 2,
00377             'comment length' => 2,
00378             'disk number start' => 2,
00379             'internal attrs' => 2,
00380             'external attrs' => 4,
00381             'local header offset' => 4,
00382         );
00383         $fixedSize = $this->getStructSize( $fixedInfo );
00384 
00385         $pos = 0;
00386         while ( $pos < $size ) {
00387             $data = $this->unpack( $block, $fixedInfo, $pos );
00388             $pos += $fixedSize;
00389 
00390             if ( $data['signature'] !== "PK\x01\x02" ) {
00391                 $this->error( 'zip-bad', 'Invalid signature found in directory entry' );
00392             }
00393 
00394             $variableInfo = array(
00395                 'name' => array( 'string', $data['name length'] ),
00396                 'extra field' => array( 'string', $data['extra field length'] ),
00397                 'comment' => array( 'string', $data['comment length'] ),
00398             );
00399             $data += $this->unpack( $block, $variableInfo, $pos );
00400             $pos += $this->getStructSize( $variableInfo );
00401 
00402             if ( $this->zip64 && (
00403                     $data['compressed size'] == 0xffffffff
00404                     || $data['uncompressed size'] == 0xffffffff
00405                     || $data['local header offset'] == 0xffffffff )
00406             ) {
00407                 $zip64Data = $this->unpackZip64Extra( $data['extra field'] );
00408                 if ( $zip64Data ) {
00409                     $data = $zip64Data + $data;
00410                 }
00411             }
00412 
00413             if ( $this->testBit( $data['general bits'], self::GENERAL_CD_ENCRYPTED ) ) {
00414                 $this->error( 'zip-unsupported', 'central directory encryption is not supported' );
00415             }
00416 
00417             // Convert the timestamp into MediaWiki format
00418             // For the format, please see the MS-DOS 2.0 Programmer's Reference,
00419             // pages 3-5 and 3-6.
00420             $time = $data['mod time'];
00421             $date = $data['mod date'];
00422 
00423             $year = 1980 + ( $date >> 9 );
00424             $month = ( $date >> 5 ) & 15;
00425             $day = $date & 31;
00426             $hour = ( $time >> 11 ) & 31;
00427             $minute = ( $time >> 5 ) & 63;
00428             $second = ( $time & 31 ) * 2;
00429             $timestamp = sprintf( "%04d%02d%02d%02d%02d%02d",
00430                 $year, $month, $day, $hour, $minute, $second );
00431 
00432             // Convert the character set in the file name
00433             if ( !function_exists( 'iconv' )
00434                 || $this->testBit( $data['general bits'], self::GENERAL_UTF8 )
00435             ) {
00436                 $name = $data['name'];
00437             } else {
00438                 $name = iconv( 'CP437', 'UTF-8', $data['name'] );
00439             }
00440 
00441             // Compile a data array for the user, with a sensible format
00442             $userData = array(
00443                 'name' => $name,
00444                 'mtime' => $timestamp,
00445                 'size' => $data['uncompressed size'],
00446             );
00447             call_user_func( $this->callback, $userData );
00448         }
00449     }
00450 
00455     function unpackZip64Extra( $extraField ) {
00456         $extraHeaderInfo = array(
00457             'id' => 2,
00458             'size' => 2,
00459         );
00460         $extraHeaderSize = $this->getStructSize( $extraHeaderInfo );
00461 
00462         $zip64ExtraInfo = array(
00463             'uncompressed size' => 8,
00464             'compressed size' => 8,
00465             'local header offset' => 8,
00466             'disk number start' => 4,
00467         );
00468 
00469         $extraPos = 0;
00470         while ( $extraPos < strlen( $extraField ) ) {
00471             $extra = $this->unpack( $extraField, $extraHeaderInfo, $extraPos );
00472             $extraPos += $extraHeaderSize;
00473             $extra += $this->unpack( $extraField,
00474                 array( 'data' => array( 'string', $extra['size'] ) ),
00475                 $extraPos );
00476             $extraPos += $extra['size'];
00477 
00478             if ( $extra['id'] == self::ZIP64_EXTRA_HEADER ) {
00479                 return $this->unpack( $extra['data'], $zip64ExtraInfo );
00480             }
00481         }
00482 
00483         return false;
00484     }
00485 
00489     function getFileLength() {
00490         if ( $this->fileLength === null ) {
00491             $stat = fstat( $this->file );
00492             $this->fileLength = $stat['size'];
00493         }
00494 
00495         return $this->fileLength;
00496     }
00497 
00508     function getBlock( $start, $length = null ) {
00509         $fileLength = $this->getFileLength();
00510         if ( $start >= $fileLength ) {
00511             $this->error( 'zip-bad', "getBlock() requested position $start, " .
00512                 "file length is $fileLength" );
00513         }
00514         if ( $length === null ) {
00515             $length = $fileLength - $start;
00516         }
00517         $end = $start + $length;
00518         if ( $end > $fileLength ) {
00519             $this->error( 'zip-bad', "getBlock() requested end position $end, " .
00520                 "file length is $fileLength" );
00521         }
00522         $startSeg = floor( $start / self::SEGSIZE );
00523         $endSeg = ceil( $end / self::SEGSIZE );
00524 
00525         $block = '';
00526         for ( $segIndex = $startSeg; $segIndex <= $endSeg; $segIndex++ ) {
00527             $block .= $this->getSegment( $segIndex );
00528         }
00529 
00530         $block = substr( $block,
00531             $start - $startSeg * self::SEGSIZE,
00532             $length );
00533 
00534         if ( strlen( $block ) < $length ) {
00535             $this->error( 'zip-bad', 'getBlock() returned an unexpectedly small amount of data' );
00536         }
00537 
00538         return $block;
00539     }
00540 
00551     function getSegment( $segIndex ) {
00552         if ( !isset( $this->buffer[$segIndex] ) ) {
00553             $bytePos = $segIndex * self::SEGSIZE;
00554             if ( $bytePos >= $this->getFileLength() ) {
00555                 $this->buffer[$segIndex] = '';
00556 
00557                 return '';
00558             }
00559             if ( fseek( $this->file, $bytePos ) ) {
00560                 $this->error( 'zip-bad', "seek to $bytePos failed" );
00561             }
00562             $seg = fread( $this->file, self::SEGSIZE );
00563             if ( $seg === false ) {
00564                 $this->error( 'zip-bad', "read from $bytePos failed" );
00565             }
00566             $this->buffer[$segIndex] = $seg;
00567         }
00568 
00569         return $this->buffer[$segIndex];
00570     }
00571 
00576     function getStructSize( $struct ) {
00577         $size = 0;
00578         foreach ( $struct as $type ) {
00579             if ( is_array( $type ) ) {
00580                 list( , $fieldSize ) = $type;
00581                 $size += $fieldSize;
00582             } else {
00583                 $size += $type;
00584             }
00585         }
00586 
00587         return $size;
00588     }
00589 
00612     function unpack( $string, $struct, $offset = 0 ) {
00613         $size = $this->getStructSize( $struct );
00614         if ( $offset + $size > strlen( $string ) ) {
00615             $this->error( 'zip-bad', 'unpack() would run past the end of the supplied string' );
00616         }
00617 
00618         $data = array();
00619         $pos = $offset;
00620         foreach ( $struct as $key => $type ) {
00621             if ( is_array( $type ) ) {
00622                 list( $typeName, $fieldSize ) = $type;
00623                 switch ( $typeName ) {
00624                     case 'string':
00625                         $data[$key] = substr( $string, $pos, $fieldSize );
00626                         $pos += $fieldSize;
00627                         break;
00628                     default:
00629                         throw new MWException( __METHOD__ . ": invalid type \"$typeName\"" );
00630                 }
00631             } else {
00632                 // Unsigned little-endian integer
00633                 $length = intval( $type );
00634 
00635                 // Calculate the value. Use an algorithm which automatically
00636                 // upgrades the value to floating point if necessary.
00637                 $value = 0;
00638                 for ( $i = $length - 1; $i >= 0; $i-- ) {
00639                     $value *= 256;
00640                     $value += ord( $string[$pos + $i] );
00641                 }
00642 
00643                 // Throw an exception if there was loss of precision
00644                 if ( $value > pow( 2, 52 ) ) {
00645                     $this->error( 'zip-unsupported', 'number too large to be stored in a double. ' .
00646                         'This could happen if we tried to unpack a 64-bit structure ' .
00647                         'at an invalid location.' );
00648                 }
00649                 $data[$key] = $value;
00650                 $pos += $length;
00651             }
00652         }
00653 
00654         return $data;
00655     }
00656 
00665     function testBit( $value, $bitIndex ) {
00666         return (bool)( ( $value >> $bitIndex ) & 1 );
00667     }
00668 
00672     function hexDump( $s ) {
00673         $n = strlen( $s );
00674         for ( $i = 0; $i < $n; $i += 16 ) {
00675             printf( "%08X ", $i );
00676             for ( $j = 0; $j < 16; $j++ ) {
00677                 print " ";
00678                 if ( $j == 8 ) {
00679                     print " ";
00680                 }
00681                 if ( $i + $j >= $n ) {
00682                     print "  ";
00683                 } else {
00684                     printf( "%02X", ord( $s[$i + $j] ) );
00685                 }
00686             }
00687 
00688             print "  |";
00689             for ( $j = 0; $j < 16; $j++ ) {
00690                 if ( $i + $j >= $n ) {
00691                     print " ";
00692                 } elseif ( ctype_print( $s[$i + $j] ) ) {
00693                     print $s[$i + $j];
00694                 } else {
00695                     print '.';
00696                 }
00697             }
00698             print "|\n";
00699         }
00700     }
00701 }
00702 
00706 class ZipDirectoryReaderError extends Exception {
00707     protected $errorCode;
00708 
00709     function __construct( $code ) {
00710         $this->errorCode = $code;
00711         parent::__construct( "ZipDirectoryReader error: $code" );
00712     }
00713 
00717     function getErrorCode() {
00718         return $this->errorCode;
00719     }
00720 }