MediaWiki  REL1_24
ZipDirectoryReader.php
Go to the documentation of this file.
00001 <?php
00031 class ZipDirectoryReader {
00089     public static function read( $fileName, $callback, $options = array() ) {
00090         $zdr = new self( $fileName, $callback, $options );
00091 
00092         return $zdr->execute();
00093     }
00094 
00096     protected $fileName;
00097 
00099     protected $file;
00100 
00102     protected $fileLength;
00103 
00105     protected $buffer;
00106 
00108     protected $callback;
00109 
00111     protected $zip64 = false;
00112 
00114     protected $eocdr, $eocdr64, $eocdr64Locator;
00115 
00116     protected $data;
00117 
00119     const ZIP64_EXTRA_HEADER = 0x0001;
00120 
00122     const SEGSIZE = 16384;
00123 
00125     const GENERAL_UTF8 = 11;
00126 
00128     const GENERAL_CD_ENCRYPTED = 13;
00129 
00136     protected function __construct( $fileName, $callback, $options ) {
00137         $this->fileName = $fileName;
00138         $this->callback = $callback;
00139 
00140         if ( isset( $options['zip64'] ) ) {
00141             $this->zip64 = $options['zip64'];
00142         }
00143     }
00144 
00150     function execute() {
00151         $this->file = fopen( $this->fileName, 'r' );
00152         $this->data = array();
00153         if ( !$this->file ) {
00154             return Status::newFatal( 'zip-file-open-error' );
00155         }
00156 
00157         $status = Status::newGood();
00158         try {
00159             $this->readEndOfCentralDirectoryRecord();
00160             if ( $this->zip64 ) {
00161                 list( $offset, $size ) = $this->findZip64CentralDirectory();
00162                 $this->readCentralDirectory( $offset, $size );
00163             } else {
00164                 if ( $this->eocdr['CD size'] == 0xffffffff
00165                     || $this->eocdr['CD offset'] == 0xffffffff
00166                     || $this->eocdr['CD entries total'] == 0xffff
00167                 ) {
00168                     $this->error( 'zip-unsupported', 'Central directory header indicates ZIP64, ' .
00169                         'but we are in legacy mode. Rejecting this upload is necessary to avoid ' .
00170                         'opening vulnerabilities on clients using OpenJDK 7 or later.' );
00171                 }
00172 
00173                 list( $offset, $size ) = $this->findOldCentralDirectory();
00174                 $this->readCentralDirectory( $offset, $size );
00175             }
00176         } catch ( ZipDirectoryReaderError $e ) {
00177             $status->fatal( $e->getErrorCode() );
00178         }
00179 
00180         fclose( $this->file );
00181 
00182         return $status;
00183     }
00184 
00190     function error( $code, $debugMessage ) {
00191         wfDebug( __CLASS__ . ": Fatal error: $debugMessage\n" );
00192         throw new ZipDirectoryReaderError( $code );
00193     }
00194 
00200     function readEndOfCentralDirectoryRecord() {
00201         $info = array(
00202             'signature' => 4,
00203             'disk' => 2,
00204             'CD start disk' => 2,
00205             'CD entries this disk' => 2,
00206             'CD entries total' => 2,
00207             'CD size' => 4,
00208             'CD offset' => 4,
00209             'file comment length' => 2,
00210         );
00211         $structSize = $this->getStructSize( $info );
00212         $startPos = $this->getFileLength() - 65536 - $structSize;
00213         if ( $startPos < 0 ) {
00214             $startPos = 0;
00215         }
00216 
00217         $block = $this->getBlock( $startPos );
00218         $sigPos = strrpos( $block, "PK\x05\x06" );
00219         if ( $sigPos === false ) {
00220             $this->error( 'zip-wrong-format',
00221                 "zip file lacks EOCDR signature. It probably isn't a zip file." );
00222         }
00223 
00224         $this->eocdr = $this->unpack( substr( $block, $sigPos ), $info );
00225         $this->eocdr['EOCDR size'] = $structSize + $this->eocdr['file comment length'];
00226 
00227         if ( $structSize + $this->eocdr['file comment length'] != strlen( $block ) - $sigPos ) {
00228             $this->error( 'zip-bad', 'trailing bytes after the end of the file comment' );
00229         }
00230         if ( $this->eocdr['disk'] !== 0
00231             || $this->eocdr['CD start disk'] !== 0
00232         ) {
00233             $this->error( 'zip-unsupported', 'more than one disk (in EOCDR)' );
00234         }
00235         $this->eocdr += $this->unpack(
00236             $block,
00237             array( 'file comment' => array( 'string', $this->eocdr['file comment length'] ) ),
00238             $sigPos + $structSize );
00239         $this->eocdr['position'] = $startPos + $sigPos;
00240     }
00241 
00246     function readZip64EndOfCentralDirectoryLocator() {
00247         $info = array(
00248             'signature' => array( 'string', 4 ),
00249             'eocdr64 start disk' => 4,
00250             'eocdr64 offset' => 8,
00251             'number of disks' => 4,
00252         );
00253         $structSize = $this->getStructSize( $info );
00254 
00255         $start = $this->getFileLength() - $this->eocdr['EOCDR size'] - $structSize;
00256         $block = $this->getBlock( $start, $structSize );
00257         $this->eocdr64Locator = $data = $this->unpack( $block, $info );
00258 
00259         if ( $data['signature'] !== "PK\x06\x07" ) {
00260             // Note: Java will allow this and continue to read the
00261             // EOCDR64, so we have to reject the upload, we can't
00262             // just use the EOCDR header instead.
00263             $this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory locator' );
00264         }
00265     }
00266 
00271     function readZip64EndOfCentralDirectoryRecord() {
00272         if ( $this->eocdr64Locator['eocdr64 start disk'] != 0
00273             || $this->eocdr64Locator['number of disks'] != 0
00274         ) {
00275             $this->error( 'zip-unsupported', 'more than one disk (in EOCDR64 locator)' );
00276         }
00277 
00278         $info = array(
00279             'signature' => array( 'string', 4 ),
00280             'EOCDR64 size' => 8,
00281             'version made by' => 2,
00282             'version needed' => 2,
00283             'disk' => 4,
00284             'CD start disk' => 4,
00285             'CD entries this disk' => 8,
00286             'CD entries total' => 8,
00287             'CD size' => 8,
00288             'CD offset' => 8
00289         );
00290         $structSize = $this->getStructSize( $info );
00291         $block = $this->getBlock( $this->eocdr64Locator['eocdr64 offset'], $structSize );
00292         $this->eocdr64 = $data = $this->unpack( $block, $info );
00293         if ( $data['signature'] !== "PK\x06\x06" ) {
00294             $this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory record' );
00295         }
00296         if ( $data['disk'] !== 0
00297             || $data['CD start disk'] !== 0
00298         ) {
00299             $this->error( 'zip-unsupported', 'more than one disk (in EOCDR64)' );
00300         }
00301     }
00302 
00309     function findOldCentralDirectory() {
00310         $size = $this->eocdr['CD size'];
00311         $offset = $this->eocdr['CD offset'];
00312         $endPos = $this->eocdr['position'];
00313 
00314         // Some readers use the EOCDR position instead of the offset field
00315         // to find the directory, so to be safe, we check if they both agree.
00316         if ( $offset + $size != $endPos ) {
00317             $this->error( 'zip-bad', 'the central directory does not immediately precede the end ' .
00318                 'of central directory record' );
00319         }
00320 
00321         return array( $offset, $size );
00322     }
00323 
00330     function findZip64CentralDirectory() {
00331         // The spec is ambiguous about the exact rules of precedence between the
00332         // ZIP64 headers and the original headers. Here we follow zip_util.c
00333         // from OpenJDK 7.
00334         $size = $this->eocdr['CD size'];
00335         $offset = $this->eocdr['CD offset'];
00336         $numEntries = $this->eocdr['CD entries total'];
00337         $endPos = $this->eocdr['position'];
00338         if ( $size == 0xffffffff
00339             || $offset == 0xffffffff
00340             || $numEntries == 0xffff
00341         ) {
00342             $this->readZip64EndOfCentralDirectoryLocator();
00343 
00344             if ( isset( $this->eocdr64Locator['eocdr64 offset'] ) ) {
00345                 $this->readZip64EndOfCentralDirectoryRecord();
00346                 if ( isset( $this->eocdr64['CD offset'] ) ) {
00347                     $size = $this->eocdr64['CD size'];
00348                     $offset = $this->eocdr64['CD offset'];
00349                     $endPos = $this->eocdr64Locator['eocdr64 offset'];
00350                 }
00351             }
00352         }
00353         // Some readers use the EOCDR position instead of the offset field
00354         // to find the directory, so to be safe, we check if they both agree.
00355         if ( $offset + $size != $endPos ) {
00356             $this->error( 'zip-bad', 'the central directory does not immediately precede the end ' .
00357                 'of central directory record' );
00358         }
00359 
00360         return array( $offset, $size );
00361     }
00362 
00368     function readCentralDirectory( $offset, $size ) {
00369         $block = $this->getBlock( $offset, $size );
00370 
00371         $fixedInfo = array(
00372             'signature' => array( 'string', 4 ),
00373             'version made by' => 2,
00374             'version needed' => 2,
00375             'general bits' => 2,
00376             'compression method' => 2,
00377             'mod time' => 2,
00378             'mod date' => 2,
00379             'crc-32' => 4,
00380             'compressed size' => 4,
00381             'uncompressed size' => 4,
00382             'name length' => 2,
00383             'extra field length' => 2,
00384             'comment length' => 2,
00385             'disk number start' => 2,
00386             'internal attrs' => 2,
00387             'external attrs' => 4,
00388             'local header offset' => 4,
00389         );
00390         $fixedSize = $this->getStructSize( $fixedInfo );
00391 
00392         $pos = 0;
00393         while ( $pos < $size ) {
00394             $data = $this->unpack( $block, $fixedInfo, $pos );
00395             $pos += $fixedSize;
00396 
00397             if ( $data['signature'] !== "PK\x01\x02" ) {
00398                 $this->error( 'zip-bad', 'Invalid signature found in directory entry' );
00399             }
00400 
00401             $variableInfo = array(
00402                 'name' => array( 'string', $data['name length'] ),
00403                 'extra field' => array( 'string', $data['extra field length'] ),
00404                 'comment' => array( 'string', $data['comment length'] ),
00405             );
00406             $data += $this->unpack( $block, $variableInfo, $pos );
00407             $pos += $this->getStructSize( $variableInfo );
00408 
00409             if ( $this->zip64 && (
00410                     $data['compressed size'] == 0xffffffff
00411                     || $data['uncompressed size'] == 0xffffffff
00412                     || $data['local header offset'] == 0xffffffff )
00413             ) {
00414                 $zip64Data = $this->unpackZip64Extra( $data['extra field'] );
00415                 if ( $zip64Data ) {
00416                     $data = $zip64Data + $data;
00417                 }
00418             }
00419 
00420             if ( $this->testBit( $data['general bits'], self::GENERAL_CD_ENCRYPTED ) ) {
00421                 $this->error( 'zip-unsupported', 'central directory encryption is not supported' );
00422             }
00423 
00424             // Convert the timestamp into MediaWiki format
00425             // For the format, please see the MS-DOS 2.0 Programmer's Reference,
00426             // pages 3-5 and 3-6.
00427             $time = $data['mod time'];
00428             $date = $data['mod date'];
00429 
00430             $year = 1980 + ( $date >> 9 );
00431             $month = ( $date >> 5 ) & 15;
00432             $day = $date & 31;
00433             $hour = ( $time >> 11 ) & 31;
00434             $minute = ( $time >> 5 ) & 63;
00435             $second = ( $time & 31 ) * 2;
00436             $timestamp = sprintf( "%04d%02d%02d%02d%02d%02d",
00437                 $year, $month, $day, $hour, $minute, $second );
00438 
00439             // Convert the character set in the file name
00440             if ( $this->testBit( $data['general bits'], self::GENERAL_UTF8 ) ) {
00441                 $name = $data['name'];
00442             } else {
00443                 $name = iconv( 'CP437', 'UTF-8', $data['name'] );
00444             }
00445 
00446             // Compile a data array for the user, with a sensible format
00447             $userData = array(
00448                 'name' => $name,
00449                 'mtime' => $timestamp,
00450                 'size' => $data['uncompressed size'],
00451             );
00452             call_user_func( $this->callback, $userData );
00453         }
00454     }
00455 
00461     function unpackZip64Extra( $extraField ) {
00462         $extraHeaderInfo = array(
00463             'id' => 2,
00464             'size' => 2,
00465         );
00466         $extraHeaderSize = $this->getStructSize( $extraHeaderInfo );
00467 
00468         $zip64ExtraInfo = array(
00469             'uncompressed size' => 8,
00470             'compressed size' => 8,
00471             'local header offset' => 8,
00472             'disk number start' => 4,
00473         );
00474 
00475         $extraPos = 0;
00476         while ( $extraPos < strlen( $extraField ) ) {
00477             $extra = $this->unpack( $extraField, $extraHeaderInfo, $extraPos );
00478             $extraPos += $extraHeaderSize;
00479             $extra += $this->unpack( $extraField,
00480                 array( 'data' => array( 'string', $extra['size'] ) ),
00481                 $extraPos );
00482             $extraPos += $extra['size'];
00483 
00484             if ( $extra['id'] == self::ZIP64_EXTRA_HEADER ) {
00485                 return $this->unpack( $extra['data'], $zip64ExtraInfo );
00486             }
00487         }
00488 
00489         return false;
00490     }
00491 
00496     function getFileLength() {
00497         if ( $this->fileLength === null ) {
00498             $stat = fstat( $this->file );
00499             $this->fileLength = $stat['size'];
00500         }
00501 
00502         return $this->fileLength;
00503     }
00504 
00515     function getBlock( $start, $length = null ) {
00516         $fileLength = $this->getFileLength();
00517         if ( $start >= $fileLength ) {
00518             $this->error( 'zip-bad', "getBlock() requested position $start, " .
00519                 "file length is $fileLength" );
00520         }
00521         if ( $length === null ) {
00522             $length = $fileLength - $start;
00523         }
00524         $end = $start + $length;
00525         if ( $end > $fileLength ) {
00526             $this->error( 'zip-bad', "getBlock() requested end position $end, " .
00527                 "file length is $fileLength" );
00528         }
00529         $startSeg = floor( $start / self::SEGSIZE );
00530         $endSeg = ceil( $end / self::SEGSIZE );
00531 
00532         $block = '';
00533         for ( $segIndex = $startSeg; $segIndex <= $endSeg; $segIndex++ ) {
00534             $block .= $this->getSegment( $segIndex );
00535         }
00536 
00537         $block = substr( $block,
00538             $start - $startSeg * self::SEGSIZE,
00539             $length );
00540 
00541         if ( strlen( $block ) < $length ) {
00542             $this->error( 'zip-bad', 'getBlock() returned an unexpectedly small amount of data' );
00543         }
00544 
00545         return $block;
00546     }
00547 
00561     function getSegment( $segIndex ) {
00562         if ( !isset( $this->buffer[$segIndex] ) ) {
00563             $bytePos = $segIndex * self::SEGSIZE;
00564             if ( $bytePos >= $this->getFileLength() ) {
00565                 $this->buffer[$segIndex] = '';
00566 
00567                 return '';
00568             }
00569             if ( fseek( $this->file, $bytePos ) ) {
00570                 $this->error( 'zip-bad', "seek to $bytePos failed" );
00571             }
00572             $seg = fread( $this->file, self::SEGSIZE );
00573             if ( $seg === false ) {
00574                 $this->error( 'zip-bad', "read from $bytePos failed" );
00575             }
00576             $this->buffer[$segIndex] = $seg;
00577         }
00578 
00579         return $this->buffer[$segIndex];
00580     }
00581 
00587     function getStructSize( $struct ) {
00588         $size = 0;
00589         foreach ( $struct as $type ) {
00590             if ( is_array( $type ) ) {
00591                 list( , $fieldSize ) = $type;
00592                 $size += $fieldSize;
00593             } else {
00594                 $size += $type;
00595             }
00596         }
00597 
00598         return $size;
00599     }
00600 
00623     function unpack( $string, $struct, $offset = 0 ) {
00624         $size = $this->getStructSize( $struct );
00625         if ( $offset + $size > strlen( $string ) ) {
00626             $this->error( 'zip-bad', 'unpack() would run past the end of the supplied string' );
00627         }
00628 
00629         $data = array();
00630         $pos = $offset;
00631         foreach ( $struct as $key => $type ) {
00632             if ( is_array( $type ) ) {
00633                 list( $typeName, $fieldSize ) = $type;
00634                 switch ( $typeName ) {
00635                     case 'string':
00636                         $data[$key] = substr( $string, $pos, $fieldSize );
00637                         $pos += $fieldSize;
00638                         break;
00639                     default:
00640                         throw new MWException( __METHOD__ . ": invalid type \"$typeName\"" );
00641                 }
00642             } else {
00643                 // Unsigned little-endian integer
00644                 $length = intval( $type );
00645 
00646                 // Calculate the value. Use an algorithm which automatically
00647                 // upgrades the value to floating point if necessary.
00648                 $value = 0;
00649                 for ( $i = $length - 1; $i >= 0; $i-- ) {
00650                     $value *= 256;
00651                     $value += ord( $string[$pos + $i] );
00652                 }
00653 
00654                 // Throw an exception if there was loss of precision
00655                 if ( $value > pow( 2, 52 ) ) {
00656                     $this->error( 'zip-unsupported', 'number too large to be stored in a double. ' .
00657                         'This could happen if we tried to unpack a 64-bit structure ' .
00658                         'at an invalid location.' );
00659                 }
00660                 $data[$key] = $value;
00661                 $pos += $length;
00662             }
00663         }
00664 
00665         return $data;
00666     }
00667 
00676     function testBit( $value, $bitIndex ) {
00677         return (bool)( ( $value >> $bitIndex ) & 1 );
00678     }
00679 
00684     function hexDump( $s ) {
00685         $n = strlen( $s );
00686         for ( $i = 0; $i < $n; $i += 16 ) {
00687             printf( "%08X ", $i );
00688             for ( $j = 0; $j < 16; $j++ ) {
00689                 print " ";
00690                 if ( $j == 8 ) {
00691                     print " ";
00692                 }
00693                 if ( $i + $j >= $n ) {
00694                     print "  ";
00695                 } else {
00696                     printf( "%02X", ord( $s[$i + $j] ) );
00697                 }
00698             }
00699 
00700             print "  |";
00701             for ( $j = 0; $j < 16; $j++ ) {
00702                 if ( $i + $j >= $n ) {
00703                     print " ";
00704                 } elseif ( ctype_print( $s[$i + $j] ) ) {
00705                     print $s[$i + $j];
00706                 } else {
00707                     print '.';
00708                 }
00709             }
00710             print "|\n";
00711         }
00712     }
00713 }
00714 
00718 class ZipDirectoryReaderError extends Exception {
00719     protected $errorCode;
00720 
00721     function __construct( $code ) {
00722         $this->errorCode = $code;
00723         parent::__construct( "ZipDirectoryReader error: $code" );
00724     }
00725 
00729     function getErrorCode() {
00730         return $this->errorCode;
00731     }
00732 }