MediaWiki  master
ZipDirectoryReader.php
Go to the documentation of this file.
1 <?php
89  public static function read( $fileName, $callback, $options = [] ) {
90  $zdr = new self( $fileName, $callback, $options );
91 
92  return $zdr->execute();
93  }
94 
96  protected $fileName;
97 
99  protected $file;
100 
102  protected $fileLength;
103 
105  protected $buffer;
106 
108  protected $callback;
109 
111  protected $zip64 = false;
112 
115 
116  protected $data;
117 
119  const ZIP64_EXTRA_HEADER = 0x0001;
120 
122  const SEGSIZE = 16384;
123 
125  const GENERAL_UTF8 = 11;
126 
129 
136  protected function __construct( $fileName, $callback, $options ) {
137  $this->fileName = $fileName;
138  $this->callback = $callback;
139 
140  if ( isset( $options['zip64'] ) ) {
141  $this->zip64 = $options['zip64'];
142  }
143  }
144 
150  function execute() {
151  $this->file = fopen( $this->fileName, 'r' );
152  $this->data = [];
153  if ( !$this->file ) {
154  return Status::newFatal( 'zip-file-open-error' );
155  }
156 
158  try {
160  if ( $this->zip64 ) {
161  list( $offset, $size ) = $this->findZip64CentralDirectory();
162  $this->readCentralDirectory( $offset, $size );
163  } else {
164  if ( $this->eocdr['CD size'] == 0xffffffff
165  || $this->eocdr['CD offset'] == 0xffffffff
166  || $this->eocdr['CD entries total'] == 0xffff
167  ) {
168  $this->error( 'zip-unsupported', 'Central directory header indicates ZIP64, ' .
169  'but we are in legacy mode. Rejecting this upload is necessary to avoid ' .
170  'opening vulnerabilities on clients using OpenJDK 7 or later.' );
171  }
172 
173  list( $offset, $size ) = $this->findOldCentralDirectory();
174  $this->readCentralDirectory( $offset, $size );
175  }
176  } catch ( ZipDirectoryReaderError $e ) {
177  $status->fatal( $e->getErrorCode() );
178  }
179 
180  fclose( $this->file );
181 
182  return $status;
183  }
184 
191  function error( $code, $debugMessage ) {
192  wfDebug( __CLASS__ . ": Fatal error: $debugMessage\n" );
193  throw new ZipDirectoryReaderError( $code );
194  }
195 
202  $info = [
203  'signature' => 4,
204  'disk' => 2,
205  'CD start disk' => 2,
206  'CD entries this disk' => 2,
207  'CD entries total' => 2,
208  'CD size' => 4,
209  'CD offset' => 4,
210  'file comment length' => 2,
211  ];
212  $structSize = $this->getStructSize( $info );
213  $startPos = $this->getFileLength() - 65536 - $structSize;
214  if ( $startPos < 0 ) {
215  $startPos = 0;
216  }
217 
218  if ( $this->getFileLength() === 0 ) {
219  $this->error( 'zip-wrong-format', "The file is empty." );
220  }
221 
222  $block = $this->getBlock( $startPos );
223  $sigPos = strrpos( $block, "PK\x05\x06" );
224  if ( $sigPos === false ) {
225  $this->error( 'zip-wrong-format',
226  "zip file lacks EOCDR signature. It probably isn't a zip file." );
227  }
228 
229  $this->eocdr = $this->unpack( substr( $block, $sigPos ), $info );
230  $this->eocdr['EOCDR size'] = $structSize + $this->eocdr['file comment length'];
231 
232  if ( $structSize + $this->eocdr['file comment length'] != strlen( $block ) - $sigPos ) {
233  $this->error( 'zip-bad', 'trailing bytes after the end of the file comment' );
234  }
235  if ( $this->eocdr['disk'] !== 0
236  || $this->eocdr['CD start disk'] !== 0
237  ) {
238  $this->error( 'zip-unsupported', 'more than one disk (in EOCDR)' );
239  }
240  $this->eocdr += $this->unpack(
241  $block,
242  [ 'file comment' => [ 'string', $this->eocdr['file comment length'] ] ],
243  $sigPos + $structSize );
244  $this->eocdr['position'] = $startPos + $sigPos;
245  }
246 
252  $info = [
253  'signature' => [ 'string', 4 ],
254  'eocdr64 start disk' => 4,
255  'eocdr64 offset' => 8,
256  'number of disks' => 4,
257  ];
258  $structSize = $this->getStructSize( $info );
259 
260  $start = $this->getFileLength() - $this->eocdr['EOCDR size'] - $structSize;
261  $block = $this->getBlock( $start, $structSize );
262  $this->eocdr64Locator = $data = $this->unpack( $block, $info );
263 
264  if ( $data['signature'] !== "PK\x06\x07" ) {
265  // Note: Java will allow this and continue to read the
266  // EOCDR64, so we have to reject the upload, we can't
267  // just use the EOCDR header instead.
268  $this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory locator' );
269  }
270  }
271 
277  if ( $this->eocdr64Locator['eocdr64 start disk'] != 0
278  || $this->eocdr64Locator['number of disks'] != 0
279  ) {
280  $this->error( 'zip-unsupported', 'more than one disk (in EOCDR64 locator)' );
281  }
282 
283  $info = [
284  'signature' => [ 'string', 4 ],
285  'EOCDR64 size' => 8,
286  'version made by' => 2,
287  'version needed' => 2,
288  'disk' => 4,
289  'CD start disk' => 4,
290  'CD entries this disk' => 8,
291  'CD entries total' => 8,
292  'CD size' => 8,
293  'CD offset' => 8
294  ];
295  $structSize = $this->getStructSize( $info );
296  $block = $this->getBlock( $this->eocdr64Locator['eocdr64 offset'], $structSize );
297  $this->eocdr64 = $data = $this->unpack( $block, $info );
298  if ( $data['signature'] !== "PK\x06\x06" ) {
299  $this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory record' );
300  }
301  if ( $data['disk'] !== 0
302  || $data['CD start disk'] !== 0
303  ) {
304  $this->error( 'zip-unsupported', 'more than one disk (in EOCDR64)' );
305  }
306  }
307 
315  $size = $this->eocdr['CD size'];
316  $offset = $this->eocdr['CD offset'];
317  $endPos = $this->eocdr['position'];
318 
319  // Some readers use the EOCDR position instead of the offset field
320  // to find the directory, so to be safe, we check if they both agree.
321  if ( $offset + $size != $endPos ) {
322  $this->error( 'zip-bad', 'the central directory does not immediately precede the end ' .
323  'of central directory record' );
324  }
325 
326  return [ $offset, $size ];
327  }
328 
336  // The spec is ambiguous about the exact rules of precedence between the
337  // ZIP64 headers and the original headers. Here we follow zip_util.c
338  // from OpenJDK 7.
339  $size = $this->eocdr['CD size'];
340  $offset = $this->eocdr['CD offset'];
341  $numEntries = $this->eocdr['CD entries total'];
342  $endPos = $this->eocdr['position'];
343  if ( $size == 0xffffffff
344  || $offset == 0xffffffff
345  || $numEntries == 0xffff
346  ) {
348 
349  if ( isset( $this->eocdr64Locator['eocdr64 offset'] ) ) {
351  if ( isset( $this->eocdr64['CD offset'] ) ) {
352  $size = $this->eocdr64['CD size'];
353  $offset = $this->eocdr64['CD offset'];
354  $endPos = $this->eocdr64Locator['eocdr64 offset'];
355  }
356  }
357  }
358  // Some readers use the EOCDR position instead of the offset field
359  // to find the directory, so to be safe, we check if they both agree.
360  if ( $offset + $size != $endPos ) {
361  $this->error( 'zip-bad', 'the central directory does not immediately precede the end ' .
362  'of central directory record' );
363  }
364 
365  return [ $offset, $size ];
366  }
367 
373  function readCentralDirectory( $offset, $size ) {
374  $block = $this->getBlock( $offset, $size );
375 
376  $fixedInfo = [
377  'signature' => [ 'string', 4 ],
378  'version made by' => 2,
379  'version needed' => 2,
380  'general bits' => 2,
381  'compression method' => 2,
382  'mod time' => 2,
383  'mod date' => 2,
384  'crc-32' => 4,
385  'compressed size' => 4,
386  'uncompressed size' => 4,
387  'name length' => 2,
388  'extra field length' => 2,
389  'comment length' => 2,
390  'disk number start' => 2,
391  'internal attrs' => 2,
392  'external attrs' => 4,
393  'local header offset' => 4,
394  ];
395  $fixedSize = $this->getStructSize( $fixedInfo );
396 
397  $pos = 0;
398  while ( $pos < $size ) {
399  $data = $this->unpack( $block, $fixedInfo, $pos );
400  $pos += $fixedSize;
401 
402  if ( $data['signature'] !== "PK\x01\x02" ) {
403  $this->error( 'zip-bad', 'Invalid signature found in directory entry' );
404  }
405 
406  $variableInfo = [
407  'name' => [ 'string', $data['name length'] ],
408  'extra field' => [ 'string', $data['extra field length'] ],
409  'comment' => [ 'string', $data['comment length'] ],
410  ];
411  $data += $this->unpack( $block, $variableInfo, $pos );
412  $pos += $this->getStructSize( $variableInfo );
413 
414  if ( $this->zip64 && (
415  $data['compressed size'] == 0xffffffff
416  || $data['uncompressed size'] == 0xffffffff
417  || $data['local header offset'] == 0xffffffff )
418  ) {
419  $zip64Data = $this->unpackZip64Extra( $data['extra field'] );
420  if ( $zip64Data ) {
421  $data = $zip64Data + $data;
422  }
423  }
424 
425  if ( $this->testBit( $data['general bits'], self::GENERAL_CD_ENCRYPTED ) ) {
426  $this->error( 'zip-unsupported', 'central directory encryption is not supported' );
427  }
428 
429  // Convert the timestamp into MediaWiki format
430  // For the format, please see the MS-DOS 2.0 Programmer's Reference,
431  // pages 3-5 and 3-6.
432  $time = $data['mod time'];
433  $date = $data['mod date'];
434 
435  $year = 1980 + ( $date >> 9 );
436  $month = ( $date >> 5 ) & 15;
437  $day = $date & 31;
438  $hour = ( $time >> 11 ) & 31;
439  $minute = ( $time >> 5 ) & 63;
440  $second = ( $time & 31 ) * 2;
441  $timestamp = sprintf( "%04d%02d%02d%02d%02d%02d",
442  $year, $month, $day, $hour, $minute, $second );
443 
444  // Convert the character set in the file name
445  if ( $this->testBit( $data['general bits'], self::GENERAL_UTF8 ) ) {
446  $name = $data['name'];
447  } else {
448  $name = iconv( 'CP437', 'UTF-8', $data['name'] );
449  }
450 
451  // Compile a data array for the user, with a sensible format
452  $userData = [
453  'name' => $name,
454  'mtime' => $timestamp,
455  'size' => $data['uncompressed size'],
456  ];
457  call_user_func( $this->callback, $userData );
458  }
459  }
460 
466  function unpackZip64Extra( $extraField ) {
467  $extraHeaderInfo = [
468  'id' => 2,
469  'size' => 2,
470  ];
471  $extraHeaderSize = $this->getStructSize( $extraHeaderInfo );
472 
473  $zip64ExtraInfo = [
474  'uncompressed size' => 8,
475  'compressed size' => 8,
476  'local header offset' => 8,
477  'disk number start' => 4,
478  ];
479 
480  $extraPos = 0;
481  while ( $extraPos < strlen( $extraField ) ) {
482  $extra = $this->unpack( $extraField, $extraHeaderInfo, $extraPos );
483  $extraPos += $extraHeaderSize;
484  $extra += $this->unpack( $extraField,
485  [ 'data' => [ 'string', $extra['size'] ] ],
486  $extraPos );
487  $extraPos += $extra['size'];
488 
489  if ( $extra['id'] == self::ZIP64_EXTRA_HEADER ) {
490  return $this->unpack( $extra['data'], $zip64ExtraInfo );
491  }
492  }
493 
494  return false;
495  }
496 
501  function getFileLength() {
502  if ( $this->fileLength === null ) {
503  $stat = fstat( $this->file );
504  $this->fileLength = $stat['size'];
505  }
506 
507  return $this->fileLength;
508  }
509 
520  function getBlock( $start, $length = null ) {
521  $fileLength = $this->getFileLength();
522  if ( $start >= $fileLength ) {
523  $this->error( 'zip-bad', "getBlock() requested position $start, " .
524  "file length is $fileLength" );
525  }
526  if ( $length === null ) {
527  $length = $fileLength - $start;
528  }
529  $end = $start + $length;
530  if ( $end > $fileLength ) {
531  $this->error( 'zip-bad', "getBlock() requested end position $end, " .
532  "file length is $fileLength" );
533  }
534  $startSeg = floor( $start / self::SEGSIZE );
535  $endSeg = ceil( $end / self::SEGSIZE );
536 
537  $block = '';
538  for ( $segIndex = $startSeg; $segIndex <= $endSeg; $segIndex++ ) {
539  $block .= $this->getSegment( $segIndex );
540  }
541 
542  $block = substr( $block,
543  $start - $startSeg * self::SEGSIZE,
544  $length );
545 
546  if ( strlen( $block ) < $length ) {
547  $this->error( 'zip-bad', 'getBlock() returned an unexpectedly small amount of data' );
548  }
549 
550  return $block;
551  }
552 
566  function getSegment( $segIndex ) {
567  if ( !isset( $this->buffer[$segIndex] ) ) {
568  $bytePos = $segIndex * self::SEGSIZE;
569  if ( $bytePos >= $this->getFileLength() ) {
570  $this->buffer[$segIndex] = '';
571 
572  return '';
573  }
574  if ( fseek( $this->file, $bytePos ) ) {
575  $this->error( 'zip-bad', "seek to $bytePos failed" );
576  }
577  $seg = fread( $this->file, self::SEGSIZE );
578  if ( $seg === false ) {
579  $this->error( 'zip-bad', "read from $bytePos failed" );
580  }
581  $this->buffer[$segIndex] = $seg;
582  }
583 
584  return $this->buffer[$segIndex];
585  }
586 
592  function getStructSize( $struct ) {
593  $size = 0;
594  foreach ( $struct as $type ) {
595  if ( is_array( $type ) ) {
596  list( , $fieldSize ) = $type;
597  $size += $fieldSize;
598  } else {
599  $size += $type;
600  }
601  }
602 
603  return $size;
604  }
605 
628  function unpack( $string, $struct, $offset = 0 ) {
629  $size = $this->getStructSize( $struct );
630  if ( $offset + $size > strlen( $string ) ) {
631  $this->error( 'zip-bad', 'unpack() would run past the end of the supplied string' );
632  }
633 
634  $data = [];
635  $pos = $offset;
636  foreach ( $struct as $key => $type ) {
637  if ( is_array( $type ) ) {
638  list( $typeName, $fieldSize ) = $type;
639  switch ( $typeName ) {
640  case 'string':
641  $data[$key] = substr( $string, $pos, $fieldSize );
642  $pos += $fieldSize;
643  break;
644  default:
645  throw new MWException( __METHOD__ . ": invalid type \"$typeName\"" );
646  }
647  } else {
648  // Unsigned little-endian integer
649  $length = intval( $type );
650 
651  // Calculate the value. Use an algorithm which automatically
652  // upgrades the value to floating point if necessary.
653  $value = 0;
654  for ( $i = $length - 1; $i >= 0; $i-- ) {
655  $value *= 256;
656  $value += ord( $string[$pos + $i] );
657  }
658 
659  // Throw an exception if there was loss of precision
660  if ( $value > pow( 2, 52 ) ) {
661  $this->error( 'zip-unsupported', 'number too large to be stored in a double. ' .
662  'This could happen if we tried to unpack a 64-bit structure ' .
663  'at an invalid location.' );
664  }
665  $data[$key] = $value;
666  $pos += $length;
667  }
668  }
669 
670  return $data;
671  }
672 
681  function testBit( $value, $bitIndex ) {
682  return (bool)( ( $value >> $bitIndex ) & 1 );
683  }
684 
689  function hexDump( $s ) {
690  $n = strlen( $s );
691  for ( $i = 0; $i < $n; $i += 16 ) {
692  printf( "%08X ", $i );
693  for ( $j = 0; $j < 16; $j++ ) {
694  print " ";
695  if ( $j == 8 ) {
696  print " ";
697  }
698  if ( $i + $j >= $n ) {
699  print " ";
700  } else {
701  printf( "%02X", ord( $s[$i + $j] ) );
702  }
703  }
704 
705  print " |";
706  for ( $j = 0; $j < 16; $j++ ) {
707  if ( $i + $j >= $n ) {
708  print " ";
709  } elseif ( ctype_print( $s[$i + $j] ) ) {
710  print $s[$i + $j];
711  } else {
712  print '.';
713  }
714  }
715  print "|\n";
716  }
717  }
718 }
719 
723 class ZipDirectoryReaderError extends Exception {
724  protected $errorCode;
725 
726  function __construct( $code ) {
727  $this->errorCode = $code;
728  parent::__construct( "ZipDirectoryReader error: $code" );
729  }
730 
734  function getErrorCode() {
735  return $this->errorCode;
736  }
737 }
findZip64CentralDirectory()
Find the location of the central directory, as would be seen by a ZIP64-compliant reader...
getStructSize($struct)
Get the size of a structure in bytes.
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition: deferred.txt:11
static read($fileName, $callback, $options=[])
Read a ZIP file and call a function for each file discovered in it.
$callback
The file data callback.
const GENERAL_CD_ENCRYPTED
The index of the "general field" bit for central directory encryption.
const GENERAL_UTF8
The index of the "general field" bit for UTF-8 file names.
div flags Integer display flags(NO_ACTION_LINK, NO_EXTRA_USER_LINKS) 'LogException'returning false will NOT prevent logging $e
Definition: hooks.txt:1980
$value
and how to run hooks for an and one after Each event has a preferably in CamelCase For ArticleDelete hook A clump of code and data that should be run when an event happens This can be either a function and a chunk of data
Definition: hooks.txt:6
getFileLength()
Get the length of the file.
$zip64
The ZIP64 mode.
execute()
Read the directory according to settings in $this.
static newFatal($message)
Factory function for fatal errors.
Definition: Status.php:89
see documentation in includes Linker php for Linker::makeImageLink & $time
Definition: hooks.txt:1629
wfDebug($text, $dest= 'all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
unpackZip64Extra($extraField)
Interpret ZIP64 "extra field" data and return an associative array.
readZip64EndOfCentralDirectoryRecord()
Read the header called the "ZIP64 end of central directory record".
readZip64EndOfCentralDirectoryLocator()
Read the header called the "ZIP64 end of central directory locator".
error($code, $debugMessage)
Throw an error, and log a debug message.
const SEGSIZE
The segment size for the file contents cache.
We ve cleaned up the code here by removing clumps of infrequently used code and moving them off somewhere else It s much easier for someone working with this code to see what s _really_ going and make changes or fix bugs In we can take all the code that deals with the little used title reversing we can concentrate it all in an extension file
Definition: hooks.txt:93
readCentralDirectory($offset, $size)
Read the central directory at the given location.
$eocdr
Stored headers.
if($limit) $timestamp
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions as context called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content as context as context $options
Definition: hooks.txt:1020
readEndOfCentralDirectoryRecord()
Read the header which is at the end of the central directory, unimaginatively called the "end of cent...
MediaWiki exception.
Definition: MWException.php:26
A class for reading ZIP file directories, for the purposes of upload verification.
unpack($string, $struct, $offset=0)
Unpack a binary structure.
findOldCentralDirectory()
Find the location of the central directory, as would be seen by a non-ZIP64 reader.
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that probably a stub it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output modifiable & $code
Definition: hooks.txt:776
getBlock($start, $length=null)
Get the file contents from a given offset.
Internal exception class.
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
hexDump($s)
Debugging helper function which dumps a string in hexdump -C format.
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set $status
Definition: hooks.txt:1020
$file
The opened file resource.
const ZIP64_EXTRA_HEADER
The "extra field" ID for ZIP64 central directory entries.
getSegment($segIndex)
Get a section of the file starting at position $segIndex * self::SEGSIZE, of length self::SEGSIZE...
__construct($fileName, $callback, $options)
Private constructor.
$fileName
The file name.
testBit($value, $bitIndex)
Returns a bit from a given position in an integer value, converted to boolean.
do that in ParserLimitReportFormat instead use this to modify the parameters of the image and a DIV can begin in one section and end in another Make sure your code can handle that case gracefully See the EditSectionClearerLink extension for an example zero but section is usually empty its values are the globals values before the output is cached one of or reset my talk my contributions etc etc otherwise the built in rate limiting checks are if enabled allows for interception of redirect as a string mapping parameter names to values & $type
Definition: hooks.txt:2376
$buffer
A segmented cache of the file contents.
static newGood($value=null)
Factory function for good results.
Definition: Status.php:101
$fileLength
The cached length of the file, or null if it has not been loaded yet.
Allows to change the fields on the form that will be generated $name
Definition: hooks.txt:310