MediaWiki  master
HistoryBlob.php
Go to the documentation of this file.
1 <?php
28 interface HistoryBlob {
38  function addItem( $text );
39 
47  function getItem( $key );
48 
59  function setText( $text );
60 
66  function getText();
67 }
68 
74  public $mVersion = 0, $mCompressed = false, $mItems = [], $mDefaultHash = '';
75  public $mSize = 0;
76  public $mMaxSize = 10000000;
77  public $mMaxCount = 100;
78 
82  public function __construct() {
83  if ( !function_exists( 'gzdeflate' ) ) {
84  throw new MWException( "Need zlib support to read or write this "
85  . "kind of history object (ConcatenatedGzipHistoryBlob)\n" );
86  }
87  }
88 
93  public function addItem( $text ) {
94  $this->uncompress();
95  $hash = md5( $text );
96  if ( !isset( $this->mItems[$hash] ) ) {
97  $this->mItems[$hash] = $text;
98  $this->mSize += strlen( $text );
99  }
100  return $hash;
101  }
102 
107  public function getItem( $hash ) {
108  $this->uncompress();
109  if ( array_key_exists( $hash, $this->mItems ) ) {
110  return $this->mItems[$hash];
111  } else {
112  return false;
113  }
114  }
115 
120  public function setText( $text ) {
121  $this->uncompress();
122  $this->mDefaultHash = $this->addItem( $text );
123  }
124 
128  public function getText() {
129  $this->uncompress();
130  return $this->getItem( $this->mDefaultHash );
131  }
132 
138  public function removeItem( $hash ) {
139  $this->mSize -= strlen( $this->mItems[$hash] );
140  unset( $this->mItems[$hash] );
141  }
142 
146  public function compress() {
147  if ( !$this->mCompressed ) {
148  $this->mItems = gzdeflate( serialize( $this->mItems ) );
149  $this->mCompressed = true;
150  }
151  }
152 
156  public function uncompress() {
157  if ( $this->mCompressed ) {
158  $this->mItems = unserialize( gzinflate( $this->mItems ) );
159  $this->mCompressed = false;
160  }
161  }
162 
166  function __sleep() {
167  $this->compress();
168  return [ 'mVersion', 'mCompressed', 'mItems', 'mDefaultHash' ];
169  }
170 
171  function __wakeup() {
172  $this->uncompress();
173  }
174 
181  public function isHappy() {
182  return $this->mSize < $this->mMaxSize
183  && count( $this->mItems ) < $this->mMaxCount;
184  }
185 }
186 
197  protected static $blobCache = [];
198 
200  public $mOldId;
201 
203  public $mHash;
204 
206  public $mRef;
207 
212  function __construct( $hash = '', $oldid = 0 ) {
213  $this->mHash = $hash;
214  }
215 
221  function setLocation( $id ) {
222  $this->mOldId = $id;
223  }
224 
229  function setReferrer( $id ) {
230  $this->mRef = $id;
231  }
232 
237  function getReferrer() {
238  return $this->mRef;
239  }
240 
244  function getText() {
245  if ( isset( self::$blobCache[$this->mOldId] ) ) {
246  $obj = self::$blobCache[$this->mOldId];
247  } else {
248  $dbr = wfGetDB( DB_SLAVE );
249  $row = $dbr->selectRow(
250  'text',
251  [ 'old_flags', 'old_text' ],
252  [ 'old_id' => $this->mOldId ]
253  );
254 
255  if ( !$row ) {
256  return false;
257  }
258 
259  $flags = explode( ',', $row->old_flags );
260  if ( in_array( 'external', $flags ) ) {
261  $url = $row->old_text;
262  $parts = explode( '://', $url, 2 );
263  if ( !isset( $parts[1] ) || $parts[1] == '' ) {
264  return false;
265  }
266  $row->old_text = ExternalStore::fetchFromURL( $url );
267 
268  }
269 
270  if ( !in_array( 'object', $flags ) ) {
271  return false;
272  }
273 
274  if ( in_array( 'gzip', $flags ) ) {
275  // This shouldn't happen, but a bug in the compress script
276  // may at times gzip-compress a HistoryBlob object row.
277  $obj = unserialize( gzinflate( $row->old_text ) );
278  } else {
279  $obj = unserialize( $row->old_text );
280  }
281 
282  if ( !is_object( $obj ) ) {
283  // Correct for old double-serialization bug.
284  $obj = unserialize( $obj );
285  }
286 
287  // Save this item for reference; if pulling many
288  // items in a row we'll likely use it again.
289  $obj->uncompress();
290  self::$blobCache = [ $this->mOldId => $obj ];
291  }
292 
293  return $obj->getItem( $this->mHash );
294  }
295 
301  function getHash() {
302  return $this->mHash;
303  }
304 }
305 
316  public $mCurId;
317 
321  function __construct( $curid = 0 ) {
322  $this->mCurId = $curid;
323  }
324 
331  function setLocation( $id ) {
332  $this->mCurId = $id;
333  }
334 
338  function getText() {
339  $dbr = wfGetDB( DB_SLAVE );
340  $row = $dbr->selectRow( 'cur', [ 'cur_text' ], [ 'cur_id' => $this->mCurId ] );
341  if ( !$row ) {
342  return false;
343  }
344  return $row->cur_text;
345  }
346 }
347 
352 class DiffHistoryBlob implements HistoryBlob {
354  public $mItems = [];
355 
357  public $mSize = 0;
358 
367  public $mDiffs;
368 
370  public $mDiffMap;
371 
374  public $mDefaultKey;
375 
377  public $mCompressed;
378 
380  public $mFrozen = false;
381 
386  public $mMaxSize = 10000000;
387 
389  public $mMaxCount = 100;
390 
392  const XDL_BDOP_INS = 1;
393  const XDL_BDOP_CPY = 2;
394  const XDL_BDOP_INSB = 3;
395 
396  function __construct() {
397  if ( !function_exists( 'gzdeflate' ) ) {
398  throw new MWException( "Need zlib support to read or write DiffHistoryBlob\n" );
399  }
400  }
401 
407  function addItem( $text ) {
408  if ( $this->mFrozen ) {
409  throw new MWException( __METHOD__ . ": Cannot add more items after sleep/wakeup" );
410  }
411 
412  $this->mItems[] = $text;
413  $this->mSize += strlen( $text );
414  $this->mDiffs = null; // later
415  return count( $this->mItems ) - 1;
416  }
417 
422  function getItem( $key ) {
423  return $this->mItems[$key];
424  }
425 
429  function setText( $text ) {
430  $this->mDefaultKey = $this->addItem( $text );
431  }
432 
436  function getText() {
437  return $this->getItem( $this->mDefaultKey );
438  }
439 
443  function compress() {
444  if ( !function_exists( 'xdiff_string_rabdiff' ) ) {
445  throw new MWException( "Need xdiff 1.5+ support to write DiffHistoryBlob\n" );
446  }
447  if ( isset( $this->mDiffs ) ) {
448  // Already compressed
449  return;
450  }
451  if ( !count( $this->mItems ) ) {
452  // Empty
453  return;
454  }
455 
456  // Create two diff sequences: one for main text and one for small text
457  $sequences = [
458  'small' => [
459  'tail' => '',
460  'diffs' => [],
461  'map' => [],
462  ],
463  'main' => [
464  'tail' => '',
465  'diffs' => [],
466  'map' => [],
467  ],
468  ];
469  $smallFactor = 0.5;
470 
471  $mItemsCount = count( $this->mItems );
472  for ( $i = 0; $i < $mItemsCount; $i++ ) {
473  $text = $this->mItems[$i];
474  if ( $i == 0 ) {
475  $seqName = 'main';
476  } else {
477  $mainTail = $sequences['main']['tail'];
478  if ( strlen( $text ) < strlen( $mainTail ) * $smallFactor ) {
479  $seqName = 'small';
480  } else {
481  $seqName = 'main';
482  }
483  }
484  $seq =& $sequences[$seqName];
485  $tail = $seq['tail'];
486  $diff = $this->diff( $tail, $text );
487  $seq['diffs'][] = $diff;
488  $seq['map'][] = $i;
489  $seq['tail'] = $text;
490  }
491  unset( $seq ); // unlink dangerous alias
492 
493  // Knit the sequences together
494  $tail = '';
495  $this->mDiffs = [];
496  $this->mDiffMap = [];
497  foreach ( $sequences as $seq ) {
498  if ( !count( $seq['diffs'] ) ) {
499  continue;
500  }
501  if ( $tail === '' ) {
502  $this->mDiffs[] = $seq['diffs'][0];
503  } else {
504  $head = $this->patch( '', $seq['diffs'][0] );
505  $this->mDiffs[] = $this->diff( $tail, $head );
506  }
507  $this->mDiffMap[] = $seq['map'][0];
508  $diffsCount = count( $seq['diffs'] );
509  for ( $i = 1; $i < $diffsCount; $i++ ) {
510  $this->mDiffs[] = $seq['diffs'][$i];
511  $this->mDiffMap[] = $seq['map'][$i];
512  }
513  $tail = $seq['tail'];
514  }
515  }
516 
522  function diff( $t1, $t2 ) {
523  # Need to do a null concatenation with warnings off, due to bugs in the current version of xdiff
524  # "String is not zero-terminated"
525  MediaWiki\suppressWarnings();
526  $diff = xdiff_string_rabdiff( $t1, $t2 ) . '';
527  MediaWiki\restoreWarnings();
528  return $diff;
529  }
530 
536  function patch( $base, $diff ) {
537  if ( function_exists( 'xdiff_string_bpatch' ) ) {
538  MediaWiki\suppressWarnings();
539  $text = xdiff_string_bpatch( $base, $diff ) . '';
540  MediaWiki\restoreWarnings();
541  return $text;
542  }
543 
544  # Pure PHP implementation
545 
546  $header = unpack( 'Vofp/Vcsize', substr( $diff, 0, 8 ) );
547 
548  # Check the checksum if hash extension is available
549  $ofp = $this->xdiffAdler32( $base );
550  if ( $ofp !== false && $ofp !== substr( $diff, 0, 4 ) ) {
551  wfDebug( __METHOD__ . ": incorrect base checksum\n" );
552  return false;
553  }
554  if ( $header['csize'] != strlen( $base ) ) {
555  wfDebug( __METHOD__ . ": incorrect base length\n" );
556  return false;
557  }
558 
559  $p = 8;
560  $out = '';
561  while ( $p < strlen( $diff ) ) {
562  $x = unpack( 'Cop', substr( $diff, $p, 1 ) );
563  $op = $x['op'];
564  ++$p;
565  switch ( $op ) {
566  case self::XDL_BDOP_INS:
567  $x = unpack( 'Csize', substr( $diff, $p, 1 ) );
568  $p++;
569  $out .= substr( $diff, $p, $x['size'] );
570  $p += $x['size'];
571  break;
572  case self::XDL_BDOP_INSB:
573  $x = unpack( 'Vcsize', substr( $diff, $p, 4 ) );
574  $p += 4;
575  $out .= substr( $diff, $p, $x['csize'] );
576  $p += $x['csize'];
577  break;
578  case self::XDL_BDOP_CPY:
579  $x = unpack( 'Voff/Vcsize', substr( $diff, $p, 8 ) );
580  $p += 8;
581  $out .= substr( $base, $x['off'], $x['csize'] );
582  break;
583  default:
584  wfDebug( __METHOD__ . ": invalid op\n" );
585  return false;
586  }
587  }
588  return $out;
589  }
590 
598  function xdiffAdler32( $s ) {
599  if ( !function_exists( 'hash' ) ) {
600  return false;
601  }
602 
603  static $init;
604  if ( $init === null ) {
605  $init = str_repeat( "\xf0", 205 ) . "\xee" . str_repeat( "\xf0", 67 ) . "\x02";
606  }
607 
608  // The real Adler-32 checksum of $init is zero, so it initialises the
609  // state to zero, as it is at the start of LibXDiff's checksum
610  // algorithm. Appending the subject string then simulates LibXDiff.
611  return strrev( hash( 'adler32', $init . $s, true ) );
612  }
613 
614  function uncompress() {
615  if ( !$this->mDiffs ) {
616  return;
617  }
618  $tail = '';
619  $mDiffsCount = count( $this->mDiffs );
620  for ( $diffKey = 0; $diffKey < $mDiffsCount; $diffKey++ ) {
621  $textKey = $this->mDiffMap[$diffKey];
622  $text = $this->patch( $tail, $this->mDiffs[$diffKey] );
623  $this->mItems[$textKey] = $text;
624  $tail = $text;
625  }
626  }
627 
631  function __sleep() {
632  $this->compress();
633  if ( !count( $this->mItems ) ) {
634  // Empty object
635  $info = false;
636  } else {
637  // Take forward differences to improve the compression ratio for sequences
638  $map = '';
639  $prev = 0;
640  foreach ( $this->mDiffMap as $i ) {
641  if ( $map !== '' ) {
642  $map .= ',';
643  }
644  $map .= $i - $prev;
645  $prev = $i;
646  }
647  $info = [
648  'diffs' => $this->mDiffs,
649  'map' => $map
650  ];
651  }
652  if ( isset( $this->mDefaultKey ) ) {
653  $info['default'] = $this->mDefaultKey;
654  }
655  $this->mCompressed = gzdeflate( serialize( $info ) );
656  return [ 'mCompressed' ];
657  }
658 
659  function __wakeup() {
660  // addItem() doesn't work if mItems is partially filled from mDiffs
661  $this->mFrozen = true;
662  $info = unserialize( gzinflate( $this->mCompressed ) );
663  unset( $this->mCompressed );
664 
665  if ( !$info ) {
666  // Empty object
667  return;
668  }
669 
670  if ( isset( $info['default'] ) ) {
671  $this->mDefaultKey = $info['default'];
672  }
673  $this->mDiffs = $info['diffs'];
674  if ( isset( $info['base'] ) ) {
675  // Old format
676  $this->mDiffMap = range( 0, count( $this->mDiffs ) - 1 );
677  array_unshift( $this->mDiffs,
678  pack( 'VVCV', 0, 0, self::XDL_BDOP_INSB, strlen( $info['base'] ) ) .
679  $info['base'] );
680  } else {
681  // New format
682  $map = explode( ',', $info['map'] );
683  $cur = 0;
684  $this->mDiffMap = [];
685  foreach ( $map as $i ) {
686  $cur += $i;
687  $this->mDiffMap[] = $cur;
688  }
689  }
690  $this->uncompress();
691  }
692 
699  function isHappy() {
700  return $this->mSize < $this->mMaxSize
701  && count( $this->mItems ) < $this->mMaxCount;
702  }
703 
704 }
removeItem($hash)
Remove an item.
wfGetDB($db, $groups=[], $wiki=false)
Get a Database object.
isHappy()
Helper function for compression jobs Returns true until the object is "full" and ready to be committe...
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that probably a stub it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output $out
Definition: hooks.txt:776
int $mMaxSize
The maximum uncompressed size before the object becomes sad Should be less than max_allowed_packet.
int $mSize
Total uncompressed size.
__construct($hash= '', $oldid=0)
addItem($text)
Adds an item of text, returns a stub object which points to the item.
compress()
Compress the bulk data in the object.
it s the revision text itself In either if gzip is the revision text is gzipped $flags
Definition: hooks.txt:2588
patch($base, $diff)
array $mItems
Uncompressed item cache.
Pointer object for an item within a CGZ blob stored in the text table.
const XDL_BDOP_INS
Constants from xdiff.h.
setText($text)
Set the "default text" This concept is an odd property of the current DB schema, whereby each text it...
wfDebug($text, $dest= 'all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
array $mDiffMap
The diff map, see above.
Concatenated gzip (CGZ) storage Improves compression ratio by concatenating like objects before gzipp...
Definition: HistoryBlob.php:73
unserialize($serialized)
Definition: ApiMessage.php:102
setLocation($id)
Sets the location (old_id) of the main object to which this object points.
MediaWiki exception.
Definition: MWException.php:26
uncompress()
Uncompress bulk data.
isHappy()
Helper function for compression jobs Returns true until the object is "full" and ready to be committe...
const DB_SLAVE
Definition: Defines.php:46
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
diff($t1, $t2)
To speed up conversion from 1.4 to 1.5 schema, text rows can refer to the leftover cur table as the b...
__construct()
Constructor.
Definition: HistoryBlob.php:82
getText()
Get default text.
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
setReferrer($id)
Sets the location (old_id) of the referring object.
Diff-based history compression Requires xdiff 1.5+ and zlib.
bool $mFrozen
True if the object is locked against further writes.
static fetchFromURL($url, array $params=[])
Fetch data from given URL.
__construct($curid=0)
getHash()
Get the content hash.
array $mDiffs
Array of diffs.
getItem($key)
Get item by key, or false if the key is not present.
static array $blobCache
One-step cache variable to hold base blobs; operations that pull multiple revisions may often pull mu...
string $mCompressed
Compressed storage.
int $mDefaultKey
The key for getText()
xdiffAdler32($s)
Compute a binary "Adler-32" checksum as defined by LibXDiff, i.e.
serialize()
Definition: ApiMessage.php:94
setLocation($id)
Sets the location (cur_id) of the main object to which this object points.
getReferrer()
Gets the location of the referring object.
int $mMaxCount
The maximum number of text items before the object becomes sad.
Base class for general text storage via the "object" flag in old_flags, or two-part external storage ...
Definition: HistoryBlob.php:28