[ Index ] |
PHP Cross Reference of MediaWiki-1.24.0 |
[Summary view] [Print] [Text view]
1 <?php 2 /** 3 * Efficient concatenated text storage. 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 2 of the License, or 8 * (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License along 16 * with this program; if not, write to the Free Software Foundation, Inc., 17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 18 * http://www.gnu.org/copyleft/gpl.html 19 * 20 * @file 21 */ 22 23 /** 24 * Base class for general text storage via the "object" flag in old_flags, or 25 * two-part external storage URLs. Used for represent efficient concatenated 26 * storage, and migration-related pointer objects. 27 */ 28 interface HistoryBlob { 29 /** 30 * Adds an item of text, returns a stub object which points to the item. 31 * You must call setLocation() on the stub object before storing it to the 32 * database 33 * 34 * @param string $text 35 * 36 * @return string The key for getItem() 37 */ 38 function addItem( $text ); 39 40 /** 41 * Get item by key, or false if the key is not present 42 * 43 * @param string $key 44 * 45 * @return string|bool 46 */ 47 function getItem( $key ); 48 49 /** 50 * Set the "default text" 51 * This concept is an odd property of the current DB schema, whereby each text item has a revision 52 * associated with it. The default text is the text of the associated revision. There may, however, 53 * be other revisions in the same object. 54 * 55 * Default text is not required for two-part external storage URLs. 56 * 57 * @param string $text 58 */ 59 function setText( $text ); 60 61 /** 62 * Get default text. This is called from Revision::getRevisionText() 63 * 64 * @return string 65 */ 66 function getText(); 67 } 68 69 /** 70 * Concatenated gzip (CGZ) storage 71 * Improves compression ratio by concatenating like objects before gzipping 72 */ 73 class ConcatenatedGzipHistoryBlob implements HistoryBlob { 74 public $mVersion = 0, $mCompressed = false, $mItems = array(), $mDefaultHash = ''; 75 public $mSize = 0; 76 public $mMaxSize = 10000000; 77 public $mMaxCount = 100; 78 79 /** 80 * Constructor 81 */ 82 public function __construct() { 83 if ( !function_exists( 'gzdeflate' ) ) { 84 throw new MWException( "Need zlib support to read or write this " 85 . "kind of history object (ConcatenatedGzipHistoryBlob)\n" ); 86 } 87 } 88 89 /** 90 * @param string $text 91 * @return string 92 */ 93 public function addItem( $text ) { 94 $this->uncompress(); 95 $hash = md5( $text ); 96 if ( !isset( $this->mItems[$hash] ) ) { 97 $this->mItems[$hash] = $text; 98 $this->mSize += strlen( $text ); 99 } 100 return $hash; 101 } 102 103 /** 104 * @param string $hash 105 * @return array|bool 106 */ 107 public function getItem( $hash ) { 108 $this->uncompress(); 109 if ( array_key_exists( $hash, $this->mItems ) ) { 110 return $this->mItems[$hash]; 111 } else { 112 return false; 113 } 114 } 115 116 /** 117 * @param string $text 118 * @return void 119 */ 120 public function setText( $text ) { 121 $this->uncompress(); 122 $this->mDefaultHash = $this->addItem( $text ); 123 } 124 125 /** 126 * @return array|bool 127 */ 128 public function getText() { 129 $this->uncompress(); 130 return $this->getItem( $this->mDefaultHash ); 131 } 132 133 /** 134 * Remove an item 135 * 136 * @param string $hash 137 */ 138 public function removeItem( $hash ) { 139 $this->mSize -= strlen( $this->mItems[$hash] ); 140 unset( $this->mItems[$hash] ); 141 } 142 143 /** 144 * Compress the bulk data in the object 145 */ 146 public function compress() { 147 if ( !$this->mCompressed ) { 148 $this->mItems = gzdeflate( serialize( $this->mItems ) ); 149 $this->mCompressed = true; 150 } 151 } 152 153 /** 154 * Uncompress bulk data 155 */ 156 public function uncompress() { 157 if ( $this->mCompressed ) { 158 $this->mItems = unserialize( gzinflate( $this->mItems ) ); 159 $this->mCompressed = false; 160 } 161 } 162 163 /** 164 * @return array 165 */ 166 function __sleep() { 167 $this->compress(); 168 return array( 'mVersion', 'mCompressed', 'mItems', 'mDefaultHash' ); 169 } 170 171 function __wakeup() { 172 $this->uncompress(); 173 } 174 175 /** 176 * Helper function for compression jobs 177 * Returns true until the object is "full" and ready to be committed 178 * 179 * @return bool 180 */ 181 public function isHappy() { 182 return $this->mSize < $this->mMaxSize 183 && count( $this->mItems ) < $this->mMaxCount; 184 } 185 } 186 187 /** 188 * Pointer object for an item within a CGZ blob stored in the text table. 189 */ 190 class HistoryBlobStub { 191 /** 192 * @var array One-step cache variable to hold base blobs; operations that 193 * pull multiple revisions may often pull multiple times from the same 194 * blob. By keeping the last-used one open, we avoid redundant 195 * unserialization and decompression overhead. 196 */ 197 protected static $blobCache = array(); 198 199 /** @var int */ 200 public $mOldId; 201 202 /** @var string */ 203 public $mHash; 204 205 /** @var string */ 206 public $mRef; 207 208 /** 209 * @param string $hash The content hash of the text 210 * @param int $oldid The old_id for the CGZ object 211 */ 212 function __construct( $hash = '', $oldid = 0 ) { 213 $this->mHash = $hash; 214 } 215 216 /** 217 * Sets the location (old_id) of the main object to which this object 218 * points 219 * @param int $id 220 */ 221 function setLocation( $id ) { 222 $this->mOldId = $id; 223 } 224 225 /** 226 * Sets the location (old_id) of the referring object 227 * @param string $id 228 */ 229 function setReferrer( $id ) { 230 $this->mRef = $id; 231 } 232 233 /** 234 * Gets the location of the referring object 235 * @return string 236 */ 237 function getReferrer() { 238 return $this->mRef; 239 } 240 241 /** 242 * @return string 243 */ 244 function getText() { 245 if ( isset( self::$blobCache[$this->mOldId] ) ) { 246 $obj = self::$blobCache[$this->mOldId]; 247 } else { 248 $dbr = wfGetDB( DB_SLAVE ); 249 $row = $dbr->selectRow( 250 'text', 251 array( 'old_flags', 'old_text' ), 252 array( 'old_id' => $this->mOldId ) 253 ); 254 255 if ( !$row ) { 256 return false; 257 } 258 259 $flags = explode( ',', $row->old_flags ); 260 if ( in_array( 'external', $flags ) ) { 261 $url = $row->old_text; 262 $parts = explode( '://', $url, 2 ); 263 if ( !isset( $parts[1] ) || $parts[1] == '' ) { 264 return false; 265 } 266 $row->old_text = ExternalStore::fetchFromUrl( $url ); 267 268 } 269 270 if ( !in_array( 'object', $flags ) ) { 271 return false; 272 } 273 274 if ( in_array( 'gzip', $flags ) ) { 275 // This shouldn't happen, but a bug in the compress script 276 // may at times gzip-compress a HistoryBlob object row. 277 $obj = unserialize( gzinflate( $row->old_text ) ); 278 } else { 279 $obj = unserialize( $row->old_text ); 280 } 281 282 if ( !is_object( $obj ) ) { 283 // Correct for old double-serialization bug. 284 $obj = unserialize( $obj ); 285 } 286 287 // Save this item for reference; if pulling many 288 // items in a row we'll likely use it again. 289 $obj->uncompress(); 290 self::$blobCache = array( $this->mOldId => $obj ); 291 } 292 293 return $obj->getItem( $this->mHash ); 294 } 295 296 /** 297 * Get the content hash 298 * 299 * @return string 300 */ 301 function getHash() { 302 return $this->mHash; 303 } 304 } 305 306 /** 307 * To speed up conversion from 1.4 to 1.5 schema, text rows can refer to the 308 * leftover cur table as the backend. This avoids expensively copying hundreds 309 * of megabytes of data during the conversion downtime. 310 * 311 * Serialized HistoryBlobCurStub objects will be inserted into the text table 312 * on conversion if $wgLegacySchemaConversion is set to true. 313 */ 314 class HistoryBlobCurStub { 315 /** @var int */ 316 public $mCurId; 317 318 /** 319 * @param int $curid The cur_id pointed to 320 */ 321 function __construct( $curid = 0 ) { 322 $this->mCurId = $curid; 323 } 324 325 /** 326 * Sets the location (cur_id) of the main object to which this object 327 * points 328 * 329 * @param int $id 330 */ 331 function setLocation( $id ) { 332 $this->mCurId = $id; 333 } 334 335 /** 336 * @return string|bool 337 */ 338 function getText() { 339 $dbr = wfGetDB( DB_SLAVE ); 340 $row = $dbr->selectRow( 'cur', array( 'cur_text' ), array( 'cur_id' => $this->mCurId ) ); 341 if ( !$row ) { 342 return false; 343 } 344 return $row->cur_text; 345 } 346 } 347 348 /** 349 * Diff-based history compression 350 * Requires xdiff 1.5+ and zlib 351 */ 352 class DiffHistoryBlob implements HistoryBlob { 353 /** @var array Uncompressed item cache */ 354 public $mItems = array(); 355 356 /** @var int Total uncompressed size */ 357 public $mSize = 0; 358 359 /** 360 * @var array Array of diffs. If a diff D from A to B is notated D = B - A, 361 * and Z is an empty string: 362 * 363 * { item[map[i]] - item[map[i-1]] where i > 0 364 * diff[i] = { 365 * { item[map[i]] - Z where i = 0 366 */ 367 public $mDiffs; 368 369 /** @var array The diff map, see above */ 370 public $mDiffMap; 371 372 /** @var int The key for getText() 373 */ 374 public $mDefaultKey; 375 376 /** @var string Compressed storage */ 377 public $mCompressed; 378 379 /** @var bool True if the object is locked against further writes */ 380 public $mFrozen = false; 381 382 /** 383 * @var int The maximum uncompressed size before the object becomes sad 384 * Should be less than max_allowed_packet 385 */ 386 public $mMaxSize = 10000000; 387 388 /** @var int The maximum number of text items before the object becomes sad */ 389 public $mMaxCount = 100; 390 391 /** Constants from xdiff.h */ 392 const XDL_BDOP_INS = 1; 393 const XDL_BDOP_CPY = 2; 394 const XDL_BDOP_INSB = 3; 395 396 function __construct() { 397 if ( !function_exists( 'gzdeflate' ) ) { 398 throw new MWException( "Need zlib support to read or write DiffHistoryBlob\n" ); 399 } 400 } 401 402 /** 403 * @throws MWException 404 * @param string $text 405 * @return int 406 */ 407 function addItem( $text ) { 408 if ( $this->mFrozen ) { 409 throw new MWException( __METHOD__ . ": Cannot add more items after sleep/wakeup" ); 410 } 411 412 $this->mItems[] = $text; 413 $this->mSize += strlen( $text ); 414 $this->mDiffs = null; // later 415 return count( $this->mItems ) - 1; 416 } 417 418 /** 419 * @param string $key 420 * @return string 421 */ 422 function getItem( $key ) { 423 return $this->mItems[$key]; 424 } 425 426 /** 427 * @param string $text 428 */ 429 function setText( $text ) { 430 $this->mDefaultKey = $this->addItem( $text ); 431 } 432 433 /** 434 * @return string 435 */ 436 function getText() { 437 return $this->getItem( $this->mDefaultKey ); 438 } 439 440 /** 441 * @throws MWException 442 */ 443 function compress() { 444 if ( !function_exists( 'xdiff_string_rabdiff' ) ) { 445 throw new MWException( "Need xdiff 1.5+ support to write DiffHistoryBlob\n" ); 446 } 447 if ( isset( $this->mDiffs ) ) { 448 // Already compressed 449 return; 450 } 451 if ( !count( $this->mItems ) ) { 452 // Empty 453 return; 454 } 455 456 // Create two diff sequences: one for main text and one for small text 457 $sequences = array( 458 'small' => array( 459 'tail' => '', 460 'diffs' => array(), 461 'map' => array(), 462 ), 463 'main' => array( 464 'tail' => '', 465 'diffs' => array(), 466 'map' => array(), 467 ), 468 ); 469 $smallFactor = 0.5; 470 471 $mItemsCount = count( $this->mItems ); 472 for ( $i = 0; $i < $mItemsCount; $i++ ) { 473 $text = $this->mItems[$i]; 474 if ( $i == 0 ) { 475 $seqName = 'main'; 476 } else { 477 $mainTail = $sequences['main']['tail']; 478 if ( strlen( $text ) < strlen( $mainTail ) * $smallFactor ) { 479 $seqName = 'small'; 480 } else { 481 $seqName = 'main'; 482 } 483 } 484 $seq =& $sequences[$seqName]; 485 $tail = $seq['tail']; 486 $diff = $this->diff( $tail, $text ); 487 $seq['diffs'][] = $diff; 488 $seq['map'][] = $i; 489 $seq['tail'] = $text; 490 } 491 unset( $seq ); // unlink dangerous alias 492 493 // Knit the sequences together 494 $tail = ''; 495 $this->mDiffs = array(); 496 $this->mDiffMap = array(); 497 foreach ( $sequences as $seq ) { 498 if ( !count( $seq['diffs'] ) ) { 499 continue; 500 } 501 if ( $tail === '' ) { 502 $this->mDiffs[] = $seq['diffs'][0]; 503 } else { 504 $head = $this->patch( '', $seq['diffs'][0] ); 505 $this->mDiffs[] = $this->diff( $tail, $head ); 506 } 507 $this->mDiffMap[] = $seq['map'][0]; 508 $diffsCount = count( $seq['diffs'] ); 509 for ( $i = 1; $i < $diffsCount; $i++ ) { 510 $this->mDiffs[] = $seq['diffs'][$i]; 511 $this->mDiffMap[] = $seq['map'][$i]; 512 } 513 $tail = $seq['tail']; 514 } 515 } 516 517 /** 518 * @param string $t1 519 * @param string $t2 520 * @return string 521 */ 522 function diff( $t1, $t2 ) { 523 # Need to do a null concatenation with warnings off, due to bugs in the current version of xdiff 524 # "String is not zero-terminated" 525 wfSuppressWarnings(); 526 $diff = xdiff_string_rabdiff( $t1, $t2 ) . ''; 527 wfRestoreWarnings(); 528 return $diff; 529 } 530 531 /** 532 * @param string $base 533 * @param string $diff 534 * @return bool|string 535 */ 536 function patch( $base, $diff ) { 537 if ( function_exists( 'xdiff_string_bpatch' ) ) { 538 wfSuppressWarnings(); 539 $text = xdiff_string_bpatch( $base, $diff ) . ''; 540 wfRestoreWarnings(); 541 return $text; 542 } 543 544 # Pure PHP implementation 545 546 $header = unpack( 'Vofp/Vcsize', substr( $diff, 0, 8 ) ); 547 548 # Check the checksum if hash extension is available 549 $ofp = $this->xdiffAdler32( $base ); 550 if ( $ofp !== false && $ofp !== substr( $diff, 0, 4 ) ) { 551 wfDebug( __METHOD__ . ": incorrect base checksum\n" ); 552 return false; 553 } 554 if ( $header['csize'] != strlen( $base ) ) { 555 wfDebug( __METHOD__ . ": incorrect base length\n" ); 556 return false; 557 } 558 559 $p = 8; 560 $out = ''; 561 while ( $p < strlen( $diff ) ) { 562 $x = unpack( 'Cop', substr( $diff, $p, 1 ) ); 563 $op = $x['op']; 564 ++$p; 565 switch ( $op ) { 566 case self::XDL_BDOP_INS: 567 $x = unpack( 'Csize', substr( $diff, $p, 1 ) ); 568 $p++; 569 $out .= substr( $diff, $p, $x['size'] ); 570 $p += $x['size']; 571 break; 572 case self::XDL_BDOP_INSB: 573 $x = unpack( 'Vcsize', substr( $diff, $p, 4 ) ); 574 $p += 4; 575 $out .= substr( $diff, $p, $x['csize'] ); 576 $p += $x['csize']; 577 break; 578 case self::XDL_BDOP_CPY: 579 $x = unpack( 'Voff/Vcsize', substr( $diff, $p, 8 ) ); 580 $p += 8; 581 $out .= substr( $base, $x['off'], $x['csize'] ); 582 break; 583 default: 584 wfDebug( __METHOD__ . ": invalid op\n" ); 585 return false; 586 } 587 } 588 return $out; 589 } 590 591 /** 592 * Compute a binary "Adler-32" checksum as defined by LibXDiff, i.e. with 593 * the bytes backwards and initialised with 0 instead of 1. See bug 34428. 594 * 595 * @param string $s 596 * @return string|bool False if the hash extension is not available 597 */ 598 function xdiffAdler32( $s ) { 599 if ( !function_exists( 'hash' ) ) { 600 return false; 601 } 602 603 static $init; 604 if ( $init === null ) { 605 $init = str_repeat( "\xf0", 205 ) . "\xee" . str_repeat( "\xf0", 67 ) . "\x02"; 606 } 607 608 // The real Adler-32 checksum of $init is zero, so it initialises the 609 // state to zero, as it is at the start of LibXDiff's checksum 610 // algorithm. Appending the subject string then simulates LibXDiff. 611 return strrev( hash( 'adler32', $init . $s, true ) ); 612 } 613 614 function uncompress() { 615 if ( !$this->mDiffs ) { 616 return; 617 } 618 $tail = ''; 619 $mDiffsCount = count( $this->mDiffs ); 620 for ( $diffKey = 0; $diffKey < $mDiffsCount; $diffKey++ ) { 621 $textKey = $this->mDiffMap[$diffKey]; 622 $text = $this->patch( $tail, $this->mDiffs[$diffKey] ); 623 $this->mItems[$textKey] = $text; 624 $tail = $text; 625 } 626 } 627 628 /** 629 * @return array 630 */ 631 function __sleep() { 632 $this->compress(); 633 if ( !count( $this->mItems ) ) { 634 // Empty object 635 $info = false; 636 } else { 637 // Take forward differences to improve the compression ratio for sequences 638 $map = ''; 639 $prev = 0; 640 foreach ( $this->mDiffMap as $i ) { 641 if ( $map !== '' ) { 642 $map .= ','; 643 } 644 $map .= $i - $prev; 645 $prev = $i; 646 } 647 $info = array( 648 'diffs' => $this->mDiffs, 649 'map' => $map 650 ); 651 } 652 if ( isset( $this->mDefaultKey ) ) { 653 $info['default'] = $this->mDefaultKey; 654 } 655 $this->mCompressed = gzdeflate( serialize( $info ) ); 656 return array( 'mCompressed' ); 657 } 658 659 function __wakeup() { 660 // addItem() doesn't work if mItems is partially filled from mDiffs 661 $this->mFrozen = true; 662 $info = unserialize( gzinflate( $this->mCompressed ) ); 663 unset( $this->mCompressed ); 664 665 if ( !$info ) { 666 // Empty object 667 return; 668 } 669 670 if ( isset( $info['default'] ) ) { 671 $this->mDefaultKey = $info['default']; 672 } 673 $this->mDiffs = $info['diffs']; 674 if ( isset( $info['base'] ) ) { 675 // Old format 676 $this->mDiffMap = range( 0, count( $this->mDiffs ) - 1 ); 677 array_unshift( $this->mDiffs, 678 pack( 'VVCV', 0, 0, self::XDL_BDOP_INSB, strlen( $info['base'] ) ) . 679 $info['base'] ); 680 } else { 681 // New format 682 $map = explode( ',', $info['map'] ); 683 $cur = 0; 684 $this->mDiffMap = array(); 685 foreach ( $map as $i ) { 686 $cur += $i; 687 $this->mDiffMap[] = $cur; 688 } 689 } 690 $this->uncompress(); 691 } 692 693 /** 694 * Helper function for compression jobs 695 * Returns true until the object is "full" and ready to be committed 696 * 697 * @return bool 698 */ 699 function isHappy() { 700 return $this->mSize < $this->mMaxSize 701 && count( $this->mItems ) < $this->mMaxCount; 702 } 703 704 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Fri Nov 28 14:03:12 2014 | Cross-referenced by PHPXref 0.7.1 |