MediaWiki
REL1_24
|
00001 <?php 00028 interface HistoryBlob { 00038 function addItem( $text ); 00039 00047 function getItem( $key ); 00048 00059 function setText( $text ); 00060 00066 function getText(); 00067 } 00068 00073 class ConcatenatedGzipHistoryBlob implements HistoryBlob { 00074 public $mVersion = 0, $mCompressed = false, $mItems = array(), $mDefaultHash = ''; 00075 public $mSize = 0; 00076 public $mMaxSize = 10000000; 00077 public $mMaxCount = 100; 00078 00082 public function __construct() { 00083 if ( !function_exists( 'gzdeflate' ) ) { 00084 throw new MWException( "Need zlib support to read or write this " 00085 . "kind of history object (ConcatenatedGzipHistoryBlob)\n" ); 00086 } 00087 } 00088 00093 public function addItem( $text ) { 00094 $this->uncompress(); 00095 $hash = md5( $text ); 00096 if ( !isset( $this->mItems[$hash] ) ) { 00097 $this->mItems[$hash] = $text; 00098 $this->mSize += strlen( $text ); 00099 } 00100 return $hash; 00101 } 00102 00107 public function getItem( $hash ) { 00108 $this->uncompress(); 00109 if ( array_key_exists( $hash, $this->mItems ) ) { 00110 return $this->mItems[$hash]; 00111 } else { 00112 return false; 00113 } 00114 } 00115 00120 public function setText( $text ) { 00121 $this->uncompress(); 00122 $this->mDefaultHash = $this->addItem( $text ); 00123 } 00124 00128 public function getText() { 00129 $this->uncompress(); 00130 return $this->getItem( $this->mDefaultHash ); 00131 } 00132 00138 public function removeItem( $hash ) { 00139 $this->mSize -= strlen( $this->mItems[$hash] ); 00140 unset( $this->mItems[$hash] ); 00141 } 00142 00146 public function compress() { 00147 if ( !$this->mCompressed ) { 00148 $this->mItems = gzdeflate( serialize( $this->mItems ) ); 00149 $this->mCompressed = true; 00150 } 00151 } 00152 00156 public function uncompress() { 00157 if ( $this->mCompressed ) { 00158 $this->mItems = unserialize( gzinflate( $this->mItems ) ); 00159 $this->mCompressed = false; 00160 } 00161 } 00162 00166 function __sleep() { 00167 $this->compress(); 00168 return array( 'mVersion', 'mCompressed', 'mItems', 'mDefaultHash' ); 00169 } 00170 00171 function __wakeup() { 00172 $this->uncompress(); 00173 } 00174 00181 public function isHappy() { 00182 return $this->mSize < $this->mMaxSize 00183 && count( $this->mItems ) < $this->mMaxCount; 00184 } 00185 } 00186 00190 class HistoryBlobStub { 00197 protected static $blobCache = array(); 00198 00200 public $mOldId; 00201 00203 public $mHash; 00204 00206 public $mRef; 00207 00212 function __construct( $hash = '', $oldid = 0 ) { 00213 $this->mHash = $hash; 00214 } 00215 00221 function setLocation( $id ) { 00222 $this->mOldId = $id; 00223 } 00224 00229 function setReferrer( $id ) { 00230 $this->mRef = $id; 00231 } 00232 00237 function getReferrer() { 00238 return $this->mRef; 00239 } 00240 00244 function getText() { 00245 if ( isset( self::$blobCache[$this->mOldId] ) ) { 00246 $obj = self::$blobCache[$this->mOldId]; 00247 } else { 00248 $dbr = wfGetDB( DB_SLAVE ); 00249 $row = $dbr->selectRow( 00250 'text', 00251 array( 'old_flags', 'old_text' ), 00252 array( 'old_id' => $this->mOldId ) 00253 ); 00254 00255 if ( !$row ) { 00256 return false; 00257 } 00258 00259 $flags = explode( ',', $row->old_flags ); 00260 if ( in_array( 'external', $flags ) ) { 00261 $url = $row->old_text; 00262 $parts = explode( '://', $url, 2 ); 00263 if ( !isset( $parts[1] ) || $parts[1] == '' ) { 00264 return false; 00265 } 00266 $row->old_text = ExternalStore::fetchFromUrl( $url ); 00267 00268 } 00269 00270 if ( !in_array( 'object', $flags ) ) { 00271 return false; 00272 } 00273 00274 if ( in_array( 'gzip', $flags ) ) { 00275 // This shouldn't happen, but a bug in the compress script 00276 // may at times gzip-compress a HistoryBlob object row. 00277 $obj = unserialize( gzinflate( $row->old_text ) ); 00278 } else { 00279 $obj = unserialize( $row->old_text ); 00280 } 00281 00282 if ( !is_object( $obj ) ) { 00283 // Correct for old double-serialization bug. 00284 $obj = unserialize( $obj ); 00285 } 00286 00287 // Save this item for reference; if pulling many 00288 // items in a row we'll likely use it again. 00289 $obj->uncompress(); 00290 self::$blobCache = array( $this->mOldId => $obj ); 00291 } 00292 00293 return $obj->getItem( $this->mHash ); 00294 } 00295 00301 function getHash() { 00302 return $this->mHash; 00303 } 00304 } 00305 00314 class HistoryBlobCurStub { 00316 public $mCurId; 00317 00321 function __construct( $curid = 0 ) { 00322 $this->mCurId = $curid; 00323 } 00324 00331 function setLocation( $id ) { 00332 $this->mCurId = $id; 00333 } 00334 00338 function getText() { 00339 $dbr = wfGetDB( DB_SLAVE ); 00340 $row = $dbr->selectRow( 'cur', array( 'cur_text' ), array( 'cur_id' => $this->mCurId ) ); 00341 if ( !$row ) { 00342 return false; 00343 } 00344 return $row->cur_text; 00345 } 00346 } 00347 00352 class DiffHistoryBlob implements HistoryBlob { 00354 public $mItems = array(); 00355 00357 public $mSize = 0; 00358 00367 public $mDiffs; 00368 00370 public $mDiffMap; 00371 00374 public $mDefaultKey; 00375 00377 public $mCompressed; 00378 00380 public $mFrozen = false; 00381 00386 public $mMaxSize = 10000000; 00387 00389 public $mMaxCount = 100; 00390 00392 const XDL_BDOP_INS = 1; 00393 const XDL_BDOP_CPY = 2; 00394 const XDL_BDOP_INSB = 3; 00395 00396 function __construct() { 00397 if ( !function_exists( 'gzdeflate' ) ) { 00398 throw new MWException( "Need zlib support to read or write DiffHistoryBlob\n" ); 00399 } 00400 } 00401 00407 function addItem( $text ) { 00408 if ( $this->mFrozen ) { 00409 throw new MWException( __METHOD__ . ": Cannot add more items after sleep/wakeup" ); 00410 } 00411 00412 $this->mItems[] = $text; 00413 $this->mSize += strlen( $text ); 00414 $this->mDiffs = null; // later 00415 return count( $this->mItems ) - 1; 00416 } 00417 00422 function getItem( $key ) { 00423 return $this->mItems[$key]; 00424 } 00425 00429 function setText( $text ) { 00430 $this->mDefaultKey = $this->addItem( $text ); 00431 } 00432 00436 function getText() { 00437 return $this->getItem( $this->mDefaultKey ); 00438 } 00439 00443 function compress() { 00444 if ( !function_exists( 'xdiff_string_rabdiff' ) ) { 00445 throw new MWException( "Need xdiff 1.5+ support to write DiffHistoryBlob\n" ); 00446 } 00447 if ( isset( $this->mDiffs ) ) { 00448 // Already compressed 00449 return; 00450 } 00451 if ( !count( $this->mItems ) ) { 00452 // Empty 00453 return; 00454 } 00455 00456 // Create two diff sequences: one for main text and one for small text 00457 $sequences = array( 00458 'small' => array( 00459 'tail' => '', 00460 'diffs' => array(), 00461 'map' => array(), 00462 ), 00463 'main' => array( 00464 'tail' => '', 00465 'diffs' => array(), 00466 'map' => array(), 00467 ), 00468 ); 00469 $smallFactor = 0.5; 00470 00471 $mItemsCount = count( $this->mItems ); 00472 for ( $i = 0; $i < $mItemsCount; $i++ ) { 00473 $text = $this->mItems[$i]; 00474 if ( $i == 0 ) { 00475 $seqName = 'main'; 00476 } else { 00477 $mainTail = $sequences['main']['tail']; 00478 if ( strlen( $text ) < strlen( $mainTail ) * $smallFactor ) { 00479 $seqName = 'small'; 00480 } else { 00481 $seqName = 'main'; 00482 } 00483 } 00484 $seq =& $sequences[$seqName]; 00485 $tail = $seq['tail']; 00486 $diff = $this->diff( $tail, $text ); 00487 $seq['diffs'][] = $diff; 00488 $seq['map'][] = $i; 00489 $seq['tail'] = $text; 00490 } 00491 unset( $seq ); // unlink dangerous alias 00492 00493 // Knit the sequences together 00494 $tail = ''; 00495 $this->mDiffs = array(); 00496 $this->mDiffMap = array(); 00497 foreach ( $sequences as $seq ) { 00498 if ( !count( $seq['diffs'] ) ) { 00499 continue; 00500 } 00501 if ( $tail === '' ) { 00502 $this->mDiffs[] = $seq['diffs'][0]; 00503 } else { 00504 $head = $this->patch( '', $seq['diffs'][0] ); 00505 $this->mDiffs[] = $this->diff( $tail, $head ); 00506 } 00507 $this->mDiffMap[] = $seq['map'][0]; 00508 $diffsCount = count( $seq['diffs'] ); 00509 for ( $i = 1; $i < $diffsCount; $i++ ) { 00510 $this->mDiffs[] = $seq['diffs'][$i]; 00511 $this->mDiffMap[] = $seq['map'][$i]; 00512 } 00513 $tail = $seq['tail']; 00514 } 00515 } 00516 00522 function diff( $t1, $t2 ) { 00523 # Need to do a null concatenation with warnings off, due to bugs in the current version of xdiff 00524 # "String is not zero-terminated" 00525 wfSuppressWarnings(); 00526 $diff = xdiff_string_rabdiff( $t1, $t2 ) . ''; 00527 wfRestoreWarnings(); 00528 return $diff; 00529 } 00530 00536 function patch( $base, $diff ) { 00537 if ( function_exists( 'xdiff_string_bpatch' ) ) { 00538 wfSuppressWarnings(); 00539 $text = xdiff_string_bpatch( $base, $diff ) . ''; 00540 wfRestoreWarnings(); 00541 return $text; 00542 } 00543 00544 # Pure PHP implementation 00545 00546 $header = unpack( 'Vofp/Vcsize', substr( $diff, 0, 8 ) ); 00547 00548 # Check the checksum if hash extension is available 00549 $ofp = $this->xdiffAdler32( $base ); 00550 if ( $ofp !== false && $ofp !== substr( $diff, 0, 4 ) ) { 00551 wfDebug( __METHOD__ . ": incorrect base checksum\n" ); 00552 return false; 00553 } 00554 if ( $header['csize'] != strlen( $base ) ) { 00555 wfDebug( __METHOD__ . ": incorrect base length\n" ); 00556 return false; 00557 } 00558 00559 $p = 8; 00560 $out = ''; 00561 while ( $p < strlen( $diff ) ) { 00562 $x = unpack( 'Cop', substr( $diff, $p, 1 ) ); 00563 $op = $x['op']; 00564 ++$p; 00565 switch ( $op ) { 00566 case self::XDL_BDOP_INS: 00567 $x = unpack( 'Csize', substr( $diff, $p, 1 ) ); 00568 $p++; 00569 $out .= substr( $diff, $p, $x['size'] ); 00570 $p += $x['size']; 00571 break; 00572 case self::XDL_BDOP_INSB: 00573 $x = unpack( 'Vcsize', substr( $diff, $p, 4 ) ); 00574 $p += 4; 00575 $out .= substr( $diff, $p, $x['csize'] ); 00576 $p += $x['csize']; 00577 break; 00578 case self::XDL_BDOP_CPY: 00579 $x = unpack( 'Voff/Vcsize', substr( $diff, $p, 8 ) ); 00580 $p += 8; 00581 $out .= substr( $base, $x['off'], $x['csize'] ); 00582 break; 00583 default: 00584 wfDebug( __METHOD__ . ": invalid op\n" ); 00585 return false; 00586 } 00587 } 00588 return $out; 00589 } 00590 00598 function xdiffAdler32( $s ) { 00599 if ( !function_exists( 'hash' ) ) { 00600 return false; 00601 } 00602 00603 static $init; 00604 if ( $init === null ) { 00605 $init = str_repeat( "\xf0", 205 ) . "\xee" . str_repeat( "\xf0", 67 ) . "\x02"; 00606 } 00607 00608 // The real Adler-32 checksum of $init is zero, so it initialises the 00609 // state to zero, as it is at the start of LibXDiff's checksum 00610 // algorithm. Appending the subject string then simulates LibXDiff. 00611 return strrev( hash( 'adler32', $init . $s, true ) ); 00612 } 00613 00614 function uncompress() { 00615 if ( !$this->mDiffs ) { 00616 return; 00617 } 00618 $tail = ''; 00619 $mDiffsCount = count( $this->mDiffs ); 00620 for ( $diffKey = 0; $diffKey < $mDiffsCount; $diffKey++ ) { 00621 $textKey = $this->mDiffMap[$diffKey]; 00622 $text = $this->patch( $tail, $this->mDiffs[$diffKey] ); 00623 $this->mItems[$textKey] = $text; 00624 $tail = $text; 00625 } 00626 } 00627 00631 function __sleep() { 00632 $this->compress(); 00633 if ( !count( $this->mItems ) ) { 00634 // Empty object 00635 $info = false; 00636 } else { 00637 // Take forward differences to improve the compression ratio for sequences 00638 $map = ''; 00639 $prev = 0; 00640 foreach ( $this->mDiffMap as $i ) { 00641 if ( $map !== '' ) { 00642 $map .= ','; 00643 } 00644 $map .= $i - $prev; 00645 $prev = $i; 00646 } 00647 $info = array( 00648 'diffs' => $this->mDiffs, 00649 'map' => $map 00650 ); 00651 } 00652 if ( isset( $this->mDefaultKey ) ) { 00653 $info['default'] = $this->mDefaultKey; 00654 } 00655 $this->mCompressed = gzdeflate( serialize( $info ) ); 00656 return array( 'mCompressed' ); 00657 } 00658 00659 function __wakeup() { 00660 // addItem() doesn't work if mItems is partially filled from mDiffs 00661 $this->mFrozen = true; 00662 $info = unserialize( gzinflate( $this->mCompressed ) ); 00663 unset( $this->mCompressed ); 00664 00665 if ( !$info ) { 00666 // Empty object 00667 return; 00668 } 00669 00670 if ( isset( $info['default'] ) ) { 00671 $this->mDefaultKey = $info['default']; 00672 } 00673 $this->mDiffs = $info['diffs']; 00674 if ( isset( $info['base'] ) ) { 00675 // Old format 00676 $this->mDiffMap = range( 0, count( $this->mDiffs ) - 1 ); 00677 array_unshift( $this->mDiffs, 00678 pack( 'VVCV', 0, 0, self::XDL_BDOP_INSB, strlen( $info['base'] ) ) . 00679 $info['base'] ); 00680 } else { 00681 // New format 00682 $map = explode( ',', $info['map'] ); 00683 $cur = 0; 00684 $this->mDiffMap = array(); 00685 foreach ( $map as $i ) { 00686 $cur += $i; 00687 $this->mDiffMap[] = $cur; 00688 } 00689 } 00690 $this->uncompress(); 00691 } 00692 00699 function isHappy() { 00700 return $this->mSize < $this->mMaxSize 00701 && count( $this->mItems ) < $this->mMaxCount; 00702 } 00703 00704 }