MediaWiki
REL1_22
|
00001 <?php 00028 interface HistoryBlob 00029 { 00039 function addItem( $text ); 00040 00048 function getItem( $key ); 00049 00060 function setText( $text ); 00061 00067 function getText(); 00068 } 00069 00074 class ConcatenatedGzipHistoryBlob implements HistoryBlob 00075 { 00076 public $mVersion = 0, $mCompressed = false, $mItems = array(), $mDefaultHash = ''; 00077 public $mSize = 0; 00078 public $mMaxSize = 10000000; 00079 public $mMaxCount = 100; 00080 00082 public function __construct() { 00083 if ( !function_exists( 'gzdeflate' ) ) { 00084 throw new MWException( "Need zlib support to read or write this kind of history object (ConcatenatedGzipHistoryBlob)\n" ); 00085 } 00086 } 00087 00092 public function addItem( $text ) { 00093 $this->uncompress(); 00094 $hash = md5( $text ); 00095 if ( !isset( $this->mItems[$hash] ) ) { 00096 $this->mItems[$hash] = $text; 00097 $this->mSize += strlen( $text ); 00098 } 00099 return $hash; 00100 } 00101 00106 public function getItem( $hash ) { 00107 $this->uncompress(); 00108 if ( array_key_exists( $hash, $this->mItems ) ) { 00109 return $this->mItems[$hash]; 00110 } else { 00111 return false; 00112 } 00113 } 00114 00119 public function setText( $text ) { 00120 $this->uncompress(); 00121 $this->mDefaultHash = $this->addItem( $text ); 00122 } 00123 00127 public function getText() { 00128 $this->uncompress(); 00129 return $this->getItem( $this->mDefaultHash ); 00130 } 00131 00137 public function removeItem( $hash ) { 00138 $this->mSize -= strlen( $this->mItems[$hash] ); 00139 unset( $this->mItems[$hash] ); 00140 } 00141 00145 public function compress() { 00146 if ( !$this->mCompressed ) { 00147 $this->mItems = gzdeflate( serialize( $this->mItems ) ); 00148 $this->mCompressed = true; 00149 } 00150 } 00151 00155 public function uncompress() { 00156 if ( $this->mCompressed ) { 00157 $this->mItems = unserialize( gzinflate( $this->mItems ) ); 00158 $this->mCompressed = false; 00159 } 00160 } 00161 00165 function __sleep() { 00166 $this->compress(); 00167 return array( 'mVersion', 'mCompressed', 'mItems', 'mDefaultHash' ); 00168 } 00169 00170 function __wakeup() { 00171 $this->uncompress(); 00172 } 00173 00180 public function isHappy() { 00181 return $this->mSize < $this->mMaxSize 00182 && count( $this->mItems ) < $this->mMaxCount; 00183 } 00184 } 00185 00189 class HistoryBlobStub { 00196 protected static $blobCache = array(); 00197 00198 var $mOldId, $mHash, $mRef; 00199 00204 function __construct( $hash = '', $oldid = 0 ) { 00205 $this->mHash = $hash; 00206 } 00207 00212 function setLocation( $id ) { 00213 $this->mOldId = $id; 00214 } 00215 00219 function setReferrer( $id ) { 00220 $this->mRef = $id; 00221 } 00222 00226 function getReferrer() { 00227 return $this->mRef; 00228 } 00229 00233 function getText() { 00234 if ( isset( self::$blobCache[$this->mOldId] ) ) { 00235 $obj = self::$blobCache[$this->mOldId]; 00236 } else { 00237 $dbr = wfGetDB( DB_SLAVE ); 00238 $row = $dbr->selectRow( 'text', array( 'old_flags', 'old_text' ), array( 'old_id' => $this->mOldId ) ); 00239 if ( !$row ) { 00240 return false; 00241 } 00242 $flags = explode( ',', $row->old_flags ); 00243 if ( in_array( 'external', $flags ) ) { 00244 $url = $row->old_text; 00245 $parts = explode( '://', $url, 2 ); 00246 if ( !isset( $parts[1] ) || $parts[1] == '' ) { 00247 return false; 00248 } 00249 $row->old_text = ExternalStore::fetchFromUrl( $url ); 00250 00251 } 00252 if ( !in_array( 'object', $flags ) ) { 00253 return false; 00254 } 00255 00256 if ( in_array( 'gzip', $flags ) ) { 00257 // This shouldn't happen, but a bug in the compress script 00258 // may at times gzip-compress a HistoryBlob object row. 00259 $obj = unserialize( gzinflate( $row->old_text ) ); 00260 } else { 00261 $obj = unserialize( $row->old_text ); 00262 } 00263 00264 if ( !is_object( $obj ) ) { 00265 // Correct for old double-serialization bug. 00266 $obj = unserialize( $obj ); 00267 } 00268 00269 // Save this item for reference; if pulling many 00270 // items in a row we'll likely use it again. 00271 $obj->uncompress(); 00272 self::$blobCache = array( $this->mOldId => $obj ); 00273 } 00274 return $obj->getItem( $this->mHash ); 00275 } 00276 00282 function getHash() { 00283 return $this->mHash; 00284 } 00285 } 00286 00295 class HistoryBlobCurStub { 00296 var $mCurId; 00297 00301 function __construct( $curid = 0 ) { 00302 $this->mCurId = $curid; 00303 } 00304 00311 function setLocation( $id ) { 00312 $this->mCurId = $id; 00313 } 00314 00318 function getText() { 00319 $dbr = wfGetDB( DB_SLAVE ); 00320 $row = $dbr->selectRow( 'cur', array( 'cur_text' ), array( 'cur_id' => $this->mCurId ) ); 00321 if ( !$row ) { 00322 return false; 00323 } 00324 return $row->cur_text; 00325 } 00326 } 00327 00332 class DiffHistoryBlob implements HistoryBlob { 00334 var $mItems = array(); 00335 00337 var $mSize = 0; 00338 00347 var $mDiffs; 00348 00350 var $mDiffMap; 00351 00355 var $mDefaultKey; 00356 00360 var $mCompressed; 00361 00365 var $mFrozen = false; 00366 00371 var $mMaxSize = 10000000; 00372 00376 var $mMaxCount = 100; 00377 00379 const XDL_BDOP_INS = 1; 00380 const XDL_BDOP_CPY = 2; 00381 const XDL_BDOP_INSB = 3; 00382 00383 function __construct() { 00384 if ( !function_exists( 'gzdeflate' ) ) { 00385 throw new MWException( "Need zlib support to read or write DiffHistoryBlob\n" ); 00386 } 00387 } 00388 00394 function addItem( $text ) { 00395 if ( $this->mFrozen ) { 00396 throw new MWException( __METHOD__ . ": Cannot add more items after sleep/wakeup" ); 00397 } 00398 00399 $this->mItems[] = $text; 00400 $this->mSize += strlen( $text ); 00401 $this->mDiffs = null; // later 00402 return count( $this->mItems ) - 1; 00403 } 00404 00409 function getItem( $key ) { 00410 return $this->mItems[$key]; 00411 } 00412 00416 function setText( $text ) { 00417 $this->mDefaultKey = $this->addItem( $text ); 00418 } 00419 00423 function getText() { 00424 return $this->getItem( $this->mDefaultKey ); 00425 } 00426 00430 function compress() { 00431 if ( !function_exists( 'xdiff_string_rabdiff' ) ) { 00432 throw new MWException( "Need xdiff 1.5+ support to write DiffHistoryBlob\n" ); 00433 } 00434 if ( isset( $this->mDiffs ) ) { 00435 // Already compressed 00436 return; 00437 } 00438 if ( !count( $this->mItems ) ) { 00439 // Empty 00440 return; 00441 } 00442 00443 // Create two diff sequences: one for main text and one for small text 00444 $sequences = array( 00445 'small' => array( 00446 'tail' => '', 00447 'diffs' => array(), 00448 'map' => array(), 00449 ), 00450 'main' => array( 00451 'tail' => '', 00452 'diffs' => array(), 00453 'map' => array(), 00454 ), 00455 ); 00456 $smallFactor = 0.5; 00457 00458 for ( $i = 0; $i < count( $this->mItems ); $i++ ) { 00459 $text = $this->mItems[$i]; 00460 if ( $i == 0 ) { 00461 $seqName = 'main'; 00462 } else { 00463 $mainTail = $sequences['main']['tail']; 00464 if ( strlen( $text ) < strlen( $mainTail ) * $smallFactor ) { 00465 $seqName = 'small'; 00466 } else { 00467 $seqName = 'main'; 00468 } 00469 } 00470 $seq =& $sequences[$seqName]; 00471 $tail = $seq['tail']; 00472 $diff = $this->diff( $tail, $text ); 00473 $seq['diffs'][] = $diff; 00474 $seq['map'][] = $i; 00475 $seq['tail'] = $text; 00476 } 00477 unset( $seq ); // unlink dangerous alias 00478 00479 // Knit the sequences together 00480 $tail = ''; 00481 $this->mDiffs = array(); 00482 $this->mDiffMap = array(); 00483 foreach ( $sequences as $seq ) { 00484 if ( !count( $seq['diffs'] ) ) { 00485 continue; 00486 } 00487 if ( $tail === '' ) { 00488 $this->mDiffs[] = $seq['diffs'][0]; 00489 } else { 00490 $head = $this->patch( '', $seq['diffs'][0] ); 00491 $this->mDiffs[] = $this->diff( $tail, $head ); 00492 } 00493 $this->mDiffMap[] = $seq['map'][0]; 00494 for ( $i = 1; $i < count( $seq['diffs'] ); $i++ ) { 00495 $this->mDiffs[] = $seq['diffs'][$i]; 00496 $this->mDiffMap[] = $seq['map'][$i]; 00497 } 00498 $tail = $seq['tail']; 00499 } 00500 } 00501 00507 function diff( $t1, $t2 ) { 00508 # Need to do a null concatenation with warnings off, due to bugs in the current version of xdiff 00509 # "String is not zero-terminated" 00510 wfSuppressWarnings(); 00511 $diff = xdiff_string_rabdiff( $t1, $t2 ) . ''; 00512 wfRestoreWarnings(); 00513 return $diff; 00514 } 00515 00521 function patch( $base, $diff ) { 00522 if ( function_exists( 'xdiff_string_bpatch' ) ) { 00523 wfSuppressWarnings(); 00524 $text = xdiff_string_bpatch( $base, $diff ) . ''; 00525 wfRestoreWarnings(); 00526 return $text; 00527 } 00528 00529 # Pure PHP implementation 00530 00531 $header = unpack( 'Vofp/Vcsize', substr( $diff, 0, 8 ) ); 00532 00533 # Check the checksum if hash extension is available 00534 $ofp = $this->xdiffAdler32( $base ); 00535 if ( $ofp !== false && $ofp !== substr( $diff, 0, 4 ) ) { 00536 wfDebug( __METHOD__ . ": incorrect base checksum\n" ); 00537 return false; 00538 } 00539 if ( $header['csize'] != strlen( $base ) ) { 00540 wfDebug( __METHOD__ . ": incorrect base length\n" ); 00541 return false; 00542 } 00543 00544 $p = 8; 00545 $out = ''; 00546 while ( $p < strlen( $diff ) ) { 00547 $x = unpack( 'Cop', substr( $diff, $p, 1 ) ); 00548 $op = $x['op']; 00549 ++$p; 00550 switch ( $op ) { 00551 case self::XDL_BDOP_INS: 00552 $x = unpack( 'Csize', substr( $diff, $p, 1 ) ); 00553 $p++; 00554 $out .= substr( $diff, $p, $x['size'] ); 00555 $p += $x['size']; 00556 break; 00557 case self::XDL_BDOP_INSB: 00558 $x = unpack( 'Vcsize', substr( $diff, $p, 4 ) ); 00559 $p += 4; 00560 $out .= substr( $diff, $p, $x['csize'] ); 00561 $p += $x['csize']; 00562 break; 00563 case self::XDL_BDOP_CPY: 00564 $x = unpack( 'Voff/Vcsize', substr( $diff, $p, 8 ) ); 00565 $p += 8; 00566 $out .= substr( $base, $x['off'], $x['csize'] ); 00567 break; 00568 default: 00569 wfDebug( __METHOD__ . ": invalid op\n" ); 00570 return false; 00571 } 00572 } 00573 return $out; 00574 } 00575 00583 function xdiffAdler32( $s ) { 00584 if ( !function_exists( 'hash' ) ) { 00585 return false; 00586 } 00587 00588 static $init; 00589 if ( $init === null ) { 00590 $init = str_repeat( "\xf0", 205 ) . "\xee" . str_repeat( "\xf0", 67 ) . "\x02"; 00591 } 00592 00593 // The real Adler-32 checksum of $init is zero, so it initialises the 00594 // state to zero, as it is at the start of LibXDiff's checksum 00595 // algorithm. Appending the subject string then simulates LibXDiff. 00596 return strrev( hash( 'adler32', $init . $s, true ) ); 00597 } 00598 00599 function uncompress() { 00600 if ( !$this->mDiffs ) { 00601 return; 00602 } 00603 $tail = ''; 00604 for ( $diffKey = 0; $diffKey < count( $this->mDiffs ); $diffKey++ ) { 00605 $textKey = $this->mDiffMap[$diffKey]; 00606 $text = $this->patch( $tail, $this->mDiffs[$diffKey] ); 00607 $this->mItems[$textKey] = $text; 00608 $tail = $text; 00609 } 00610 } 00611 00615 function __sleep() { 00616 $this->compress(); 00617 if ( !count( $this->mItems ) ) { 00618 // Empty object 00619 $info = false; 00620 } else { 00621 // Take forward differences to improve the compression ratio for sequences 00622 $map = ''; 00623 $prev = 0; 00624 foreach ( $this->mDiffMap as $i ) { 00625 if ( $map !== '' ) { 00626 $map .= ','; 00627 } 00628 $map .= $i - $prev; 00629 $prev = $i; 00630 } 00631 $info = array( 00632 'diffs' => $this->mDiffs, 00633 'map' => $map 00634 ); 00635 } 00636 if ( isset( $this->mDefaultKey ) ) { 00637 $info['default'] = $this->mDefaultKey; 00638 } 00639 $this->mCompressed = gzdeflate( serialize( $info ) ); 00640 return array( 'mCompressed' ); 00641 } 00642 00643 function __wakeup() { 00644 // addItem() doesn't work if mItems is partially filled from mDiffs 00645 $this->mFrozen = true; 00646 $info = unserialize( gzinflate( $this->mCompressed ) ); 00647 unset( $this->mCompressed ); 00648 00649 if ( !$info ) { 00650 // Empty object 00651 return; 00652 } 00653 00654 if ( isset( $info['default'] ) ) { 00655 $this->mDefaultKey = $info['default']; 00656 } 00657 $this->mDiffs = $info['diffs']; 00658 if ( isset( $info['base'] ) ) { 00659 // Old format 00660 $this->mDiffMap = range( 0, count( $this->mDiffs ) - 1 ); 00661 array_unshift( $this->mDiffs, 00662 pack( 'VVCV', 0, 0, self::XDL_BDOP_INSB, strlen( $info['base'] ) ) . 00663 $info['base'] ); 00664 } else { 00665 // New format 00666 $map = explode( ',', $info['map'] ); 00667 $cur = 0; 00668 $this->mDiffMap = array(); 00669 foreach ( $map as $i ) { 00670 $cur += $i; 00671 $this->mDiffMap[] = $cur; 00672 } 00673 } 00674 $this->uncompress(); 00675 } 00676 00683 function isHappy() { 00684 return $this->mSize < $this->mMaxSize 00685 && count( $this->mItems ) < $this->mMaxCount; 00686 } 00687 00688 }