MediaWiki
REL1_23
|
00001 <?php 00028 interface HistoryBlob 00029 { 00039 function addItem( $text ); 00040 00048 function getItem( $key ); 00049 00060 function setText( $text ); 00061 00067 function getText(); 00068 } 00069 00074 class ConcatenatedGzipHistoryBlob implements HistoryBlob 00075 { 00076 public $mVersion = 0, $mCompressed = false, $mItems = array(), $mDefaultHash = ''; 00077 public $mSize = 0; 00078 public $mMaxSize = 10000000; 00079 public $mMaxCount = 100; 00080 00084 public function __construct() { 00085 if ( !function_exists( 'gzdeflate' ) ) { 00086 throw new MWException( "Need zlib support to read or write this kind of history object (ConcatenatedGzipHistoryBlob)\n" ); 00087 } 00088 } 00089 00094 public function addItem( $text ) { 00095 $this->uncompress(); 00096 $hash = md5( $text ); 00097 if ( !isset( $this->mItems[$hash] ) ) { 00098 $this->mItems[$hash] = $text; 00099 $this->mSize += strlen( $text ); 00100 } 00101 return $hash; 00102 } 00103 00108 public function getItem( $hash ) { 00109 $this->uncompress(); 00110 if ( array_key_exists( $hash, $this->mItems ) ) { 00111 return $this->mItems[$hash]; 00112 } else { 00113 return false; 00114 } 00115 } 00116 00121 public function setText( $text ) { 00122 $this->uncompress(); 00123 $this->mDefaultHash = $this->addItem( $text ); 00124 } 00125 00129 public function getText() { 00130 $this->uncompress(); 00131 return $this->getItem( $this->mDefaultHash ); 00132 } 00133 00139 public function removeItem( $hash ) { 00140 $this->mSize -= strlen( $this->mItems[$hash] ); 00141 unset( $this->mItems[$hash] ); 00142 } 00143 00147 public function compress() { 00148 if ( !$this->mCompressed ) { 00149 $this->mItems = gzdeflate( serialize( $this->mItems ) ); 00150 $this->mCompressed = true; 00151 } 00152 } 00153 00157 public function uncompress() { 00158 if ( $this->mCompressed ) { 00159 $this->mItems = unserialize( gzinflate( $this->mItems ) ); 00160 $this->mCompressed = false; 00161 } 00162 } 00163 00167 function __sleep() { 00168 $this->compress(); 00169 return array( 'mVersion', 'mCompressed', 'mItems', 'mDefaultHash' ); 00170 } 00171 00172 function __wakeup() { 00173 $this->uncompress(); 00174 } 00175 00182 public function isHappy() { 00183 return $this->mSize < $this->mMaxSize 00184 && count( $this->mItems ) < $this->mMaxCount; 00185 } 00186 } 00187 00191 class HistoryBlobStub { 00198 protected static $blobCache = array(); 00199 00200 var $mOldId, $mHash, $mRef; 00201 00206 function __construct( $hash = '', $oldid = 0 ) { 00207 $this->mHash = $hash; 00208 } 00209 00214 function setLocation( $id ) { 00215 $this->mOldId = $id; 00216 } 00217 00221 function setReferrer( $id ) { 00222 $this->mRef = $id; 00223 } 00224 00228 function getReferrer() { 00229 return $this->mRef; 00230 } 00231 00235 function getText() { 00236 if ( isset( self::$blobCache[$this->mOldId] ) ) { 00237 $obj = self::$blobCache[$this->mOldId]; 00238 } else { 00239 $dbr = wfGetDB( DB_SLAVE ); 00240 $row = $dbr->selectRow( 'text', array( 'old_flags', 'old_text' ), array( 'old_id' => $this->mOldId ) ); 00241 if ( !$row ) { 00242 return false; 00243 } 00244 $flags = explode( ',', $row->old_flags ); 00245 if ( in_array( 'external', $flags ) ) { 00246 $url = $row->old_text; 00247 $parts = explode( '://', $url, 2 ); 00248 if ( !isset( $parts[1] ) || $parts[1] == '' ) { 00249 return false; 00250 } 00251 $row->old_text = ExternalStore::fetchFromUrl( $url ); 00252 00253 } 00254 if ( !in_array( 'object', $flags ) ) { 00255 return false; 00256 } 00257 00258 if ( in_array( 'gzip', $flags ) ) { 00259 // This shouldn't happen, but a bug in the compress script 00260 // may at times gzip-compress a HistoryBlob object row. 00261 $obj = unserialize( gzinflate( $row->old_text ) ); 00262 } else { 00263 $obj = unserialize( $row->old_text ); 00264 } 00265 00266 if ( !is_object( $obj ) ) { 00267 // Correct for old double-serialization bug. 00268 $obj = unserialize( $obj ); 00269 } 00270 00271 // Save this item for reference; if pulling many 00272 // items in a row we'll likely use it again. 00273 $obj->uncompress(); 00274 self::$blobCache = array( $this->mOldId => $obj ); 00275 } 00276 return $obj->getItem( $this->mHash ); 00277 } 00278 00284 function getHash() { 00285 return $this->mHash; 00286 } 00287 } 00288 00297 class HistoryBlobCurStub { 00298 var $mCurId; 00299 00303 function __construct( $curid = 0 ) { 00304 $this->mCurId = $curid; 00305 } 00306 00313 function setLocation( $id ) { 00314 $this->mCurId = $id; 00315 } 00316 00320 function getText() { 00321 $dbr = wfGetDB( DB_SLAVE ); 00322 $row = $dbr->selectRow( 'cur', array( 'cur_text' ), array( 'cur_id' => $this->mCurId ) ); 00323 if ( !$row ) { 00324 return false; 00325 } 00326 return $row->cur_text; 00327 } 00328 } 00329 00334 class DiffHistoryBlob implements HistoryBlob { 00336 var $mItems = array(); 00337 00339 var $mSize = 0; 00340 00349 var $mDiffs; 00350 00352 var $mDiffMap; 00353 00357 var $mDefaultKey; 00358 00362 var $mCompressed; 00363 00367 var $mFrozen = false; 00368 00373 var $mMaxSize = 10000000; 00374 00378 var $mMaxCount = 100; 00379 00381 const XDL_BDOP_INS = 1; 00382 const XDL_BDOP_CPY = 2; 00383 const XDL_BDOP_INSB = 3; 00384 00385 function __construct() { 00386 if ( !function_exists( 'gzdeflate' ) ) { 00387 throw new MWException( "Need zlib support to read or write DiffHistoryBlob\n" ); 00388 } 00389 } 00390 00396 function addItem( $text ) { 00397 if ( $this->mFrozen ) { 00398 throw new MWException( __METHOD__ . ": Cannot add more items after sleep/wakeup" ); 00399 } 00400 00401 $this->mItems[] = $text; 00402 $this->mSize += strlen( $text ); 00403 $this->mDiffs = null; // later 00404 return count( $this->mItems ) - 1; 00405 } 00406 00411 function getItem( $key ) { 00412 return $this->mItems[$key]; 00413 } 00414 00418 function setText( $text ) { 00419 $this->mDefaultKey = $this->addItem( $text ); 00420 } 00421 00425 function getText() { 00426 return $this->getItem( $this->mDefaultKey ); 00427 } 00428 00432 function compress() { 00433 if ( !function_exists( 'xdiff_string_rabdiff' ) ) { 00434 throw new MWException( "Need xdiff 1.5+ support to write DiffHistoryBlob\n" ); 00435 } 00436 if ( isset( $this->mDiffs ) ) { 00437 // Already compressed 00438 return; 00439 } 00440 if ( !count( $this->mItems ) ) { 00441 // Empty 00442 return; 00443 } 00444 00445 // Create two diff sequences: one for main text and one for small text 00446 $sequences = array( 00447 'small' => array( 00448 'tail' => '', 00449 'diffs' => array(), 00450 'map' => array(), 00451 ), 00452 'main' => array( 00453 'tail' => '', 00454 'diffs' => array(), 00455 'map' => array(), 00456 ), 00457 ); 00458 $smallFactor = 0.5; 00459 00460 for ( $i = 0; $i < count( $this->mItems ); $i++ ) { 00461 $text = $this->mItems[$i]; 00462 if ( $i == 0 ) { 00463 $seqName = 'main'; 00464 } else { 00465 $mainTail = $sequences['main']['tail']; 00466 if ( strlen( $text ) < strlen( $mainTail ) * $smallFactor ) { 00467 $seqName = 'small'; 00468 } else { 00469 $seqName = 'main'; 00470 } 00471 } 00472 $seq =& $sequences[$seqName]; 00473 $tail = $seq['tail']; 00474 $diff = $this->diff( $tail, $text ); 00475 $seq['diffs'][] = $diff; 00476 $seq['map'][] = $i; 00477 $seq['tail'] = $text; 00478 } 00479 unset( $seq ); // unlink dangerous alias 00480 00481 // Knit the sequences together 00482 $tail = ''; 00483 $this->mDiffs = array(); 00484 $this->mDiffMap = array(); 00485 foreach ( $sequences as $seq ) { 00486 if ( !count( $seq['diffs'] ) ) { 00487 continue; 00488 } 00489 if ( $tail === '' ) { 00490 $this->mDiffs[] = $seq['diffs'][0]; 00491 } else { 00492 $head = $this->patch( '', $seq['diffs'][0] ); 00493 $this->mDiffs[] = $this->diff( $tail, $head ); 00494 } 00495 $this->mDiffMap[] = $seq['map'][0]; 00496 for ( $i = 1; $i < count( $seq['diffs'] ); $i++ ) { 00497 $this->mDiffs[] = $seq['diffs'][$i]; 00498 $this->mDiffMap[] = $seq['map'][$i]; 00499 } 00500 $tail = $seq['tail']; 00501 } 00502 } 00503 00509 function diff( $t1, $t2 ) { 00510 # Need to do a null concatenation with warnings off, due to bugs in the current version of xdiff 00511 # "String is not zero-terminated" 00512 wfSuppressWarnings(); 00513 $diff = xdiff_string_rabdiff( $t1, $t2 ) . ''; 00514 wfRestoreWarnings(); 00515 return $diff; 00516 } 00517 00523 function patch( $base, $diff ) { 00524 if ( function_exists( 'xdiff_string_bpatch' ) ) { 00525 wfSuppressWarnings(); 00526 $text = xdiff_string_bpatch( $base, $diff ) . ''; 00527 wfRestoreWarnings(); 00528 return $text; 00529 } 00530 00531 # Pure PHP implementation 00532 00533 $header = unpack( 'Vofp/Vcsize', substr( $diff, 0, 8 ) ); 00534 00535 # Check the checksum if hash extension is available 00536 $ofp = $this->xdiffAdler32( $base ); 00537 if ( $ofp !== false && $ofp !== substr( $diff, 0, 4 ) ) { 00538 wfDebug( __METHOD__ . ": incorrect base checksum\n" ); 00539 return false; 00540 } 00541 if ( $header['csize'] != strlen( $base ) ) { 00542 wfDebug( __METHOD__ . ": incorrect base length\n" ); 00543 return false; 00544 } 00545 00546 $p = 8; 00547 $out = ''; 00548 while ( $p < strlen( $diff ) ) { 00549 $x = unpack( 'Cop', substr( $diff, $p, 1 ) ); 00550 $op = $x['op']; 00551 ++$p; 00552 switch ( $op ) { 00553 case self::XDL_BDOP_INS: 00554 $x = unpack( 'Csize', substr( $diff, $p, 1 ) ); 00555 $p++; 00556 $out .= substr( $diff, $p, $x['size'] ); 00557 $p += $x['size']; 00558 break; 00559 case self::XDL_BDOP_INSB: 00560 $x = unpack( 'Vcsize', substr( $diff, $p, 4 ) ); 00561 $p += 4; 00562 $out .= substr( $diff, $p, $x['csize'] ); 00563 $p += $x['csize']; 00564 break; 00565 case self::XDL_BDOP_CPY: 00566 $x = unpack( 'Voff/Vcsize', substr( $diff, $p, 8 ) ); 00567 $p += 8; 00568 $out .= substr( $base, $x['off'], $x['csize'] ); 00569 break; 00570 default: 00571 wfDebug( __METHOD__ . ": invalid op\n" ); 00572 return false; 00573 } 00574 } 00575 return $out; 00576 } 00577 00585 function xdiffAdler32( $s ) { 00586 if ( !function_exists( 'hash' ) ) { 00587 return false; 00588 } 00589 00590 static $init; 00591 if ( $init === null ) { 00592 $init = str_repeat( "\xf0", 205 ) . "\xee" . str_repeat( "\xf0", 67 ) . "\x02"; 00593 } 00594 00595 // The real Adler-32 checksum of $init is zero, so it initialises the 00596 // state to zero, as it is at the start of LibXDiff's checksum 00597 // algorithm. Appending the subject string then simulates LibXDiff. 00598 return strrev( hash( 'adler32', $init . $s, true ) ); 00599 } 00600 00601 function uncompress() { 00602 if ( !$this->mDiffs ) { 00603 return; 00604 } 00605 $tail = ''; 00606 for ( $diffKey = 0; $diffKey < count( $this->mDiffs ); $diffKey++ ) { 00607 $textKey = $this->mDiffMap[$diffKey]; 00608 $text = $this->patch( $tail, $this->mDiffs[$diffKey] ); 00609 $this->mItems[$textKey] = $text; 00610 $tail = $text; 00611 } 00612 } 00613 00617 function __sleep() { 00618 $this->compress(); 00619 if ( !count( $this->mItems ) ) { 00620 // Empty object 00621 $info = false; 00622 } else { 00623 // Take forward differences to improve the compression ratio for sequences 00624 $map = ''; 00625 $prev = 0; 00626 foreach ( $this->mDiffMap as $i ) { 00627 if ( $map !== '' ) { 00628 $map .= ','; 00629 } 00630 $map .= $i - $prev; 00631 $prev = $i; 00632 } 00633 $info = array( 00634 'diffs' => $this->mDiffs, 00635 'map' => $map 00636 ); 00637 } 00638 if ( isset( $this->mDefaultKey ) ) { 00639 $info['default'] = $this->mDefaultKey; 00640 } 00641 $this->mCompressed = gzdeflate( serialize( $info ) ); 00642 return array( 'mCompressed' ); 00643 } 00644 00645 function __wakeup() { 00646 // addItem() doesn't work if mItems is partially filled from mDiffs 00647 $this->mFrozen = true; 00648 $info = unserialize( gzinflate( $this->mCompressed ) ); 00649 unset( $this->mCompressed ); 00650 00651 if ( !$info ) { 00652 // Empty object 00653 return; 00654 } 00655 00656 if ( isset( $info['default'] ) ) { 00657 $this->mDefaultKey = $info['default']; 00658 } 00659 $this->mDiffs = $info['diffs']; 00660 if ( isset( $info['base'] ) ) { 00661 // Old format 00662 $this->mDiffMap = range( 0, count( $this->mDiffs ) - 1 ); 00663 array_unshift( $this->mDiffs, 00664 pack( 'VVCV', 0, 0, self::XDL_BDOP_INSB, strlen( $info['base'] ) ) . 00665 $info['base'] ); 00666 } else { 00667 // New format 00668 $map = explode( ',', $info['map'] ); 00669 $cur = 0; 00670 $this->mDiffMap = array(); 00671 foreach ( $map as $i ) { 00672 $cur += $i; 00673 $this->mDiffMap[] = $cur; 00674 } 00675 } 00676 $this->uncompress(); 00677 } 00678 00685 function isHappy() { 00686 return $this->mSize < $this->mMaxSize 00687 && count( $this->mItems ) < $this->mMaxCount; 00688 } 00689 00690 }