MediaWiki
REL1_20
|
00001 <?php 00028 interface HistoryBlob 00029 { 00039 function addItem( $text ); 00040 00048 function getItem( $key ); 00049 00060 function setText( $text ); 00061 00067 function getText(); 00068 } 00069 00074 class ConcatenatedGzipHistoryBlob implements HistoryBlob 00075 { 00076 public $mVersion = 0, $mCompressed = false, $mItems = array(), $mDefaultHash = ''; 00077 public $mSize = 0; 00078 public $mMaxSize = 10000000; 00079 public $mMaxCount = 100; 00080 00082 public function __construct() { 00083 if ( !function_exists( 'gzdeflate' ) ) { 00084 throw new MWException( "Need zlib support to read or write this kind of history object (ConcatenatedGzipHistoryBlob)\n" ); 00085 } 00086 } 00087 00092 public function addItem( $text ) { 00093 $this->uncompress(); 00094 $hash = md5( $text ); 00095 if ( !isset( $this->mItems[$hash] ) ) { 00096 $this->mItems[$hash] = $text; 00097 $this->mSize += strlen( $text ); 00098 } 00099 return $hash; 00100 } 00101 00106 public function getItem( $hash ) { 00107 $this->uncompress(); 00108 if ( array_key_exists( $hash, $this->mItems ) ) { 00109 return $this->mItems[$hash]; 00110 } else { 00111 return false; 00112 } 00113 } 00114 00119 public function setText( $text ) { 00120 $this->uncompress(); 00121 $this->mDefaultHash = $this->addItem( $text ); 00122 } 00123 00127 public function getText() { 00128 $this->uncompress(); 00129 return $this->getItem( $this->mDefaultHash ); 00130 } 00131 00137 public function removeItem( $hash ) { 00138 $this->mSize -= strlen( $this->mItems[$hash] ); 00139 unset( $this->mItems[$hash] ); 00140 } 00141 00145 public function compress() { 00146 if ( !$this->mCompressed ) { 00147 $this->mItems = gzdeflate( serialize( $this->mItems ) ); 00148 $this->mCompressed = true; 00149 } 00150 } 00151 00155 public function uncompress() { 00156 if ( $this->mCompressed ) { 00157 $this->mItems = unserialize( gzinflate( $this->mItems ) ); 00158 $this->mCompressed = false; 00159 } 00160 } 00161 00165 function __sleep() { 00166 $this->compress(); 00167 return array( 'mVersion', 'mCompressed', 'mItems', 'mDefaultHash' ); 00168 } 00169 00170 function __wakeup() { 00171 $this->uncompress(); 00172 } 00173 00180 public function isHappy() { 00181 return $this->mSize < $this->mMaxSize 00182 && count( $this->mItems ) < $this->mMaxCount; 00183 } 00184 } 00185 00186 00190 class HistoryBlobStub { 00197 protected static $blobCache = array(); 00198 00199 var $mOldId, $mHash, $mRef; 00200 00205 function __construct( $hash = '', $oldid = 0 ) { 00206 $this->mHash = $hash; 00207 } 00208 00213 function setLocation( $id ) { 00214 $this->mOldId = $id; 00215 } 00216 00220 function setReferrer( $id ) { 00221 $this->mRef = $id; 00222 } 00223 00227 function getReferrer() { 00228 return $this->mRef; 00229 } 00230 00234 function getText() { 00235 $fname = 'HistoryBlobStub::getText'; 00236 00237 if( isset( self::$blobCache[$this->mOldId] ) ) { 00238 $obj = self::$blobCache[$this->mOldId]; 00239 } else { 00240 $dbr = wfGetDB( DB_SLAVE ); 00241 $row = $dbr->selectRow( 'text', array( 'old_flags', 'old_text' ), array( 'old_id' => $this->mOldId ) ); 00242 if( !$row ) { 00243 return false; 00244 } 00245 $flags = explode( ',', $row->old_flags ); 00246 if( in_array( 'external', $flags ) ) { 00247 $url=$row->old_text; 00248 $parts = explode( '://', $url, 2 ); 00249 if ( !isset( $parts[1] ) || $parts[1] == '' ) { 00250 wfProfileOut( $fname ); 00251 return false; 00252 } 00253 $row->old_text = ExternalStore::fetchFromUrl($url); 00254 00255 } 00256 if( !in_array( 'object', $flags ) ) { 00257 return false; 00258 } 00259 00260 if( in_array( 'gzip', $flags ) ) { 00261 // This shouldn't happen, but a bug in the compress script 00262 // may at times gzip-compress a HistoryBlob object row. 00263 $obj = unserialize( gzinflate( $row->old_text ) ); 00264 } else { 00265 $obj = unserialize( $row->old_text ); 00266 } 00267 00268 if( !is_object( $obj ) ) { 00269 // Correct for old double-serialization bug. 00270 $obj = unserialize( $obj ); 00271 } 00272 00273 // Save this item for reference; if pulling many 00274 // items in a row we'll likely use it again. 00275 $obj->uncompress(); 00276 self::$blobCache = array( $this->mOldId => $obj ); 00277 } 00278 return $obj->getItem( $this->mHash ); 00279 } 00280 00286 function getHash() { 00287 return $this->mHash; 00288 } 00289 } 00290 00291 00300 class HistoryBlobCurStub { 00301 var $mCurId; 00302 00306 function __construct( $curid = 0 ) { 00307 $this->mCurId = $curid; 00308 } 00309 00316 function setLocation( $id ) { 00317 $this->mCurId = $id; 00318 } 00319 00323 function getText() { 00324 $dbr = wfGetDB( DB_SLAVE ); 00325 $row = $dbr->selectRow( 'cur', array( 'cur_text' ), array( 'cur_id' => $this->mCurId ) ); 00326 if( !$row ) { 00327 return false; 00328 } 00329 return $row->cur_text; 00330 } 00331 } 00332 00337 class DiffHistoryBlob implements HistoryBlob { 00339 var $mItems = array(); 00340 00342 var $mSize = 0; 00343 00352 var $mDiffs; 00353 00355 var $mDiffMap; 00356 00360 var $mDefaultKey; 00361 00365 var $mCompressed; 00366 00370 var $mFrozen = false; 00371 00376 var $mMaxSize = 10000000; 00377 00381 var $mMaxCount = 100; 00382 00384 const XDL_BDOP_INS = 1; 00385 const XDL_BDOP_CPY = 2; 00386 const XDL_BDOP_INSB = 3; 00387 00388 function __construct() { 00389 if ( !function_exists( 'gzdeflate' ) ) { 00390 throw new MWException( "Need zlib support to read or write DiffHistoryBlob\n" ); 00391 } 00392 } 00393 00399 function addItem( $text ) { 00400 if ( $this->mFrozen ) { 00401 throw new MWException( __METHOD__.": Cannot add more items after sleep/wakeup" ); 00402 } 00403 00404 $this->mItems[] = $text; 00405 $this->mSize += strlen( $text ); 00406 $this->mDiffs = null; // later 00407 return count( $this->mItems ) - 1; 00408 } 00409 00414 function getItem( $key ) { 00415 return $this->mItems[$key]; 00416 } 00417 00421 function setText( $text ) { 00422 $this->mDefaultKey = $this->addItem( $text ); 00423 } 00424 00428 function getText() { 00429 return $this->getItem( $this->mDefaultKey ); 00430 } 00431 00435 function compress() { 00436 if ( !function_exists( 'xdiff_string_rabdiff' ) ){ 00437 throw new MWException( "Need xdiff 1.5+ support to write DiffHistoryBlob\n" ); 00438 } 00439 if ( isset( $this->mDiffs ) ) { 00440 // Already compressed 00441 return; 00442 } 00443 if ( !count( $this->mItems ) ) { 00444 // Empty 00445 return; 00446 } 00447 00448 // Create two diff sequences: one for main text and one for small text 00449 $sequences = array( 00450 'small' => array( 00451 'tail' => '', 00452 'diffs' => array(), 00453 'map' => array(), 00454 ), 00455 'main' => array( 00456 'tail' => '', 00457 'diffs' => array(), 00458 'map' => array(), 00459 ), 00460 ); 00461 $smallFactor = 0.5; 00462 00463 for ( $i = 0; $i < count( $this->mItems ); $i++ ) { 00464 $text = $this->mItems[$i]; 00465 if ( $i == 0 ) { 00466 $seqName = 'main'; 00467 } else { 00468 $mainTail = $sequences['main']['tail']; 00469 if ( strlen( $text ) < strlen( $mainTail ) * $smallFactor ) { 00470 $seqName = 'small'; 00471 } else { 00472 $seqName = 'main'; 00473 } 00474 } 00475 $seq =& $sequences[$seqName]; 00476 $tail = $seq['tail']; 00477 $diff = $this->diff( $tail, $text ); 00478 $seq['diffs'][] = $diff; 00479 $seq['map'][] = $i; 00480 $seq['tail'] = $text; 00481 } 00482 unset( $seq ); // unlink dangerous alias 00483 00484 // Knit the sequences together 00485 $tail = ''; 00486 $this->mDiffs = array(); 00487 $this->mDiffMap = array(); 00488 foreach ( $sequences as $seq ) { 00489 if ( !count( $seq['diffs'] ) ) { 00490 continue; 00491 } 00492 if ( $tail === '' ) { 00493 $this->mDiffs[] = $seq['diffs'][0]; 00494 } else { 00495 $head = $this->patch( '', $seq['diffs'][0] ); 00496 $this->mDiffs[] = $this->diff( $tail, $head ); 00497 } 00498 $this->mDiffMap[] = $seq['map'][0]; 00499 for ( $i = 1; $i < count( $seq['diffs'] ); $i++ ) { 00500 $this->mDiffs[] = $seq['diffs'][$i]; 00501 $this->mDiffMap[] = $seq['map'][$i]; 00502 } 00503 $tail = $seq['tail']; 00504 } 00505 } 00506 00512 function diff( $t1, $t2 ) { 00513 # Need to do a null concatenation with warnings off, due to bugs in the current version of xdiff 00514 # "String is not zero-terminated" 00515 wfSuppressWarnings(); 00516 $diff = xdiff_string_rabdiff( $t1, $t2 ) . ''; 00517 wfRestoreWarnings(); 00518 return $diff; 00519 } 00520 00526 function patch( $base, $diff ) { 00527 if ( function_exists( 'xdiff_string_bpatch' ) ) { 00528 wfSuppressWarnings(); 00529 $text = xdiff_string_bpatch( $base, $diff ) . ''; 00530 wfRestoreWarnings(); 00531 return $text; 00532 } 00533 00534 # Pure PHP implementation 00535 00536 $header = unpack( 'Vofp/Vcsize', substr( $diff, 0, 8 ) ); 00537 00538 # Check the checksum if hash/mhash is available 00539 $ofp = $this->xdiffAdler32( $base ); 00540 if ( $ofp !== false && $ofp !== substr( $diff, 0, 4 ) ) { 00541 wfDebug( __METHOD__. ": incorrect base checksum\n" ); 00542 return false; 00543 } 00544 if ( $header['csize'] != strlen( $base ) ) { 00545 wfDebug( __METHOD__. ": incorrect base length\n" ); 00546 return false; 00547 } 00548 00549 $p = 8; 00550 $out = ''; 00551 while ( $p < strlen( $diff ) ) { 00552 $x = unpack( 'Cop', substr( $diff, $p, 1 ) ); 00553 $op = $x['op']; 00554 ++$p; 00555 switch ( $op ) { 00556 case self::XDL_BDOP_INS: 00557 $x = unpack( 'Csize', substr( $diff, $p, 1 ) ); 00558 $p++; 00559 $out .= substr( $diff, $p, $x['size'] ); 00560 $p += $x['size']; 00561 break; 00562 case self::XDL_BDOP_INSB: 00563 $x = unpack( 'Vcsize', substr( $diff, $p, 4 ) ); 00564 $p += 4; 00565 $out .= substr( $diff, $p, $x['csize'] ); 00566 $p += $x['csize']; 00567 break; 00568 case self::XDL_BDOP_CPY: 00569 $x = unpack( 'Voff/Vcsize', substr( $diff, $p, 8 ) ); 00570 $p += 8; 00571 $out .= substr( $base, $x['off'], $x['csize'] ); 00572 break; 00573 default: 00574 wfDebug( __METHOD__.": invalid op\n" ); 00575 return false; 00576 } 00577 } 00578 return $out; 00579 } 00580 00587 function xdiffAdler32( $s ) { 00588 static $init; 00589 if ( $init === null ) { 00590 $init = str_repeat( "\xf0", 205 ) . "\xee" . str_repeat( "\xf0", 67 ) . "\x02"; 00591 } 00592 // The real Adler-32 checksum of $init is zero, so it initialises the 00593 // state to zero, as it is at the start of LibXDiff's checksum 00594 // algorithm. Appending the subject string then simulates LibXDiff. 00595 if ( function_exists( 'hash' ) ) { 00596 $hash = hash( 'adler32', $init . $s, true ); 00597 } elseif ( function_exists( 'mhash' ) ) { 00598 $hash = mhash( MHASH_ADLER32, $init . $s ); 00599 } else { 00600 return false; 00601 } 00602 return strrev( $hash ); 00603 } 00604 00605 function uncompress() { 00606 if ( !$this->mDiffs ) { 00607 return; 00608 } 00609 $tail = ''; 00610 for ( $diffKey = 0; $diffKey < count( $this->mDiffs ); $diffKey++ ) { 00611 $textKey = $this->mDiffMap[$diffKey]; 00612 $text = $this->patch( $tail, $this->mDiffs[$diffKey] ); 00613 $this->mItems[$textKey] = $text; 00614 $tail = $text; 00615 } 00616 } 00617 00621 function __sleep() { 00622 $this->compress(); 00623 if ( !count( $this->mItems ) ) { 00624 // Empty object 00625 $info = false; 00626 } else { 00627 // Take forward differences to improve the compression ratio for sequences 00628 $map = ''; 00629 $prev = 0; 00630 foreach ( $this->mDiffMap as $i ) { 00631 if ( $map !== '' ) { 00632 $map .= ','; 00633 } 00634 $map .= $i - $prev; 00635 $prev = $i; 00636 } 00637 $info = array( 00638 'diffs' => $this->mDiffs, 00639 'map' => $map 00640 ); 00641 } 00642 if ( isset( $this->mDefaultKey ) ) { 00643 $info['default'] = $this->mDefaultKey; 00644 } 00645 $this->mCompressed = gzdeflate( serialize( $info ) ); 00646 return array( 'mCompressed' ); 00647 } 00648 00649 function __wakeup() { 00650 // addItem() doesn't work if mItems is partially filled from mDiffs 00651 $this->mFrozen = true; 00652 $info = unserialize( gzinflate( $this->mCompressed ) ); 00653 unset( $this->mCompressed ); 00654 00655 if ( !$info ) { 00656 // Empty object 00657 return; 00658 } 00659 00660 if ( isset( $info['default'] ) ) { 00661 $this->mDefaultKey = $info['default']; 00662 } 00663 $this->mDiffs = $info['diffs']; 00664 if ( isset( $info['base'] ) ) { 00665 // Old format 00666 $this->mDiffMap = range( 0, count( $this->mDiffs ) - 1 ); 00667 array_unshift( $this->mDiffs, 00668 pack( 'VVCV', 0, 0, self::XDL_BDOP_INSB, strlen( $info['base'] ) ) . 00669 $info['base'] ); 00670 } else { 00671 // New format 00672 $map = explode( ',', $info['map'] ); 00673 $cur = 0; 00674 $this->mDiffMap = array(); 00675 foreach ( $map as $i ) { 00676 $cur += $i; 00677 $this->mDiffMap[] = $cur; 00678 } 00679 } 00680 $this->uncompress(); 00681 } 00682 00689 function isHappy() { 00690 return $this->mSize < $this->mMaxSize 00691 && count( $this->mItems ) < $this->mMaxCount; 00692 } 00693 00694 }