MediaWiki  REL1_20
HistoryBlob.php
Go to the documentation of this file.
00001 <?php
00028 interface HistoryBlob
00029 {
00039         function addItem( $text );
00040 
00048         function getItem( $key );
00049 
00060         function setText( $text );
00061 
00067         function getText();
00068 }
00069 
00074 class ConcatenatedGzipHistoryBlob implements HistoryBlob
00075 {
00076         public $mVersion = 0, $mCompressed = false, $mItems = array(), $mDefaultHash = '';
00077         public $mSize = 0;
00078         public $mMaxSize = 10000000;
00079         public $mMaxCount = 100;
00080 
00082         public function __construct() {
00083                 if ( !function_exists( 'gzdeflate' ) ) {
00084                         throw new MWException( "Need zlib support to read or write this kind of history object (ConcatenatedGzipHistoryBlob)\n" );
00085                 }
00086         }
00087 
00092         public function addItem( $text ) {
00093                 $this->uncompress();
00094                 $hash = md5( $text );
00095                 if ( !isset( $this->mItems[$hash] ) ) {
00096                         $this->mItems[$hash] = $text;
00097                         $this->mSize += strlen( $text );
00098                 }
00099                 return $hash;
00100         }
00101 
00106         public function getItem( $hash ) {
00107                 $this->uncompress();
00108                 if ( array_key_exists( $hash, $this->mItems ) ) {
00109                         return $this->mItems[$hash];
00110                 } else {
00111                         return false;
00112                 }
00113         }
00114 
00119         public function setText( $text ) {
00120                 $this->uncompress();
00121                 $this->mDefaultHash = $this->addItem( $text );
00122         }
00123 
00127         public function getText() {
00128                 $this->uncompress();
00129                 return $this->getItem( $this->mDefaultHash );
00130         }
00131 
00137         public function removeItem( $hash ) {
00138                 $this->mSize -= strlen( $this->mItems[$hash] );
00139                 unset( $this->mItems[$hash] );
00140         }
00141 
00145         public function compress() {
00146                 if ( !$this->mCompressed  ) {
00147                         $this->mItems = gzdeflate( serialize( $this->mItems ) );
00148                         $this->mCompressed = true;
00149                 }
00150         }
00151 
00155         public function uncompress() {
00156                 if ( $this->mCompressed ) {
00157                         $this->mItems = unserialize( gzinflate( $this->mItems ) );
00158                         $this->mCompressed = false;
00159                 }
00160         }
00161 
00165         function __sleep() {
00166                 $this->compress();
00167                 return array( 'mVersion', 'mCompressed', 'mItems', 'mDefaultHash' );
00168         }
00169 
00170         function __wakeup() {
00171                 $this->uncompress();
00172         }
00173 
00180         public function isHappy() {
00181                 return $this->mSize < $this->mMaxSize 
00182                         && count( $this->mItems ) < $this->mMaxCount;
00183         }
00184 }
00185 
00186 
00190 class HistoryBlobStub {
00197         protected static $blobCache = array();
00198 
00199         var $mOldId, $mHash, $mRef;
00200 
00205         function __construct( $hash = '', $oldid = 0 ) {
00206                 $this->mHash = $hash;
00207         }
00208 
00213         function setLocation( $id ) {
00214                 $this->mOldId = $id;
00215         }
00216 
00220         function setReferrer( $id ) {
00221                 $this->mRef = $id;
00222         }
00223 
00227         function getReferrer() {
00228                 return $this->mRef;
00229         }
00230 
00234         function getText() {
00235                 $fname = 'HistoryBlobStub::getText';
00236 
00237                 if( isset( self::$blobCache[$this->mOldId] ) ) {
00238                         $obj = self::$blobCache[$this->mOldId];
00239                 } else {
00240                         $dbr = wfGetDB( DB_SLAVE );
00241                         $row = $dbr->selectRow( 'text', array( 'old_flags', 'old_text' ), array( 'old_id' => $this->mOldId ) );
00242                         if( !$row ) {
00243                                 return false;
00244                         }
00245                         $flags = explode( ',', $row->old_flags );
00246                         if( in_array( 'external', $flags ) ) {
00247                                 $url=$row->old_text;
00248                                 $parts = explode( '://', $url, 2 );
00249                                 if ( !isset( $parts[1] ) || $parts[1] == '' ) {
00250                                         wfProfileOut( $fname );
00251                                         return false;
00252                                 }
00253                                 $row->old_text = ExternalStore::fetchFromUrl($url);
00254 
00255                         }
00256                         if( !in_array( 'object', $flags ) ) {
00257                                 return false;
00258                         }
00259 
00260                         if( in_array( 'gzip', $flags ) ) {
00261                                 // This shouldn't happen, but a bug in the compress script
00262                                 // may at times gzip-compress a HistoryBlob object row.
00263                                 $obj = unserialize( gzinflate( $row->old_text ) );
00264                         } else {
00265                                 $obj = unserialize( $row->old_text );
00266                         }
00267 
00268                         if( !is_object( $obj ) ) {
00269                                 // Correct for old double-serialization bug.
00270                                 $obj = unserialize( $obj );
00271                         }
00272 
00273                         // Save this item for reference; if pulling many
00274                         // items in a row we'll likely use it again.
00275                         $obj->uncompress();
00276                         self::$blobCache = array( $this->mOldId => $obj );
00277                 }
00278                 return $obj->getItem( $this->mHash );
00279         }
00280 
00286         function getHash() {
00287                 return $this->mHash;
00288         }
00289 }
00290 
00291 
00300 class HistoryBlobCurStub {
00301         var $mCurId;
00302 
00306         function __construct( $curid = 0 ) {
00307                 $this->mCurId = $curid;
00308         }
00309 
00316         function setLocation( $id ) {
00317                 $this->mCurId = $id;
00318         }
00319 
00323         function getText() {
00324                 $dbr = wfGetDB( DB_SLAVE );
00325                 $row = $dbr->selectRow( 'cur', array( 'cur_text' ), array( 'cur_id' => $this->mCurId ) );
00326                 if( !$row ) {
00327                         return false;
00328                 }
00329                 return $row->cur_text;
00330         }
00331 }
00332 
00337 class DiffHistoryBlob implements HistoryBlob {
00339         var $mItems = array();
00340 
00342         var $mSize = 0;
00343 
00352         var $mDiffs;
00353 
00355         var $mDiffMap;
00356 
00360         var $mDefaultKey;
00361 
00365         var $mCompressed;
00366 
00370         var $mFrozen = false;
00371 
00376         var $mMaxSize = 10000000;
00377 
00381         var $mMaxCount = 100;
00382         
00384         const XDL_BDOP_INS = 1;
00385         const XDL_BDOP_CPY = 2;
00386         const XDL_BDOP_INSB = 3;
00387 
00388         function __construct() {
00389                 if ( !function_exists( 'gzdeflate' ) ) {
00390                         throw new MWException( "Need zlib support to read or write DiffHistoryBlob\n" );
00391                 }
00392         }
00393 
00399         function addItem( $text ) {
00400                 if ( $this->mFrozen ) {
00401                         throw new MWException( __METHOD__.": Cannot add more items after sleep/wakeup" );
00402                 }
00403 
00404                 $this->mItems[] = $text;
00405                 $this->mSize += strlen( $text );
00406                 $this->mDiffs = null; // later
00407                 return count( $this->mItems ) - 1;
00408         }
00409 
00414         function getItem( $key ) {
00415                 return $this->mItems[$key];
00416         }
00417 
00421         function setText( $text ) {
00422                 $this->mDefaultKey = $this->addItem( $text );
00423         }
00424 
00428         function getText() {
00429                 return $this->getItem( $this->mDefaultKey );
00430         }
00431 
00435         function compress() {
00436                 if ( !function_exists( 'xdiff_string_rabdiff' ) ){ 
00437                         throw new MWException( "Need xdiff 1.5+ support to write DiffHistoryBlob\n" );
00438                 }
00439                 if ( isset( $this->mDiffs ) ) {
00440                         // Already compressed
00441                         return;
00442                 }
00443                 if ( !count( $this->mItems ) ) {
00444                         // Empty
00445                         return;
00446                 }
00447 
00448                 // Create two diff sequences: one for main text and one for small text
00449                 $sequences = array(
00450                         'small' => array(
00451                                 'tail' => '',
00452                                 'diffs' => array(),
00453                                 'map' => array(),
00454                         ),
00455                         'main' => array(
00456                                 'tail' => '',
00457                                 'diffs' => array(),
00458                                 'map' => array(),
00459                         ),
00460                 );
00461                 $smallFactor = 0.5;
00462 
00463                 for ( $i = 0; $i < count( $this->mItems ); $i++ ) {
00464                         $text = $this->mItems[$i];
00465                         if ( $i == 0 ) {
00466                                 $seqName = 'main';
00467                         } else {
00468                                 $mainTail = $sequences['main']['tail'];
00469                                 if ( strlen( $text ) < strlen( $mainTail ) * $smallFactor ) {
00470                                         $seqName = 'small';
00471                                 } else {
00472                                         $seqName = 'main';
00473                                 }
00474                         }
00475                         $seq =& $sequences[$seqName];
00476                         $tail = $seq['tail'];
00477                         $diff = $this->diff( $tail, $text );
00478                         $seq['diffs'][] = $diff;
00479                         $seq['map'][] = $i;
00480                         $seq['tail'] = $text;
00481                 }
00482                 unset( $seq ); // unlink dangerous alias
00483 
00484                 // Knit the sequences together
00485                 $tail = '';
00486                 $this->mDiffs = array();
00487                 $this->mDiffMap = array();
00488                 foreach ( $sequences as $seq ) {
00489                         if ( !count( $seq['diffs'] ) ) {
00490                                 continue;
00491                         }
00492                         if ( $tail === '' ) {
00493                                 $this->mDiffs[] = $seq['diffs'][0];
00494                         } else {
00495                                 $head = $this->patch( '', $seq['diffs'][0] );
00496                                 $this->mDiffs[] = $this->diff( $tail, $head );
00497                         }
00498                         $this->mDiffMap[] = $seq['map'][0];
00499                         for ( $i = 1; $i < count( $seq['diffs'] ); $i++ ) {
00500                                 $this->mDiffs[] = $seq['diffs'][$i];
00501                                 $this->mDiffMap[] = $seq['map'][$i];
00502                         }
00503                         $tail = $seq['tail'];
00504                 }
00505         }
00506 
00512         function diff( $t1, $t2 ) {
00513                 # Need to do a null concatenation with warnings off, due to bugs in the current version of xdiff
00514                 # "String is not zero-terminated"
00515                 wfSuppressWarnings();
00516                 $diff = xdiff_string_rabdiff( $t1, $t2 ) . '';
00517                 wfRestoreWarnings();
00518                 return $diff;
00519         }
00520 
00526         function patch( $base, $diff ) {
00527                 if ( function_exists( 'xdiff_string_bpatch' ) ) {
00528                         wfSuppressWarnings();
00529                         $text = xdiff_string_bpatch( $base, $diff ) . '';
00530                         wfRestoreWarnings();
00531                         return $text;
00532                 }
00533 
00534                 # Pure PHP implementation
00535 
00536                 $header = unpack( 'Vofp/Vcsize', substr( $diff, 0, 8 ) );
00537                 
00538                 # Check the checksum if hash/mhash is available
00539                 $ofp = $this->xdiffAdler32( $base );
00540                 if ( $ofp !== false && $ofp !== substr( $diff, 0, 4 ) ) {
00541                         wfDebug( __METHOD__. ": incorrect base checksum\n" );
00542                         return false;
00543                 }
00544                 if ( $header['csize'] != strlen( $base ) ) {
00545                         wfDebug( __METHOD__. ": incorrect base length\n" );
00546                         return false;
00547                 }
00548                 
00549                 $p = 8;
00550                 $out = '';
00551                 while ( $p < strlen( $diff ) ) {
00552                         $x = unpack( 'Cop', substr( $diff, $p, 1 ) );
00553                         $op = $x['op'];
00554                         ++$p;
00555                         switch ( $op ) {
00556                         case self::XDL_BDOP_INS:
00557                                 $x = unpack( 'Csize', substr( $diff, $p, 1 ) );
00558                                 $p++;
00559                                 $out .= substr( $diff, $p, $x['size'] );
00560                                 $p += $x['size'];
00561                                 break;
00562                         case self::XDL_BDOP_INSB:
00563                                 $x = unpack( 'Vcsize', substr( $diff, $p, 4 ) );
00564                                 $p += 4;
00565                                 $out .= substr( $diff, $p, $x['csize'] );
00566                                 $p += $x['csize'];
00567                                 break;
00568                         case self::XDL_BDOP_CPY:
00569                                 $x = unpack( 'Voff/Vcsize', substr( $diff, $p, 8 ) );
00570                                 $p += 8;
00571                                 $out .= substr( $base, $x['off'], $x['csize'] );
00572                                 break;
00573                         default:
00574                                 wfDebug( __METHOD__.": invalid op\n" );
00575                                 return false;
00576                         }
00577                 }
00578                 return $out;
00579         }
00580 
00587         function xdiffAdler32( $s ) {
00588                 static $init;
00589                 if ( $init === null ) {
00590                         $init = str_repeat( "\xf0", 205 ) . "\xee" . str_repeat( "\xf0", 67 ) . "\x02";
00591                 }
00592                 // The real Adler-32 checksum of $init is zero, so it initialises the 
00593                 // state to zero, as it is at the start of LibXDiff's checksum 
00594                 // algorithm. Appending the subject string then simulates LibXDiff.
00595                 if ( function_exists( 'hash' ) ) {
00596                         $hash = hash( 'adler32', $init . $s, true );
00597                 } elseif ( function_exists( 'mhash' ) ) {
00598                         $hash = mhash( MHASH_ADLER32, $init . $s );
00599                 } else {
00600                         return false;
00601                 }
00602                 return strrev( $hash );
00603         }
00604 
00605         function uncompress() {
00606                 if ( !$this->mDiffs ) {
00607                         return;
00608                 }
00609                 $tail = '';
00610                 for ( $diffKey = 0; $diffKey < count( $this->mDiffs ); $diffKey++ ) {
00611                         $textKey = $this->mDiffMap[$diffKey];
00612                         $text = $this->patch( $tail, $this->mDiffs[$diffKey] );
00613                         $this->mItems[$textKey] = $text;
00614                         $tail = $text;
00615                 }
00616         }
00617 
00621         function __sleep() {
00622                 $this->compress();
00623                 if ( !count( $this->mItems ) ) {
00624                         // Empty object
00625                         $info = false;
00626                 } else {
00627                         // Take forward differences to improve the compression ratio for sequences
00628                         $map = '';
00629                         $prev = 0;
00630                         foreach ( $this->mDiffMap as $i ) {
00631                                 if ( $map !== '' ) {
00632                                         $map .= ',';
00633                                 }
00634                                 $map .= $i - $prev;
00635                                 $prev = $i;
00636                         }
00637                         $info = array(
00638                                 'diffs' => $this->mDiffs,
00639                                 'map' => $map
00640                         );
00641                 }
00642                 if ( isset( $this->mDefaultKey ) ) {
00643                         $info['default'] = $this->mDefaultKey;
00644                 }
00645                 $this->mCompressed = gzdeflate( serialize( $info ) );
00646                 return array( 'mCompressed' );
00647         }
00648 
00649         function __wakeup() {
00650                 // addItem() doesn't work if mItems is partially filled from mDiffs
00651                 $this->mFrozen = true;
00652                 $info = unserialize( gzinflate( $this->mCompressed ) );
00653                 unset( $this->mCompressed );
00654 
00655                 if ( !$info ) {
00656                         // Empty object
00657                         return;
00658                 }
00659 
00660                 if ( isset( $info['default'] ) ) {
00661                         $this->mDefaultKey = $info['default'];
00662                 }
00663                 $this->mDiffs = $info['diffs'];
00664                 if ( isset( $info['base'] ) ) {
00665                         // Old format
00666                         $this->mDiffMap = range( 0, count( $this->mDiffs ) - 1 );
00667                         array_unshift( $this->mDiffs, 
00668                                 pack( 'VVCV', 0, 0, self::XDL_BDOP_INSB, strlen( $info['base'] ) ) .
00669                                 $info['base'] );
00670                 } else {
00671                         // New format
00672                         $map = explode( ',', $info['map'] );
00673                         $cur = 0;
00674                         $this->mDiffMap = array();
00675                         foreach ( $map as $i ) {
00676                                 $cur += $i;
00677                                 $this->mDiffMap[] = $cur;
00678                         }
00679                 }
00680                 $this->uncompress();
00681         }
00682 
00689         function isHappy() {
00690                 return $this->mSize < $this->mMaxSize 
00691                         && count( $this->mItems ) < $this->mMaxCount;
00692         }
00693 
00694 }