MediaWiki  REL1_21
HistoryBlob.php
Go to the documentation of this file.
00001 <?php
00028 interface HistoryBlob
00029 {
00039         function addItem( $text );
00040 
00048         function getItem( $key );
00049 
00060         function setText( $text );
00061 
00067         function getText();
00068 }
00069 
00074 class ConcatenatedGzipHistoryBlob implements HistoryBlob
00075 {
00076         public $mVersion = 0, $mCompressed = false, $mItems = array(), $mDefaultHash = '';
00077         public $mSize = 0;
00078         public $mMaxSize = 10000000;
00079         public $mMaxCount = 100;
00080 
00082         public function __construct() {
00083                 if ( !function_exists( 'gzdeflate' ) ) {
00084                         throw new MWException( "Need zlib support to read or write this kind of history object (ConcatenatedGzipHistoryBlob)\n" );
00085                 }
00086         }
00087 
00092         public function addItem( $text ) {
00093                 $this->uncompress();
00094                 $hash = md5( $text );
00095                 if ( !isset( $this->mItems[$hash] ) ) {
00096                         $this->mItems[$hash] = $text;
00097                         $this->mSize += strlen( $text );
00098                 }
00099                 return $hash;
00100         }
00101 
00106         public function getItem( $hash ) {
00107                 $this->uncompress();
00108                 if ( array_key_exists( $hash, $this->mItems ) ) {
00109                         return $this->mItems[$hash];
00110                 } else {
00111                         return false;
00112                 }
00113         }
00114 
00119         public function setText( $text ) {
00120                 $this->uncompress();
00121                 $this->mDefaultHash = $this->addItem( $text );
00122         }
00123 
00127         public function getText() {
00128                 $this->uncompress();
00129                 return $this->getItem( $this->mDefaultHash );
00130         }
00131 
00137         public function removeItem( $hash ) {
00138                 $this->mSize -= strlen( $this->mItems[$hash] );
00139                 unset( $this->mItems[$hash] );
00140         }
00141 
00145         public function compress() {
00146                 if ( !$this->mCompressed  ) {
00147                         $this->mItems = gzdeflate( serialize( $this->mItems ) );
00148                         $this->mCompressed = true;
00149                 }
00150         }
00151 
00155         public function uncompress() {
00156                 if ( $this->mCompressed ) {
00157                         $this->mItems = unserialize( gzinflate( $this->mItems ) );
00158                         $this->mCompressed = false;
00159                 }
00160         }
00161 
00165         function __sleep() {
00166                 $this->compress();
00167                 return array( 'mVersion', 'mCompressed', 'mItems', 'mDefaultHash' );
00168         }
00169 
00170         function __wakeup() {
00171                 $this->uncompress();
00172         }
00173 
00180         public function isHappy() {
00181                 return $this->mSize < $this->mMaxSize
00182                         && count( $this->mItems ) < $this->mMaxCount;
00183         }
00184 }
00185 
00189 class HistoryBlobStub {
00196         protected static $blobCache = array();
00197 
00198         var $mOldId, $mHash, $mRef;
00199 
00204         function __construct( $hash = '', $oldid = 0 ) {
00205                 $this->mHash = $hash;
00206         }
00207 
00212         function setLocation( $id ) {
00213                 $this->mOldId = $id;
00214         }
00215 
00219         function setReferrer( $id ) {
00220                 $this->mRef = $id;
00221         }
00222 
00226         function getReferrer() {
00227                 return $this->mRef;
00228         }
00229 
00233         function getText() {
00234                 if( isset( self::$blobCache[$this->mOldId] ) ) {
00235                         $obj = self::$blobCache[$this->mOldId];
00236                 } else {
00237                         $dbr = wfGetDB( DB_SLAVE );
00238                         $row = $dbr->selectRow( 'text', array( 'old_flags', 'old_text' ), array( 'old_id' => $this->mOldId ) );
00239                         if( !$row ) {
00240                                 return false;
00241                         }
00242                         $flags = explode( ',', $row->old_flags );
00243                         if( in_array( 'external', $flags ) ) {
00244                                 $url = $row->old_text;
00245                                 $parts = explode( '://', $url, 2 );
00246                                 if ( !isset( $parts[1] ) || $parts[1] == '' ) {
00247                                         return false;
00248                                 }
00249                                 $row->old_text = ExternalStore::fetchFromUrl( $url );
00250 
00251                         }
00252                         if( !in_array( 'object', $flags ) ) {
00253                                 return false;
00254                         }
00255 
00256                         if( in_array( 'gzip', $flags ) ) {
00257                                 // This shouldn't happen, but a bug in the compress script
00258                                 // may at times gzip-compress a HistoryBlob object row.
00259                                 $obj = unserialize( gzinflate( $row->old_text ) );
00260                         } else {
00261                                 $obj = unserialize( $row->old_text );
00262                         }
00263 
00264                         if( !is_object( $obj ) ) {
00265                                 // Correct for old double-serialization bug.
00266                                 $obj = unserialize( $obj );
00267                         }
00268 
00269                         // Save this item for reference; if pulling many
00270                         // items in a row we'll likely use it again.
00271                         $obj->uncompress();
00272                         self::$blobCache = array( $this->mOldId => $obj );
00273                 }
00274                 return $obj->getItem( $this->mHash );
00275         }
00276 
00282         function getHash() {
00283                 return $this->mHash;
00284         }
00285 }
00286 
00295 class HistoryBlobCurStub {
00296         var $mCurId;
00297 
00301         function __construct( $curid = 0 ) {
00302                 $this->mCurId = $curid;
00303         }
00304 
00311         function setLocation( $id ) {
00312                 $this->mCurId = $id;
00313         }
00314 
00318         function getText() {
00319                 $dbr = wfGetDB( DB_SLAVE );
00320                 $row = $dbr->selectRow( 'cur', array( 'cur_text' ), array( 'cur_id' => $this->mCurId ) );
00321                 if( !$row ) {
00322                         return false;
00323                 }
00324                 return $row->cur_text;
00325         }
00326 }
00327 
00332 class DiffHistoryBlob implements HistoryBlob {
00334         var $mItems = array();
00335 
00337         var $mSize = 0;
00338 
00347         var $mDiffs;
00348 
00350         var $mDiffMap;
00351 
00355         var $mDefaultKey;
00356 
00360         var $mCompressed;
00361 
00365         var $mFrozen = false;
00366 
00371         var $mMaxSize = 10000000;
00372 
00376         var $mMaxCount = 100;
00377 
00379         const XDL_BDOP_INS = 1;
00380         const XDL_BDOP_CPY = 2;
00381         const XDL_BDOP_INSB = 3;
00382 
00383         function __construct() {
00384                 if ( !function_exists( 'gzdeflate' ) ) {
00385                         throw new MWException( "Need zlib support to read or write DiffHistoryBlob\n" );
00386                 }
00387         }
00388 
00394         function addItem( $text ) {
00395                 if ( $this->mFrozen ) {
00396                         throw new MWException( __METHOD__ . ": Cannot add more items after sleep/wakeup" );
00397                 }
00398 
00399                 $this->mItems[] = $text;
00400                 $this->mSize += strlen( $text );
00401                 $this->mDiffs = null; // later
00402                 return count( $this->mItems ) - 1;
00403         }
00404 
00409         function getItem( $key ) {
00410                 return $this->mItems[$key];
00411         }
00412 
00416         function setText( $text ) {
00417                 $this->mDefaultKey = $this->addItem( $text );
00418         }
00419 
00423         function getText() {
00424                 return $this->getItem( $this->mDefaultKey );
00425         }
00426 
00430         function compress() {
00431                 if ( !function_exists( 'xdiff_string_rabdiff' ) ) {
00432                         throw new MWException( "Need xdiff 1.5+ support to write DiffHistoryBlob\n" );
00433                 }
00434                 if ( isset( $this->mDiffs ) ) {
00435                         // Already compressed
00436                         return;
00437                 }
00438                 if ( !count( $this->mItems ) ) {
00439                         // Empty
00440                         return;
00441                 }
00442 
00443                 // Create two diff sequences: one for main text and one for small text
00444                 $sequences = array(
00445                         'small' => array(
00446                                 'tail' => '',
00447                                 'diffs' => array(),
00448                                 'map' => array(),
00449                         ),
00450                         'main' => array(
00451                                 'tail' => '',
00452                                 'diffs' => array(),
00453                                 'map' => array(),
00454                         ),
00455                 );
00456                 $smallFactor = 0.5;
00457 
00458                 for ( $i = 0; $i < count( $this->mItems ); $i++ ) {
00459                         $text = $this->mItems[$i];
00460                         if ( $i == 0 ) {
00461                                 $seqName = 'main';
00462                         } else {
00463                                 $mainTail = $sequences['main']['tail'];
00464                                 if ( strlen( $text ) < strlen( $mainTail ) * $smallFactor ) {
00465                                         $seqName = 'small';
00466                                 } else {
00467                                         $seqName = 'main';
00468                                 }
00469                         }
00470                         $seq =& $sequences[$seqName];
00471                         $tail = $seq['tail'];
00472                         $diff = $this->diff( $tail, $text );
00473                         $seq['diffs'][] = $diff;
00474                         $seq['map'][] = $i;
00475                         $seq['tail'] = $text;
00476                 }
00477                 unset( $seq ); // unlink dangerous alias
00478 
00479                 // Knit the sequences together
00480                 $tail = '';
00481                 $this->mDiffs = array();
00482                 $this->mDiffMap = array();
00483                 foreach ( $sequences as $seq ) {
00484                         if ( !count( $seq['diffs'] ) ) {
00485                                 continue;
00486                         }
00487                         if ( $tail === '' ) {
00488                                 $this->mDiffs[] = $seq['diffs'][0];
00489                         } else {
00490                                 $head = $this->patch( '', $seq['diffs'][0] );
00491                                 $this->mDiffs[] = $this->diff( $tail, $head );
00492                         }
00493                         $this->mDiffMap[] = $seq['map'][0];
00494                         for ( $i = 1; $i < count( $seq['diffs'] ); $i++ ) {
00495                                 $this->mDiffs[] = $seq['diffs'][$i];
00496                                 $this->mDiffMap[] = $seq['map'][$i];
00497                         }
00498                         $tail = $seq['tail'];
00499                 }
00500         }
00501 
00507         function diff( $t1, $t2 ) {
00508                 # Need to do a null concatenation with warnings off, due to bugs in the current version of xdiff
00509                 # "String is not zero-terminated"
00510                 wfSuppressWarnings();
00511                 $diff = xdiff_string_rabdiff( $t1, $t2 ) . '';
00512                 wfRestoreWarnings();
00513                 return $diff;
00514         }
00515 
00521         function patch( $base, $diff ) {
00522                 if ( function_exists( 'xdiff_string_bpatch' ) ) {
00523                         wfSuppressWarnings();
00524                         $text = xdiff_string_bpatch( $base, $diff ) . '';
00525                         wfRestoreWarnings();
00526                         return $text;
00527                 }
00528 
00529                 # Pure PHP implementation
00530 
00531                 $header = unpack( 'Vofp/Vcsize', substr( $diff, 0, 8 ) );
00532 
00533                 # Check the checksum if hash/mhash is available
00534                 $ofp = $this->xdiffAdler32( $base );
00535                 if ( $ofp !== false && $ofp !== substr( $diff, 0, 4 ) ) {
00536                         wfDebug( __METHOD__ . ": incorrect base checksum\n" );
00537                         return false;
00538                 }
00539                 if ( $header['csize'] != strlen( $base ) ) {
00540                         wfDebug( __METHOD__ . ": incorrect base length\n" );
00541                         return false;
00542                 }
00543 
00544                 $p = 8;
00545                 $out = '';
00546                 while ( $p < strlen( $diff ) ) {
00547                         $x = unpack( 'Cop', substr( $diff, $p, 1 ) );
00548                         $op = $x['op'];
00549                         ++$p;
00550                         switch ( $op ) {
00551                         case self::XDL_BDOP_INS:
00552                                 $x = unpack( 'Csize', substr( $diff, $p, 1 ) );
00553                                 $p++;
00554                                 $out .= substr( $diff, $p, $x['size'] );
00555                                 $p += $x['size'];
00556                                 break;
00557                         case self::XDL_BDOP_INSB:
00558                                 $x = unpack( 'Vcsize', substr( $diff, $p, 4 ) );
00559                                 $p += 4;
00560                                 $out .= substr( $diff, $p, $x['csize'] );
00561                                 $p += $x['csize'];
00562                                 break;
00563                         case self::XDL_BDOP_CPY:
00564                                 $x = unpack( 'Voff/Vcsize', substr( $diff, $p, 8 ) );
00565                                 $p += 8;
00566                                 $out .= substr( $base, $x['off'], $x['csize'] );
00567                                 break;
00568                         default:
00569                                 wfDebug( __METHOD__ . ": invalid op\n" );
00570                                 return false;
00571                         }
00572                 }
00573                 return $out;
00574         }
00575 
00582         function xdiffAdler32( $s ) {
00583                 static $init;
00584                 if ( $init === null ) {
00585                         $init = str_repeat( "\xf0", 205 ) . "\xee" . str_repeat( "\xf0", 67 ) . "\x02";
00586                 }
00587                 // The real Adler-32 checksum of $init is zero, so it initialises the
00588                 // state to zero, as it is at the start of LibXDiff's checksum
00589                 // algorithm. Appending the subject string then simulates LibXDiff.
00590                 if ( function_exists( 'hash' ) ) {
00591                         $hash = hash( 'adler32', $init . $s, true );
00592                 } elseif ( function_exists( 'mhash' ) ) {
00593                         $hash = mhash( MHASH_ADLER32, $init . $s );
00594                 } else {
00595                         return false;
00596                 }
00597                 return strrev( $hash );
00598         }
00599 
00600         function uncompress() {
00601                 if ( !$this->mDiffs ) {
00602                         return;
00603                 }
00604                 $tail = '';
00605                 for ( $diffKey = 0; $diffKey < count( $this->mDiffs ); $diffKey++ ) {
00606                         $textKey = $this->mDiffMap[$diffKey];
00607                         $text = $this->patch( $tail, $this->mDiffs[$diffKey] );
00608                         $this->mItems[$textKey] = $text;
00609                         $tail = $text;
00610                 }
00611         }
00612 
00616         function __sleep() {
00617                 $this->compress();
00618                 if ( !count( $this->mItems ) ) {
00619                         // Empty object
00620                         $info = false;
00621                 } else {
00622                         // Take forward differences to improve the compression ratio for sequences
00623                         $map = '';
00624                         $prev = 0;
00625                         foreach ( $this->mDiffMap as $i ) {
00626                                 if ( $map !== '' ) {
00627                                         $map .= ',';
00628                                 }
00629                                 $map .= $i - $prev;
00630                                 $prev = $i;
00631                         }
00632                         $info = array(
00633                                 'diffs' => $this->mDiffs,
00634                                 'map' => $map
00635                         );
00636                 }
00637                 if ( isset( $this->mDefaultKey ) ) {
00638                         $info['default'] = $this->mDefaultKey;
00639                 }
00640                 $this->mCompressed = gzdeflate( serialize( $info ) );
00641                 return array( 'mCompressed' );
00642         }
00643 
00644         function __wakeup() {
00645                 // addItem() doesn't work if mItems is partially filled from mDiffs
00646                 $this->mFrozen = true;
00647                 $info = unserialize( gzinflate( $this->mCompressed ) );
00648                 unset( $this->mCompressed );
00649 
00650                 if ( !$info ) {
00651                         // Empty object
00652                         return;
00653                 }
00654 
00655                 if ( isset( $info['default'] ) ) {
00656                         $this->mDefaultKey = $info['default'];
00657                 }
00658                 $this->mDiffs = $info['diffs'];
00659                 if ( isset( $info['base'] ) ) {
00660                         // Old format
00661                         $this->mDiffMap = range( 0, count( $this->mDiffs ) - 1 );
00662                         array_unshift( $this->mDiffs,
00663                                 pack( 'VVCV', 0, 0, self::XDL_BDOP_INSB, strlen( $info['base'] ) ) .
00664                                 $info['base'] );
00665                 } else {
00666                         // New format
00667                         $map = explode( ',', $info['map'] );
00668                         $cur = 0;
00669                         $this->mDiffMap = array();
00670                         foreach ( $map as $i ) {
00671                                 $cur += $i;
00672                                 $this->mDiffMap[] = $cur;
00673                         }
00674                 }
00675                 $this->uncompress();
00676         }
00677 
00684         function isHappy() {
00685                 return $this->mSize < $this->mMaxSize
00686                         && count( $this->mItems ) < $this->mMaxCount;
00687         }
00688 
00689 }