MediaWiki  REL1_19
HistoryBlob.php
Go to the documentation of this file.
00001 <?php
00002 
00008 interface HistoryBlob
00009 {
00019         function addItem( $text );
00020 
00028         function getItem( $key );
00029 
00040         function setText( $text );
00041 
00047         function getText();
00048 }
00049 
00054 class ConcatenatedGzipHistoryBlob implements HistoryBlob
00055 {
00056         public $mVersion = 0, $mCompressed = false, $mItems = array(), $mDefaultHash = '';
00057         public $mSize = 0;
00058         public $mMaxSize = 10000000;
00059         public $mMaxCount = 100;
00060 
00062         public function __construct() {
00063                 if ( !function_exists( 'gzdeflate' ) ) {
00064                         throw new MWException( "Need zlib support to read or write this kind of history object (ConcatenatedGzipHistoryBlob)\n" );
00065                 }
00066         }
00067 
00072         public function addItem( $text ) {
00073                 $this->uncompress();
00074                 $hash = md5( $text );
00075                 if ( !isset( $this->mItems[$hash] ) ) {
00076                         $this->mItems[$hash] = $text;
00077                         $this->mSize += strlen( $text );
00078                 }
00079                 return $hash;
00080         }
00081 
00086         public function getItem( $hash ) {
00087                 $this->uncompress();
00088                 if ( array_key_exists( $hash, $this->mItems ) ) {
00089                         return $this->mItems[$hash];
00090                 } else {
00091                         return false;
00092                 }
00093         }
00094 
00099         public function setText( $text ) {
00100                 $this->uncompress();
00101                 $this->mDefaultHash = $this->addItem( $text );
00102         }
00103 
00107         public function getText() {
00108                 $this->uncompress();
00109                 return $this->getItem( $this->mDefaultHash );
00110         }
00111 
00117         public function removeItem( $hash ) {
00118                 $this->mSize -= strlen( $this->mItems[$hash] );
00119                 unset( $this->mItems[$hash] );
00120         }
00121 
00125         public function compress() {
00126                 if ( !$this->mCompressed  ) {
00127                         $this->mItems = gzdeflate( serialize( $this->mItems ) );
00128                         $this->mCompressed = true;
00129                 }
00130         }
00131 
00135         public function uncompress() {
00136                 if ( $this->mCompressed ) {
00137                         $this->mItems = unserialize( gzinflate( $this->mItems ) );
00138                         $this->mCompressed = false;
00139                 }
00140         }
00141 
00145         function __sleep() {
00146                 $this->compress();
00147                 return array( 'mVersion', 'mCompressed', 'mItems', 'mDefaultHash' );
00148         }
00149 
00150         function __wakeup() {
00151                 $this->uncompress();
00152         }
00153 
00160         public function isHappy() {
00161                 return $this->mSize < $this->mMaxSize 
00162                         && count( $this->mItems ) < $this->mMaxCount;
00163         }
00164 }
00165 
00166 
00170 class HistoryBlobStub {
00177         protected static $blobCache = array();
00178 
00179         var $mOldId, $mHash, $mRef;
00180 
00185         function __construct( $hash = '', $oldid = 0 ) {
00186                 $this->mHash = $hash;
00187         }
00188 
00193         function setLocation( $id ) {
00194                 $this->mOldId = $id;
00195         }
00196 
00200         function setReferrer( $id ) {
00201                 $this->mRef = $id;
00202         }
00203 
00207         function getReferrer() {
00208                 return $this->mRef;
00209         }
00210 
00214         function getText() {
00215                 $fname = 'HistoryBlobStub::getText';
00216 
00217                 if( isset( self::$blobCache[$this->mOldId] ) ) {
00218                         $obj = self::$blobCache[$this->mOldId];
00219                 } else {
00220                         $dbr = wfGetDB( DB_SLAVE );
00221                         $row = $dbr->selectRow( 'text', array( 'old_flags', 'old_text' ), array( 'old_id' => $this->mOldId ) );
00222                         if( !$row ) {
00223                                 return false;
00224                         }
00225                         $flags = explode( ',', $row->old_flags );
00226                         if( in_array( 'external', $flags ) ) {
00227                                 $url=$row->old_text;
00228                                 $parts = explode( '://', $url, 2 );
00229                                 if ( !isset( $parts[1] ) || $parts[1] == '' ) {
00230                                         wfProfileOut( $fname );
00231                                         return false;
00232                                 }
00233                                 $row->old_text = ExternalStore::fetchFromUrl($url);
00234 
00235                         }
00236                         if( !in_array( 'object', $flags ) ) {
00237                                 return false;
00238                         }
00239 
00240                         if( in_array( 'gzip', $flags ) ) {
00241                                 // This shouldn't happen, but a bug in the compress script
00242                                 // may at times gzip-compress a HistoryBlob object row.
00243                                 $obj = unserialize( gzinflate( $row->old_text ) );
00244                         } else {
00245                                 $obj = unserialize( $row->old_text );
00246                         }
00247 
00248                         if( !is_object( $obj ) ) {
00249                                 // Correct for old double-serialization bug.
00250                                 $obj = unserialize( $obj );
00251                         }
00252 
00253                         // Save this item for reference; if pulling many
00254                         // items in a row we'll likely use it again.
00255                         $obj->uncompress();
00256                         self::$blobCache = array( $this->mOldId => $obj );
00257                 }
00258                 return $obj->getItem( $this->mHash );
00259         }
00260 
00266         function getHash() {
00267                 return $this->mHash;
00268         }
00269 }
00270 
00271 
00280 class HistoryBlobCurStub {
00281         var $mCurId;
00282 
00286         function __construct( $curid = 0 ) {
00287                 $this->mCurId = $curid;
00288         }
00289 
00296         function setLocation( $id ) {
00297                 $this->mCurId = $id;
00298         }
00299 
00303         function getText() {
00304                 $dbr = wfGetDB( DB_SLAVE );
00305                 $row = $dbr->selectRow( 'cur', array( 'cur_text' ), array( 'cur_id' => $this->mCurId ) );
00306                 if( !$row ) {
00307                         return false;
00308                 }
00309                 return $row->cur_text;
00310         }
00311 }
00312 
00317 class DiffHistoryBlob implements HistoryBlob {
00319         var $mItems = array();
00320 
00322         var $mSize = 0;
00323 
00332         var $mDiffs;
00333 
00335         var $mDiffMap;
00336 
00340         var $mDefaultKey;
00341 
00345         var $mCompressed;
00346 
00350         var $mFrozen = false;
00351 
00356         var $mMaxSize = 10000000;
00357 
00361         var $mMaxCount = 100;
00362         
00364         const XDL_BDOP_INS = 1;
00365         const XDL_BDOP_CPY = 2;
00366         const XDL_BDOP_INSB = 3;
00367 
00368         function __construct() {
00369                 if ( !function_exists( 'gzdeflate' ) ) {
00370                         throw new MWException( "Need zlib support to read or write DiffHistoryBlob\n" );
00371                 }
00372         }
00373 
00379         function addItem( $text ) {
00380                 if ( $this->mFrozen ) {
00381                         throw new MWException( __METHOD__.": Cannot add more items after sleep/wakeup" );
00382                 }
00383 
00384                 $this->mItems[] = $text;
00385                 $this->mSize += strlen( $text );
00386                 $this->mDiffs = null; // later
00387                 return count( $this->mItems ) - 1;
00388         }
00389 
00394         function getItem( $key ) {
00395                 return $this->mItems[$key];
00396         }
00397 
00401         function setText( $text ) {
00402                 $this->mDefaultKey = $this->addItem( $text );
00403         }
00404 
00408         function getText() {
00409                 return $this->getItem( $this->mDefaultKey );
00410         }
00411 
00415         function compress() {
00416                 if ( !function_exists( 'xdiff_string_rabdiff' ) ){ 
00417                         throw new MWException( "Need xdiff 1.5+ support to write DiffHistoryBlob\n" );
00418                 }
00419                 if ( isset( $this->mDiffs ) ) {
00420                         // Already compressed
00421                         return;
00422                 }
00423                 if ( !count( $this->mItems ) ) {
00424                         // Empty
00425                         return;
00426                 }
00427 
00428                 // Create two diff sequences: one for main text and one for small text
00429                 $sequences = array(
00430                         'small' => array(
00431                                 'tail' => '',
00432                                 'diffs' => array(),
00433                                 'map' => array(),
00434                         ),
00435                         'main' => array(
00436                                 'tail' => '',
00437                                 'diffs' => array(),
00438                                 'map' => array(),
00439                         ),
00440                 );
00441                 $smallFactor = 0.5;
00442 
00443                 for ( $i = 0; $i < count( $this->mItems ); $i++ ) {
00444                         $text = $this->mItems[$i];
00445                         if ( $i == 0 ) {
00446                                 $seqName = 'main';
00447                         } else {
00448                                 $mainTail = $sequences['main']['tail'];
00449                                 if ( strlen( $text ) < strlen( $mainTail ) * $smallFactor ) {
00450                                         $seqName = 'small';
00451                                 } else {
00452                                         $seqName = 'main';
00453                                 }
00454                         }
00455                         $seq =& $sequences[$seqName];
00456                         $tail = $seq['tail'];
00457                         $diff = $this->diff( $tail, $text );
00458                         $seq['diffs'][] = $diff;
00459                         $seq['map'][] = $i;
00460                         $seq['tail'] = $text;
00461                 }
00462                 unset( $seq ); // unlink dangerous alias
00463 
00464                 // Knit the sequences together
00465                 $tail = '';
00466                 $this->mDiffs = array();
00467                 $this->mDiffMap = array();
00468                 foreach ( $sequences as $seq ) {
00469                         if ( !count( $seq['diffs'] ) ) {
00470                                 continue;
00471                         }
00472                         if ( $tail === '' ) {
00473                                 $this->mDiffs[] = $seq['diffs'][0];
00474                         } else {
00475                                 $head = $this->patch( '', $seq['diffs'][0] );
00476                                 $this->mDiffs[] = $this->diff( $tail, $head );
00477                         }
00478                         $this->mDiffMap[] = $seq['map'][0];
00479                         for ( $i = 1; $i < count( $seq['diffs'] ); $i++ ) {
00480                                 $this->mDiffs[] = $seq['diffs'][$i];
00481                                 $this->mDiffMap[] = $seq['map'][$i];
00482                         }
00483                         $tail = $seq['tail'];
00484                 }
00485         }
00486 
00492         function diff( $t1, $t2 ) {
00493                 # Need to do a null concatenation with warnings off, due to bugs in the current version of xdiff
00494                 # "String is not zero-terminated"
00495                 wfSuppressWarnings();
00496                 $diff = xdiff_string_rabdiff( $t1, $t2 ) . '';
00497                 wfRestoreWarnings();
00498                 return $diff;
00499         }
00500 
00506         function patch( $base, $diff ) {
00507                 if ( function_exists( 'xdiff_string_bpatch' ) ) {
00508                         wfSuppressWarnings();
00509                         $text = xdiff_string_bpatch( $base, $diff ) . '';
00510                         wfRestoreWarnings();
00511                         return $text;
00512                 }
00513 
00514                 # Pure PHP implementation
00515 
00516                 $header = unpack( 'Vofp/Vcsize', substr( $diff, 0, 8 ) );
00517                 
00518                 # Check the checksum if mhash is available
00519                 if ( extension_loaded( 'mhash' ) ) {
00520                         $ofp = mhash( MHASH_ADLER32, $base );
00521                         if ( $ofp !== substr( $diff, 0, 4 ) ) {
00522                                 wfDebug( __METHOD__. ": incorrect base checksum\n" );
00523                                 return false;
00524                         }
00525                 }
00526                 if ( $header['csize'] != strlen( $base ) ) {
00527                         wfDebug( __METHOD__. ": incorrect base length\n" );
00528                         return false;
00529                 }
00530                 
00531                 $p = 8;
00532                 $out = '';
00533                 while ( $p < strlen( $diff ) ) {
00534                         $x = unpack( 'Cop', substr( $diff, $p, 1 ) );
00535                         $op = $x['op'];
00536                         ++$p;
00537                         switch ( $op ) {
00538                         case self::XDL_BDOP_INS:
00539                                 $x = unpack( 'Csize', substr( $diff, $p, 1 ) );
00540                                 $p++;
00541                                 $out .= substr( $diff, $p, $x['size'] );
00542                                 $p += $x['size'];
00543                                 break;
00544                         case self::XDL_BDOP_INSB:
00545                                 $x = unpack( 'Vcsize', substr( $diff, $p, 4 ) );
00546                                 $p += 4;
00547                                 $out .= substr( $diff, $p, $x['csize'] );
00548                                 $p += $x['csize'];
00549                                 break;
00550                         case self::XDL_BDOP_CPY:
00551                                 $x = unpack( 'Voff/Vcsize', substr( $diff, $p, 8 ) );
00552                                 $p += 8;
00553                                 $out .= substr( $base, $x['off'], $x['csize'] );
00554                                 break;
00555                         default:
00556                                 wfDebug( __METHOD__.": invalid op\n" );
00557                                 return false;
00558                         }
00559                 }
00560                 return $out;
00561         }
00562 
00563         function uncompress() {
00564                 if ( !$this->mDiffs ) {
00565                         return;
00566                 }
00567                 $tail = '';
00568                 for ( $diffKey = 0; $diffKey < count( $this->mDiffs ); $diffKey++ ) {
00569                         $textKey = $this->mDiffMap[$diffKey];
00570                         $text = $this->patch( $tail, $this->mDiffs[$diffKey] );
00571                         $this->mItems[$textKey] = $text;
00572                         $tail = $text;
00573                 }
00574         }
00575 
00579         function __sleep() {
00580                 $this->compress();
00581                 if ( !count( $this->mItems ) ) {
00582                         // Empty object
00583                         $info = false;
00584                 } else {
00585                         // Take forward differences to improve the compression ratio for sequences
00586                         $map = '';
00587                         $prev = 0;
00588                         foreach ( $this->mDiffMap as $i ) {
00589                                 if ( $map !== '' ) {
00590                                         $map .= ',';
00591                                 }
00592                                 $map .= $i - $prev;
00593                                 $prev = $i;
00594                         }
00595                         $info = array(
00596                                 'diffs' => $this->mDiffs,
00597                                 'map' => $map
00598                         );
00599                 }
00600                 if ( isset( $this->mDefaultKey ) ) {
00601                         $info['default'] = $this->mDefaultKey;
00602                 }
00603                 $this->mCompressed = gzdeflate( serialize( $info ) );
00604                 return array( 'mCompressed' );
00605         }
00606 
00607         function __wakeup() {
00608                 // addItem() doesn't work if mItems is partially filled from mDiffs
00609                 $this->mFrozen = true;
00610                 $info = unserialize( gzinflate( $this->mCompressed ) );
00611                 unset( $this->mCompressed );
00612 
00613                 if ( !$info ) {
00614                         // Empty object
00615                         return;
00616                 }
00617 
00618                 if ( isset( $info['default'] ) ) {
00619                         $this->mDefaultKey = $info['default'];
00620                 }
00621                 $this->mDiffs = $info['diffs'];
00622                 if ( isset( $info['base'] ) ) {
00623                         // Old format
00624                         $this->mDiffMap = range( 0, count( $this->mDiffs ) - 1 );
00625                         array_unshift( $this->mDiffs, 
00626                                 pack( 'VVCV', 0, 0, self::XDL_BDOP_INSB, strlen( $info['base'] ) ) .
00627                                 $info['base'] );
00628                 } else {
00629                         // New format
00630                         $map = explode( ',', $info['map'] );
00631                         $cur = 0;
00632                         $this->mDiffMap = array();
00633                         foreach ( $map as $i ) {
00634                                 $cur += $i;
00635                                 $this->mDiffMap[] = $cur;
00636                         }
00637                 }
00638                 $this->uncompress();
00639         }
00640 
00647         function isHappy() {
00648                 return $this->mSize < $this->mMaxSize 
00649                         && count( $this->mItems ) < $this->mMaxCount;
00650         }
00651 
00652 }