MediaWiki  REL1_22
HistoryBlob.php
Go to the documentation of this file.
00001 <?php
00028 interface HistoryBlob
00029 {
00039     function addItem( $text );
00040 
00048     function getItem( $key );
00049 
00060     function setText( $text );
00061 
00067     function getText();
00068 }
00069 
00074 class ConcatenatedGzipHistoryBlob implements HistoryBlob
00075 {
00076     public $mVersion = 0, $mCompressed = false, $mItems = array(), $mDefaultHash = '';
00077     public $mSize = 0;
00078     public $mMaxSize = 10000000;
00079     public $mMaxCount = 100;
00080 
00082     public function __construct() {
00083         if ( !function_exists( 'gzdeflate' ) ) {
00084             throw new MWException( "Need zlib support to read or write this kind of history object (ConcatenatedGzipHistoryBlob)\n" );
00085         }
00086     }
00087 
00092     public function addItem( $text ) {
00093         $this->uncompress();
00094         $hash = md5( $text );
00095         if ( !isset( $this->mItems[$hash] ) ) {
00096             $this->mItems[$hash] = $text;
00097             $this->mSize += strlen( $text );
00098         }
00099         return $hash;
00100     }
00101 
00106     public function getItem( $hash ) {
00107         $this->uncompress();
00108         if ( array_key_exists( $hash, $this->mItems ) ) {
00109             return $this->mItems[$hash];
00110         } else {
00111             return false;
00112         }
00113     }
00114 
00119     public function setText( $text ) {
00120         $this->uncompress();
00121         $this->mDefaultHash = $this->addItem( $text );
00122     }
00123 
00127     public function getText() {
00128         $this->uncompress();
00129         return $this->getItem( $this->mDefaultHash );
00130     }
00131 
00137     public function removeItem( $hash ) {
00138         $this->mSize -= strlen( $this->mItems[$hash] );
00139         unset( $this->mItems[$hash] );
00140     }
00141 
00145     public function compress() {
00146         if ( !$this->mCompressed ) {
00147             $this->mItems = gzdeflate( serialize( $this->mItems ) );
00148             $this->mCompressed = true;
00149         }
00150     }
00151 
00155     public function uncompress() {
00156         if ( $this->mCompressed ) {
00157             $this->mItems = unserialize( gzinflate( $this->mItems ) );
00158             $this->mCompressed = false;
00159         }
00160     }
00161 
00165     function __sleep() {
00166         $this->compress();
00167         return array( 'mVersion', 'mCompressed', 'mItems', 'mDefaultHash' );
00168     }
00169 
00170     function __wakeup() {
00171         $this->uncompress();
00172     }
00173 
00180     public function isHappy() {
00181         return $this->mSize < $this->mMaxSize
00182             && count( $this->mItems ) < $this->mMaxCount;
00183     }
00184 }
00185 
00189 class HistoryBlobStub {
00196     protected static $blobCache = array();
00197 
00198     var $mOldId, $mHash, $mRef;
00199 
00204     function __construct( $hash = '', $oldid = 0 ) {
00205         $this->mHash = $hash;
00206     }
00207 
00212     function setLocation( $id ) {
00213         $this->mOldId = $id;
00214     }
00215 
00219     function setReferrer( $id ) {
00220         $this->mRef = $id;
00221     }
00222 
00226     function getReferrer() {
00227         return $this->mRef;
00228     }
00229 
00233     function getText() {
00234         if ( isset( self::$blobCache[$this->mOldId] ) ) {
00235             $obj = self::$blobCache[$this->mOldId];
00236         } else {
00237             $dbr = wfGetDB( DB_SLAVE );
00238             $row = $dbr->selectRow( 'text', array( 'old_flags', 'old_text' ), array( 'old_id' => $this->mOldId ) );
00239             if ( !$row ) {
00240                 return false;
00241             }
00242             $flags = explode( ',', $row->old_flags );
00243             if ( in_array( 'external', $flags ) ) {
00244                 $url = $row->old_text;
00245                 $parts = explode( '://', $url, 2 );
00246                 if ( !isset( $parts[1] ) || $parts[1] == '' ) {
00247                     return false;
00248                 }
00249                 $row->old_text = ExternalStore::fetchFromUrl( $url );
00250 
00251             }
00252             if ( !in_array( 'object', $flags ) ) {
00253                 return false;
00254             }
00255 
00256             if ( in_array( 'gzip', $flags ) ) {
00257                 // This shouldn't happen, but a bug in the compress script
00258                 // may at times gzip-compress a HistoryBlob object row.
00259                 $obj = unserialize( gzinflate( $row->old_text ) );
00260             } else {
00261                 $obj = unserialize( $row->old_text );
00262             }
00263 
00264             if ( !is_object( $obj ) ) {
00265                 // Correct for old double-serialization bug.
00266                 $obj = unserialize( $obj );
00267             }
00268 
00269             // Save this item for reference; if pulling many
00270             // items in a row we'll likely use it again.
00271             $obj->uncompress();
00272             self::$blobCache = array( $this->mOldId => $obj );
00273         }
00274         return $obj->getItem( $this->mHash );
00275     }
00276 
00282     function getHash() {
00283         return $this->mHash;
00284     }
00285 }
00286 
00295 class HistoryBlobCurStub {
00296     var $mCurId;
00297 
00301     function __construct( $curid = 0 ) {
00302         $this->mCurId = $curid;
00303     }
00304 
00311     function setLocation( $id ) {
00312         $this->mCurId = $id;
00313     }
00314 
00318     function getText() {
00319         $dbr = wfGetDB( DB_SLAVE );
00320         $row = $dbr->selectRow( 'cur', array( 'cur_text' ), array( 'cur_id' => $this->mCurId ) );
00321         if ( !$row ) {
00322             return false;
00323         }
00324         return $row->cur_text;
00325     }
00326 }
00327 
00332 class DiffHistoryBlob implements HistoryBlob {
00334     var $mItems = array();
00335 
00337     var $mSize = 0;
00338 
00347     var $mDiffs;
00348 
00350     var $mDiffMap;
00351 
00355     var $mDefaultKey;
00356 
00360     var $mCompressed;
00361 
00365     var $mFrozen = false;
00366 
00371     var $mMaxSize = 10000000;
00372 
00376     var $mMaxCount = 100;
00377 
00379     const XDL_BDOP_INS = 1;
00380     const XDL_BDOP_CPY = 2;
00381     const XDL_BDOP_INSB = 3;
00382 
00383     function __construct() {
00384         if ( !function_exists( 'gzdeflate' ) ) {
00385             throw new MWException( "Need zlib support to read or write DiffHistoryBlob\n" );
00386         }
00387     }
00388 
00394     function addItem( $text ) {
00395         if ( $this->mFrozen ) {
00396             throw new MWException( __METHOD__ . ": Cannot add more items after sleep/wakeup" );
00397         }
00398 
00399         $this->mItems[] = $text;
00400         $this->mSize += strlen( $text );
00401         $this->mDiffs = null; // later
00402         return count( $this->mItems ) - 1;
00403     }
00404 
00409     function getItem( $key ) {
00410         return $this->mItems[$key];
00411     }
00412 
00416     function setText( $text ) {
00417         $this->mDefaultKey = $this->addItem( $text );
00418     }
00419 
00423     function getText() {
00424         return $this->getItem( $this->mDefaultKey );
00425     }
00426 
00430     function compress() {
00431         if ( !function_exists( 'xdiff_string_rabdiff' ) ) {
00432             throw new MWException( "Need xdiff 1.5+ support to write DiffHistoryBlob\n" );
00433         }
00434         if ( isset( $this->mDiffs ) ) {
00435             // Already compressed
00436             return;
00437         }
00438         if ( !count( $this->mItems ) ) {
00439             // Empty
00440             return;
00441         }
00442 
00443         // Create two diff sequences: one for main text and one for small text
00444         $sequences = array(
00445             'small' => array(
00446                 'tail' => '',
00447                 'diffs' => array(),
00448                 'map' => array(),
00449             ),
00450             'main' => array(
00451                 'tail' => '',
00452                 'diffs' => array(),
00453                 'map' => array(),
00454             ),
00455         );
00456         $smallFactor = 0.5;
00457 
00458         for ( $i = 0; $i < count( $this->mItems ); $i++ ) {
00459             $text = $this->mItems[$i];
00460             if ( $i == 0 ) {
00461                 $seqName = 'main';
00462             } else {
00463                 $mainTail = $sequences['main']['tail'];
00464                 if ( strlen( $text ) < strlen( $mainTail ) * $smallFactor ) {
00465                     $seqName = 'small';
00466                 } else {
00467                     $seqName = 'main';
00468                 }
00469             }
00470             $seq =& $sequences[$seqName];
00471             $tail = $seq['tail'];
00472             $diff = $this->diff( $tail, $text );
00473             $seq['diffs'][] = $diff;
00474             $seq['map'][] = $i;
00475             $seq['tail'] = $text;
00476         }
00477         unset( $seq ); // unlink dangerous alias
00478 
00479         // Knit the sequences together
00480         $tail = '';
00481         $this->mDiffs = array();
00482         $this->mDiffMap = array();
00483         foreach ( $sequences as $seq ) {
00484             if ( !count( $seq['diffs'] ) ) {
00485                 continue;
00486             }
00487             if ( $tail === '' ) {
00488                 $this->mDiffs[] = $seq['diffs'][0];
00489             } else {
00490                 $head = $this->patch( '', $seq['diffs'][0] );
00491                 $this->mDiffs[] = $this->diff( $tail, $head );
00492             }
00493             $this->mDiffMap[] = $seq['map'][0];
00494             for ( $i = 1; $i < count( $seq['diffs'] ); $i++ ) {
00495                 $this->mDiffs[] = $seq['diffs'][$i];
00496                 $this->mDiffMap[] = $seq['map'][$i];
00497             }
00498             $tail = $seq['tail'];
00499         }
00500     }
00501 
00507     function diff( $t1, $t2 ) {
00508         # Need to do a null concatenation with warnings off, due to bugs in the current version of xdiff
00509         # "String is not zero-terminated"
00510         wfSuppressWarnings();
00511         $diff = xdiff_string_rabdiff( $t1, $t2 ) . '';
00512         wfRestoreWarnings();
00513         return $diff;
00514     }
00515 
00521     function patch( $base, $diff ) {
00522         if ( function_exists( 'xdiff_string_bpatch' ) ) {
00523             wfSuppressWarnings();
00524             $text = xdiff_string_bpatch( $base, $diff ) . '';
00525             wfRestoreWarnings();
00526             return $text;
00527         }
00528 
00529         # Pure PHP implementation
00530 
00531         $header = unpack( 'Vofp/Vcsize', substr( $diff, 0, 8 ) );
00532 
00533         # Check the checksum if hash extension is available
00534         $ofp = $this->xdiffAdler32( $base );
00535         if ( $ofp !== false && $ofp !== substr( $diff, 0, 4 ) ) {
00536             wfDebug( __METHOD__ . ": incorrect base checksum\n" );
00537             return false;
00538         }
00539         if ( $header['csize'] != strlen( $base ) ) {
00540             wfDebug( __METHOD__ . ": incorrect base length\n" );
00541             return false;
00542         }
00543 
00544         $p = 8;
00545         $out = '';
00546         while ( $p < strlen( $diff ) ) {
00547             $x = unpack( 'Cop', substr( $diff, $p, 1 ) );
00548             $op = $x['op'];
00549             ++$p;
00550             switch ( $op ) {
00551             case self::XDL_BDOP_INS:
00552                 $x = unpack( 'Csize', substr( $diff, $p, 1 ) );
00553                 $p++;
00554                 $out .= substr( $diff, $p, $x['size'] );
00555                 $p += $x['size'];
00556                 break;
00557             case self::XDL_BDOP_INSB:
00558                 $x = unpack( 'Vcsize', substr( $diff, $p, 4 ) );
00559                 $p += 4;
00560                 $out .= substr( $diff, $p, $x['csize'] );
00561                 $p += $x['csize'];
00562                 break;
00563             case self::XDL_BDOP_CPY:
00564                 $x = unpack( 'Voff/Vcsize', substr( $diff, $p, 8 ) );
00565                 $p += 8;
00566                 $out .= substr( $base, $x['off'], $x['csize'] );
00567                 break;
00568             default:
00569                 wfDebug( __METHOD__ . ": invalid op\n" );
00570                 return false;
00571             }
00572         }
00573         return $out;
00574     }
00575 
00583     function xdiffAdler32( $s ) {
00584         if ( !function_exists( 'hash' ) ) {
00585             return false;
00586         }
00587 
00588         static $init;
00589         if ( $init === null ) {
00590             $init = str_repeat( "\xf0", 205 ) . "\xee" . str_repeat( "\xf0", 67 ) . "\x02";
00591         }
00592 
00593         // The real Adler-32 checksum of $init is zero, so it initialises the
00594         // state to zero, as it is at the start of LibXDiff's checksum
00595         // algorithm. Appending the subject string then simulates LibXDiff.
00596         return strrev( hash( 'adler32', $init . $s, true ) );
00597     }
00598 
00599     function uncompress() {
00600         if ( !$this->mDiffs ) {
00601             return;
00602         }
00603         $tail = '';
00604         for ( $diffKey = 0; $diffKey < count( $this->mDiffs ); $diffKey++ ) {
00605             $textKey = $this->mDiffMap[$diffKey];
00606             $text = $this->patch( $tail, $this->mDiffs[$diffKey] );
00607             $this->mItems[$textKey] = $text;
00608             $tail = $text;
00609         }
00610     }
00611 
00615     function __sleep() {
00616         $this->compress();
00617         if ( !count( $this->mItems ) ) {
00618             // Empty object
00619             $info = false;
00620         } else {
00621             // Take forward differences to improve the compression ratio for sequences
00622             $map = '';
00623             $prev = 0;
00624             foreach ( $this->mDiffMap as $i ) {
00625                 if ( $map !== '' ) {
00626                     $map .= ',';
00627                 }
00628                 $map .= $i - $prev;
00629                 $prev = $i;
00630             }
00631             $info = array(
00632                 'diffs' => $this->mDiffs,
00633                 'map' => $map
00634             );
00635         }
00636         if ( isset( $this->mDefaultKey ) ) {
00637             $info['default'] = $this->mDefaultKey;
00638         }
00639         $this->mCompressed = gzdeflate( serialize( $info ) );
00640         return array( 'mCompressed' );
00641     }
00642 
00643     function __wakeup() {
00644         // addItem() doesn't work if mItems is partially filled from mDiffs
00645         $this->mFrozen = true;
00646         $info = unserialize( gzinflate( $this->mCompressed ) );
00647         unset( $this->mCompressed );
00648 
00649         if ( !$info ) {
00650             // Empty object
00651             return;
00652         }
00653 
00654         if ( isset( $info['default'] ) ) {
00655             $this->mDefaultKey = $info['default'];
00656         }
00657         $this->mDiffs = $info['diffs'];
00658         if ( isset( $info['base'] ) ) {
00659             // Old format
00660             $this->mDiffMap = range( 0, count( $this->mDiffs ) - 1 );
00661             array_unshift( $this->mDiffs,
00662                 pack( 'VVCV', 0, 0, self::XDL_BDOP_INSB, strlen( $info['base'] ) ) .
00663                 $info['base'] );
00664         } else {
00665             // New format
00666             $map = explode( ',', $info['map'] );
00667             $cur = 0;
00668             $this->mDiffMap = array();
00669             foreach ( $map as $i ) {
00670                 $cur += $i;
00671                 $this->mDiffMap[] = $cur;
00672             }
00673         }
00674         $this->uncompress();
00675     }
00676 
00683     function isHappy() {
00684         return $this->mSize < $this->mMaxSize
00685             && count( $this->mItems ) < $this->mMaxCount;
00686     }
00687 
00688 }