MediaWiki  REL1_23
HistoryBlob.php
Go to the documentation of this file.
00001 <?php
00028 interface HistoryBlob
00029 {
00039     function addItem( $text );
00040 
00048     function getItem( $key );
00049 
00060     function setText( $text );
00061 
00067     function getText();
00068 }
00069 
00074 class ConcatenatedGzipHistoryBlob implements HistoryBlob
00075 {
00076     public $mVersion = 0, $mCompressed = false, $mItems = array(), $mDefaultHash = '';
00077     public $mSize = 0;
00078     public $mMaxSize = 10000000;
00079     public $mMaxCount = 100;
00080 
00084     public function __construct() {
00085         if ( !function_exists( 'gzdeflate' ) ) {
00086             throw new MWException( "Need zlib support to read or write this kind of history object (ConcatenatedGzipHistoryBlob)\n" );
00087         }
00088     }
00089 
00094     public function addItem( $text ) {
00095         $this->uncompress();
00096         $hash = md5( $text );
00097         if ( !isset( $this->mItems[$hash] ) ) {
00098             $this->mItems[$hash] = $text;
00099             $this->mSize += strlen( $text );
00100         }
00101         return $hash;
00102     }
00103 
00108     public function getItem( $hash ) {
00109         $this->uncompress();
00110         if ( array_key_exists( $hash, $this->mItems ) ) {
00111             return $this->mItems[$hash];
00112         } else {
00113             return false;
00114         }
00115     }
00116 
00121     public function setText( $text ) {
00122         $this->uncompress();
00123         $this->mDefaultHash = $this->addItem( $text );
00124     }
00125 
00129     public function getText() {
00130         $this->uncompress();
00131         return $this->getItem( $this->mDefaultHash );
00132     }
00133 
00139     public function removeItem( $hash ) {
00140         $this->mSize -= strlen( $this->mItems[$hash] );
00141         unset( $this->mItems[$hash] );
00142     }
00143 
00147     public function compress() {
00148         if ( !$this->mCompressed ) {
00149             $this->mItems = gzdeflate( serialize( $this->mItems ) );
00150             $this->mCompressed = true;
00151         }
00152     }
00153 
00157     public function uncompress() {
00158         if ( $this->mCompressed ) {
00159             $this->mItems = unserialize( gzinflate( $this->mItems ) );
00160             $this->mCompressed = false;
00161         }
00162     }
00163 
00167     function __sleep() {
00168         $this->compress();
00169         return array( 'mVersion', 'mCompressed', 'mItems', 'mDefaultHash' );
00170     }
00171 
00172     function __wakeup() {
00173         $this->uncompress();
00174     }
00175 
00182     public function isHappy() {
00183         return $this->mSize < $this->mMaxSize
00184             && count( $this->mItems ) < $this->mMaxCount;
00185     }
00186 }
00187 
00191 class HistoryBlobStub {
00198     protected static $blobCache = array();
00199 
00200     var $mOldId, $mHash, $mRef;
00201 
00206     function __construct( $hash = '', $oldid = 0 ) {
00207         $this->mHash = $hash;
00208     }
00209 
00214     function setLocation( $id ) {
00215         $this->mOldId = $id;
00216     }
00217 
00221     function setReferrer( $id ) {
00222         $this->mRef = $id;
00223     }
00224 
00228     function getReferrer() {
00229         return $this->mRef;
00230     }
00231 
00235     function getText() {
00236         if ( isset( self::$blobCache[$this->mOldId] ) ) {
00237             $obj = self::$blobCache[$this->mOldId];
00238         } else {
00239             $dbr = wfGetDB( DB_SLAVE );
00240             $row = $dbr->selectRow( 'text', array( 'old_flags', 'old_text' ), array( 'old_id' => $this->mOldId ) );
00241             if ( !$row ) {
00242                 return false;
00243             }
00244             $flags = explode( ',', $row->old_flags );
00245             if ( in_array( 'external', $flags ) ) {
00246                 $url = $row->old_text;
00247                 $parts = explode( '://', $url, 2 );
00248                 if ( !isset( $parts[1] ) || $parts[1] == '' ) {
00249                     return false;
00250                 }
00251                 $row->old_text = ExternalStore::fetchFromUrl( $url );
00252 
00253             }
00254             if ( !in_array( 'object', $flags ) ) {
00255                 return false;
00256             }
00257 
00258             if ( in_array( 'gzip', $flags ) ) {
00259                 // This shouldn't happen, but a bug in the compress script
00260                 // may at times gzip-compress a HistoryBlob object row.
00261                 $obj = unserialize( gzinflate( $row->old_text ) );
00262             } else {
00263                 $obj = unserialize( $row->old_text );
00264             }
00265 
00266             if ( !is_object( $obj ) ) {
00267                 // Correct for old double-serialization bug.
00268                 $obj = unserialize( $obj );
00269             }
00270 
00271             // Save this item for reference; if pulling many
00272             // items in a row we'll likely use it again.
00273             $obj->uncompress();
00274             self::$blobCache = array( $this->mOldId => $obj );
00275         }
00276         return $obj->getItem( $this->mHash );
00277     }
00278 
00284     function getHash() {
00285         return $this->mHash;
00286     }
00287 }
00288 
00297 class HistoryBlobCurStub {
00298     var $mCurId;
00299 
00303     function __construct( $curid = 0 ) {
00304         $this->mCurId = $curid;
00305     }
00306 
00313     function setLocation( $id ) {
00314         $this->mCurId = $id;
00315     }
00316 
00320     function getText() {
00321         $dbr = wfGetDB( DB_SLAVE );
00322         $row = $dbr->selectRow( 'cur', array( 'cur_text' ), array( 'cur_id' => $this->mCurId ) );
00323         if ( !$row ) {
00324             return false;
00325         }
00326         return $row->cur_text;
00327     }
00328 }
00329 
00334 class DiffHistoryBlob implements HistoryBlob {
00336     var $mItems = array();
00337 
00339     var $mSize = 0;
00340 
00349     var $mDiffs;
00350 
00352     var $mDiffMap;
00353 
00357     var $mDefaultKey;
00358 
00362     var $mCompressed;
00363 
00367     var $mFrozen = false;
00368 
00373     var $mMaxSize = 10000000;
00374 
00378     var $mMaxCount = 100;
00379 
00381     const XDL_BDOP_INS = 1;
00382     const XDL_BDOP_CPY = 2;
00383     const XDL_BDOP_INSB = 3;
00384 
00385     function __construct() {
00386         if ( !function_exists( 'gzdeflate' ) ) {
00387             throw new MWException( "Need zlib support to read or write DiffHistoryBlob\n" );
00388         }
00389     }
00390 
00396     function addItem( $text ) {
00397         if ( $this->mFrozen ) {
00398             throw new MWException( __METHOD__ . ": Cannot add more items after sleep/wakeup" );
00399         }
00400 
00401         $this->mItems[] = $text;
00402         $this->mSize += strlen( $text );
00403         $this->mDiffs = null; // later
00404         return count( $this->mItems ) - 1;
00405     }
00406 
00411     function getItem( $key ) {
00412         return $this->mItems[$key];
00413     }
00414 
00418     function setText( $text ) {
00419         $this->mDefaultKey = $this->addItem( $text );
00420     }
00421 
00425     function getText() {
00426         return $this->getItem( $this->mDefaultKey );
00427     }
00428 
00432     function compress() {
00433         if ( !function_exists( 'xdiff_string_rabdiff' ) ) {
00434             throw new MWException( "Need xdiff 1.5+ support to write DiffHistoryBlob\n" );
00435         }
00436         if ( isset( $this->mDiffs ) ) {
00437             // Already compressed
00438             return;
00439         }
00440         if ( !count( $this->mItems ) ) {
00441             // Empty
00442             return;
00443         }
00444 
00445         // Create two diff sequences: one for main text and one for small text
00446         $sequences = array(
00447             'small' => array(
00448                 'tail' => '',
00449                 'diffs' => array(),
00450                 'map' => array(),
00451             ),
00452             'main' => array(
00453                 'tail' => '',
00454                 'diffs' => array(),
00455                 'map' => array(),
00456             ),
00457         );
00458         $smallFactor = 0.5;
00459 
00460         for ( $i = 0; $i < count( $this->mItems ); $i++ ) {
00461             $text = $this->mItems[$i];
00462             if ( $i == 0 ) {
00463                 $seqName = 'main';
00464             } else {
00465                 $mainTail = $sequences['main']['tail'];
00466                 if ( strlen( $text ) < strlen( $mainTail ) * $smallFactor ) {
00467                     $seqName = 'small';
00468                 } else {
00469                     $seqName = 'main';
00470                 }
00471             }
00472             $seq =& $sequences[$seqName];
00473             $tail = $seq['tail'];
00474             $diff = $this->diff( $tail, $text );
00475             $seq['diffs'][] = $diff;
00476             $seq['map'][] = $i;
00477             $seq['tail'] = $text;
00478         }
00479         unset( $seq ); // unlink dangerous alias
00480 
00481         // Knit the sequences together
00482         $tail = '';
00483         $this->mDiffs = array();
00484         $this->mDiffMap = array();
00485         foreach ( $sequences as $seq ) {
00486             if ( !count( $seq['diffs'] ) ) {
00487                 continue;
00488             }
00489             if ( $tail === '' ) {
00490                 $this->mDiffs[] = $seq['diffs'][0];
00491             } else {
00492                 $head = $this->patch( '', $seq['diffs'][0] );
00493                 $this->mDiffs[] = $this->diff( $tail, $head );
00494             }
00495             $this->mDiffMap[] = $seq['map'][0];
00496             for ( $i = 1; $i < count( $seq['diffs'] ); $i++ ) {
00497                 $this->mDiffs[] = $seq['diffs'][$i];
00498                 $this->mDiffMap[] = $seq['map'][$i];
00499             }
00500             $tail = $seq['tail'];
00501         }
00502     }
00503 
00509     function diff( $t1, $t2 ) {
00510         # Need to do a null concatenation with warnings off, due to bugs in the current version of xdiff
00511         # "String is not zero-terminated"
00512         wfSuppressWarnings();
00513         $diff = xdiff_string_rabdiff( $t1, $t2 ) . '';
00514         wfRestoreWarnings();
00515         return $diff;
00516     }
00517 
00523     function patch( $base, $diff ) {
00524         if ( function_exists( 'xdiff_string_bpatch' ) ) {
00525             wfSuppressWarnings();
00526             $text = xdiff_string_bpatch( $base, $diff ) . '';
00527             wfRestoreWarnings();
00528             return $text;
00529         }
00530 
00531         # Pure PHP implementation
00532 
00533         $header = unpack( 'Vofp/Vcsize', substr( $diff, 0, 8 ) );
00534 
00535         # Check the checksum if hash extension is available
00536         $ofp = $this->xdiffAdler32( $base );
00537         if ( $ofp !== false && $ofp !== substr( $diff, 0, 4 ) ) {
00538             wfDebug( __METHOD__ . ": incorrect base checksum\n" );
00539             return false;
00540         }
00541         if ( $header['csize'] != strlen( $base ) ) {
00542             wfDebug( __METHOD__ . ": incorrect base length\n" );
00543             return false;
00544         }
00545 
00546         $p = 8;
00547         $out = '';
00548         while ( $p < strlen( $diff ) ) {
00549             $x = unpack( 'Cop', substr( $diff, $p, 1 ) );
00550             $op = $x['op'];
00551             ++$p;
00552             switch ( $op ) {
00553             case self::XDL_BDOP_INS:
00554                 $x = unpack( 'Csize', substr( $diff, $p, 1 ) );
00555                 $p++;
00556                 $out .= substr( $diff, $p, $x['size'] );
00557                 $p += $x['size'];
00558                 break;
00559             case self::XDL_BDOP_INSB:
00560                 $x = unpack( 'Vcsize', substr( $diff, $p, 4 ) );
00561                 $p += 4;
00562                 $out .= substr( $diff, $p, $x['csize'] );
00563                 $p += $x['csize'];
00564                 break;
00565             case self::XDL_BDOP_CPY:
00566                 $x = unpack( 'Voff/Vcsize', substr( $diff, $p, 8 ) );
00567                 $p += 8;
00568                 $out .= substr( $base, $x['off'], $x['csize'] );
00569                 break;
00570             default:
00571                 wfDebug( __METHOD__ . ": invalid op\n" );
00572                 return false;
00573             }
00574         }
00575         return $out;
00576     }
00577 
00585     function xdiffAdler32( $s ) {
00586         if ( !function_exists( 'hash' ) ) {
00587             return false;
00588         }
00589 
00590         static $init;
00591         if ( $init === null ) {
00592             $init = str_repeat( "\xf0", 205 ) . "\xee" . str_repeat( "\xf0", 67 ) . "\x02";
00593         }
00594 
00595         // The real Adler-32 checksum of $init is zero, so it initialises the
00596         // state to zero, as it is at the start of LibXDiff's checksum
00597         // algorithm. Appending the subject string then simulates LibXDiff.
00598         return strrev( hash( 'adler32', $init . $s, true ) );
00599     }
00600 
00601     function uncompress() {
00602         if ( !$this->mDiffs ) {
00603             return;
00604         }
00605         $tail = '';
00606         for ( $diffKey = 0; $diffKey < count( $this->mDiffs ); $diffKey++ ) {
00607             $textKey = $this->mDiffMap[$diffKey];
00608             $text = $this->patch( $tail, $this->mDiffs[$diffKey] );
00609             $this->mItems[$textKey] = $text;
00610             $tail = $text;
00611         }
00612     }
00613 
00617     function __sleep() {
00618         $this->compress();
00619         if ( !count( $this->mItems ) ) {
00620             // Empty object
00621             $info = false;
00622         } else {
00623             // Take forward differences to improve the compression ratio for sequences
00624             $map = '';
00625             $prev = 0;
00626             foreach ( $this->mDiffMap as $i ) {
00627                 if ( $map !== '' ) {
00628                     $map .= ',';
00629                 }
00630                 $map .= $i - $prev;
00631                 $prev = $i;
00632             }
00633             $info = array(
00634                 'diffs' => $this->mDiffs,
00635                 'map' => $map
00636             );
00637         }
00638         if ( isset( $this->mDefaultKey ) ) {
00639             $info['default'] = $this->mDefaultKey;
00640         }
00641         $this->mCompressed = gzdeflate( serialize( $info ) );
00642         return array( 'mCompressed' );
00643     }
00644 
00645     function __wakeup() {
00646         // addItem() doesn't work if mItems is partially filled from mDiffs
00647         $this->mFrozen = true;
00648         $info = unserialize( gzinflate( $this->mCompressed ) );
00649         unset( $this->mCompressed );
00650 
00651         if ( !$info ) {
00652             // Empty object
00653             return;
00654         }
00655 
00656         if ( isset( $info['default'] ) ) {
00657             $this->mDefaultKey = $info['default'];
00658         }
00659         $this->mDiffs = $info['diffs'];
00660         if ( isset( $info['base'] ) ) {
00661             // Old format
00662             $this->mDiffMap = range( 0, count( $this->mDiffs ) - 1 );
00663             array_unshift( $this->mDiffs,
00664                 pack( 'VVCV', 0, 0, self::XDL_BDOP_INSB, strlen( $info['base'] ) ) .
00665                 $info['base'] );
00666         } else {
00667             // New format
00668             $map = explode( ',', $info['map'] );
00669             $cur = 0;
00670             $this->mDiffMap = array();
00671             foreach ( $map as $i ) {
00672                 $cur += $i;
00673                 $this->mDiffMap[] = $cur;
00674             }
00675         }
00676         $this->uncompress();
00677     }
00678 
00685     function isHappy() {
00686         return $this->mSize < $this->mMaxSize
00687             && count( $this->mItems ) < $this->mMaxCount;
00688     }
00689 
00690 }