MediaWiki  REL1_24
HistoryBlob.php
Go to the documentation of this file.
00001 <?php
00028 interface HistoryBlob {
00038     function addItem( $text );
00039 
00047     function getItem( $key );
00048 
00059     function setText( $text );
00060 
00066     function getText();
00067 }
00068 
00073 class ConcatenatedGzipHistoryBlob implements HistoryBlob {
00074     public $mVersion = 0, $mCompressed = false, $mItems = array(), $mDefaultHash = '';
00075     public $mSize = 0;
00076     public $mMaxSize = 10000000;
00077     public $mMaxCount = 100;
00078 
00082     public function __construct() {
00083         if ( !function_exists( 'gzdeflate' ) ) {
00084             throw new MWException( "Need zlib support to read or write this "
00085                 . "kind of history object (ConcatenatedGzipHistoryBlob)\n" );
00086         }
00087     }
00088 
00093     public function addItem( $text ) {
00094         $this->uncompress();
00095         $hash = md5( $text );
00096         if ( !isset( $this->mItems[$hash] ) ) {
00097             $this->mItems[$hash] = $text;
00098             $this->mSize += strlen( $text );
00099         }
00100         return $hash;
00101     }
00102 
00107     public function getItem( $hash ) {
00108         $this->uncompress();
00109         if ( array_key_exists( $hash, $this->mItems ) ) {
00110             return $this->mItems[$hash];
00111         } else {
00112             return false;
00113         }
00114     }
00115 
00120     public function setText( $text ) {
00121         $this->uncompress();
00122         $this->mDefaultHash = $this->addItem( $text );
00123     }
00124 
00128     public function getText() {
00129         $this->uncompress();
00130         return $this->getItem( $this->mDefaultHash );
00131     }
00132 
00138     public function removeItem( $hash ) {
00139         $this->mSize -= strlen( $this->mItems[$hash] );
00140         unset( $this->mItems[$hash] );
00141     }
00142 
00146     public function compress() {
00147         if ( !$this->mCompressed ) {
00148             $this->mItems = gzdeflate( serialize( $this->mItems ) );
00149             $this->mCompressed = true;
00150         }
00151     }
00152 
00156     public function uncompress() {
00157         if ( $this->mCompressed ) {
00158             $this->mItems = unserialize( gzinflate( $this->mItems ) );
00159             $this->mCompressed = false;
00160         }
00161     }
00162 
00166     function __sleep() {
00167         $this->compress();
00168         return array( 'mVersion', 'mCompressed', 'mItems', 'mDefaultHash' );
00169     }
00170 
00171     function __wakeup() {
00172         $this->uncompress();
00173     }
00174 
00181     public function isHappy() {
00182         return $this->mSize < $this->mMaxSize
00183             && count( $this->mItems ) < $this->mMaxCount;
00184     }
00185 }
00186 
00190 class HistoryBlobStub {
00197     protected static $blobCache = array();
00198 
00200     public $mOldId;
00201 
00203     public $mHash;
00204 
00206     public $mRef;
00207 
00212     function __construct( $hash = '', $oldid = 0 ) {
00213         $this->mHash = $hash;
00214     }
00215 
00221     function setLocation( $id ) {
00222         $this->mOldId = $id;
00223     }
00224 
00229     function setReferrer( $id ) {
00230         $this->mRef = $id;
00231     }
00232 
00237     function getReferrer() {
00238         return $this->mRef;
00239     }
00240 
00244     function getText() {
00245         if ( isset( self::$blobCache[$this->mOldId] ) ) {
00246             $obj = self::$blobCache[$this->mOldId];
00247         } else {
00248             $dbr = wfGetDB( DB_SLAVE );
00249             $row = $dbr->selectRow(
00250                 'text',
00251                 array( 'old_flags', 'old_text' ),
00252                 array( 'old_id' => $this->mOldId )
00253             );
00254 
00255             if ( !$row ) {
00256                 return false;
00257             }
00258 
00259             $flags = explode( ',', $row->old_flags );
00260             if ( in_array( 'external', $flags ) ) {
00261                 $url = $row->old_text;
00262                 $parts = explode( '://', $url, 2 );
00263                 if ( !isset( $parts[1] ) || $parts[1] == '' ) {
00264                     return false;
00265                 }
00266                 $row->old_text = ExternalStore::fetchFromUrl( $url );
00267 
00268             }
00269 
00270             if ( !in_array( 'object', $flags ) ) {
00271                 return false;
00272             }
00273 
00274             if ( in_array( 'gzip', $flags ) ) {
00275                 // This shouldn't happen, but a bug in the compress script
00276                 // may at times gzip-compress a HistoryBlob object row.
00277                 $obj = unserialize( gzinflate( $row->old_text ) );
00278             } else {
00279                 $obj = unserialize( $row->old_text );
00280             }
00281 
00282             if ( !is_object( $obj ) ) {
00283                 // Correct for old double-serialization bug.
00284                 $obj = unserialize( $obj );
00285             }
00286 
00287             // Save this item for reference; if pulling many
00288             // items in a row we'll likely use it again.
00289             $obj->uncompress();
00290             self::$blobCache = array( $this->mOldId => $obj );
00291         }
00292 
00293         return $obj->getItem( $this->mHash );
00294     }
00295 
00301     function getHash() {
00302         return $this->mHash;
00303     }
00304 }
00305 
00314 class HistoryBlobCurStub {
00316     public $mCurId;
00317 
00321     function __construct( $curid = 0 ) {
00322         $this->mCurId = $curid;
00323     }
00324 
00331     function setLocation( $id ) {
00332         $this->mCurId = $id;
00333     }
00334 
00338     function getText() {
00339         $dbr = wfGetDB( DB_SLAVE );
00340         $row = $dbr->selectRow( 'cur', array( 'cur_text' ), array( 'cur_id' => $this->mCurId ) );
00341         if ( !$row ) {
00342             return false;
00343         }
00344         return $row->cur_text;
00345     }
00346 }
00347 
00352 class DiffHistoryBlob implements HistoryBlob {
00354     public $mItems = array();
00355 
00357     public $mSize = 0;
00358 
00367     public $mDiffs;
00368 
00370     public $mDiffMap;
00371 
00374     public $mDefaultKey;
00375 
00377     public $mCompressed;
00378 
00380     public $mFrozen = false;
00381 
00386     public $mMaxSize = 10000000;
00387 
00389     public $mMaxCount = 100;
00390 
00392     const XDL_BDOP_INS = 1;
00393     const XDL_BDOP_CPY = 2;
00394     const XDL_BDOP_INSB = 3;
00395 
00396     function __construct() {
00397         if ( !function_exists( 'gzdeflate' ) ) {
00398             throw new MWException( "Need zlib support to read or write DiffHistoryBlob\n" );
00399         }
00400     }
00401 
00407     function addItem( $text ) {
00408         if ( $this->mFrozen ) {
00409             throw new MWException( __METHOD__ . ": Cannot add more items after sleep/wakeup" );
00410         }
00411 
00412         $this->mItems[] = $text;
00413         $this->mSize += strlen( $text );
00414         $this->mDiffs = null; // later
00415         return count( $this->mItems ) - 1;
00416     }
00417 
00422     function getItem( $key ) {
00423         return $this->mItems[$key];
00424     }
00425 
00429     function setText( $text ) {
00430         $this->mDefaultKey = $this->addItem( $text );
00431     }
00432 
00436     function getText() {
00437         return $this->getItem( $this->mDefaultKey );
00438     }
00439 
00443     function compress() {
00444         if ( !function_exists( 'xdiff_string_rabdiff' ) ) {
00445             throw new MWException( "Need xdiff 1.5+ support to write DiffHistoryBlob\n" );
00446         }
00447         if ( isset( $this->mDiffs ) ) {
00448             // Already compressed
00449             return;
00450         }
00451         if ( !count( $this->mItems ) ) {
00452             // Empty
00453             return;
00454         }
00455 
00456         // Create two diff sequences: one for main text and one for small text
00457         $sequences = array(
00458             'small' => array(
00459                 'tail' => '',
00460                 'diffs' => array(),
00461                 'map' => array(),
00462             ),
00463             'main' => array(
00464                 'tail' => '',
00465                 'diffs' => array(),
00466                 'map' => array(),
00467             ),
00468         );
00469         $smallFactor = 0.5;
00470 
00471         $mItemsCount = count( $this->mItems );
00472         for ( $i = 0; $i < $mItemsCount; $i++ ) {
00473             $text = $this->mItems[$i];
00474             if ( $i == 0 ) {
00475                 $seqName = 'main';
00476             } else {
00477                 $mainTail = $sequences['main']['tail'];
00478                 if ( strlen( $text ) < strlen( $mainTail ) * $smallFactor ) {
00479                     $seqName = 'small';
00480                 } else {
00481                     $seqName = 'main';
00482                 }
00483             }
00484             $seq =& $sequences[$seqName];
00485             $tail = $seq['tail'];
00486             $diff = $this->diff( $tail, $text );
00487             $seq['diffs'][] = $diff;
00488             $seq['map'][] = $i;
00489             $seq['tail'] = $text;
00490         }
00491         unset( $seq ); // unlink dangerous alias
00492 
00493         // Knit the sequences together
00494         $tail = '';
00495         $this->mDiffs = array();
00496         $this->mDiffMap = array();
00497         foreach ( $sequences as $seq ) {
00498             if ( !count( $seq['diffs'] ) ) {
00499                 continue;
00500             }
00501             if ( $tail === '' ) {
00502                 $this->mDiffs[] = $seq['diffs'][0];
00503             } else {
00504                 $head = $this->patch( '', $seq['diffs'][0] );
00505                 $this->mDiffs[] = $this->diff( $tail, $head );
00506             }
00507             $this->mDiffMap[] = $seq['map'][0];
00508             $diffsCount = count( $seq['diffs'] );
00509             for ( $i = 1; $i < $diffsCount; $i++ ) {
00510                 $this->mDiffs[] = $seq['diffs'][$i];
00511                 $this->mDiffMap[] = $seq['map'][$i];
00512             }
00513             $tail = $seq['tail'];
00514         }
00515     }
00516 
00522     function diff( $t1, $t2 ) {
00523         # Need to do a null concatenation with warnings off, due to bugs in the current version of xdiff
00524         # "String is not zero-terminated"
00525         wfSuppressWarnings();
00526         $diff = xdiff_string_rabdiff( $t1, $t2 ) . '';
00527         wfRestoreWarnings();
00528         return $diff;
00529     }
00530 
00536     function patch( $base, $diff ) {
00537         if ( function_exists( 'xdiff_string_bpatch' ) ) {
00538             wfSuppressWarnings();
00539             $text = xdiff_string_bpatch( $base, $diff ) . '';
00540             wfRestoreWarnings();
00541             return $text;
00542         }
00543 
00544         # Pure PHP implementation
00545 
00546         $header = unpack( 'Vofp/Vcsize', substr( $diff, 0, 8 ) );
00547 
00548         # Check the checksum if hash extension is available
00549         $ofp = $this->xdiffAdler32( $base );
00550         if ( $ofp !== false && $ofp !== substr( $diff, 0, 4 ) ) {
00551             wfDebug( __METHOD__ . ": incorrect base checksum\n" );
00552             return false;
00553         }
00554         if ( $header['csize'] != strlen( $base ) ) {
00555             wfDebug( __METHOD__ . ": incorrect base length\n" );
00556             return false;
00557         }
00558 
00559         $p = 8;
00560         $out = '';
00561         while ( $p < strlen( $diff ) ) {
00562             $x = unpack( 'Cop', substr( $diff, $p, 1 ) );
00563             $op = $x['op'];
00564             ++$p;
00565             switch ( $op ) {
00566             case self::XDL_BDOP_INS:
00567                 $x = unpack( 'Csize', substr( $diff, $p, 1 ) );
00568                 $p++;
00569                 $out .= substr( $diff, $p, $x['size'] );
00570                 $p += $x['size'];
00571                 break;
00572             case self::XDL_BDOP_INSB:
00573                 $x = unpack( 'Vcsize', substr( $diff, $p, 4 ) );
00574                 $p += 4;
00575                 $out .= substr( $diff, $p, $x['csize'] );
00576                 $p += $x['csize'];
00577                 break;
00578             case self::XDL_BDOP_CPY:
00579                 $x = unpack( 'Voff/Vcsize', substr( $diff, $p, 8 ) );
00580                 $p += 8;
00581                 $out .= substr( $base, $x['off'], $x['csize'] );
00582                 break;
00583             default:
00584                 wfDebug( __METHOD__ . ": invalid op\n" );
00585                 return false;
00586             }
00587         }
00588         return $out;
00589     }
00590 
00598     function xdiffAdler32( $s ) {
00599         if ( !function_exists( 'hash' ) ) {
00600             return false;
00601         }
00602 
00603         static $init;
00604         if ( $init === null ) {
00605             $init = str_repeat( "\xf0", 205 ) . "\xee" . str_repeat( "\xf0", 67 ) . "\x02";
00606         }
00607 
00608         // The real Adler-32 checksum of $init is zero, so it initialises the
00609         // state to zero, as it is at the start of LibXDiff's checksum
00610         // algorithm. Appending the subject string then simulates LibXDiff.
00611         return strrev( hash( 'adler32', $init . $s, true ) );
00612     }
00613 
00614     function uncompress() {
00615         if ( !$this->mDiffs ) {
00616             return;
00617         }
00618         $tail = '';
00619         $mDiffsCount = count( $this->mDiffs );
00620         for ( $diffKey = 0; $diffKey < $mDiffsCount; $diffKey++ ) {
00621             $textKey = $this->mDiffMap[$diffKey];
00622             $text = $this->patch( $tail, $this->mDiffs[$diffKey] );
00623             $this->mItems[$textKey] = $text;
00624             $tail = $text;
00625         }
00626     }
00627 
00631     function __sleep() {
00632         $this->compress();
00633         if ( !count( $this->mItems ) ) {
00634             // Empty object
00635             $info = false;
00636         } else {
00637             // Take forward differences to improve the compression ratio for sequences
00638             $map = '';
00639             $prev = 0;
00640             foreach ( $this->mDiffMap as $i ) {
00641                 if ( $map !== '' ) {
00642                     $map .= ',';
00643                 }
00644                 $map .= $i - $prev;
00645                 $prev = $i;
00646             }
00647             $info = array(
00648                 'diffs' => $this->mDiffs,
00649                 'map' => $map
00650             );
00651         }
00652         if ( isset( $this->mDefaultKey ) ) {
00653             $info['default'] = $this->mDefaultKey;
00654         }
00655         $this->mCompressed = gzdeflate( serialize( $info ) );
00656         return array( 'mCompressed' );
00657     }
00658 
00659     function __wakeup() {
00660         // addItem() doesn't work if mItems is partially filled from mDiffs
00661         $this->mFrozen = true;
00662         $info = unserialize( gzinflate( $this->mCompressed ) );
00663         unset( $this->mCompressed );
00664 
00665         if ( !$info ) {
00666             // Empty object
00667             return;
00668         }
00669 
00670         if ( isset( $info['default'] ) ) {
00671             $this->mDefaultKey = $info['default'];
00672         }
00673         $this->mDiffs = $info['diffs'];
00674         if ( isset( $info['base'] ) ) {
00675             // Old format
00676             $this->mDiffMap = range( 0, count( $this->mDiffs ) - 1 );
00677             array_unshift( $this->mDiffs,
00678                 pack( 'VVCV', 0, 0, self::XDL_BDOP_INSB, strlen( $info['base'] ) ) .
00679                 $info['base'] );
00680         } else {
00681             // New format
00682             $map = explode( ',', $info['map'] );
00683             $cur = 0;
00684             $this->mDiffMap = array();
00685             foreach ( $map as $i ) {
00686                 $cur += $i;
00687                 $this->mDiffMap[] = $cur;
00688             }
00689         }
00690         $this->uncompress();
00691     }
00692 
00699     function isHappy() {
00700         return $this->mSize < $this->mMaxSize
00701             && count( $this->mItems ) < $this->mMaxCount;
00702     }
00703 
00704 }