MediaWiki  REL1_24
UIDGenerator.php
Go to the documentation of this file.
00001 <?php
00029 class UIDGenerator {
00031     protected static $instance = null;
00032 
00033     protected $nodeIdFile; // string; local file path
00034     protected $nodeId32; // string; node ID in binary (32 bits)
00035     protected $nodeId48; // string; node ID in binary (48 bits)
00036 
00037     protected $lockFile88; // string; local file path
00038     protected $lockFile128; // string; local file path
00039 
00041     protected $fileHandles = array(); // cache file handles
00042 
00043     const QUICK_RAND = 1; // get randomness from fast and insecure sources
00044     const QUICK_VOLATILE = 2; // use an APC like in-memory counter if available
00045 
00046     protected function __construct() {
00047         $this->nodeIdFile = wfTempDir() . '/mw-' . __CLASS__ . '-UID-nodeid';
00048         $nodeId = '';
00049         if ( is_file( $this->nodeIdFile ) ) {
00050             $nodeId = file_get_contents( $this->nodeIdFile );
00051         }
00052         // Try to get some ID that uniquely identifies this machine (RFC 4122)...
00053         if ( !preg_match( '/^[0-9a-f]{12}$/i', $nodeId ) ) {
00054             wfSuppressWarnings();
00055             if ( wfIsWindows() ) {
00056                 // http://technet.microsoft.com/en-us/library/bb490913.aspx
00057                 $csv = trim( wfShellExec( 'getmac /NH /FO CSV' ) );
00058                 $line = substr( $csv, 0, strcspn( $csv, "\n" ) );
00059                 $info = str_getcsv( $line );
00060                 $nodeId = isset( $info[0] ) ? str_replace( '-', '', $info[0] ) : '';
00061             } elseif ( is_executable( '/sbin/ifconfig' ) ) { // Linux/BSD/Solaris/OS X
00062                 // See http://linux.die.net/man/8/ifconfig
00063                 $m = array();
00064                 preg_match( '/\s([0-9a-f]{2}(:[0-9a-f]{2}){5})\s/',
00065                     wfShellExec( '/sbin/ifconfig -a' ), $m );
00066                 $nodeId = isset( $m[1] ) ? str_replace( ':', '', $m[1] ) : '';
00067             }
00068             wfRestoreWarnings();
00069             if ( !preg_match( '/^[0-9a-f]{12}$/i', $nodeId ) ) {
00070                 $nodeId = MWCryptRand::generateHex( 12, true );
00071                 $nodeId[1] = dechex( hexdec( $nodeId[1] ) | 0x1 ); // set multicast bit
00072             }
00073             file_put_contents( $this->nodeIdFile, $nodeId ); // cache
00074         }
00075         $this->nodeId32 = wfBaseConvert( substr( sha1( $nodeId ), 0, 8 ), 16, 2, 32 );
00076         $this->nodeId48 = wfBaseConvert( $nodeId, 16, 2, 48 );
00077         // If different processes run as different users, they may have different temp dirs.
00078         // This is dealt with by initializing the clock sequence number and counters randomly.
00079         $this->lockFile88 = wfTempDir() . '/mw-' . __CLASS__ . '-UID-88';
00080         $this->lockFile128 = wfTempDir() . '/mw-' . __CLASS__ . '-UID-128';
00081     }
00082 
00086     protected static function singleton() {
00087         if ( self::$instance === null ) {
00088             self::$instance = new self();
00089         }
00090 
00091         return self::$instance;
00092     }
00093 
00109     public static function newTimestampedUID88( $base = 10 ) {
00110         if ( !is_integer( $base ) || $base > 36 || $base < 2 ) {
00111             throw new MWException( "Base must an integer be between 2 and 36" );
00112         }
00113         $gen = self::singleton();
00114         $time = $gen->getTimestampAndDelay( 'lockFile88', 1, 1024 );
00115 
00116         return wfBaseConvert( $gen->getTimestampedID88( $time ), 2, $base );
00117     }
00118 
00123     protected function getTimestampedID88( array $info ) {
00124         list( $time, $counter ) = $info;
00125         // Take the 46 MSBs of "milliseconds since epoch"
00126         $id_bin = $this->millisecondsSinceEpochBinary( $time );
00127         // Add a 10 bit counter resulting in 56 bits total
00128         $id_bin .= str_pad( decbin( $counter ), 10, '0', STR_PAD_LEFT );
00129         // Add the 32 bit node ID resulting in 88 bits total
00130         $id_bin .= $this->nodeId32;
00131         // Convert to a 1-27 digit integer string
00132         if ( strlen( $id_bin ) !== 88 ) {
00133             throw new MWException( "Detected overflow for millisecond timestamp." );
00134         }
00135 
00136         return $id_bin;
00137     }
00138 
00153     public static function newTimestampedUID128( $base = 10 ) {
00154         if ( !is_integer( $base ) || $base > 36 || $base < 2 ) {
00155             throw new MWException( "Base must be an integer between 2 and 36" );
00156         }
00157         $gen = self::singleton();
00158         $time = $gen->getTimestampAndDelay( 'lockFile128', 16384, 1048576 );
00159 
00160         return wfBaseConvert( $gen->getTimestampedID128( $time ), 2, $base );
00161     }
00162 
00167     protected function getTimestampedID128( array $info ) {
00168         list( $time, $counter, $clkSeq ) = $info;
00169         // Take the 46 MSBs of "milliseconds since epoch"
00170         $id_bin = $this->millisecondsSinceEpochBinary( $time );
00171         // Add a 20 bit counter resulting in 66 bits total
00172         $id_bin .= str_pad( decbin( $counter ), 20, '0', STR_PAD_LEFT );
00173         // Add a 14 bit clock sequence number resulting in 80 bits total
00174         $id_bin .= str_pad( decbin( $clkSeq ), 14, '0', STR_PAD_LEFT );
00175         // Add the 48 bit node ID resulting in 128 bits total
00176         $id_bin .= $this->nodeId48;
00177         // Convert to a 1-39 digit integer string
00178         if ( strlen( $id_bin ) !== 128 ) {
00179             throw new MWException( "Detected overflow for millisecond timestamp." );
00180         }
00181 
00182         return $id_bin;
00183     }
00184 
00192     public static function newUUIDv4( $flags = 0 ) {
00193         $hex = ( $flags & self::QUICK_RAND )
00194             ? wfRandomString( 31 )
00195             : MWCryptRand::generateHex( 31 );
00196 
00197         return sprintf( '%s-%s-%s-%s-%s',
00198             // "time_low" (32 bits)
00199             substr( $hex, 0, 8 ),
00200             // "time_mid" (16 bits)
00201             substr( $hex, 8, 4 ),
00202             // "time_hi_and_version" (16 bits)
00203             '4' . substr( $hex, 12, 3 ),
00204             // "clk_seq_hi_res (8 bits, variant is binary 10x) and "clk_seq_low" (8 bits)
00205             dechex( 0x8 | ( hexdec( $hex[15] ) & 0x3 ) ) . $hex[16] . substr( $hex, 17, 2 ),
00206             // "node" (48 bits)
00207             substr( $hex, 19, 12 )
00208         );
00209     }
00210 
00218     public static function newRawUUIDv4( $flags = 0 ) {
00219         return str_replace( '-', '', self::newUUIDv4( $flags ) );
00220     }
00221 
00234     public static function newSequentialPerNodeID( $bucket, $bits = 48, $flags = 0 ) {
00235         return current( self::newSequentialPerNodeIDs( $bucket, $bits, 1, $flags ) );
00236     }
00237 
00249     public static function newSequentialPerNodeIDs( $bucket, $bits, $count, $flags = 0 ) {
00250         $gen = self::singleton();
00251         return $gen->getSequentialPerNodeIDs( $bucket, $bits, $count, $flags );
00252     }
00253 
00264     protected function getSequentialPerNodeIDs( $bucket, $bits, $count, $flags ) {
00265         if ( $count <= 0 ) {
00266             return array(); // nothing to do
00267         } elseif ( $count > 10000 ) {
00268             throw new MWException( "Number of requested IDs ($count) is too high." );
00269         } elseif ( $bits < 16 || $bits > 48 ) {
00270             throw new MWException( "Requested bit size ($bits) is out of range." );
00271         }
00272 
00273         $counter = null; // post-increment persistent counter value
00274 
00275         // Use APC/eAccelerator/xcache if requested, available, and not in CLI mode;
00276         // Counter values would not survive accross script instances in CLI mode.
00277         $cache = null;
00278         if ( ( $flags & self::QUICK_VOLATILE ) && PHP_SAPI !== 'cli' ) {
00279             try {
00280                 $cache = ObjectCache::newAccelerator( array() );
00281             } catch ( MWException $e ) {
00282                 // not supported
00283             }
00284         }
00285         if ( $cache ) {
00286             $counter = $cache->incr( $bucket, $count );
00287             if ( $counter === false ) {
00288                 if ( !$cache->add( $bucket, (int)$count ) ) {
00289                     throw new MWException( 'Unable to set value to ' . get_class( $cache ) );
00290                 }
00291                 $counter = $count;
00292             }
00293         }
00294 
00295         // Note: use of fmod() avoids "division by zero" on 32 bit machines
00296         if ( $counter === null ) {
00297             $path = wfTempDir() . '/mw-' . __CLASS__ . '-' . rawurlencode( $bucket ) . '-48';
00298             // Get the UID lock file handle
00299             if ( isset( $this->fileHandles[$path] ) ) {
00300                 $handle = $this->fileHandles[$path];
00301             } else {
00302                 $handle = fopen( $path, 'cb+' );
00303                 $this->fileHandles[$path] = $handle ?: null; // cache
00304             }
00305             // Acquire the UID lock file
00306             if ( $handle === false ) {
00307                 throw new MWException( "Could not open '{$path}'." );
00308             } elseif ( !flock( $handle, LOCK_EX ) ) {
00309                 fclose( $handle );
00310                 throw new MWException( "Could not acquire '{$path}'." );
00311             }
00312             // Fetch the counter value and increment it...
00313             rewind( $handle );
00314             $counter = floor( trim( fgets( $handle ) ) ) + $count; // fetch as float
00315             // Write back the new counter value
00316             ftruncate( $handle, 0 );
00317             rewind( $handle );
00318             fwrite( $handle, fmod( $counter, pow( 2, 48 ) ) ); // warp-around as needed
00319             fflush( $handle );
00320             // Release the UID lock file
00321             flock( $handle, LOCK_UN );
00322         }
00323 
00324         $ids = array();
00325         $divisor = pow( 2, $bits );
00326         $currentId = floor( $counter - $count ); // pre-increment counter value
00327         for ( $i = 0; $i < $count; ++$i ) {
00328             $ids[] = fmod( ++$currentId, $divisor );
00329         }
00330 
00331         return $ids;
00332     }
00333 
00345     protected function getTimestampAndDelay( $lockFile, $clockSeqSize, $counterSize ) {
00346         // Get the UID lock file handle
00347         $path = $this->$lockFile;
00348         if ( isset( $this->fileHandles[$path] ) ) {
00349             $handle = $this->fileHandles[$path];
00350         } else {
00351             $handle = fopen( $path, 'cb+' );
00352             $this->fileHandles[$path] = $handle ?: null; // cache
00353         }
00354         // Acquire the UID lock file
00355         if ( $handle === false ) {
00356             throw new MWException( "Could not open '{$this->$lockFile}'." );
00357         } elseif ( !flock( $handle, LOCK_EX ) ) {
00358             fclose( $handle );
00359             throw new MWException( "Could not acquire '{$this->$lockFile}'." );
00360         }
00361         // Get the current timestamp, clock sequence number, last time, and counter
00362         rewind( $handle );
00363         $data = explode( ' ', fgets( $handle ) ); // "<clk seq> <sec> <msec> <counter> <offset>"
00364         $clockChanged = false; // clock set back significantly?
00365         if ( count( $data ) == 5 ) { // last UID info already initialized
00366             $clkSeq = (int)$data[0] % $clockSeqSize;
00367             $prevTime = array( (int)$data[1], (int)$data[2] );
00368             $offset = (int)$data[4] % $counterSize; // random counter offset
00369             $counter = 0; // counter for UIDs with the same timestamp
00370             // Delay until the clock reaches the time of the last ID.
00371             // This detects any microtime() drift among processes.
00372             $time = $this->timeWaitUntil( $prevTime );
00373             if ( !$time ) { // too long to delay?
00374                 $clockChanged = true; // bump clock sequence number
00375                 $time = self::millitime();
00376             } elseif ( $time == $prevTime ) {
00377                 // Bump the counter if there are timestamp collisions
00378                 $counter = (int)$data[3] % $counterSize;
00379                 if ( ++$counter >= $counterSize ) { // sanity (starts at 0)
00380                     flock( $handle, LOCK_UN ); // abort
00381                     throw new MWException( "Counter overflow for timestamp value." );
00382                 }
00383             }
00384         } else { // last UID info not initialized
00385             $clkSeq = mt_rand( 0, $clockSeqSize - 1 );
00386             $counter = 0;
00387             $offset = mt_rand( 0, $counterSize - 1 );
00388             $time = self::millitime();
00389         }
00390         // microtime() and gettimeofday() can drift from time() at least on Windows.
00391         // The drift is immediate for processes running while the system clock changes.
00392         // time() does not have this problem. See https://bugs.php.net/bug.php?id=42659.
00393         if ( abs( time() - $time[0] ) >= 2 ) {
00394             // We don't want processes using too high or low timestamps to avoid duplicate
00395             // UIDs and clock sequence number churn. This process should just be restarted.
00396             flock( $handle, LOCK_UN ); // abort
00397             throw new MWException( "Process clock is outdated or drifted." );
00398         }
00399         // If microtime() is synced and a clock change was detected, then the clock went back
00400         if ( $clockChanged ) {
00401             // Bump the clock sequence number and also randomize the counter offset,
00402             // which is useful for UIDs that do not include the clock sequence number.
00403             $clkSeq = ( $clkSeq + 1 ) % $clockSeqSize;
00404             $offset = mt_rand( 0, $counterSize - 1 );
00405             trigger_error( "Clock was set back; sequence number incremented." );
00406         }
00407         // Update the (clock sequence number, timestamp, counter)
00408         ftruncate( $handle, 0 );
00409         rewind( $handle );
00410         fwrite( $handle, "{$clkSeq} {$time[0]} {$time[1]} {$counter} {$offset}" );
00411         fflush( $handle );
00412         // Release the UID lock file
00413         flock( $handle, LOCK_UN );
00414 
00415         return array( $time, ( $counter + $offset ) % $counterSize, $clkSeq );
00416     }
00417 
00425     protected function timeWaitUntil( array $time ) {
00426         do {
00427             $ct = self::millitime();
00428             if ( $ct >= $time ) { // http://php.net/manual/en/language.operators.comparison.php
00429                 return $ct; // current timestamp is higher than $time
00430             }
00431         } while ( ( ( $time[0] - $ct[0] ) * 1000 + ( $time[1] - $ct[1] ) ) <= 10 );
00432 
00433         return false;
00434     }
00435 
00440     protected function millisecondsSinceEpochBinary( array $time ) {
00441         list( $sec, $msec ) = $time;
00442         $ts = 1000 * $sec + $msec;
00443         if ( $ts > pow( 2, 52 ) ) {
00444             throw new MWException( __METHOD__ .
00445                 ': sorry, this function doesn\'t work after the year 144680' );
00446         }
00447 
00448         return substr( wfBaseConvert( $ts, 10, 2, 46 ), -46 );
00449     }
00450 
00454     protected static function millitime() {
00455         list( $msec, $sec ) = explode( ' ', microtime() );
00456 
00457         return array( (int)$sec, (int)( $msec * 1000 ) );
00458     }
00459 
00471     protected function deleteCacheFiles() {
00472         // Bug: 44850
00473         foreach ( $this->fileHandles as $path => $handle ) {
00474             if ( $handle !== null ) {
00475                 fclose( $handle );
00476             }
00477             if ( is_file( $path ) ) {
00478                 unlink( $path );
00479             }
00480             unset( $this->fileHandles[$path] );
00481         }
00482         if ( is_file( $this->nodeIdFile ) ) {
00483             unlink( $this->nodeIdFile );
00484         }
00485     }
00486 
00498     public static function unitTestTearDown() {
00499         // Bug: 44850
00500         $gen = self::singleton();
00501         $gen->deleteCacheFiles();
00502     }
00503 
00504     function __destruct() {
00505         array_map( 'fclose', array_filter( $this->fileHandles ) );
00506     }
00507 }