MediaWiki
REL1_24
|
00001 <?php 00029 class UIDGenerator { 00031 protected static $instance = null; 00032 00033 protected $nodeIdFile; // string; local file path 00034 protected $nodeId32; // string; node ID in binary (32 bits) 00035 protected $nodeId48; // string; node ID in binary (48 bits) 00036 00037 protected $lockFile88; // string; local file path 00038 protected $lockFile128; // string; local file path 00039 00041 protected $fileHandles = array(); // cache file handles 00042 00043 const QUICK_RAND = 1; // get randomness from fast and insecure sources 00044 const QUICK_VOLATILE = 2; // use an APC like in-memory counter if available 00045 00046 protected function __construct() { 00047 $this->nodeIdFile = wfTempDir() . '/mw-' . __CLASS__ . '-UID-nodeid'; 00048 $nodeId = ''; 00049 if ( is_file( $this->nodeIdFile ) ) { 00050 $nodeId = file_get_contents( $this->nodeIdFile ); 00051 } 00052 // Try to get some ID that uniquely identifies this machine (RFC 4122)... 00053 if ( !preg_match( '/^[0-9a-f]{12}$/i', $nodeId ) ) { 00054 wfSuppressWarnings(); 00055 if ( wfIsWindows() ) { 00056 // http://technet.microsoft.com/en-us/library/bb490913.aspx 00057 $csv = trim( wfShellExec( 'getmac /NH /FO CSV' ) ); 00058 $line = substr( $csv, 0, strcspn( $csv, "\n" ) ); 00059 $info = str_getcsv( $line ); 00060 $nodeId = isset( $info[0] ) ? str_replace( '-', '', $info[0] ) : ''; 00061 } elseif ( is_executable( '/sbin/ifconfig' ) ) { // Linux/BSD/Solaris/OS X 00062 // See http://linux.die.net/man/8/ifconfig 00063 $m = array(); 00064 preg_match( '/\s([0-9a-f]{2}(:[0-9a-f]{2}){5})\s/', 00065 wfShellExec( '/sbin/ifconfig -a' ), $m ); 00066 $nodeId = isset( $m[1] ) ? str_replace( ':', '', $m[1] ) : ''; 00067 } 00068 wfRestoreWarnings(); 00069 if ( !preg_match( '/^[0-9a-f]{12}$/i', $nodeId ) ) { 00070 $nodeId = MWCryptRand::generateHex( 12, true ); 00071 $nodeId[1] = dechex( hexdec( $nodeId[1] ) | 0x1 ); // set multicast bit 00072 } 00073 file_put_contents( $this->nodeIdFile, $nodeId ); // cache 00074 } 00075 $this->nodeId32 = wfBaseConvert( substr( sha1( $nodeId ), 0, 8 ), 16, 2, 32 ); 00076 $this->nodeId48 = wfBaseConvert( $nodeId, 16, 2, 48 ); 00077 // If different processes run as different users, they may have different temp dirs. 00078 // This is dealt with by initializing the clock sequence number and counters randomly. 00079 $this->lockFile88 = wfTempDir() . '/mw-' . __CLASS__ . '-UID-88'; 00080 $this->lockFile128 = wfTempDir() . '/mw-' . __CLASS__ . '-UID-128'; 00081 } 00082 00086 protected static function singleton() { 00087 if ( self::$instance === null ) { 00088 self::$instance = new self(); 00089 } 00090 00091 return self::$instance; 00092 } 00093 00109 public static function newTimestampedUID88( $base = 10 ) { 00110 if ( !is_integer( $base ) || $base > 36 || $base < 2 ) { 00111 throw new MWException( "Base must an integer be between 2 and 36" ); 00112 } 00113 $gen = self::singleton(); 00114 $time = $gen->getTimestampAndDelay( 'lockFile88', 1, 1024 ); 00115 00116 return wfBaseConvert( $gen->getTimestampedID88( $time ), 2, $base ); 00117 } 00118 00123 protected function getTimestampedID88( array $info ) { 00124 list( $time, $counter ) = $info; 00125 // Take the 46 MSBs of "milliseconds since epoch" 00126 $id_bin = $this->millisecondsSinceEpochBinary( $time ); 00127 // Add a 10 bit counter resulting in 56 bits total 00128 $id_bin .= str_pad( decbin( $counter ), 10, '0', STR_PAD_LEFT ); 00129 // Add the 32 bit node ID resulting in 88 bits total 00130 $id_bin .= $this->nodeId32; 00131 // Convert to a 1-27 digit integer string 00132 if ( strlen( $id_bin ) !== 88 ) { 00133 throw new MWException( "Detected overflow for millisecond timestamp." ); 00134 } 00135 00136 return $id_bin; 00137 } 00138 00153 public static function newTimestampedUID128( $base = 10 ) { 00154 if ( !is_integer( $base ) || $base > 36 || $base < 2 ) { 00155 throw new MWException( "Base must be an integer between 2 and 36" ); 00156 } 00157 $gen = self::singleton(); 00158 $time = $gen->getTimestampAndDelay( 'lockFile128', 16384, 1048576 ); 00159 00160 return wfBaseConvert( $gen->getTimestampedID128( $time ), 2, $base ); 00161 } 00162 00167 protected function getTimestampedID128( array $info ) { 00168 list( $time, $counter, $clkSeq ) = $info; 00169 // Take the 46 MSBs of "milliseconds since epoch" 00170 $id_bin = $this->millisecondsSinceEpochBinary( $time ); 00171 // Add a 20 bit counter resulting in 66 bits total 00172 $id_bin .= str_pad( decbin( $counter ), 20, '0', STR_PAD_LEFT ); 00173 // Add a 14 bit clock sequence number resulting in 80 bits total 00174 $id_bin .= str_pad( decbin( $clkSeq ), 14, '0', STR_PAD_LEFT ); 00175 // Add the 48 bit node ID resulting in 128 bits total 00176 $id_bin .= $this->nodeId48; 00177 // Convert to a 1-39 digit integer string 00178 if ( strlen( $id_bin ) !== 128 ) { 00179 throw new MWException( "Detected overflow for millisecond timestamp." ); 00180 } 00181 00182 return $id_bin; 00183 } 00184 00192 public static function newUUIDv4( $flags = 0 ) { 00193 $hex = ( $flags & self::QUICK_RAND ) 00194 ? wfRandomString( 31 ) 00195 : MWCryptRand::generateHex( 31 ); 00196 00197 return sprintf( '%s-%s-%s-%s-%s', 00198 // "time_low" (32 bits) 00199 substr( $hex, 0, 8 ), 00200 // "time_mid" (16 bits) 00201 substr( $hex, 8, 4 ), 00202 // "time_hi_and_version" (16 bits) 00203 '4' . substr( $hex, 12, 3 ), 00204 // "clk_seq_hi_res (8 bits, variant is binary 10x) and "clk_seq_low" (8 bits) 00205 dechex( 0x8 | ( hexdec( $hex[15] ) & 0x3 ) ) . $hex[16] . substr( $hex, 17, 2 ), 00206 // "node" (48 bits) 00207 substr( $hex, 19, 12 ) 00208 ); 00209 } 00210 00218 public static function newRawUUIDv4( $flags = 0 ) { 00219 return str_replace( '-', '', self::newUUIDv4( $flags ) ); 00220 } 00221 00234 public static function newSequentialPerNodeID( $bucket, $bits = 48, $flags = 0 ) { 00235 return current( self::newSequentialPerNodeIDs( $bucket, $bits, 1, $flags ) ); 00236 } 00237 00249 public static function newSequentialPerNodeIDs( $bucket, $bits, $count, $flags = 0 ) { 00250 $gen = self::singleton(); 00251 return $gen->getSequentialPerNodeIDs( $bucket, $bits, $count, $flags ); 00252 } 00253 00264 protected function getSequentialPerNodeIDs( $bucket, $bits, $count, $flags ) { 00265 if ( $count <= 0 ) { 00266 return array(); // nothing to do 00267 } elseif ( $count > 10000 ) { 00268 throw new MWException( "Number of requested IDs ($count) is too high." ); 00269 } elseif ( $bits < 16 || $bits > 48 ) { 00270 throw new MWException( "Requested bit size ($bits) is out of range." ); 00271 } 00272 00273 $counter = null; // post-increment persistent counter value 00274 00275 // Use APC/eAccelerator/xcache if requested, available, and not in CLI mode; 00276 // Counter values would not survive accross script instances in CLI mode. 00277 $cache = null; 00278 if ( ( $flags & self::QUICK_VOLATILE ) && PHP_SAPI !== 'cli' ) { 00279 try { 00280 $cache = ObjectCache::newAccelerator( array() ); 00281 } catch ( MWException $e ) { 00282 // not supported 00283 } 00284 } 00285 if ( $cache ) { 00286 $counter = $cache->incr( $bucket, $count ); 00287 if ( $counter === false ) { 00288 if ( !$cache->add( $bucket, (int)$count ) ) { 00289 throw new MWException( 'Unable to set value to ' . get_class( $cache ) ); 00290 } 00291 $counter = $count; 00292 } 00293 } 00294 00295 // Note: use of fmod() avoids "division by zero" on 32 bit machines 00296 if ( $counter === null ) { 00297 $path = wfTempDir() . '/mw-' . __CLASS__ . '-' . rawurlencode( $bucket ) . '-48'; 00298 // Get the UID lock file handle 00299 if ( isset( $this->fileHandles[$path] ) ) { 00300 $handle = $this->fileHandles[$path]; 00301 } else { 00302 $handle = fopen( $path, 'cb+' ); 00303 $this->fileHandles[$path] = $handle ?: null; // cache 00304 } 00305 // Acquire the UID lock file 00306 if ( $handle === false ) { 00307 throw new MWException( "Could not open '{$path}'." ); 00308 } elseif ( !flock( $handle, LOCK_EX ) ) { 00309 fclose( $handle ); 00310 throw new MWException( "Could not acquire '{$path}'." ); 00311 } 00312 // Fetch the counter value and increment it... 00313 rewind( $handle ); 00314 $counter = floor( trim( fgets( $handle ) ) ) + $count; // fetch as float 00315 // Write back the new counter value 00316 ftruncate( $handle, 0 ); 00317 rewind( $handle ); 00318 fwrite( $handle, fmod( $counter, pow( 2, 48 ) ) ); // warp-around as needed 00319 fflush( $handle ); 00320 // Release the UID lock file 00321 flock( $handle, LOCK_UN ); 00322 } 00323 00324 $ids = array(); 00325 $divisor = pow( 2, $bits ); 00326 $currentId = floor( $counter - $count ); // pre-increment counter value 00327 for ( $i = 0; $i < $count; ++$i ) { 00328 $ids[] = fmod( ++$currentId, $divisor ); 00329 } 00330 00331 return $ids; 00332 } 00333 00345 protected function getTimestampAndDelay( $lockFile, $clockSeqSize, $counterSize ) { 00346 // Get the UID lock file handle 00347 $path = $this->$lockFile; 00348 if ( isset( $this->fileHandles[$path] ) ) { 00349 $handle = $this->fileHandles[$path]; 00350 } else { 00351 $handle = fopen( $path, 'cb+' ); 00352 $this->fileHandles[$path] = $handle ?: null; // cache 00353 } 00354 // Acquire the UID lock file 00355 if ( $handle === false ) { 00356 throw new MWException( "Could not open '{$this->$lockFile}'." ); 00357 } elseif ( !flock( $handle, LOCK_EX ) ) { 00358 fclose( $handle ); 00359 throw new MWException( "Could not acquire '{$this->$lockFile}'." ); 00360 } 00361 // Get the current timestamp, clock sequence number, last time, and counter 00362 rewind( $handle ); 00363 $data = explode( ' ', fgets( $handle ) ); // "<clk seq> <sec> <msec> <counter> <offset>" 00364 $clockChanged = false; // clock set back significantly? 00365 if ( count( $data ) == 5 ) { // last UID info already initialized 00366 $clkSeq = (int)$data[0] % $clockSeqSize; 00367 $prevTime = array( (int)$data[1], (int)$data[2] ); 00368 $offset = (int)$data[4] % $counterSize; // random counter offset 00369 $counter = 0; // counter for UIDs with the same timestamp 00370 // Delay until the clock reaches the time of the last ID. 00371 // This detects any microtime() drift among processes. 00372 $time = $this->timeWaitUntil( $prevTime ); 00373 if ( !$time ) { // too long to delay? 00374 $clockChanged = true; // bump clock sequence number 00375 $time = self::millitime(); 00376 } elseif ( $time == $prevTime ) { 00377 // Bump the counter if there are timestamp collisions 00378 $counter = (int)$data[3] % $counterSize; 00379 if ( ++$counter >= $counterSize ) { // sanity (starts at 0) 00380 flock( $handle, LOCK_UN ); // abort 00381 throw new MWException( "Counter overflow for timestamp value." ); 00382 } 00383 } 00384 } else { // last UID info not initialized 00385 $clkSeq = mt_rand( 0, $clockSeqSize - 1 ); 00386 $counter = 0; 00387 $offset = mt_rand( 0, $counterSize - 1 ); 00388 $time = self::millitime(); 00389 } 00390 // microtime() and gettimeofday() can drift from time() at least on Windows. 00391 // The drift is immediate for processes running while the system clock changes. 00392 // time() does not have this problem. See https://bugs.php.net/bug.php?id=42659. 00393 if ( abs( time() - $time[0] ) >= 2 ) { 00394 // We don't want processes using too high or low timestamps to avoid duplicate 00395 // UIDs and clock sequence number churn. This process should just be restarted. 00396 flock( $handle, LOCK_UN ); // abort 00397 throw new MWException( "Process clock is outdated or drifted." ); 00398 } 00399 // If microtime() is synced and a clock change was detected, then the clock went back 00400 if ( $clockChanged ) { 00401 // Bump the clock sequence number and also randomize the counter offset, 00402 // which is useful for UIDs that do not include the clock sequence number. 00403 $clkSeq = ( $clkSeq + 1 ) % $clockSeqSize; 00404 $offset = mt_rand( 0, $counterSize - 1 ); 00405 trigger_error( "Clock was set back; sequence number incremented." ); 00406 } 00407 // Update the (clock sequence number, timestamp, counter) 00408 ftruncate( $handle, 0 ); 00409 rewind( $handle ); 00410 fwrite( $handle, "{$clkSeq} {$time[0]} {$time[1]} {$counter} {$offset}" ); 00411 fflush( $handle ); 00412 // Release the UID lock file 00413 flock( $handle, LOCK_UN ); 00414 00415 return array( $time, ( $counter + $offset ) % $counterSize, $clkSeq ); 00416 } 00417 00425 protected function timeWaitUntil( array $time ) { 00426 do { 00427 $ct = self::millitime(); 00428 if ( $ct >= $time ) { // http://php.net/manual/en/language.operators.comparison.php 00429 return $ct; // current timestamp is higher than $time 00430 } 00431 } while ( ( ( $time[0] - $ct[0] ) * 1000 + ( $time[1] - $ct[1] ) ) <= 10 ); 00432 00433 return false; 00434 } 00435 00440 protected function millisecondsSinceEpochBinary( array $time ) { 00441 list( $sec, $msec ) = $time; 00442 $ts = 1000 * $sec + $msec; 00443 if ( $ts > pow( 2, 52 ) ) { 00444 throw new MWException( __METHOD__ . 00445 ': sorry, this function doesn\'t work after the year 144680' ); 00446 } 00447 00448 return substr( wfBaseConvert( $ts, 10, 2, 46 ), -46 ); 00449 } 00450 00454 protected static function millitime() { 00455 list( $msec, $sec ) = explode( ' ', microtime() ); 00456 00457 return array( (int)$sec, (int)( $msec * 1000 ) ); 00458 } 00459 00471 protected function deleteCacheFiles() { 00472 // Bug: 44850 00473 foreach ( $this->fileHandles as $path => $handle ) { 00474 if ( $handle !== null ) { 00475 fclose( $handle ); 00476 } 00477 if ( is_file( $path ) ) { 00478 unlink( $path ); 00479 } 00480 unset( $this->fileHandles[$path] ); 00481 } 00482 if ( is_file( $this->nodeIdFile ) ) { 00483 unlink( $this->nodeIdFile ); 00484 } 00485 } 00486 00498 public static function unitTestTearDown() { 00499 // Bug: 44850 00500 $gen = self::singleton(); 00501 $gen->deleteCacheFiles(); 00502 } 00503 00504 function __destruct() { 00505 array_map( 'fclose', array_filter( $this->fileHandles ) ); 00506 } 00507 }