[ Index ] |
PHP Cross Reference of MediaWiki-1.24.0 |
[Summary view] [Print] [Text view]
1 <?php 2 /** 3 * This file deals with UID generation. 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 2 of the License, or 8 * (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License along 16 * with this program; if not, write to the Free Software Foundation, Inc., 17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 18 * http://www.gnu.org/copyleft/gpl.html 19 * 20 * @file 21 * @author Aaron Schulz 22 */ 23 24 /** 25 * Class for getting statistically unique IDs 26 * 27 * @since 1.21 28 */ 29 class UIDGenerator { 30 /** @var UIDGenerator */ 31 protected static $instance = null; 32 33 protected $nodeIdFile; // string; local file path 34 protected $nodeId32; // string; node ID in binary (32 bits) 35 protected $nodeId48; // string; node ID in binary (48 bits) 36 37 protected $lockFile88; // string; local file path 38 protected $lockFile128; // string; local file path 39 40 /** @var array */ 41 protected $fileHandles = array(); // cache file handles 42 43 const QUICK_RAND = 1; // get randomness from fast and insecure sources 44 const QUICK_VOLATILE = 2; // use an APC like in-memory counter if available 45 46 protected function __construct() { 47 $this->nodeIdFile = wfTempDir() . '/mw-' . __CLASS__ . '-UID-nodeid'; 48 $nodeId = ''; 49 if ( is_file( $this->nodeIdFile ) ) { 50 $nodeId = file_get_contents( $this->nodeIdFile ); 51 } 52 // Try to get some ID that uniquely identifies this machine (RFC 4122)... 53 if ( !preg_match( '/^[0-9a-f]{12}$/i', $nodeId ) ) { 54 wfSuppressWarnings(); 55 if ( wfIsWindows() ) { 56 // http://technet.microsoft.com/en-us/library/bb490913.aspx 57 $csv = trim( wfShellExec( 'getmac /NH /FO CSV' ) ); 58 $line = substr( $csv, 0, strcspn( $csv, "\n" ) ); 59 $info = str_getcsv( $line ); 60 $nodeId = isset( $info[0] ) ? str_replace( '-', '', $info[0] ) : ''; 61 } elseif ( is_executable( '/sbin/ifconfig' ) ) { // Linux/BSD/Solaris/OS X 62 // See http://linux.die.net/man/8/ifconfig 63 $m = array(); 64 preg_match( '/\s([0-9a-f]{2}(:[0-9a-f]{2}){5})\s/', 65 wfShellExec( '/sbin/ifconfig -a' ), $m ); 66 $nodeId = isset( $m[1] ) ? str_replace( ':', '', $m[1] ) : ''; 67 } 68 wfRestoreWarnings(); 69 if ( !preg_match( '/^[0-9a-f]{12}$/i', $nodeId ) ) { 70 $nodeId = MWCryptRand::generateHex( 12, true ); 71 $nodeId[1] = dechex( hexdec( $nodeId[1] ) | 0x1 ); // set multicast bit 72 } 73 file_put_contents( $this->nodeIdFile, $nodeId ); // cache 74 } 75 $this->nodeId32 = wfBaseConvert( substr( sha1( $nodeId ), 0, 8 ), 16, 2, 32 ); 76 $this->nodeId48 = wfBaseConvert( $nodeId, 16, 2, 48 ); 77 // If different processes run as different users, they may have different temp dirs. 78 // This is dealt with by initializing the clock sequence number and counters randomly. 79 $this->lockFile88 = wfTempDir() . '/mw-' . __CLASS__ . '-UID-88'; 80 $this->lockFile128 = wfTempDir() . '/mw-' . __CLASS__ . '-UID-128'; 81 } 82 83 /** 84 * @return UIDGenerator 85 */ 86 protected static function singleton() { 87 if ( self::$instance === null ) { 88 self::$instance = new self(); 89 } 90 91 return self::$instance; 92 } 93 94 /** 95 * Get a statistically unique 88-bit unsigned integer ID string. 96 * The bits of the UID are prefixed with the time (down to the millisecond). 97 * 98 * These IDs are suitable as values for the shard key of distributed data. 99 * If a column uses these as values, it should be declared UNIQUE to handle collisions. 100 * New rows almost always have higher UIDs, which makes B-TREE updates on INSERT fast. 101 * They can also be stored "DECIMAL(27) UNSIGNED" or BINARY(11) in MySQL. 102 * 103 * UID generation is serialized on each server (as the node ID is for the whole machine). 104 * 105 * @param int $base Specifies a base other than 10 106 * @return string Number 107 * @throws MWException 108 */ 109 public static function newTimestampedUID88( $base = 10 ) { 110 if ( !is_integer( $base ) || $base > 36 || $base < 2 ) { 111 throw new MWException( "Base must an integer be between 2 and 36" ); 112 } 113 $gen = self::singleton(); 114 $time = $gen->getTimestampAndDelay( 'lockFile88', 1, 1024 ); 115 116 return wfBaseConvert( $gen->getTimestampedID88( $time ), 2, $base ); 117 } 118 119 /** 120 * @param array $info (UIDGenerator::millitime(), counter, clock sequence) 121 * @return string 88 bits 122 */ 123 protected function getTimestampedID88( array $info ) { 124 list( $time, $counter ) = $info; 125 // Take the 46 MSBs of "milliseconds since epoch" 126 $id_bin = $this->millisecondsSinceEpochBinary( $time ); 127 // Add a 10 bit counter resulting in 56 bits total 128 $id_bin .= str_pad( decbin( $counter ), 10, '0', STR_PAD_LEFT ); 129 // Add the 32 bit node ID resulting in 88 bits total 130 $id_bin .= $this->nodeId32; 131 // Convert to a 1-27 digit integer string 132 if ( strlen( $id_bin ) !== 88 ) { 133 throw new MWException( "Detected overflow for millisecond timestamp." ); 134 } 135 136 return $id_bin; 137 } 138 139 /** 140 * Get a statistically unique 128-bit unsigned integer ID string. 141 * The bits of the UID are prefixed with the time (down to the millisecond). 142 * 143 * These IDs are suitable as globally unique IDs, without any enforced uniqueness. 144 * New rows almost always have higher UIDs, which makes B-TREE updates on INSERT fast. 145 * They can also be stored as "DECIMAL(39) UNSIGNED" or BINARY(16) in MySQL. 146 * 147 * UID generation is serialized on each server (as the node ID is for the whole machine). 148 * 149 * @param int $base Specifies a base other than 10 150 * @return string Number 151 * @throws MWException 152 */ 153 public static function newTimestampedUID128( $base = 10 ) { 154 if ( !is_integer( $base ) || $base > 36 || $base < 2 ) { 155 throw new MWException( "Base must be an integer between 2 and 36" ); 156 } 157 $gen = self::singleton(); 158 $time = $gen->getTimestampAndDelay( 'lockFile128', 16384, 1048576 ); 159 160 return wfBaseConvert( $gen->getTimestampedID128( $time ), 2, $base ); 161 } 162 163 /** 164 * @param array $info (UIDGenerator::millitime(), counter, clock sequence) 165 * @return string 128 bits 166 */ 167 protected function getTimestampedID128( array $info ) { 168 list( $time, $counter, $clkSeq ) = $info; 169 // Take the 46 MSBs of "milliseconds since epoch" 170 $id_bin = $this->millisecondsSinceEpochBinary( $time ); 171 // Add a 20 bit counter resulting in 66 bits total 172 $id_bin .= str_pad( decbin( $counter ), 20, '0', STR_PAD_LEFT ); 173 // Add a 14 bit clock sequence number resulting in 80 bits total 174 $id_bin .= str_pad( decbin( $clkSeq ), 14, '0', STR_PAD_LEFT ); 175 // Add the 48 bit node ID resulting in 128 bits total 176 $id_bin .= $this->nodeId48; 177 // Convert to a 1-39 digit integer string 178 if ( strlen( $id_bin ) !== 128 ) { 179 throw new MWException( "Detected overflow for millisecond timestamp." ); 180 } 181 182 return $id_bin; 183 } 184 185 /** 186 * Return an RFC4122 compliant v4 UUID 187 * 188 * @param int $flags Bitfield (supports UIDGenerator::QUICK_RAND) 189 * @return string 190 * @throws MWException 191 */ 192 public static function newUUIDv4( $flags = 0 ) { 193 $hex = ( $flags & self::QUICK_RAND ) 194 ? wfRandomString( 31 ) 195 : MWCryptRand::generateHex( 31 ); 196 197 return sprintf( '%s-%s-%s-%s-%s', 198 // "time_low" (32 bits) 199 substr( $hex, 0, 8 ), 200 // "time_mid" (16 bits) 201 substr( $hex, 8, 4 ), 202 // "time_hi_and_version" (16 bits) 203 '4' . substr( $hex, 12, 3 ), 204 // "clk_seq_hi_res (8 bits, variant is binary 10x) and "clk_seq_low" (8 bits) 205 dechex( 0x8 | ( hexdec( $hex[15] ) & 0x3 ) ) . $hex[16] . substr( $hex, 17, 2 ), 206 // "node" (48 bits) 207 substr( $hex, 19, 12 ) 208 ); 209 } 210 211 /** 212 * Return an RFC4122 compliant v4 UUID 213 * 214 * @param int $flags Bitfield (supports UIDGenerator::QUICK_RAND) 215 * @return string 32 hex characters with no hyphens 216 * @throws MWException 217 */ 218 public static function newRawUUIDv4( $flags = 0 ) { 219 return str_replace( '-', '', self::newUUIDv4( $flags ) ); 220 } 221 222 /** 223 * Return an ID that is sequential *only* for this node and bucket 224 * 225 * These IDs are suitable for per-host sequence numbers, e.g. for some packet protocols. 226 * If UIDGenerator::QUICK_VOLATILE is used the counter might reset on server restart. 227 * 228 * @param string $bucket Arbitrary bucket name (should be ASCII) 229 * @param int $bits Bit size (<=48) of resulting numbers before wrap-around 230 * @param int $flags (supports UIDGenerator::QUICK_VOLATILE) 231 * @return float Integer value as float 232 * @since 1.23 233 */ 234 public static function newSequentialPerNodeID( $bucket, $bits = 48, $flags = 0 ) { 235 return current( self::newSequentialPerNodeIDs( $bucket, $bits, 1, $flags ) ); 236 } 237 238 /** 239 * Return IDs that are sequential *only* for this node and bucket 240 * 241 * @see UIDGenerator::newSequentialPerNodeID() 242 * @param string $bucket Arbitrary bucket name (should be ASCII) 243 * @param int $bits Bit size (16 to 48) of resulting numbers before wrap-around 244 * @param int $count Number of IDs to return (1 to 10000) 245 * @param int $flags (supports UIDGenerator::QUICK_VOLATILE) 246 * @return array Ordered list of float integer values 247 * @since 1.23 248 */ 249 public static function newSequentialPerNodeIDs( $bucket, $bits, $count, $flags = 0 ) { 250 $gen = self::singleton(); 251 return $gen->getSequentialPerNodeIDs( $bucket, $bits, $count, $flags ); 252 } 253 254 /** 255 * Return IDs that are sequential *only* for this node and bucket 256 * 257 * @see UIDGenerator::newSequentialPerNodeID() 258 * @param string $bucket Arbitrary bucket name (should be ASCII) 259 * @param int $bits Bit size (16 to 48) of resulting numbers before wrap-around 260 * @param int $count Number of IDs to return (1 to 10000) 261 * @param int $flags (supports UIDGenerator::QUICK_VOLATILE) 262 * @return array Ordered list of float integer values 263 */ 264 protected function getSequentialPerNodeIDs( $bucket, $bits, $count, $flags ) { 265 if ( $count <= 0 ) { 266 return array(); // nothing to do 267 } elseif ( $count > 10000 ) { 268 throw new MWException( "Number of requested IDs ($count) is too high." ); 269 } elseif ( $bits < 16 || $bits > 48 ) { 270 throw new MWException( "Requested bit size ($bits) is out of range." ); 271 } 272 273 $counter = null; // post-increment persistent counter value 274 275 // Use APC/eAccelerator/xcache if requested, available, and not in CLI mode; 276 // Counter values would not survive accross script instances in CLI mode. 277 $cache = null; 278 if ( ( $flags & self::QUICK_VOLATILE ) && PHP_SAPI !== 'cli' ) { 279 try { 280 $cache = ObjectCache::newAccelerator( array() ); 281 } catch ( MWException $e ) { 282 // not supported 283 } 284 } 285 if ( $cache ) { 286 $counter = $cache->incr( $bucket, $count ); 287 if ( $counter === false ) { 288 if ( !$cache->add( $bucket, (int)$count ) ) { 289 throw new MWException( 'Unable to set value to ' . get_class( $cache ) ); 290 } 291 $counter = $count; 292 } 293 } 294 295 // Note: use of fmod() avoids "division by zero" on 32 bit machines 296 if ( $counter === null ) { 297 $path = wfTempDir() . '/mw-' . __CLASS__ . '-' . rawurlencode( $bucket ) . '-48'; 298 // Get the UID lock file handle 299 if ( isset( $this->fileHandles[$path] ) ) { 300 $handle = $this->fileHandles[$path]; 301 } else { 302 $handle = fopen( $path, 'cb+' ); 303 $this->fileHandles[$path] = $handle ?: null; // cache 304 } 305 // Acquire the UID lock file 306 if ( $handle === false ) { 307 throw new MWException( "Could not open '{$path}'." ); 308 } elseif ( !flock( $handle, LOCK_EX ) ) { 309 fclose( $handle ); 310 throw new MWException( "Could not acquire '{$path}'." ); 311 } 312 // Fetch the counter value and increment it... 313 rewind( $handle ); 314 $counter = floor( trim( fgets( $handle ) ) ) + $count; // fetch as float 315 // Write back the new counter value 316 ftruncate( $handle, 0 ); 317 rewind( $handle ); 318 fwrite( $handle, fmod( $counter, pow( 2, 48 ) ) ); // warp-around as needed 319 fflush( $handle ); 320 // Release the UID lock file 321 flock( $handle, LOCK_UN ); 322 } 323 324 $ids = array(); 325 $divisor = pow( 2, $bits ); 326 $currentId = floor( $counter - $count ); // pre-increment counter value 327 for ( $i = 0; $i < $count; ++$i ) { 328 $ids[] = fmod( ++$currentId, $divisor ); 329 } 330 331 return $ids; 332 } 333 334 /** 335 * Get a (time,counter,clock sequence) where (time,counter) is higher 336 * than any previous (time,counter) value for the given clock sequence. 337 * This is useful for making UIDs sequential on a per-node bases. 338 * 339 * @param string $lockFile Name of a local lock file 340 * @param int $clockSeqSize The number of possible clock sequence values 341 * @param int $counterSize The number of possible counter values 342 * @return array (result of UIDGenerator::millitime(), counter, clock sequence) 343 * @throws MWException 344 */ 345 protected function getTimestampAndDelay( $lockFile, $clockSeqSize, $counterSize ) { 346 // Get the UID lock file handle 347 $path = $this->$lockFile; 348 if ( isset( $this->fileHandles[$path] ) ) { 349 $handle = $this->fileHandles[$path]; 350 } else { 351 $handle = fopen( $path, 'cb+' ); 352 $this->fileHandles[$path] = $handle ?: null; // cache 353 } 354 // Acquire the UID lock file 355 if ( $handle === false ) { 356 throw new MWException( "Could not open '{$this->$lockFile}'." ); 357 } elseif ( !flock( $handle, LOCK_EX ) ) { 358 fclose( $handle ); 359 throw new MWException( "Could not acquire '{$this->$lockFile}'." ); 360 } 361 // Get the current timestamp, clock sequence number, last time, and counter 362 rewind( $handle ); 363 $data = explode( ' ', fgets( $handle ) ); // "<clk seq> <sec> <msec> <counter> <offset>" 364 $clockChanged = false; // clock set back significantly? 365 if ( count( $data ) == 5 ) { // last UID info already initialized 366 $clkSeq = (int)$data[0] % $clockSeqSize; 367 $prevTime = array( (int)$data[1], (int)$data[2] ); 368 $offset = (int)$data[4] % $counterSize; // random counter offset 369 $counter = 0; // counter for UIDs with the same timestamp 370 // Delay until the clock reaches the time of the last ID. 371 // This detects any microtime() drift among processes. 372 $time = $this->timeWaitUntil( $prevTime ); 373 if ( !$time ) { // too long to delay? 374 $clockChanged = true; // bump clock sequence number 375 $time = self::millitime(); 376 } elseif ( $time == $prevTime ) { 377 // Bump the counter if there are timestamp collisions 378 $counter = (int)$data[3] % $counterSize; 379 if ( ++$counter >= $counterSize ) { // sanity (starts at 0) 380 flock( $handle, LOCK_UN ); // abort 381 throw new MWException( "Counter overflow for timestamp value." ); 382 } 383 } 384 } else { // last UID info not initialized 385 $clkSeq = mt_rand( 0, $clockSeqSize - 1 ); 386 $counter = 0; 387 $offset = mt_rand( 0, $counterSize - 1 ); 388 $time = self::millitime(); 389 } 390 // microtime() and gettimeofday() can drift from time() at least on Windows. 391 // The drift is immediate for processes running while the system clock changes. 392 // time() does not have this problem. See https://bugs.php.net/bug.php?id=42659. 393 if ( abs( time() - $time[0] ) >= 2 ) { 394 // We don't want processes using too high or low timestamps to avoid duplicate 395 // UIDs and clock sequence number churn. This process should just be restarted. 396 flock( $handle, LOCK_UN ); // abort 397 throw new MWException( "Process clock is outdated or drifted." ); 398 } 399 // If microtime() is synced and a clock change was detected, then the clock went back 400 if ( $clockChanged ) { 401 // Bump the clock sequence number and also randomize the counter offset, 402 // which is useful for UIDs that do not include the clock sequence number. 403 $clkSeq = ( $clkSeq + 1 ) % $clockSeqSize; 404 $offset = mt_rand( 0, $counterSize - 1 ); 405 trigger_error( "Clock was set back; sequence number incremented." ); 406 } 407 // Update the (clock sequence number, timestamp, counter) 408 ftruncate( $handle, 0 ); 409 rewind( $handle ); 410 fwrite( $handle, "{$clkSeq} {$time[0]} {$time[1]} {$counter} {$offset}" ); 411 fflush( $handle ); 412 // Release the UID lock file 413 flock( $handle, LOCK_UN ); 414 415 return array( $time, ( $counter + $offset ) % $counterSize, $clkSeq ); 416 } 417 418 /** 419 * Wait till the current timestamp reaches $time and return the current 420 * timestamp. This returns false if it would have to wait more than 10ms. 421 * 422 * @param array $time Result of UIDGenerator::millitime() 423 * @return array|bool UIDGenerator::millitime() result or false 424 */ 425 protected function timeWaitUntil( array $time ) { 426 do { 427 $ct = self::millitime(); 428 if ( $ct >= $time ) { // http://php.net/manual/en/language.operators.comparison.php 429 return $ct; // current timestamp is higher than $time 430 } 431 } while ( ( ( $time[0] - $ct[0] ) * 1000 + ( $time[1] - $ct[1] ) ) <= 10 ); 432 433 return false; 434 } 435 436 /** 437 * @param array $time Result of UIDGenerator::millitime() 438 * @return string 46 MSBs of "milliseconds since epoch" in binary (rolls over in 4201) 439 */ 440 protected function millisecondsSinceEpochBinary( array $time ) { 441 list( $sec, $msec ) = $time; 442 $ts = 1000 * $sec + $msec; 443 if ( $ts > pow( 2, 52 ) ) { 444 throw new MWException( __METHOD__ . 445 ': sorry, this function doesn\'t work after the year 144680' ); 446 } 447 448 return substr( wfBaseConvert( $ts, 10, 2, 46 ), -46 ); 449 } 450 451 /** 452 * @return array (current time in seconds, milliseconds since then) 453 */ 454 protected static function millitime() { 455 list( $msec, $sec ) = explode( ' ', microtime() ); 456 457 return array( (int)$sec, (int)( $msec * 1000 ) ); 458 } 459 460 /** 461 * Delete all cache files that have been created. 462 * 463 * This is a cleanup method primarily meant to be used from unit tests to 464 * avoid poluting the local filesystem. If used outside of a unit test 465 * environment it should be used with caution as it may destroy state saved 466 * in the files. 467 * 468 * @see unitTestTearDown 469 * @since 1.23 470 */ 471 protected function deleteCacheFiles() { 472 // Bug: 44850 473 foreach ( $this->fileHandles as $path => $handle ) { 474 if ( $handle !== null ) { 475 fclose( $handle ); 476 } 477 if ( is_file( $path ) ) { 478 unlink( $path ); 479 } 480 unset( $this->fileHandles[$path] ); 481 } 482 if ( is_file( $this->nodeIdFile ) ) { 483 unlink( $this->nodeIdFile ); 484 } 485 } 486 487 /** 488 * Cleanup resources when tearing down after a unit test. 489 * 490 * This is a cleanup method primarily meant to be used from unit tests to 491 * avoid poluting the local filesystem. If used outside of a unit test 492 * environment it should be used with caution as it may destroy state saved 493 * in the files. 494 * 495 * @see deleteCacheFiles 496 * @since 1.23 497 */ 498 public static function unitTestTearDown() { 499 // Bug: 44850 500 $gen = self::singleton(); 501 $gen->deleteCacheFiles(); 502 } 503 504 function __destruct() { 505 array_map( 'fclose', array_filter( $this->fileHandles ) ); 506 } 507 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Fri Nov 28 14:03:12 2014 | Cross-referenced by PHPXref 0.7.1 |