MediaWiki  master
UIDGenerator.php
Go to the documentation of this file.
1 <?php
23 use Wikimedia\Assert\Assert;
24 
30 class UIDGenerator {
32  protected static $instance = null;
33 
34  protected $nodeIdFile; // string; local file path
35  protected $nodeId32; // string; node ID in binary (32 bits)
36  protected $nodeId48; // string; node ID in binary (48 bits)
37 
38  protected $lockFile88; // string; local file path
39  protected $lockFile128; // string; local file path
40  protected $lockFileUUID; // string; local file path
41 
43  protected $fileHandles = []; // cache file handles
44 
45  const QUICK_RAND = 1; // get randomness from fast and insecure sources
46  const QUICK_VOLATILE = 2; // use an APC like in-memory counter if available
47 
48  protected function __construct() {
49  $this->nodeIdFile = wfTempDir() . '/mw-' . __CLASS__ . '-UID-nodeid';
50  $nodeId = '';
51  if ( is_file( $this->nodeIdFile ) ) {
52  $nodeId = file_get_contents( $this->nodeIdFile );
53  }
54  // Try to get some ID that uniquely identifies this machine (RFC 4122)...
55  if ( !preg_match( '/^[0-9a-f]{12}$/i', $nodeId ) ) {
56  MediaWiki\suppressWarnings();
57  if ( wfIsWindows() ) {
58  // http://technet.microsoft.com/en-us/library/bb490913.aspx
59  $csv = trim( wfShellExec( 'getmac /NH /FO CSV' ) );
60  $line = substr( $csv, 0, strcspn( $csv, "\n" ) );
61  $info = str_getcsv( $line );
62  $nodeId = isset( $info[0] ) ? str_replace( '-', '', $info[0] ) : '';
63  } elseif ( is_executable( '/sbin/ifconfig' ) ) { // Linux/BSD/Solaris/OS X
64  // See http://linux.die.net/man/8/ifconfig
65  $m = [];
66  preg_match( '/\s([0-9a-f]{2}(:[0-9a-f]{2}){5})\s/',
67  wfShellExec( '/sbin/ifconfig -a' ), $m );
68  $nodeId = isset( $m[1] ) ? str_replace( ':', '', $m[1] ) : '';
69  }
70  MediaWiki\restoreWarnings();
71  if ( !preg_match( '/^[0-9a-f]{12}$/i', $nodeId ) ) {
72  $nodeId = MWCryptRand::generateHex( 12, true );
73  $nodeId[1] = dechex( hexdec( $nodeId[1] ) | 0x1 ); // set multicast bit
74  }
75  file_put_contents( $this->nodeIdFile, $nodeId ); // cache
76  }
77  $this->nodeId32 = Wikimedia\base_convert( substr( sha1( $nodeId ), 0, 8 ), 16, 2, 32 );
78  $this->nodeId48 = Wikimedia\base_convert( $nodeId, 16, 2, 48 );
79  // If different processes run as different users, they may have different temp dirs.
80  // This is dealt with by initializing the clock sequence number and counters randomly.
81  $this->lockFile88 = wfTempDir() . '/mw-' . __CLASS__ . '-UID-88';
82  $this->lockFile128 = wfTempDir() . '/mw-' . __CLASS__ . '-UID-128';
83  $this->lockFileUUID = wfTempDir() . '/mw-' . __CLASS__ . '-UUID-128';
84  }
85 
90  protected static function singleton() {
91  if ( self::$instance === null ) {
92  self::$instance = new self();
93  }
94 
95  return self::$instance;
96  }
97 
113  public static function newTimestampedUID88( $base = 10 ) {
114  Assert::parameterType( 'integer', $base, '$base' );
115  Assert::parameter( $base <= 36, '$base', 'must be <= 36' );
116  Assert::parameter( $base >= 2, '$base', 'must be >= 2' );
117 
118  $gen = self::singleton();
119  $info = $gen->getTimeAndDelay( 'lockFile88', 1, 1024, 1024 );
120  $info['offsetCounter'] = $info['offsetCounter'] % 1024;
121  return Wikimedia\base_convert( $gen->getTimestampedID88( $info ), 2, $base );
122  }
123 
130  protected function getTimestampedID88( array $info ) {
131  if ( isset( $info['time'] ) ) {
132  $time = $info['time'];
133  $counter = $info['offsetCounter'];
134  } else {
135  $time = $info[0];
136  $counter = $info[1];
137  }
138  // Take the 46 LSBs of "milliseconds since epoch"
139  $id_bin = $this->millisecondsSinceEpochBinary( $time );
140  // Add a 10 bit counter resulting in 56 bits total
141  $id_bin .= str_pad( decbin( $counter ), 10, '0', STR_PAD_LEFT );
142  // Add the 32 bit node ID resulting in 88 bits total
143  $id_bin .= $this->nodeId32;
144  // Convert to a 1-27 digit integer string
145  if ( strlen( $id_bin ) !== 88 ) {
146  throw new RuntimeException( "Detected overflow for millisecond timestamp." );
147  }
148 
149  return $id_bin;
150  }
151 
166  public static function newTimestampedUID128( $base = 10 ) {
167  Assert::parameterType( 'integer', $base, '$base' );
168  Assert::parameter( $base <= 36, '$base', 'must be <= 36' );
169  Assert::parameter( $base >= 2, '$base', 'must be >= 2' );
170 
171  $gen = self::singleton();
172  $info = $gen->getTimeAndDelay( 'lockFile128', 16384, 1048576, 1048576 );
173  $info['offsetCounter'] = $info['offsetCounter'] % 1048576;
174 
175  return Wikimedia\base_convert( $gen->getTimestampedID128( $info ), 2, $base );
176  }
177 
184  protected function getTimestampedID128( array $info ) {
185  if ( isset( $info['time'] ) ) {
186  $time = $info['time'];
187  $counter = $info['offsetCounter'];
188  $clkSeq = $info['clkSeq'];
189  } else {
190  $time = $info[0];
191  $counter = $info[1];
192  $clkSeq = $info[2];
193  }
194  // Take the 46 LSBs of "milliseconds since epoch"
195  $id_bin = $this->millisecondsSinceEpochBinary( $time );
196  // Add a 20 bit counter resulting in 66 bits total
197  $id_bin .= str_pad( decbin( $counter ), 20, '0', STR_PAD_LEFT );
198  // Add a 14 bit clock sequence number resulting in 80 bits total
199  $id_bin .= str_pad( decbin( $clkSeq ), 14, '0', STR_PAD_LEFT );
200  // Add the 48 bit node ID resulting in 128 bits total
201  $id_bin .= $this->nodeId48;
202  // Convert to a 1-39 digit integer string
203  if ( strlen( $id_bin ) !== 128 ) {
204  throw new RuntimeException( "Detected overflow for millisecond timestamp." );
205  }
206 
207  return $id_bin;
208  }
209 
217  public static function newUUIDv1() {
218  $gen = self::singleton();
219  // There can be up to 10000 intervals for the same millisecond timestamp.
220  // [0,4999] counter + [0,5000] offset is in [0,9999] for the offset counter.
221  // Add this onto the timestamp to allow making up to 5000 IDs per second.
222  return $gen->getUUIDv1( $gen->getTimeAndDelay( 'lockFileUUID', 16384, 5000, 5001 ) );
223  }
224 
232  public static function newRawUUIDv1() {
233  return str_replace( '-', '', self::newUUIDv1() );
234  }
235 
240  protected function getUUIDv1( array $info ) {
241  $clkSeq_bin = Wikimedia\base_convert( $info['clkSeq'], 10, 2, 14 );
242  $time_bin = $this->intervalsSinceGregorianBinary( $info['time'], $info['offsetCounter'] );
243  // Take the 32 bits of "time low"
244  $id_bin = substr( $time_bin, 28, 32 );
245  // Add 16 bits of "time mid" resulting in 48 bits total
246  $id_bin .= substr( $time_bin, 12, 16 );
247  // Add 4 bit version resulting in 52 bits total
248  $id_bin .= '0001';
249  // Add 12 bits of "time high" resulting in 64 bits total
250  $id_bin .= substr( $time_bin, 0, 12 );
251  // Add 2 bits of "variant" resulting in 66 bits total
252  $id_bin .= '10';
253  // Add 6 bits of "clock seq high" resulting in 72 bits total
254  $id_bin .= substr( $clkSeq_bin, 0, 6 );
255  // Add 8 bits of "clock seq low" resulting in 80 bits total
256  $id_bin .= substr( $clkSeq_bin, 6, 8 );
257  // Add the 48 bit node ID resulting in 128 bits total
258  $id_bin .= $this->nodeId48;
259  // Convert to a 32 char hex string with dashes
260  if ( strlen( $id_bin ) !== 128 ) {
261  throw new RuntimeException( "Detected overflow for millisecond timestamp." );
262  }
263  $hex = Wikimedia\base_convert( $id_bin, 2, 16, 32 );
264  return sprintf( '%s-%s-%s-%s-%s',
265  // "time_low" (32 bits)
266  substr( $hex, 0, 8 ),
267  // "time_mid" (16 bits)
268  substr( $hex, 8, 4 ),
269  // "time_hi_and_version" (16 bits)
270  substr( $hex, 12, 4 ),
271  // "clk_seq_hi_res" (8 bits) and "clk_seq_low" (8 bits)
272  substr( $hex, 16, 4 ),
273  // "node" (48 bits)
274  substr( $hex, 20, 12 )
275  );
276  }
277 
285  public static function newUUIDv4( $flags = 0 ) {
286  $hex = ( $flags & self::QUICK_RAND )
287  ? wfRandomString( 31 )
288  : MWCryptRand::generateHex( 31 );
289 
290  return sprintf( '%s-%s-%s-%s-%s',
291  // "time_low" (32 bits)
292  substr( $hex, 0, 8 ),
293  // "time_mid" (16 bits)
294  substr( $hex, 8, 4 ),
295  // "time_hi_and_version" (16 bits)
296  '4' . substr( $hex, 12, 3 ),
297  // "clk_seq_hi_res" (8 bits, variant is binary 10x) and "clk_seq_low" (8 bits)
298  dechex( 0x8 | ( hexdec( $hex[15] ) & 0x3 ) ) . $hex[16] . substr( $hex, 17, 2 ),
299  // "node" (48 bits)
300  substr( $hex, 19, 12 )
301  );
302  }
303 
311  public static function newRawUUIDv4( $flags = 0 ) {
312  return str_replace( '-', '', self::newUUIDv4( $flags ) );
313  }
314 
327  public static function newSequentialPerNodeID( $bucket, $bits = 48, $flags = 0 ) {
328  return current( self::newSequentialPerNodeIDs( $bucket, $bits, 1, $flags ) );
329  }
330 
342  public static function newSequentialPerNodeIDs( $bucket, $bits, $count, $flags = 0 ) {
343  $gen = self::singleton();
344  return $gen->getSequentialPerNodeIDs( $bucket, $bits, $count, $flags );
345  }
346 
358  protected function getSequentialPerNodeIDs( $bucket, $bits, $count, $flags ) {
359  if ( $count <= 0 ) {
360  return []; // nothing to do
361  } elseif ( $bits < 16 || $bits > 48 ) {
362  throw new RuntimeException( "Requested bit size ($bits) is out of range." );
363  }
364 
365  $counter = null; // post-increment persistent counter value
366 
367  // Use APC/eAccelerator/xcache if requested, available, and not in CLI mode;
368  // Counter values would not survive accross script instances in CLI mode.
369  $cache = null;
370  if ( ( $flags & self::QUICK_VOLATILE ) && PHP_SAPI !== 'cli' ) {
372  }
373  if ( $cache ) {
374  $counter = $cache->incrWithInit( $bucket, $cache::TTL_INDEFINITE, $count, $count );
375  if ( $counter === false ) {
376  throw new RuntimeException( 'Unable to set value to ' . get_class( $cache ) );
377  }
378  }
379 
380  // Note: use of fmod() avoids "division by zero" on 32 bit machines
381  if ( $counter === null ) {
382  $path = wfTempDir() . '/mw-' . __CLASS__ . '-' . rawurlencode( $bucket ) . '-48';
383  // Get the UID lock file handle
384  if ( isset( $this->fileHandles[$path] ) ) {
385  $handle = $this->fileHandles[$path];
386  } else {
387  $handle = fopen( $path, 'cb+' );
388  $this->fileHandles[$path] = $handle ?: null; // cache
389  }
390  // Acquire the UID lock file
391  if ( $handle === false ) {
392  throw new RuntimeException( "Could not open '{$path}'." );
393  } elseif ( !flock( $handle, LOCK_EX ) ) {
394  fclose( $handle );
395  throw new RuntimeException( "Could not acquire '{$path}'." );
396  }
397  // Fetch the counter value and increment it...
398  rewind( $handle );
399  $counter = floor( trim( fgets( $handle ) ) ) + $count; // fetch as float
400  // Write back the new counter value
401  ftruncate( $handle, 0 );
402  rewind( $handle );
403  fwrite( $handle, fmod( $counter, pow( 2, 48 ) ) ); // warp-around as needed
404  fflush( $handle );
405  // Release the UID lock file
406  flock( $handle, LOCK_UN );
407  }
408 
409  $ids = [];
410  $divisor = pow( 2, $bits );
411  $currentId = floor( $counter - $count ); // pre-increment counter value
412  for ( $i = 0; $i < $count; ++$i ) {
413  $ids[] = fmod( ++$currentId, $divisor );
414  }
415 
416  return $ids;
417  }
418 
431  protected function getTimeAndDelay( $lockFile, $clockSeqSize, $counterSize, $offsetSize ) {
432  // Get the UID lock file handle
433  if ( isset( $this->fileHandles[$lockFile] ) ) {
434  $handle = $this->fileHandles[$lockFile];
435  } else {
436  $handle = fopen( $this->$lockFile, 'cb+' );
437  $this->fileHandles[$lockFile] = $handle ?: null; // cache
438  }
439  // Acquire the UID lock file
440  if ( $handle === false ) {
441  throw new RuntimeException( "Could not open '{$this->$lockFile}'." );
442  } elseif ( !flock( $handle, LOCK_EX ) ) {
443  fclose( $handle );
444  throw new RuntimeException( "Could not acquire '{$this->$lockFile}'." );
445  }
446  // Get the current timestamp, clock sequence number, last time, and counter
447  rewind( $handle );
448  $data = explode( ' ', fgets( $handle ) ); // "<clk seq> <sec> <msec> <counter> <offset>"
449  $clockChanged = false; // clock set back significantly?
450  if ( count( $data ) == 5 ) { // last UID info already initialized
451  $clkSeq = (int)$data[0] % $clockSeqSize;
452  $prevTime = [ (int)$data[1], (int)$data[2] ];
453  $offset = (int)$data[4] % $counterSize; // random counter offset
454  $counter = 0; // counter for UIDs with the same timestamp
455  // Delay until the clock reaches the time of the last ID.
456  // This detects any microtime() drift among processes.
457  $time = $this->timeWaitUntil( $prevTime );
458  if ( !$time ) { // too long to delay?
459  $clockChanged = true; // bump clock sequence number
460  $time = self::millitime();
461  } elseif ( $time == $prevTime ) {
462  // Bump the counter if there are timestamp collisions
463  $counter = (int)$data[3] % $counterSize;
464  if ( ++$counter >= $counterSize ) { // sanity (starts at 0)
465  flock( $handle, LOCK_UN ); // abort
466  throw new RuntimeException( "Counter overflow for timestamp value." );
467  }
468  }
469  } else { // last UID info not initialized
470  $clkSeq = mt_rand( 0, $clockSeqSize - 1 );
471  $counter = 0;
472  $offset = mt_rand( 0, $offsetSize - 1 );
473  $time = self::millitime();
474  }
475  // microtime() and gettimeofday() can drift from time() at least on Windows.
476  // The drift is immediate for processes running while the system clock changes.
477  // time() does not have this problem. See https://bugs.php.net/bug.php?id=42659.
478  if ( abs( time() - $time[0] ) >= 2 ) {
479  // We don't want processes using too high or low timestamps to avoid duplicate
480  // UIDs and clock sequence number churn. This process should just be restarted.
481  flock( $handle, LOCK_UN ); // abort
482  throw new RuntimeException( "Process clock is outdated or drifted." );
483  }
484  // If microtime() is synced and a clock change was detected, then the clock went back
485  if ( $clockChanged ) {
486  // Bump the clock sequence number and also randomize the counter offset,
487  // which is useful for UIDs that do not include the clock sequence number.
488  $clkSeq = ( $clkSeq + 1 ) % $clockSeqSize;
489  $offset = mt_rand( 0, $offsetSize - 1 );
490  trigger_error( "Clock was set back; sequence number incremented." );
491  }
492  // Update the (clock sequence number, timestamp, counter)
493  ftruncate( $handle, 0 );
494  rewind( $handle );
495  fwrite( $handle, "{$clkSeq} {$time[0]} {$time[1]} {$counter} {$offset}" );
496  fflush( $handle );
497  // Release the UID lock file
498  flock( $handle, LOCK_UN );
499 
500  return [
501  'time' => $time,
502  'counter' => $counter,
503  'clkSeq' => $clkSeq,
504  'offset' => $offset,
505  'offsetCounter' => $counter + $offset
506  ];
507  }
508 
516  protected function timeWaitUntil( array $time ) {
517  do {
518  $ct = self::millitime();
519  if ( $ct >= $time ) { // http://php.net/manual/en/language.operators.comparison.php
520  return $ct; // current timestamp is higher than $time
521  }
522  } while ( ( ( $time[0] - $ct[0] ) * 1000 + ( $time[1] - $ct[1] ) ) <= 10 );
523 
524  return false;
525  }
526 
532  protected function millisecondsSinceEpochBinary( array $time ) {
533  list( $sec, $msec ) = $time;
534  $ts = 1000 * $sec + $msec;
535  if ( $ts > pow( 2, 52 ) ) {
536  throw new RuntimeException( __METHOD__ .
537  ': sorry, this function doesn\'t work after the year 144680' );
538  }
539 
540  return substr( Wikimedia\base_convert( $ts, 10, 2, 46 ), -46 );
541  }
542 
549  protected function intervalsSinceGregorianBinary( array $time, $delta = 0 ) {
550  list( $sec, $msec ) = $time;
551  $offset = '122192928000000000';
552  if ( PHP_INT_SIZE >= 8 ) { // 64 bit integers
553  $ts = ( 1000 * $sec + $msec ) * 10000 + (int)$offset + $delta;
554  $id_bin = str_pad( decbin( $ts % pow( 2, 60 ) ), 60, '0', STR_PAD_LEFT );
555  } elseif ( extension_loaded( 'gmp' ) ) {
556  $ts = gmp_add( gmp_mul( (string) $sec, '1000' ), (string) $msec ); // ms
557  $ts = gmp_add( gmp_mul( $ts, '10000' ), $offset ); // 100ns intervals
558  $ts = gmp_add( $ts, (string) $delta );
559  $ts = gmp_mod( $ts, gmp_pow( '2', '60' ) ); // wrap around
560  $id_bin = str_pad( gmp_strval( $ts, 2 ), 60, '0', STR_PAD_LEFT );
561  } elseif ( extension_loaded( 'bcmath' ) ) {
562  $ts = bcadd( bcmul( $sec, 1000 ), $msec ); // ms
563  $ts = bcadd( bcmul( $ts, 10000 ), $offset ); // 100ns intervals
564  $ts = bcadd( $ts, $delta );
565  $ts = bcmod( $ts, bcpow( 2, 60 ) ); // wrap around
566  $id_bin = Wikimedia\base_convert( $ts, 10, 2, 60 );
567  } else {
568  throw new RuntimeException( 'bcmath or gmp extension required for 32 bit machines.' );
569  }
570  return $id_bin;
571  }
572 
576  protected static function millitime() {
577  list( $msec, $sec ) = explode( ' ', microtime() );
578 
579  return [ (int)$sec, (int)( $msec * 1000 ) ];
580  }
581 
593  protected function deleteCacheFiles() {
594  // Bug: 44850
595  foreach ( $this->fileHandles as $path => $handle ) {
596  if ( $handle !== null ) {
597  fclose( $handle );
598  }
599  if ( is_file( $path ) ) {
600  unlink( $path );
601  }
602  unset( $this->fileHandles[$path] );
603  }
604  if ( is_file( $this->nodeIdFile ) ) {
605  unlink( $this->nodeIdFile );
606  }
607  }
608 
620  public static function unitTestTearDown() {
621  // Bug: 44850
622  $gen = self::singleton();
623  $gen->deleteCacheFiles();
624  }
625 
626  function __destruct() {
627  array_map( 'fclose', array_filter( $this->fileHandles ) );
628  }
629 }
const QUICK_VOLATILE
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition: deferred.txt:11
the array() calling protocol came about after MediaWiki 1.4rc1.
static newTimestampedUID128($base=10)
Get a statistically unique 128-bit unsigned integer ID string.
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for use
static newSequentialPerNodeIDs($bucket, $bits, $count, $flags=0)
Return IDs that are sequential only for this node and bucket.
Class for getting statistically unique IDs.
millisecondsSinceEpochBinary(array $time)
it s the revision text itself In either if gzip is the revision text is gzipped $flags
Definition: hooks.txt:2588
wfShellExec($cmd, &$retval=null, $environ=[], $limits=[], $options=[])
Execute a shell command, with time and memory limits mirrored from the PHP configuration if supported...
intervalsSinceGregorianBinary(array $time, $delta=0)
wfIsWindows()
Check if the operating system is Windows.
wfRandomString($length=32)
Get a random string containing a number of pseudo-random hex characters.
see documentation in includes Linker php for Linker::makeImageLink & $time
Definition: hooks.txt:1629
static newTimestampedUID88($base=10)
Get a statistically unique 88-bit unsigned integer ID string.
getTimeAndDelay($lockFile, $clockSeqSize, $counterSize, $offsetSize)
Get a (time,counter,clock sequence) where (time,counter) is higher than any previous (time...
wfTempDir()
Tries to get the system directory for temporary files.
static millitime()
static newRawUUIDv4($flags=0)
Return an RFC4122 compliant v4 UUID.
getSequentialPerNodeIDs($bucket, $bits, $count, $flags)
Return IDs that are sequential only for this node and bucket.
static unitTestTearDown()
Cleanup resources when tearing down after a unit test.
static newUUIDv4($flags=0)
Return an RFC4122 compliant v4 UUID.
static newSequentialPerNodeID($bucket, $bits=48, $flags=0)
Return an ID that is sequential only for this node and bucket.
$cache
Definition: mcc.php:33
deleteCacheFiles()
Delete all cache files that have been created.
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
getUUIDv1(array $info)
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
static getLocalServerInstance($fallback=CACHE_NONE)
Factory function for CACHE_ACCEL (referenced from DefaultSettings.php)
static generateHex($chars, $forceStrong=false)
Generate a run of (ideally) cryptographically random data and return it in hexadecimal string format...
$line
Definition: cdb.php:59
timeWaitUntil(array $time)
Wait till the current timestamp reaches $time and return the current timestamp.
static singleton()
static UIDGenerator $instance
$count
array $fileHandles
getTimestampedID128(array $info)
getTimestampedID88(array $info)
const QUICK_RAND
static newRawUUIDv1()
Return an RFC4122 compliant v1 UUID.
static newUUIDv1()
Return an RFC4122 compliant v1 UUID.