[ Index ]

PHP Cross Reference of MediaWiki-1.24.0

title

Body

[close]

/includes/utils/ -> UIDGenerator.php (source)

   1  <?php
   2  /**
   3   * This file deals with UID generation.
   4   *
   5   * This program is free software; you can redistribute it and/or modify
   6   * it under the terms of the GNU General Public License as published by
   7   * the Free Software Foundation; either version 2 of the License, or
   8   * (at your option) any later version.
   9   *
  10   * This program is distributed in the hope that it will be useful,
  11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13   * GNU General Public License for more details.
  14   *
  15   * You should have received a copy of the GNU General Public License along
  16   * with this program; if not, write to the Free Software Foundation, Inc.,
  17   * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18   * http://www.gnu.org/copyleft/gpl.html
  19   *
  20   * @file
  21   * @author Aaron Schulz
  22   */
  23  
  24  /**
  25   * Class for getting statistically unique IDs
  26   *
  27   * @since 1.21
  28   */
  29  class UIDGenerator {
  30      /** @var UIDGenerator */
  31      protected static $instance = null;
  32  
  33      protected $nodeIdFile; // string; local file path
  34      protected $nodeId32; // string; node ID in binary (32 bits)
  35      protected $nodeId48; // string; node ID in binary (48 bits)
  36  
  37      protected $lockFile88; // string; local file path
  38      protected $lockFile128; // string; local file path
  39  
  40      /** @var array */
  41      protected $fileHandles = array(); // cache file handles
  42  
  43      const QUICK_RAND = 1; // get randomness from fast and insecure sources
  44      const QUICK_VOLATILE = 2; // use an APC like in-memory counter if available
  45  
  46  	protected function __construct() {
  47          $this->nodeIdFile = wfTempDir() . '/mw-' . __CLASS__ . '-UID-nodeid';
  48          $nodeId = '';
  49          if ( is_file( $this->nodeIdFile ) ) {
  50              $nodeId = file_get_contents( $this->nodeIdFile );
  51          }
  52          // Try to get some ID that uniquely identifies this machine (RFC 4122)...
  53          if ( !preg_match( '/^[0-9a-f]{12}$/i', $nodeId ) ) {
  54              wfSuppressWarnings();
  55              if ( wfIsWindows() ) {
  56                  // http://technet.microsoft.com/en-us/library/bb490913.aspx
  57                  $csv = trim( wfShellExec( 'getmac /NH /FO CSV' ) );
  58                  $line = substr( $csv, 0, strcspn( $csv, "\n" ) );
  59                  $info = str_getcsv( $line );
  60                  $nodeId = isset( $info[0] ) ? str_replace( '-', '', $info[0] ) : '';
  61              } elseif ( is_executable( '/sbin/ifconfig' ) ) { // Linux/BSD/Solaris/OS X
  62                  // See http://linux.die.net/man/8/ifconfig
  63                  $m = array();
  64                  preg_match( '/\s([0-9a-f]{2}(:[0-9a-f]{2}){5})\s/',
  65                      wfShellExec( '/sbin/ifconfig -a' ), $m );
  66                  $nodeId = isset( $m[1] ) ? str_replace( ':', '', $m[1] ) : '';
  67              }
  68              wfRestoreWarnings();
  69              if ( !preg_match( '/^[0-9a-f]{12}$/i', $nodeId ) ) {
  70                  $nodeId = MWCryptRand::generateHex( 12, true );
  71                  $nodeId[1] = dechex( hexdec( $nodeId[1] ) | 0x1 ); // set multicast bit
  72              }
  73              file_put_contents( $this->nodeIdFile, $nodeId ); // cache
  74          }
  75          $this->nodeId32 = wfBaseConvert( substr( sha1( $nodeId ), 0, 8 ), 16, 2, 32 );
  76          $this->nodeId48 = wfBaseConvert( $nodeId, 16, 2, 48 );
  77          // If different processes run as different users, they may have different temp dirs.
  78          // This is dealt with by initializing the clock sequence number and counters randomly.
  79          $this->lockFile88 = wfTempDir() . '/mw-' . __CLASS__ . '-UID-88';
  80          $this->lockFile128 = wfTempDir() . '/mw-' . __CLASS__ . '-UID-128';
  81      }
  82  
  83      /**
  84       * @return UIDGenerator
  85       */
  86  	protected static function singleton() {
  87          if ( self::$instance === null ) {
  88              self::$instance = new self();
  89          }
  90  
  91          return self::$instance;
  92      }
  93  
  94      /**
  95       * Get a statistically unique 88-bit unsigned integer ID string.
  96       * The bits of the UID are prefixed with the time (down to the millisecond).
  97       *
  98       * These IDs are suitable as values for the shard key of distributed data.
  99       * If a column uses these as values, it should be declared UNIQUE to handle collisions.
 100       * New rows almost always have higher UIDs, which makes B-TREE updates on INSERT fast.
 101       * They can also be stored "DECIMAL(27) UNSIGNED" or BINARY(11) in MySQL.
 102       *
 103       * UID generation is serialized on each server (as the node ID is for the whole machine).
 104       *
 105       * @param int $base Specifies a base other than 10
 106       * @return string Number
 107       * @throws MWException
 108       */
 109  	public static function newTimestampedUID88( $base = 10 ) {
 110          if ( !is_integer( $base ) || $base > 36 || $base < 2 ) {
 111              throw new MWException( "Base must an integer be between 2 and 36" );
 112          }
 113          $gen = self::singleton();
 114          $time = $gen->getTimestampAndDelay( 'lockFile88', 1, 1024 );
 115  
 116          return wfBaseConvert( $gen->getTimestampedID88( $time ), 2, $base );
 117      }
 118  
 119      /**
 120       * @param array $info (UIDGenerator::millitime(), counter, clock sequence)
 121       * @return string 88 bits
 122       */
 123  	protected function getTimestampedID88( array $info ) {
 124          list( $time, $counter ) = $info;
 125          // Take the 46 MSBs of "milliseconds since epoch"
 126          $id_bin = $this->millisecondsSinceEpochBinary( $time );
 127          // Add a 10 bit counter resulting in 56 bits total
 128          $id_bin .= str_pad( decbin( $counter ), 10, '0', STR_PAD_LEFT );
 129          // Add the 32 bit node ID resulting in 88 bits total
 130          $id_bin .= $this->nodeId32;
 131          // Convert to a 1-27 digit integer string
 132          if ( strlen( $id_bin ) !== 88 ) {
 133              throw new MWException( "Detected overflow for millisecond timestamp." );
 134          }
 135  
 136          return $id_bin;
 137      }
 138  
 139      /**
 140       * Get a statistically unique 128-bit unsigned integer ID string.
 141       * The bits of the UID are prefixed with the time (down to the millisecond).
 142       *
 143       * These IDs are suitable as globally unique IDs, without any enforced uniqueness.
 144       * New rows almost always have higher UIDs, which makes B-TREE updates on INSERT fast.
 145       * They can also be stored as "DECIMAL(39) UNSIGNED" or BINARY(16) in MySQL.
 146       *
 147       * UID generation is serialized on each server (as the node ID is for the whole machine).
 148       *
 149       * @param int $base Specifies a base other than 10
 150       * @return string Number
 151       * @throws MWException
 152       */
 153  	public static function newTimestampedUID128( $base = 10 ) {
 154          if ( !is_integer( $base ) || $base > 36 || $base < 2 ) {
 155              throw new MWException( "Base must be an integer between 2 and 36" );
 156          }
 157          $gen = self::singleton();
 158          $time = $gen->getTimestampAndDelay( 'lockFile128', 16384, 1048576 );
 159  
 160          return wfBaseConvert( $gen->getTimestampedID128( $time ), 2, $base );
 161      }
 162  
 163      /**
 164       * @param array $info (UIDGenerator::millitime(), counter, clock sequence)
 165       * @return string 128 bits
 166       */
 167  	protected function getTimestampedID128( array $info ) {
 168          list( $time, $counter, $clkSeq ) = $info;
 169          // Take the 46 MSBs of "milliseconds since epoch"
 170          $id_bin = $this->millisecondsSinceEpochBinary( $time );
 171          // Add a 20 bit counter resulting in 66 bits total
 172          $id_bin .= str_pad( decbin( $counter ), 20, '0', STR_PAD_LEFT );
 173          // Add a 14 bit clock sequence number resulting in 80 bits total
 174          $id_bin .= str_pad( decbin( $clkSeq ), 14, '0', STR_PAD_LEFT );
 175          // Add the 48 bit node ID resulting in 128 bits total
 176          $id_bin .= $this->nodeId48;
 177          // Convert to a 1-39 digit integer string
 178          if ( strlen( $id_bin ) !== 128 ) {
 179              throw new MWException( "Detected overflow for millisecond timestamp." );
 180          }
 181  
 182          return $id_bin;
 183      }
 184  
 185      /**
 186       * Return an RFC4122 compliant v4 UUID
 187       *
 188       * @param int $flags Bitfield (supports UIDGenerator::QUICK_RAND)
 189       * @return string
 190       * @throws MWException
 191       */
 192  	public static function newUUIDv4( $flags = 0 ) {
 193          $hex = ( $flags & self::QUICK_RAND )
 194              ? wfRandomString( 31 )
 195              : MWCryptRand::generateHex( 31 );
 196  
 197          return sprintf( '%s-%s-%s-%s-%s',
 198              // "time_low" (32 bits)
 199              substr( $hex, 0, 8 ),
 200              // "time_mid" (16 bits)
 201              substr( $hex, 8, 4 ),
 202              // "time_hi_and_version" (16 bits)
 203              '4' . substr( $hex, 12, 3 ),
 204              // "clk_seq_hi_res (8 bits, variant is binary 10x) and "clk_seq_low" (8 bits)
 205              dechex( 0x8 | ( hexdec( $hex[15] ) & 0x3 ) ) . $hex[16] . substr( $hex, 17, 2 ),
 206              // "node" (48 bits)
 207              substr( $hex, 19, 12 )
 208          );
 209      }
 210  
 211      /**
 212       * Return an RFC4122 compliant v4 UUID
 213       *
 214       * @param int $flags Bitfield (supports UIDGenerator::QUICK_RAND)
 215       * @return string 32 hex characters with no hyphens
 216       * @throws MWException
 217       */
 218  	public static function newRawUUIDv4( $flags = 0 ) {
 219          return str_replace( '-', '', self::newUUIDv4( $flags ) );
 220      }
 221  
 222      /**
 223       * Return an ID that is sequential *only* for this node and bucket
 224       *
 225       * These IDs are suitable for per-host sequence numbers, e.g. for some packet protocols.
 226       * If UIDGenerator::QUICK_VOLATILE is used the counter might reset on server restart.
 227       *
 228       * @param string $bucket Arbitrary bucket name (should be ASCII)
 229       * @param int $bits Bit size (<=48) of resulting numbers before wrap-around
 230       * @param int $flags (supports UIDGenerator::QUICK_VOLATILE)
 231       * @return float Integer value as float
 232       * @since 1.23
 233       */
 234  	public static function newSequentialPerNodeID( $bucket, $bits = 48, $flags = 0 ) {
 235          return current( self::newSequentialPerNodeIDs( $bucket, $bits, 1, $flags ) );
 236      }
 237  
 238      /**
 239       * Return IDs that are sequential *only* for this node and bucket
 240       *
 241       * @see UIDGenerator::newSequentialPerNodeID()
 242       * @param string $bucket Arbitrary bucket name (should be ASCII)
 243       * @param int $bits Bit size (16 to 48) of resulting numbers before wrap-around
 244       * @param int $count Number of IDs to return (1 to 10000)
 245       * @param int $flags (supports UIDGenerator::QUICK_VOLATILE)
 246       * @return array Ordered list of float integer values
 247       * @since 1.23
 248       */
 249  	public static function newSequentialPerNodeIDs( $bucket, $bits, $count, $flags = 0 ) {
 250          $gen = self::singleton();
 251          return $gen->getSequentialPerNodeIDs( $bucket, $bits, $count, $flags );
 252      }
 253  
 254      /**
 255       * Return IDs that are sequential *only* for this node and bucket
 256       *
 257       * @see UIDGenerator::newSequentialPerNodeID()
 258       * @param string $bucket Arbitrary bucket name (should be ASCII)
 259       * @param int $bits Bit size (16 to 48) of resulting numbers before wrap-around
 260       * @param int $count Number of IDs to return (1 to 10000)
 261       * @param int $flags (supports UIDGenerator::QUICK_VOLATILE)
 262       * @return array Ordered list of float integer values
 263       */
 264  	protected function getSequentialPerNodeIDs( $bucket, $bits, $count, $flags ) {
 265          if ( $count <= 0 ) {
 266              return array(); // nothing to do
 267          } elseif ( $count > 10000 ) {
 268              throw new MWException( "Number of requested IDs ($count) is too high." );
 269          } elseif ( $bits < 16 || $bits > 48 ) {
 270              throw new MWException( "Requested bit size ($bits) is out of range." );
 271          }
 272  
 273          $counter = null; // post-increment persistent counter value
 274  
 275          // Use APC/eAccelerator/xcache if requested, available, and not in CLI mode;
 276          // Counter values would not survive accross script instances in CLI mode.
 277          $cache = null;
 278          if ( ( $flags & self::QUICK_VOLATILE ) && PHP_SAPI !== 'cli' ) {
 279              try {
 280                  $cache = ObjectCache::newAccelerator( array() );
 281              } catch ( MWException $e ) {
 282                  // not supported
 283              }
 284          }
 285          if ( $cache ) {
 286              $counter = $cache->incr( $bucket, $count );
 287              if ( $counter === false ) {
 288                  if ( !$cache->add( $bucket, (int)$count ) ) {
 289                      throw new MWException( 'Unable to set value to ' . get_class( $cache ) );
 290                  }
 291                  $counter = $count;
 292              }
 293          }
 294  
 295          // Note: use of fmod() avoids "division by zero" on 32 bit machines
 296          if ( $counter === null ) {
 297              $path = wfTempDir() . '/mw-' . __CLASS__ . '-' . rawurlencode( $bucket ) . '-48';
 298              // Get the UID lock file handle
 299              if ( isset( $this->fileHandles[$path] ) ) {
 300                  $handle = $this->fileHandles[$path];
 301              } else {
 302                  $handle = fopen( $path, 'cb+' );
 303                  $this->fileHandles[$path] = $handle ?: null; // cache
 304              }
 305              // Acquire the UID lock file
 306              if ( $handle === false ) {
 307                  throw new MWException( "Could not open '{$path}'." );
 308              } elseif ( !flock( $handle, LOCK_EX ) ) {
 309                  fclose( $handle );
 310                  throw new MWException( "Could not acquire '{$path}'." );
 311              }
 312              // Fetch the counter value and increment it...
 313              rewind( $handle );
 314              $counter = floor( trim( fgets( $handle ) ) ) + $count; // fetch as float
 315              // Write back the new counter value
 316              ftruncate( $handle, 0 );
 317              rewind( $handle );
 318              fwrite( $handle, fmod( $counter, pow( 2, 48 ) ) ); // warp-around as needed
 319              fflush( $handle );
 320              // Release the UID lock file
 321              flock( $handle, LOCK_UN );
 322          }
 323  
 324          $ids = array();
 325          $divisor = pow( 2, $bits );
 326          $currentId = floor( $counter - $count ); // pre-increment counter value
 327          for ( $i = 0; $i < $count; ++$i ) {
 328              $ids[] = fmod( ++$currentId, $divisor );
 329          }
 330  
 331          return $ids;
 332      }
 333  
 334      /**
 335       * Get a (time,counter,clock sequence) where (time,counter) is higher
 336       * than any previous (time,counter) value for the given clock sequence.
 337       * This is useful for making UIDs sequential on a per-node bases.
 338       *
 339       * @param string $lockFile Name of a local lock file
 340       * @param int $clockSeqSize The number of possible clock sequence values
 341       * @param int $counterSize The number of possible counter values
 342       * @return array (result of UIDGenerator::millitime(), counter, clock sequence)
 343       * @throws MWException
 344       */
 345  	protected function getTimestampAndDelay( $lockFile, $clockSeqSize, $counterSize ) {
 346          // Get the UID lock file handle
 347          $path = $this->$lockFile;
 348          if ( isset( $this->fileHandles[$path] ) ) {
 349              $handle = $this->fileHandles[$path];
 350          } else {
 351              $handle = fopen( $path, 'cb+' );
 352              $this->fileHandles[$path] = $handle ?: null; // cache
 353          }
 354          // Acquire the UID lock file
 355          if ( $handle === false ) {
 356              throw new MWException( "Could not open '{$this->$lockFile}'." );
 357          } elseif ( !flock( $handle, LOCK_EX ) ) {
 358              fclose( $handle );
 359              throw new MWException( "Could not acquire '{$this->$lockFile}'." );
 360          }
 361          // Get the current timestamp, clock sequence number, last time, and counter
 362          rewind( $handle );
 363          $data = explode( ' ', fgets( $handle ) ); // "<clk seq> <sec> <msec> <counter> <offset>"
 364          $clockChanged = false; // clock set back significantly?
 365          if ( count( $data ) == 5 ) { // last UID info already initialized
 366              $clkSeq = (int)$data[0] % $clockSeqSize;
 367              $prevTime = array( (int)$data[1], (int)$data[2] );
 368              $offset = (int)$data[4] % $counterSize; // random counter offset
 369              $counter = 0; // counter for UIDs with the same timestamp
 370              // Delay until the clock reaches the time of the last ID.
 371              // This detects any microtime() drift among processes.
 372              $time = $this->timeWaitUntil( $prevTime );
 373              if ( !$time ) { // too long to delay?
 374                  $clockChanged = true; // bump clock sequence number
 375                  $time = self::millitime();
 376              } elseif ( $time == $prevTime ) {
 377                  // Bump the counter if there are timestamp collisions
 378                  $counter = (int)$data[3] % $counterSize;
 379                  if ( ++$counter >= $counterSize ) { // sanity (starts at 0)
 380                      flock( $handle, LOCK_UN ); // abort
 381                      throw new MWException( "Counter overflow for timestamp value." );
 382                  }
 383              }
 384          } else { // last UID info not initialized
 385              $clkSeq = mt_rand( 0, $clockSeqSize - 1 );
 386              $counter = 0;
 387              $offset = mt_rand( 0, $counterSize - 1 );
 388              $time = self::millitime();
 389          }
 390          // microtime() and gettimeofday() can drift from time() at least on Windows.
 391          // The drift is immediate for processes running while the system clock changes.
 392          // time() does not have this problem. See https://bugs.php.net/bug.php?id=42659.
 393          if ( abs( time() - $time[0] ) >= 2 ) {
 394              // We don't want processes using too high or low timestamps to avoid duplicate
 395              // UIDs and clock sequence number churn. This process should just be restarted.
 396              flock( $handle, LOCK_UN ); // abort
 397              throw new MWException( "Process clock is outdated or drifted." );
 398          }
 399          // If microtime() is synced and a clock change was detected, then the clock went back
 400          if ( $clockChanged ) {
 401              // Bump the clock sequence number and also randomize the counter offset,
 402              // which is useful for UIDs that do not include the clock sequence number.
 403              $clkSeq = ( $clkSeq + 1 ) % $clockSeqSize;
 404              $offset = mt_rand( 0, $counterSize - 1 );
 405              trigger_error( "Clock was set back; sequence number incremented." );
 406          }
 407          // Update the (clock sequence number, timestamp, counter)
 408          ftruncate( $handle, 0 );
 409          rewind( $handle );
 410          fwrite( $handle, "{$clkSeq} {$time[0]} {$time[1]} {$counter} {$offset}" );
 411          fflush( $handle );
 412          // Release the UID lock file
 413          flock( $handle, LOCK_UN );
 414  
 415          return array( $time, ( $counter + $offset ) % $counterSize, $clkSeq );
 416      }
 417  
 418      /**
 419       * Wait till the current timestamp reaches $time and return the current
 420       * timestamp. This returns false if it would have to wait more than 10ms.
 421       *
 422       * @param array $time Result of UIDGenerator::millitime()
 423       * @return array|bool UIDGenerator::millitime() result or false
 424       */
 425  	protected function timeWaitUntil( array $time ) {
 426          do {
 427              $ct = self::millitime();
 428              if ( $ct >= $time ) { // http://php.net/manual/en/language.operators.comparison.php
 429                  return $ct; // current timestamp is higher than $time
 430              }
 431          } while ( ( ( $time[0] - $ct[0] ) * 1000 + ( $time[1] - $ct[1] ) ) <= 10 );
 432  
 433          return false;
 434      }
 435  
 436      /**
 437       * @param array $time Result of UIDGenerator::millitime()
 438       * @return string 46 MSBs of "milliseconds since epoch" in binary (rolls over in 4201)
 439       */
 440  	protected function millisecondsSinceEpochBinary( array $time ) {
 441          list( $sec, $msec ) = $time;
 442          $ts = 1000 * $sec + $msec;
 443          if ( $ts > pow( 2, 52 ) ) {
 444              throw new MWException( __METHOD__ .
 445                  ': sorry, this function doesn\'t work after the year 144680' );
 446          }
 447  
 448          return substr( wfBaseConvert( $ts, 10, 2, 46 ), -46 );
 449      }
 450  
 451      /**
 452       * @return array (current time in seconds, milliseconds since then)
 453       */
 454  	protected static function millitime() {
 455          list( $msec, $sec ) = explode( ' ', microtime() );
 456  
 457          return array( (int)$sec, (int)( $msec * 1000 ) );
 458      }
 459  
 460      /**
 461       * Delete all cache files that have been created.
 462       *
 463       * This is a cleanup method primarily meant to be used from unit tests to
 464       * avoid poluting the local filesystem. If used outside of a unit test
 465       * environment it should be used with caution as it may destroy state saved
 466       * in the files.
 467       *
 468       * @see unitTestTearDown
 469       * @since 1.23
 470       */
 471  	protected function deleteCacheFiles() {
 472          // Bug: 44850
 473          foreach ( $this->fileHandles as $path => $handle ) {
 474              if ( $handle !== null ) {
 475                  fclose( $handle );
 476              }
 477              if ( is_file( $path ) ) {
 478                  unlink( $path );
 479              }
 480              unset( $this->fileHandles[$path] );
 481          }
 482          if ( is_file( $this->nodeIdFile ) ) {
 483              unlink( $this->nodeIdFile );
 484          }
 485      }
 486  
 487      /**
 488       * Cleanup resources when tearing down after a unit test.
 489       *
 490       * This is a cleanup method primarily meant to be used from unit tests to
 491       * avoid poluting the local filesystem. If used outside of a unit test
 492       * environment it should be used with caution as it may destroy state saved
 493       * in the files.
 494       *
 495       * @see deleteCacheFiles
 496       * @since 1.23
 497       */
 498  	public static function unitTestTearDown() {
 499          // Bug: 44850
 500          $gen = self::singleton();
 501          $gen->deleteCacheFiles();
 502      }
 503  
 504  	function __destruct() {
 505          array_map( 'fclose', array_filter( $this->fileHandles ) );
 506      }
 507  }


Generated: Fri Nov 28 14:03:12 2014 Cross-referenced by PHPXref 0.7.1