[ Index ]

PHP Cross Reference of MediaWiki-1.24.0

title

Body

[close]

/includes/libs/ -> IPSet.php (source)

   1  <?php
   2  /**
   3   * @section LICENSE
   4   * This program is free software; you can redistribute it and/or modify
   5   * it under the terms of the GNU General Public License as published by
   6   * the Free Software Foundation; either version 2 of the License, or
   7   * (at your option) any later version.
   8   *
   9   * This program is distributed in the hope that it will be useful,
  10   * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12   * GNU General Public License for more details.
  13   *
  14   * You should have received a copy of the GNU General Public License along
  15   * with this program; if not, write to the Free Software Foundation, Inc.,
  16   * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  17   * http://www.gnu.org/copyleft/gpl.html
  18   *
  19   * @file
  20   * @author Brandon Black <[email protected]>
  21   */
  22  
  23  /**
  24   * Matches IP addresses against a set of CIDR specifications
  25   *
  26   * Usage:
  27   *   // At startup, calculate the optimized data structure for the set:
  28   *   $ipset = new IPSet( $wgSquidServersNoPurge );
  29   *   // runtime check against cached set (returns bool):
  30   *   $allowme = $ipset->match( $ip );
  31   *
  32   * In rough benchmarking, this takes about 80% more time than
  33   * in_array() checks on a short (a couple hundred at most) array
  34   * of addresses.  It's fast either way at those levels, though,
  35   * and IPSet would scale better than in_array if the array were
  36   * much larger.
  37   *
  38   * For mixed-family CIDR sets, however, this code gives well over
  39   * 100x speedup vs iterating IP::isInRange() over an array
  40   * of CIDR specs.
  41   *
  42   * The basic implementation is two separate binary trees
  43   * (IPv4 and IPv6) as nested php arrays with keys named 0 and 1.
  44   * The values false and true are terminal match-fail and match-success,
  45   * otherwise the value is a deeper node in the tree.
  46   *
  47   * A simple depth-compression scheme is also implemented: whole-byte
  48   * tree compression at whole-byte boundaries only, where no branching
  49   * occurs during that whole byte of depth.  A compressed node has
  50   * keys 'comp' (the byte to compare) and 'next' (the next node to
  51   * recurse into if 'comp' matched successfully).
  52   *
  53   * For example, given these inputs:
  54   * 25.0.0.0/9
  55   * 25.192.0.0/10
  56   *
  57   * The v4 tree would look like:
  58   * root4 => array(
  59   *     'comp' => 25,
  60   *     'next' => array(
  61   *         0 => true,
  62   *         1 => array(
  63   *             0 => false,
  64   *             1 => true,
  65   *         ),
  66   *     ),
  67   * );
  68   *
  69   * (multi-byte compression nodes were attempted as well, but were
  70   * a net loss in my test scenarios due to additional match complexity)
  71   *
  72   * @since 1.24
  73   */
  74  class IPSet {
  75      /** @var array $root4: the root of the IPv4 matching tree */
  76      private $root4 = array( false, false );
  77  
  78      /** @var array $root6: the root of the IPv6 matching tree */
  79      private $root6 = array( false, false );
  80  
  81      /**
  82       * __construct() instantiate the object from an array of CIDR specs
  83       *
  84       * @param array $cfg array of IPv[46] CIDR specs as strings
  85       * @return IPSet new IPSet object
  86       *
  87       * Invalid input network/mask values in $cfg will result in issuing
  88       * E_WARNING and/or E_USER_WARNING and the bad values being ignored.
  89       */
  90  	public function __construct( array $cfg ) {
  91          foreach ( $cfg as $cidr ) {
  92              $this->addCidr( $cidr );
  93          }
  94  
  95          self::recOptimize( $this->root4 );
  96          self::recCompress( $this->root4, 0, 24 );
  97          self::recOptimize( $this->root6 );
  98          self::recCompress( $this->root6, 0, 120 );
  99      }
 100  
 101      /**
 102       * Add a single CIDR spec to the internal matching trees
 103       *
 104       * @param string $cidr string CIDR spec, IPv[46], optional /mask (def all-1's)
 105       */
 106  	private function addCidr( $cidr ) {
 107          // v4 or v6 check
 108          if ( strpos( $cidr, ':' ) === false ) {
 109              $node =& $this->root4;
 110              $defMask = '32';
 111          } else {
 112              $node =& $this->root6;
 113              $defMask = '128';
 114          }
 115  
 116          // Default to all-1's mask if no netmask in the input
 117          if ( strpos( $cidr, '/' ) === false ) {
 118              $net = $cidr;
 119              $mask = $defMask;
 120          } else {
 121              list( $net, $mask ) = explode( '/', $cidr, 2 );
 122              if ( !ctype_digit( $mask ) || intval( $mask ) > $defMask ) {
 123                  trigger_error( "IPSet: Bad mask '$mask' from '$cidr', ignored", E_USER_WARNING );
 124                  return;
 125              }
 126          }
 127          $mask = intval( $mask ); // explicit integer convert, checked above
 128  
 129          // convert $net to an array of integer bytes, length 4 or 16:
 130          $raw = inet_pton( $net );
 131          if ( $raw === false ) {
 132              return; // inet_pton() sends an E_WARNING for us
 133          }
 134          $rawOrd = array_map( 'ord', str_split( $raw ) );
 135  
 136          // special-case: zero mask overwrites the whole tree with a pair of terminal successes
 137          if ( $mask == 0 ) {
 138              $node = array( true, true );
 139              return;
 140          }
 141  
 142          // iterate the bits of the address while walking the tree structure for inserts
 143          $curBit = 0;
 144          while ( 1 ) {
 145              $maskShift = 7 - ( $curBit & 7 );
 146              $node =& $node[( $rawOrd[$curBit >> 3] & ( 1 << $maskShift ) ) >> $maskShift];
 147              ++$curBit;
 148              if ( $node === true ) {
 149                  // already added a larger supernet, no need to go deeper
 150                  return;
 151              } elseif ( $curBit == $mask ) {
 152                  // this may wipe out deeper subnets from earlier
 153                  $node = true;
 154                  return;
 155              } elseif ( $node === false ) {
 156                  // create new subarray to go deeper
 157                  $node = array( false, false );
 158              }
 159          }
 160      }
 161  
 162      /**
 163       * Match an IP address against the set
 164       *
 165       * @param string $ip string IPv[46] address
 166       * @return boolean true is match success, false is match failure
 167       *
 168       * If $ip is unparseable, inet_pton may issue an E_WARNING to that effect
 169       */
 170  	public function match( $ip ) {
 171          $raw = inet_pton( $ip );
 172          if ( $raw === false ) {
 173              return false; // inet_pton() sends an E_WARNING for us
 174          }
 175  
 176          $rawOrd = array_map( 'ord', str_split( $raw ) );
 177          if ( count( $rawOrd ) == 4 ) {
 178              $node =& $this->root4;
 179          } else {
 180              $node =& $this->root6;
 181          }
 182  
 183          $curBit = 0;
 184          while ( 1 ) {
 185              if ( isset( $node['comp'] ) ) {
 186                  // compressed node, matches 1 whole byte on a byte boundary
 187                  if ( $rawOrd[$curBit >> 3] != $node['comp'] ) {
 188                      return false;
 189                  }
 190                  $curBit += 8;
 191                  $node =& $node['next'];
 192              } else {
 193                  // uncompressed node, walk in the correct direction for the current bit-value
 194                  $maskShift = 7 - ( $curBit & 7 );
 195                  $node =& $node[( $rawOrd[$curBit >> 3] & ( 1 << $maskShift ) ) >> $maskShift];
 196                  ++$curBit;
 197              }
 198  
 199              if ( $node === true || $node === false ) {
 200                  return $node;
 201              }
 202          }
 203      }
 204  
 205      /**
 206       * Recursively merges adjacent nets into larger supernets
 207       *
 208       * @param array &$node Tree node to optimize, by-reference
 209       *
 210       *  e.g.: 8.0.0.0/8 + 9.0.0.0/8 -> 8.0.0.0/7
 211       */
 212  	private static function recOptimize( &$node ) {
 213          if ( $node[0] !== false && $node[0] !== true && self::recOptimize( $node[0] ) ) {
 214              $node[0] = true;
 215          }
 216          if ( $node[1] !== false && $node[1] !== true && self::recOptimize( $node[1] ) ) {
 217              $node[1] = true;
 218          }
 219          if ( $node[0] === true && $node[1] === true ) {
 220              return true;
 221          }
 222          return false;
 223      }
 224  
 225      /**
 226       * Recursively compresses a tree
 227       *
 228       * @param array &$node Tree node to compress, by-reference
 229       * @param integer $curBit current depth in the tree
 230       * @param integer $maxCompStart maximum depth at which compression can start, family-specific
 231       *
 232       * This is a very simplistic compression scheme: if we go through a whole
 233       * byte of address starting at a byte boundary with no real branching
 234       * other than immediate false-vs-(node|true), compress that subtree down to a single
 235       * byte-matching node.
 236       * The $maxCompStart check elides recursing the final 7 levels of depth (family-dependent)
 237       */
 238  	private static function recCompress( &$node, $curBit, $maxCompStart ) {
 239          if ( !( $curBit & 7 ) ) { // byte boundary, check for depth-8 single path(s)
 240              $byte = 0;
 241              $cnode =& $node;
 242              $i = 8;
 243              while ( $i-- ) {
 244                  if ( $cnode[0] === false ) {
 245                      $byte |= 1 << $i;
 246                      $cnode =& $cnode[1];
 247                  } elseif ( $cnode[1] === false ) {
 248                      $cnode =& $cnode[0];
 249                  } else {
 250                      // partial-byte branching, give up
 251                      break;
 252                  }
 253              }
 254              if ( $i == -1 ) { // means we did not exit the while() via break
 255                  $node = array(
 256                      'comp' => $byte,
 257                      'next' => &$cnode,
 258                  );
 259                  $curBit += 8;
 260                  if ( $cnode !== true ) {
 261                      self::recCompress( $cnode, $curBit, $maxCompStart );
 262                  }
 263                  return;
 264              }
 265          }
 266  
 267          ++$curBit;
 268          if ( $curBit <= $maxCompStart ) {
 269              if ( $node[0] !== false && $node[0] !== true ) {
 270                  self::recCompress( $node[0], $curBit, $maxCompStart );
 271              }
 272              if ( $node[1] !== false && $node[1] !== true ) {
 273                  self::recCompress( $node[1], $curBit, $maxCompStart );
 274              }
 275          }
 276      }
 277  }


Generated: Fri Nov 28 14:03:12 2014 Cross-referenced by PHPXref 0.7.1