MediaWiki  REL1_20
IP.php
Go to the documentation of this file.
00001 <?php
00024 // Some regex definition to "play" with IP address and IP address blocks
00025 
00026 // An IPv4 address is made of 4 bytes from x00 to xFF which is d0 to d255
00027 define( 'RE_IP_BYTE', '(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|0?[0-9]?[0-9])' );
00028 define( 'RE_IP_ADD', RE_IP_BYTE . '\.' . RE_IP_BYTE . '\.' . RE_IP_BYTE . '\.' . RE_IP_BYTE );
00029 // An IPv4 block is an IP address and a prefix (d1 to d32)
00030 define( 'RE_IP_PREFIX', '(3[0-2]|[12]?\d)' );
00031 define( 'RE_IP_BLOCK', RE_IP_ADD . '\/' . RE_IP_PREFIX );
00032 
00033 // An IPv6 address is made up of 8 words (each x0000 to xFFFF).
00034 // However, the "::" abbreviation can be used on consecutive x0000 words.
00035 define( 'RE_IPV6_WORD', '([0-9A-Fa-f]{1,4})' );
00036 define( 'RE_IPV6_PREFIX', '(12[0-8]|1[01][0-9]|[1-9]?\d)');
00037 define( 'RE_IPV6_ADD',
00038         '(?:' . // starts with "::" (including "::")
00039                 ':(?::|(?::' . RE_IPV6_WORD . '){1,7})' .
00040         '|' . // ends with "::" (except "::")
00041                 RE_IPV6_WORD . '(?::' . RE_IPV6_WORD . '){0,6}::' .
00042         '|' . // contains one "::" in the middle, ending in "::WORD"
00043                 RE_IPV6_WORD . '(?::' . RE_IPV6_WORD . '){0,5}' . '::' . RE_IPV6_WORD .
00044         '|' . // contains one "::" in the middle, not ending in "::WORD" (regex for PCRE 4.0+)
00045                 RE_IPV6_WORD . '(?::(?P<abn>:(?P<iabn>))?' . RE_IPV6_WORD . '(?!:(?P=abn))){1,5}' .
00046                         ':' . RE_IPV6_WORD . '(?P=iabn)' .
00047                 // NOTE: (?!(?P=abn)) fails iff "::" used twice; (?P=iabn) passes iff a "::" was found.
00048         '|' . // contains no "::"
00049                 RE_IPV6_WORD . '(?::' . RE_IPV6_WORD . '){7}' .
00050         ')'
00051         // NOTE: With PCRE 7.2+, we can combine the two '"::" in the middle' cases into:
00052         //              RE_IPV6_WORD . '(?::((?(-1)|:))?' . RE_IPV6_WORD . '){1,6}(?(-2)|^)'
00053         // This also improves regex concatenation by using relative references.
00054 );
00055 // An IPv6 block is an IP address and a prefix (d1 to d128)
00056 define( 'RE_IPV6_BLOCK', RE_IPV6_ADD . '\/' . RE_IPV6_PREFIX );
00057 // For IPv6 canonicalization (NOT for strict validation; these are quite lax!)
00058 define( 'RE_IPV6_GAP', ':(?:0+:)*(?::(?:0+:)*)?' );
00059 define( 'RE_IPV6_V4_PREFIX', '0*' . RE_IPV6_GAP . '(?:ffff:)?' );
00060 
00061 // This might be useful for regexps used elsewhere, matches any IPv6 or IPv6 address or network
00062 define( 'IP_ADDRESS_STRING',
00063         '(?:' .
00064                 RE_IP_ADD . '(?:\/' . RE_IP_PREFIX . ')?' . // IPv4
00065         '|' .
00066                 RE_IPV6_ADD . '(?:\/' . RE_IPV6_PREFIX . ')?' . // IPv6
00067         ')'
00068 );
00069 
00074 class IP {
00083         public static function isIPAddress( $ip ) {
00084                 return (bool)preg_match( '/^' . IP_ADDRESS_STRING . '$/', $ip );
00085         }
00086 
00094         public static function isIPv6( $ip ) {
00095                 return (bool)preg_match( '/^' . RE_IPV6_ADD . '(?:\/' . RE_IPV6_PREFIX . ')?$/', $ip );
00096         }
00097 
00105         public static function isIPv4( $ip ) {
00106                 return (bool)preg_match( '/^' . RE_IP_ADD . '(?:\/' . RE_IP_PREFIX . ')?$/', $ip );
00107         }
00108 
00117         public static function isValid( $ip ) {
00118                 return ( preg_match( '/^' . RE_IP_ADD . '$/', $ip )
00119                         || preg_match( '/^' . RE_IPV6_ADD . '$/', $ip ) );
00120         }
00121 
00130         public static function isValidBlock( $ipblock ) {
00131                 return ( preg_match( '/^' . RE_IPV6_BLOCK . '$/', $ipblock )
00132                         || preg_match( '/^' . RE_IP_BLOCK . '$/', $ipblock ) );
00133         }
00134 
00143         public static function sanitizeIP( $ip ) {
00144                 $ip = trim( $ip );
00145                 if ( $ip === '' ) {
00146                         return null;
00147                 }
00148                 if ( self::isIPv4( $ip ) || !self::isIPv6( $ip ) ) {
00149                         return $ip; // nothing else to do for IPv4 addresses or invalid ones
00150                 }
00151                 // Remove any whitespaces, convert to upper case
00152                 $ip = strtoupper( $ip );
00153                 // Expand zero abbreviations
00154                 $abbrevPos = strpos( $ip, '::' );
00155                 if ( $abbrevPos !== false ) {
00156                         // We know this is valid IPv6. Find the last index of the
00157                         // address before any CIDR number (e.g. "a:b:c::/24").
00158                         $CIDRStart = strpos( $ip, "/" );
00159                         $addressEnd = ( $CIDRStart !== false )
00160                                 ? $CIDRStart - 1
00161                                 : strlen( $ip ) - 1;
00162                         // If the '::' is at the beginning...
00163                         if ( $abbrevPos == 0 ) {
00164                                 $repeat = '0:';
00165                                 $extra = ( $ip == '::' ) ? '0' : ''; // for the address '::'
00166                                 $pad = 9; // 7+2 (due to '::')
00167                         // If the '::' is at the end...
00168                         } elseif ( $abbrevPos == ( $addressEnd - 1 ) ) {
00169                                 $repeat = ':0';
00170                                 $extra = '';
00171                                 $pad = 9; // 7+2 (due to '::')
00172                         // If the '::' is in the middle...
00173                         } else {
00174                                 $repeat = ':0';
00175                                 $extra = ':';
00176                                 $pad = 8; // 6+2 (due to '::')
00177                         }
00178                         $ip = str_replace( '::',
00179                                 str_repeat( $repeat, $pad - substr_count( $ip, ':' ) ) . $extra,
00180                                 $ip
00181                         );
00182                 }
00183                 // Remove leading zereos from each bloc as needed
00184                 $ip = preg_replace( '/(^|:)0+(' . RE_IPV6_WORD . ')/', '$1$2', $ip );
00185                 return $ip;
00186         }
00187 
00195         public static function prettifyIP( $ip ) {
00196                 $ip = self::sanitizeIP( $ip ); // normalize (removes '::')
00197                 if ( self::isIPv6( $ip ) ) {
00198                         // Split IP into an address and a CIDR
00199                         if ( strpos( $ip, '/' ) !== false ) {
00200                                 list( $ip, $cidr ) = explode( '/', $ip, 2 );
00201                         } else {
00202                                 list( $ip, $cidr ) = array( $ip, '' );
00203                         }
00204                         // Get the largest slice of words with multiple zeros
00205                         $offset = 0;
00206                         $longest = $longestPos = false;
00207                         while ( preg_match(
00208                                 '!(?:^|:)0(?::0)+(?:$|:)!', $ip, $m, PREG_OFFSET_CAPTURE, $offset
00209                         ) ) {
00210                                 list( $match, $pos ) = $m[0]; // full match
00211                                 if ( strlen( $match ) > strlen( $longest ) ) {
00212                                         $longest = $match;
00213                                         $longestPos = $pos;
00214                                 }
00215                                 $offset += ( $pos + strlen( $match ) ); // advance
00216                         }
00217                         if ( $longest !== false ) {
00218                                 // Replace this portion of the string with the '::' abbreviation
00219                                 $ip = substr_replace( $ip, '::', $longestPos, strlen( $longest ) );
00220                         }
00221                         // Add any CIDR back on
00222                         if ( $cidr !== '' ) {
00223                                 $ip = "{$ip}/{$cidr}";
00224                         }
00225                         // Convert to lower case to make it more readable
00226                         $ip = strtolower( $ip );
00227                 }
00228                 return $ip;
00229         }
00230 
00247         public static function splitHostAndPort( $both ) {
00248                 if ( substr( $both, 0, 1 ) === '[' ) {
00249                         if ( preg_match( '/^\[(' . RE_IPV6_ADD . ')\](?::(?P<port>\d+))?$/', $both, $m ) ) {
00250                                 if ( isset( $m['port'] ) ) {
00251                                         return array( $m[1], intval( $m['port'] ) );
00252                                 } else {
00253                                         return array( $m[1], false );
00254                                 }
00255                         } else {
00256                                 // Square bracket found but no IPv6
00257                                 return false;
00258                         }
00259                 }
00260                 $numColons = substr_count( $both, ':' );
00261                 if ( $numColons >= 2 ) {
00262                         // Is it a bare IPv6 address?
00263                         if ( preg_match( '/^' . RE_IPV6_ADD . '$/', $both ) ) {
00264                                 return array( $both, false );
00265                         } else {
00266                                 // Not valid IPv6, but too many colons for anything else
00267                                 return false;
00268                         }
00269                 }
00270                 if ( $numColons >= 1 ) {
00271                         // Host:port?
00272                         $bits = explode( ':', $both );
00273                         if ( preg_match( '/^\d+/', $bits[1] ) ) {
00274                                 return array( $bits[0], intval( $bits[1] ) );
00275                         } else {
00276                                 // Not a valid port
00277                                 return false;
00278                         }
00279                 }
00280                 // Plain hostname
00281                 return array( $both, false );
00282         }
00283 
00295         public static function combineHostAndPort( $host, $port, $defaultPort = false ) {
00296                 if ( strpos( $host, ':' ) !== false ) {
00297                         $host = "[$host]";
00298                 }
00299                 if ( $defaultPort !== false && $port == $defaultPort ) {
00300                         return $host;
00301                 } else {
00302                         return "$host:$port";
00303                 }
00304         }
00305 
00312         public static function toOctet( $ip_int ) {
00313                 return self::hexToOctet( wfBaseConvert( $ip_int, 10, 16, 32, false ) );
00314         }
00315 
00322         public static function formatHex( $hex ) {
00323                 if ( substr( $hex, 0, 3 ) == 'v6-' ) { // IPv6
00324                         return self::hexToOctet( substr( $hex, 3 ) );
00325                 } else { // IPv4
00326                         return self::hexToQuad( $hex );
00327                 }
00328         }
00329 
00336         public static function hexToOctet( $ip_hex ) {
00337                 // Pad hex to 32 chars (128 bits)
00338                 $ip_hex = str_pad( strtoupper( $ip_hex ), 32, '0', STR_PAD_LEFT );
00339                 // Separate into 8 words
00340                 $ip_oct = substr( $ip_hex, 0, 4 );
00341                 for ( $n = 1; $n < 8; $n++ ) {
00342                         $ip_oct .= ':' . substr( $ip_hex, 4 * $n, 4 );
00343                 }
00344                 // NO leading zeroes
00345                 $ip_oct = preg_replace( '/(^|:)0+(' . RE_IPV6_WORD . ')/', '$1$2', $ip_oct );
00346                 return $ip_oct;
00347         }
00348 
00355         public static function hexToQuad( $ip_hex ) {
00356                 // Pad hex to 8 chars (32 bits)
00357                 $ip_hex = str_pad( strtoupper( $ip_hex ), 8, '0', STR_PAD_LEFT );
00358                 // Separate into four quads
00359                 $s = '';
00360                 for ( $i = 0; $i < 4; $i++ ) {
00361                         if ( $s !== '' ) {
00362                                 $s .= '.';
00363                         }
00364                         $s .= base_convert( substr( $ip_hex, $i * 2, 2 ), 16, 10 );
00365                 }
00366                 return $s;
00367         }
00368 
00377         public static function isPublic( $ip ) {
00378                 if ( self::isIPv6( $ip ) ) {
00379                         return self::isPublic6( $ip );
00380                 }
00381                 $n = self::toUnsigned( $ip );
00382                 if ( !$n ) {
00383                         return false;
00384                 }
00385 
00386                 // ip2long accepts incomplete addresses, as well as some addresses
00387                 // followed by garbage characters. Check that it's really valid.
00388                 if ( $ip != long2ip( $n ) ) {
00389                         return false;
00390                 }
00391 
00392                 static $privateRanges = false;
00393                 if ( !$privateRanges ) {
00394                         $privateRanges = array(
00395                                 array( '10.0.0.0',    '10.255.255.255' ),   # RFC 1918 (private)
00396                                 array( '172.16.0.0',  '172.31.255.255' ),   #     "
00397                                 array( '192.168.0.0', '192.168.255.255' ),  #     "
00398                                 array( '0.0.0.0',     '0.255.255.255' ),    # this network
00399                                 array( '127.0.0.0',   '127.255.255.255' ),  # loopback
00400                         );
00401                 }
00402 
00403                 foreach ( $privateRanges as $r ) {
00404                         $start = self::toUnsigned( $r[0] );
00405                         $end = self::toUnsigned( $r[1] );
00406                         if ( $n >= $start && $n <= $end ) {
00407                                 return false;
00408                         }
00409                 }
00410                 return true;
00411         }
00412 
00420         private static function isPublic6( $ip ) {
00421                 static $privateRanges = false;
00422                 if ( !$privateRanges ) {
00423                         $privateRanges = array(
00424                                 array( 'fc00::', 'fdff:ffff:ffff:ffff:ffff:ffff:ffff:ffff' ), # RFC 4193 (local)
00425                                 array( '0:0:0:0:0:0:0:1', '0:0:0:0:0:0:0:1' ), # loopback
00426                         );
00427                 }
00428                 $n = self::toHex( $ip );
00429                 foreach ( $privateRanges as $r ) {
00430                         $start = self::toHex( $r[0] );
00431                         $end = self::toHex( $r[1] );
00432                         if ( $n >= $start && $n <= $end ) {
00433                                 return false;
00434                         }
00435                 }
00436                 return true;
00437         }
00438 
00450         public static function toHex( $ip ) {
00451                 if ( self::isIPv6( $ip ) ) {
00452                         $n = 'v6-' . self::IPv6ToRawHex( $ip );
00453                 } else {
00454                         $n = self::toUnsigned( $ip );
00455                         if ( $n !== false ) {
00456                                 $n = wfBaseConvert( $n, 10, 16, 8, false );
00457                         }
00458                 }
00459                 return $n;
00460         }
00461 
00468         private static function IPv6ToRawHex( $ip ) {
00469                 $ip = self::sanitizeIP( $ip );
00470                 if ( !$ip ) {
00471                         return null;
00472                 }
00473                 $r_ip = '';
00474                 foreach ( explode( ':', $ip ) as $v ) {
00475                         $r_ip .= str_pad( $v, 4, 0, STR_PAD_LEFT );
00476                 }
00477                 return $r_ip;
00478         }
00479 
00488         public static function toUnsigned( $ip ) {
00489                 if ( self::isIPv6( $ip ) ) {
00490                         $n = self::toUnsigned6( $ip );
00491                 } else {
00492                         $n = ip2long( $ip );
00493                         if ( $n < 0 ) {
00494                                 $n += pow( 2, 32 );
00495                         }
00496                 }
00497                 return $n;
00498         }
00499 
00504         private static function toUnsigned6( $ip ) {
00505                 return wfBaseConvert( self::IPv6ToRawHex( $ip ), 16, 10 );
00506         }
00507 
00515         public static function parseCIDR( $range ) {
00516                 if ( self::isIPv6( $range ) ) {
00517                         return self::parseCIDR6( $range );
00518                 }
00519                 $parts = explode( '/', $range, 2 );
00520                 if ( count( $parts ) != 2 ) {
00521                         return array( false, false );
00522                 }
00523                 list( $network, $bits ) = $parts;
00524                 $network = ip2long( $network );
00525                 if ( $network !== false && is_numeric( $bits ) && $bits >= 0 && $bits <= 32 ) {
00526                         if ( $bits == 0 ) {
00527                                 $network = 0;
00528                         } else {
00529                                 $network &= ~( ( 1 << ( 32 - $bits ) ) - 1);
00530                         }
00531                         # Convert to unsigned
00532                         if ( $network < 0 ) {
00533                                 $network += pow( 2, 32 );
00534                         }
00535                 } else {
00536                         $network = false;
00537                         $bits = false;
00538                 }
00539                 return array( $network, $bits );
00540         }
00541 
00557         public static function parseRange( $range ) {
00558                 // CIDR notation
00559                 if ( strpos( $range, '/' ) !== false ) {
00560                         if ( self::isIPv6( $range ) ) {
00561                                 return self::parseRange6( $range );
00562                         }
00563                         list( $network, $bits ) = self::parseCIDR( $range );
00564                         if ( $network === false ) {
00565                                 $start = $end = false;
00566                         } else {
00567                                 $start = sprintf( '%08X', $network );
00568                                 $end = sprintf( '%08X', $network + pow( 2, ( 32 - $bits ) ) - 1 );
00569                         }
00570                 // Explicit range
00571                 } elseif ( strpos( $range, '-' ) !== false ) {
00572                         list( $start, $end ) = array_map( 'trim', explode( '-', $range, 2 ) );
00573                         if ( self::isIPv6( $start ) && self::isIPv6( $end ) ) {
00574                                 return self::parseRange6( $range );
00575                         }
00576                         if ( self::isIPv4( $start ) && self::isIPv4( $end ) ) {
00577                                 $start = self::toUnsigned( $start );
00578                                 $end = self::toUnsigned( $end );
00579                                 if ( $start > $end ) {
00580                                         $start = $end = false;
00581                                 } else {
00582                                         $start = sprintf( '%08X', $start );
00583                                         $end = sprintf( '%08X', $end );
00584                                 }
00585                         } else {
00586                                 $start = $end = false;
00587                         }
00588                 } else {
00589                         # Single IP
00590                         $start = $end = self::toHex( $range );
00591                 }
00592                 if ( $start === false || $end === false ) {
00593                         return array( false, false );
00594                 } else {
00595                         return array( $start, $end );
00596                 }
00597         }
00598 
00607         private static function parseCIDR6( $range ) {
00608                 # Explode into <expanded IP,range>
00609                 $parts = explode( '/', IP::sanitizeIP( $range ), 2 );
00610                 if ( count( $parts ) != 2 ) {
00611                         return array( false, false );
00612                 }
00613                 list( $network, $bits ) = $parts;
00614                 $network = self::IPv6ToRawHex( $network );
00615                 if ( $network !== false && is_numeric( $bits ) && $bits >= 0 && $bits <= 128 ) {
00616                         if ( $bits == 0 ) {
00617                                 $network = "0";
00618                         } else {
00619                                 # Native 32 bit functions WONT work here!!!
00620                                 # Convert to a padded binary number
00621                                 $network = wfBaseConvert( $network, 16, 2, 128 );
00622                                 # Truncate the last (128-$bits) bits and replace them with zeros
00623                                 $network = str_pad( substr( $network, 0, $bits ), 128, 0, STR_PAD_RIGHT );
00624                                 # Convert back to an integer
00625                                 $network = wfBaseConvert( $network, 2, 10 );
00626                         }
00627                 } else {
00628                         $network = false;
00629                         $bits = false;
00630                 }
00631                 return array( $network, (int)$bits );
00632         }
00633 
00647         private static function parseRange6( $range ) {
00648                 # Expand any IPv6 IP
00649                 $range = IP::sanitizeIP( $range );
00650                 // CIDR notation...
00651                 if ( strpos( $range, '/' ) !== false ) {
00652                         list( $network, $bits ) = self::parseCIDR6( $range );
00653                         if ( $network === false ) {
00654                                 $start = $end = false;
00655                         } else {
00656                                 $start = wfBaseConvert( $network, 10, 16, 32, false );
00657                                 # Turn network to binary (again)
00658                                 $end = wfBaseConvert( $network, 10, 2, 128 );
00659                                 # Truncate the last (128-$bits) bits and replace them with ones
00660                                 $end = str_pad( substr( $end, 0, $bits ), 128, 1, STR_PAD_RIGHT );
00661                                 # Convert to hex
00662                                 $end = wfBaseConvert( $end, 2, 16, 32, false );
00663                                 # see toHex() comment
00664                                 $start = "v6-$start";
00665                                 $end = "v6-$end";
00666                         }
00667                 // Explicit range notation...
00668                 } elseif ( strpos( $range, '-' ) !== false ) {
00669                         list( $start, $end ) = array_map( 'trim', explode( '-', $range, 2 ) );
00670                         $start = self::toUnsigned6( $start );
00671                         $end = self::toUnsigned6( $end );
00672                         if ( $start > $end ) {
00673                                 $start = $end = false;
00674                         } else {
00675                                 $start = wfBaseConvert( $start, 10, 16, 32, false );
00676                                 $end = wfBaseConvert( $end, 10, 16, 32, false );
00677                         }
00678                         # see toHex() comment
00679                         $start = "v6-$start";
00680                         $end = "v6-$end";
00681                 } else {
00682                         # Single IP
00683                         $start = $end = self::toHex( $range );
00684                 }
00685                 if ( $start === false || $end === false ) {
00686                         return array( false, false );
00687                 } else {
00688                         return array( $start, $end );
00689                 }
00690         }
00691 
00699         public static function isInRange( $addr, $range ) {
00700                 $hexIP = self::toHex( $addr );
00701                 list( $start, $end ) = self::parseRange( $range );
00702                 return ( strcmp( $hexIP, $start ) >= 0 &&
00703                         strcmp( $hexIP, $end ) <= 0 );
00704         }
00705 
00716         public static function canonicalize( $addr ) {
00717                 $addr = preg_replace( '/\%.*/','', $addr ); // remove zone info (bug 35738)
00718                 if ( self::isValid( $addr ) ) {
00719                         return $addr;
00720                 }
00721                 // Turn mapped addresses from ::ce:ffff:1.2.3.4 to 1.2.3.4
00722                 if ( strpos( $addr, ':' ) !== false && strpos( $addr, '.' ) !== false ) {
00723                         $addr = substr( $addr, strrpos( $addr, ':' ) + 1 );
00724                         if ( self::isIPv4( $addr ) ) {
00725                                 return $addr;
00726                         }
00727                 }
00728                 // IPv6 loopback address
00729                 $m = array();
00730                 if ( preg_match( '/^0*' . RE_IPV6_GAP . '1$/', $addr, $m ) ) {
00731                         return '127.0.0.1';
00732                 }
00733                 // IPv4-mapped and IPv4-compatible IPv6 addresses
00734                 if ( preg_match( '/^' . RE_IPV6_V4_PREFIX . '(' . RE_IP_ADD . ')$/i', $addr, $m ) ) {
00735                         return $m[1];
00736                 }
00737                 if ( preg_match( '/^' . RE_IPV6_V4_PREFIX . RE_IPV6_WORD .
00738                         ':' . RE_IPV6_WORD . '$/i', $addr, $m ) )
00739                 {
00740                         return long2ip( ( hexdec( $m[1] ) << 16 ) + hexdec( $m[2] ) );
00741                 }
00742 
00743                 return null;  // give up
00744         }
00745 
00752         public static function sanitizeRange( $range ) {
00753                 list( /*...*/, $bits ) = self::parseCIDR( $range );
00754                 list( $start, /*...*/ ) = self::parseRange( $range );
00755                 $start = self::formatHex( $start );
00756                 if ( $bits === false ) {
00757                         return $start; // wasn't actually a range
00758                 }
00759                 return "$start/$bits";
00760         }
00761 }