[ Index ]

PHP Cross Reference of MediaWiki-1.24.0

title

Body

[close]

/includes/normal/ -> UtfNormalUtil.php (source)

   1  <?php
   2  /**
   3   * Some of these functions are adapted from places in MediaWiki.
   4   * Should probably merge them for consistency.
   5   *
   6   * Copyright © 2004 Brion Vibber <[email protected]>
   7   * https://www.mediawiki.org/
   8   *
   9   * This program is free software; you can redistribute it and/or modify
  10   * it under the terms of the GNU General Public License as published by
  11   * the Free Software Foundation; either version 2 of the License, or
  12   * (at your option) any later version.
  13   *
  14   * This program is distributed in the hope that it will be useful,
  15   * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  17   * GNU General Public License for more details.
  18   *
  19   * You should have received a copy of the GNU General Public License along
  20   * with this program; if not, write to the Free Software Foundation, Inc.,
  21   * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  22   * http://www.gnu.org/copyleft/gpl.html
  23   *
  24   * @file
  25   * @ingroup UtfNormal
  26   */
  27  
  28  /**
  29   * Return UTF-8 sequence for a given Unicode code point.
  30   * May die if fed out of range data.
  31   *
  32   * @param $codepoint Integer:
  33   * @return String
  34   * @public
  35   */
  36  function codepointToUtf8( $codepoint ) {
  37      if ( $codepoint < 0x80 ) {
  38          return chr( $codepoint );
  39      }
  40  
  41      if ( $codepoint < 0x800 ) {
  42          return chr( $codepoint >> 6 & 0x3f | 0xc0 ) .
  43              chr( $codepoint & 0x3f | 0x80 );
  44      }
  45  
  46      if ( $codepoint < 0x10000 ) {
  47          return chr( $codepoint >> 12 & 0x0f | 0xe0 ) .
  48              chr( $codepoint >> 6 & 0x3f | 0x80 ) .
  49              chr( $codepoint & 0x3f | 0x80 );
  50      }
  51  
  52      if ( $codepoint < 0x110000 ) {
  53          return chr( $codepoint >> 18 & 0x07 | 0xf0 ) .
  54              chr( $codepoint >> 12 & 0x3f | 0x80 ) .
  55              chr( $codepoint >> 6 & 0x3f | 0x80 ) .
  56              chr( $codepoint & 0x3f | 0x80 );
  57      }
  58  
  59      echo "Asked for code outside of range ($codepoint)\n";
  60      die( -1 );
  61  }
  62  
  63  /**
  64   * Take a series of space-separated hexadecimal numbers representing
  65   * Unicode code points and return a UTF-8 string composed of those
  66   * characters. Used by UTF-8 data generation and testing routines.
  67   *
  68   * @param $sequence String
  69   * @return String
  70   * @private
  71   */
  72  function hexSequenceToUtf8( $sequence ) {
  73      $utf = '';
  74      foreach ( explode( ' ', $sequence ) as $hex ) {
  75          $n = hexdec( $hex );
  76          $utf .= codepointToUtf8( $n );
  77      }
  78  
  79      return $utf;
  80  }
  81  
  82  /**
  83   * Take a UTF-8 string and return a space-separated series of hex
  84   * numbers representing Unicode code points. For debugging.
  85   *
  86   * @param string $str UTF-8 string.
  87   * @return string
  88   * @private
  89   */
  90  function utf8ToHexSequence( $str ) {
  91      $buf = '';
  92      foreach ( preg_split( '//u', $str, -1, PREG_SPLIT_NO_EMPTY ) as $cp ) {
  93          $buf .= sprintf( '%04x ', utf8ToCodepoint( $cp ) );
  94      }
  95  
  96      return rtrim( $buf );
  97  }
  98  
  99  /**
 100   * Determine the Unicode codepoint of a single-character UTF-8 sequence.
 101   * Does not check for invalid input data.
 102   *
 103   * @param $char String
 104   * @return Integer
 105   * @public
 106   */
 107  function utf8ToCodepoint( $char ) {
 108      # Find the length
 109      $z = ord( $char[0] );
 110      if ( $z & 0x80 ) {
 111          $length = 0;
 112          while ( $z & 0x80 ) {
 113              $length++;
 114              $z <<= 1;
 115          }
 116      } else {
 117          $length = 1;
 118      }
 119  
 120      if ( $length != strlen( $char ) ) {
 121          return false;
 122      }
 123  
 124      if ( $length == 1 ) {
 125          return ord( $char );
 126      }
 127  
 128      # Mask off the length-determining bits and shift back to the original location
 129      $z &= 0xff;
 130      $z >>= $length;
 131  
 132      # Add in the free bits from subsequent bytes
 133      for ( $i = 1; $i < $length; $i++ ) {
 134          $z <<= 6;
 135          $z |= ord( $char[$i] ) & 0x3f;
 136      }
 137  
 138      return $z;
 139  }
 140  
 141  /**
 142   * Escape a string for inclusion in a PHP single-quoted string literal.
 143   *
 144   * @param string $string string to be escaped.
 145   * @return String: escaped string.
 146   * @public
 147   */
 148  function escapeSingleString( $string ) {
 149      return strtr( $string,
 150          array(
 151              '\\' => '\\\\',
 152              '\'' => '\\\''
 153          ) );
 154  }


Generated: Fri Nov 28 14:03:12 2014 Cross-referenced by PHPXref 0.7.1