[ Index ]

PHP Cross Reference of MediaWiki-1.24.0

title

Body

[close]

/includes/normal/ -> UtfNormalTest2.php (source)

   1  #!/usr/bin/env php
   2  <?php
   3  /**
   4   * Other tests for the unicode normalization module.
   5   *
   6   * This program is free software; you can redistribute it and/or modify
   7   * it under the terms of the GNU General Public License as published by
   8   * the Free Software Foundation; either version 2 of the License, or
   9   * (at your option) any later version.
  10   *
  11   * This program is distributed in the hope that it will be useful,
  12   * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14   * GNU General Public License for more details.
  15   *
  16   * You should have received a copy of the GNU General Public License along
  17   * with this program; if not, write to the Free Software Foundation, Inc.,
  18   * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  19   * http://www.gnu.org/copyleft/gpl.html
  20   *
  21   * @file
  22   * @ingroup UtfNormal
  23   */
  24  
  25  if ( PHP_SAPI != 'cli' ) {
  26      die( "Run me from the command line please.\n" );
  27  }
  28  
  29  // From http://unicode.org/Public/UNIDATA/NormalizationTest.txt
  30  $file = "NormalizationTest.txt";
  31  
  32  // Anything after this character is a comment
  33  define ( 'COMMENT', '#' );
  34  
  35  // Semicolons are used to separate the columns
  36  define ( 'SEPARATOR', ';' );
  37  
  38  $f = fopen( $file, "r" );
  39  
  40  /**
  41   * The following section will be used for testing different normalization methods.
  42   * - Pure PHP
  43   * ~ no assertion errors
  44   * ~ 6.25 minutes
  45   * - php_utfnormal.so or intl extension: both are wrappers around
  46   * libicu so we list the version of libicu when making the
  47   * comparison
  48   * - libicu Ubuntu 3.8.1-3ubuntu1.1 php 5.2.6-3ubuntu4.5
  49   * ~ 2200 assertion errors
  50   * ~ 5 seconds
  51   * ~ output: http://paste2.org/p/921566
  52   * - libicu Ubuntu 4.2.1-3 php 5.3.2-1ubuntu4.2
  53   * ~ 1384 assertion errors
  54   * ~ 15 seconds
  55   * ~ output: http://paste2.org/p/921435
  56   * - libicu Debian 4.4.1-5 php 5.3.2-1ubuntu4.2
  57   * ~ no assertion errors
  58   * ~ 13 seconds
  59   * - Tests comparing pure PHP output with libicu output were added
  60   * later and slow down the runtime.
  61   */
  62  
  63  require_once  './UtfNormal.php';
  64  function normalize_form_c( $c ) {
  65      return UtfNormal::toNFC( $c );
  66  }
  67  
  68  function normalize_form_d( $c ) {
  69      return UtfNormal::toNFD( $c );
  70  }
  71  
  72  function normalize_form_kc( $c ) {
  73      return UtfNormal::toNFKC( $c );
  74  }
  75  
  76  function normalize_form_kd( $c ) {
  77      return UtfNormal::toNFKD( $c );
  78  }
  79  
  80  /**
  81   * This set of functions is only useful if youve added a param to the
  82   * following functions to force pure PHP usage.  I decided not to
  83   * commit that code since might produce a slowdown in the UTF
  84   * normalization code just for the sake of these tests. -- hexmode
  85   * @return string
  86   */
  87  function normalize_form_c_php( $c ) {
  88      return UtfNormal::toNFC( $c, "php" );
  89  }
  90  
  91  function normalize_form_d_php( $c ) {
  92      return UtfNormal::toNFD( $c, "php" );
  93  }
  94  
  95  function normalize_form_kc_php( $c ) {
  96      return UtfNormal::toNFKC( $c, "php" );
  97  }
  98  
  99  function normalize_form_kd_php( $c ) {
 100      return UtfNormal::toNFKD( $c, "php" );
 101  }
 102  
 103  assert_options( ASSERT_ACTIVE, 1 );
 104  assert_options( ASSERT_WARNING, 0 );
 105  assert_options( ASSERT_QUIET_EVAL, 1 );
 106  assert_options( ASSERT_CALLBACK, 'my_assert' );
 107  
 108  function my_assert( $file, $line, $code ) {
 109      // @codingStandardsIgnoreStart MediaWiki.NamingConventions.ValidGlobalName.wgPrefix
 110      global $col, $lineNo;
 111      // @codingStandardsIgnoreEnd
 112  
 113      echo "Assertion that '$code' failed on line $lineNo ($col[5])\n";
 114  }
 115  
 116  $count = 0;
 117  $lineNo = 0;
 118  if ( $f !== false ) {
 119      while ( ( $col = getRow( $f ) ) !== false ) {
 120          $lineNo++;
 121  
 122          if ( count( $col ) == 6 ) {
 123              $count++;
 124              if ( $count % 100 === 0 ) echo "Count: $count\n";
 125          } else {
 126              continue;
 127          }
 128  
 129          # verify that the pure PHP version is correct
 130          $NFCc1 = normalize_form_c( $col[0] );
 131          $NFCc1p = normalize_form_c_php( $col[0] );
 132          assert( '$NFCc1 === $NFCc1p' );
 133          $NFCc2 = normalize_form_c( $col[1] );
 134          $NFCc2p = normalize_form_c_php( $col[1] );
 135          assert( '$NFCc2 === $NFCc2p' );
 136          $NFCc3 = normalize_form_c( $col[2] );
 137          $NFCc3p = normalize_form_c_php( $col[2] );
 138          assert( '$NFCc3 === $NFCc3p' );
 139          $NFCc4 = normalize_form_c( $col[3] );
 140          $NFCc4p = normalize_form_c_php( $col[3] );
 141          assert( '$NFCc4 === $NFCc4p' );
 142          $NFCc5 = normalize_form_c( $col[4] );
 143          $NFCc5p = normalize_form_c_php( $col[4] );
 144          assert( '$NFCc5 === $NFCc5p' );
 145  
 146          $NFDc1 = normalize_form_d( $col[0] );
 147          $NFDc1p = normalize_form_d_php( $col[0] );
 148          assert( '$NFDc1 === $NFDc1p' );
 149          $NFDc2 = normalize_form_d( $col[1] );
 150          $NFDc2p = normalize_form_d_php( $col[1] );
 151          assert( '$NFDc2 === $NFDc2p' );
 152          $NFDc3 = normalize_form_d( $col[2] );
 153          $NFDc3p = normalize_form_d_php( $col[2] );
 154          assert( '$NFDc3 === $NFDc3p' );
 155          $NFDc4 = normalize_form_d( $col[3] );
 156          $NFDc4p = normalize_form_d_php( $col[3] );
 157          assert( '$NFDc4 === $NFDc4p' );
 158          $NFDc5 = normalize_form_d( $col[4] );
 159          $NFDc5p = normalize_form_d_php( $col[4] );
 160          assert( '$NFDc5 === $NFDc5p' );
 161  
 162          $NFKDc1 = normalize_form_kd( $col[0] );
 163          $NFKDc1p = normalize_form_kd_php( $col[0] );
 164          assert( '$NFKDc1 === $NFKDc1p' );
 165          $NFKDc2 = normalize_form_kd( $col[1] );
 166          $NFKDc2p = normalize_form_kd_php( $col[1] );
 167          assert( '$NFKDc2 === $NFKDc2p' );
 168          $NFKDc3 = normalize_form_kd( $col[2] );
 169          $NFKDc3p = normalize_form_kd_php( $col[2] );
 170          assert( '$NFKDc3 === $NFKDc3p' );
 171          $NFKDc4 = normalize_form_kd( $col[3] );
 172          $NFKDc4p = normalize_form_kd_php( $col[3] );
 173          assert( '$NFKDc4 === $NFKDc4p' );
 174          $NFKDc5 = normalize_form_kd( $col[4] );
 175          $NFKDc5p = normalize_form_kd_php( $col[4] );
 176          assert( '$NFKDc5 === $NFKDc5p' );
 177  
 178          $NFKCc1 = normalize_form_kc( $col[0] );
 179          $NFKCc1p = normalize_form_kc_php( $col[0] );
 180          assert( '$NFKCc1 === $NFKCc1p' );
 181          $NFKCc2 = normalize_form_kc( $col[1] );
 182          $NFKCc2p = normalize_form_kc_php( $col[1] );
 183          assert( '$NFKCc2 === $NFKCc2p' );
 184          $NFKCc3 = normalize_form_kc( $col[2] );
 185          $NFKCc3p = normalize_form_kc_php( $col[2] );
 186          assert( '$NFKCc3 === $NFKCc3p' );
 187          $NFKCc4 = normalize_form_kc( $col[3] );
 188          $NFKCc4p = normalize_form_kc_php( $col[3] );
 189          assert( '$NFKCc4 === $NFKCc4p' );
 190          $NFKCc5 = normalize_form_kc( $col[4] );
 191          $NFKCc5p = normalize_form_kc_php( $col[4] );
 192          assert( '$NFKCc5 === $NFKCc5p' );
 193  
 194          # c2 ==     NFC(c1) ==     NFC(c2) ==     NFC(c3)
 195          assert( '$col[1] === $NFCc1' );
 196          assert( '$col[1] === $NFCc2' );
 197          assert( '$col[1] === $NFCc3' );
 198  
 199          # c4 ==     NFC(c4) ==     NFC(c5)
 200          assert( '$col[3] === $NFCc4' );
 201          assert( '$col[3] === $NFCc5' );
 202  
 203          # c3 ==     NFD(c1) ==     NFD(c2) ==     NFD(c3)
 204          assert( '$col[2] === $NFDc1' );
 205          assert( '$col[2] === $NFDc2' );
 206          assert( '$col[2] === $NFDc3' );
 207  
 208          # c5 ==     NFD(c4) ==     NFD(c5)
 209          assert( '$col[4] === $NFDc4' );
 210          assert( '$col[4] === $NFDc5' );
 211  
 212          # c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5)
 213          assert( '$col[3] === $NFKCc1' );
 214          assert( '$col[3] === $NFKCc2' );
 215          assert( '$col[3] === $NFKCc3' );
 216          assert( '$col[3] === $NFKCc4' );
 217          assert( '$col[3] === $NFKCc5' );
 218  
 219          # c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5)
 220          assert( '$col[4] === $NFKDc1' );
 221          assert( '$col[4] === $NFKDc2' );
 222          assert( '$col[4] === $NFKDc3' );
 223          assert( '$col[4] === $NFKDc4' );
 224          assert( '$col[4] === $NFKDc5' );
 225      }
 226  }
 227  echo "done.\n";
 228  
 229  // Compare against http://en.wikipedia.org/wiki/UTF-8#Description
 230  function unichr( $c ) {
 231      if ( $c <= 0x7F ) {
 232          return chr( $c );
 233      } elseif ( $c <= 0x7FF ) {
 234          return chr( 0xC0 | $c >> 6 ) . chr( 0x80 | $c & 0x3F );
 235      } elseif ( $c <= 0xFFFF ) {
 236          return chr( 0xE0 | $c >> 12 ) . chr( 0x80 | $c >> 6 & 0x3F )
 237          . chr( 0x80 | $c & 0x3F );
 238      } elseif ( $c <= 0x10FFFF ) {
 239          return chr( 0xF0 | $c >> 18 ) . chr( 0x80 | $c >> 12 & 0x3F )
 240          . chr( 0x80 | $c >> 6 & 0x3F )
 241          . chr( 0x80 | $c & 0x3F );
 242      } else {
 243          return false;
 244      }
 245  }
 246  
 247  function unistr( $c ) {
 248      return implode( "", array_map( "unichr", array_map( "hexdec", explode( " ", $c ) ) ) );
 249  }
 250  
 251  function getRow( $f ) {
 252      $row = fgets( $f );
 253      if ( $row === false ) return false;
 254      $row = rtrim( $row );
 255      $pos = strpos( $row, COMMENT );
 256      $pos2 = strpos( $row, ")" );
 257      if ( $pos === 0 ) return array( $row );
 258      $c = "";
 259  
 260      if ( $pos ) {
 261          if ( $pos2 ) $c = substr( $row, $pos2 + 2 );
 262          else      $c = substr( $row, $pos );
 263          $row = substr( $row, 0, $pos );
 264      }
 265  
 266      $ret = array();
 267      foreach ( explode( SEPARATOR, $row ) as $ent ) {
 268          if ( trim( $ent ) !== "" ) {
 269              $ret[] = unistr( $ent );
 270          }
 271      }
 272      $ret[] = $c;
 273  
 274      return $ret;
 275  }


Generated: Fri Nov 28 14:03:12 2014 Cross-referenced by PHPXref 0.7.1