[ Index ]

PHP Cross Reference of MediaWiki-1.24.0

title

Body

[close]

/includes/normal/ -> UtfNormalTest.php (source)

   1  <?php
   2  /**
   3   * Implements the conformance test at:
   4   * http://www.unicode.org/Public/UNIDATA/NormalizationTest.txt
   5   *
   6   * Copyright © 2004 Brion Vibber <[email protected]>
   7   * https://www.mediawiki.org/
   8   *
   9   * This program is free software; you can redistribute it and/or modify
  10   * it under the terms of the GNU General Public License as published by
  11   * the Free Software Foundation; either version 2 of the License, or
  12   * (at your option) any later version.
  13   *
  14   * This program is distributed in the hope that it will be useful,
  15   * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  17   * GNU General Public License for more details.
  18   *
  19   * You should have received a copy of the GNU General Public License along
  20   * with this program; if not, write to the Free Software Foundation, Inc.,
  21   * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  22   * http://www.gnu.org/copyleft/gpl.html
  23   *
  24   * @file
  25   * @ingroup UtfNormal
  26   */
  27  
  28  if ( PHP_SAPI != 'cli' ) {
  29      die( "Run me from the command line please.\n" );
  30  }
  31  
  32  $verbose = true;
  33  #define( 'PRETTY_UTF8', true );
  34  
  35  if ( defined( 'PRETTY_UTF8' ) ) {
  36  	function pretty( $string ) {
  37          return strtoupper( bin2hex( $string ) );
  38      }
  39  } else {
  40      /**
  41       * @ignore
  42       * @param string $string
  43       * @return string
  44       */
  45  	function pretty( $string ) {
  46          return strtoupper( utf8ToHexSequence( $string ) );
  47      }
  48  }
  49  
  50  if ( isset( $_SERVER['argv'] ) && in_array( '--icu', $_SERVER['argv'] ) ) {
  51      dl( 'php_utfnormal.so' );
  52  }
  53  
  54  require_once  'UtfNormalDefines.php';
  55  require_once  'UtfNormalUtil.php';
  56  require_once  'UtfNormal.php';
  57  
  58  $in = fopen( "NormalizationTest.txt", "rt" );
  59  if ( !$in ) {
  60      print "Couldn't open NormalizationTest.txt -- can't run tests.\n";
  61      print "If necessary, manually download this file. It can be obtained at\n";
  62      print "http://www.unicode.org/Public/UNIDATA/NormalizationTest.txt";
  63      exit( -1 );
  64  }
  65  
  66  $normalizer = new UtfNormal;
  67  
  68  $total = 0;
  69  $success = 0;
  70  $failure = 0;
  71  $ok = true;
  72  $testedChars = array();
  73  
  74  while ( false !== ( $line = fgets( $in ) ) ) {
  75      list( $data, $comment ) = explode( '#', $line );
  76      if ( $data === '' ) continue;
  77      $matches = array();
  78      if ( preg_match( '/@Part([\d])/', $data, $matches ) ) {
  79          if ( $matches[1] > 0 ) {
  80              $ok = reportResults( $total, $success, $failure ) && $ok;
  81          }
  82          print "Part {$matches[1]}: $comment";
  83          continue;
  84      }
  85  
  86      $columns = array_map( "hexSequenceToUtf8", explode( ";", $data ) );
  87      array_unshift( $columns, '' );
  88  
  89      $testedChars[$columns[1]] = true;
  90      $total++;
  91      if ( testNormals( $normalizer, $columns, $comment, $verbose ) ) {
  92          $success++;
  93      } else {
  94          $failure++;
  95          # print "FAILED: $comment";
  96      }
  97      if ( $total % 100 == 0 ) print "$total ";
  98  }
  99  fclose( $in );
 100  
 101  $ok = reportResults( $total, $success, $failure ) && $ok;
 102  
 103  $in = fopen( "UnicodeData.txt", "rt" );
 104  if ( !$in ) {
 105      print "Can't open UnicodeData.txt for reading.\n";
 106      print "If necessary, fetch this file from the internet:\n";
 107      print "http://www.unicode.org/Public/UNIDATA/UnicodeData.txt\n";
 108      exit( -1 );
 109  }
 110  print "Now testing invariants...\n";
 111  
 112  while ( false !== ( $line = fgets( $in ) ) ) {
 113      $cols = explode( ';', $line );
 114      $char = codepointToUtf8( hexdec( $cols[0] ) );
 115      $desc = $cols[0] . ": " . $cols[1];
 116      if ( $char < "\x20" || $char >= UTF8_SURROGATE_FIRST && $char <= UTF8_SURROGATE_LAST ) {
 117          # Can't check NULL with the ICU plugin, as null bytes fail in C land.
 118          # Skip other control characters, as we strip them for XML safety.
 119          # Surrogates are illegal on their own or in UTF-8, ignore.
 120          continue;
 121      }
 122      if ( empty( $testedChars[$char] ) ) {
 123          $total++;
 124          if ( testInvariant( $normalizer, $char, $desc, $verbose ) ) {
 125              $success++;
 126          } else {
 127              $failure++;
 128          }
 129          if ( $total % 100 == 0 ) print "$total ";
 130      }
 131  }
 132  fclose( $in );
 133  
 134  $ok = reportResults( $total, $success, $failure ) && $ok;
 135  
 136  if ( $ok ) {
 137      print "TEST SUCCEEDED!\n";
 138      exit( 0 );
 139  } else {
 140      print "TEST FAILED!\n";
 141      exit( -1 );
 142  }
 143  
 144  ## ------
 145  
 146  function reportResults( &$total, &$success, &$failure ) {
 147      $percSucc = intval( $success * 100 / $total );
 148      $percFail = intval( $failure * 100 / $total );
 149      print "\n";
 150      print "$success tests successful ($percSucc%)\n";
 151      print "$failure tests failed ($percFail%)\n\n";
 152      $ok = ( $success > 0 && $failure == 0 );
 153      $total = 0;
 154      $success = 0;
 155      $failure = 0;
 156  
 157      return $ok;
 158  }
 159  
 160  function testNormals( &$u, $c, $comment, $verbose, $reportFailure = false ) {
 161      $result = testNFC( $u, $c, $comment, $reportFailure );
 162      $result = testNFD( $u, $c, $comment, $reportFailure ) && $result;
 163      $result = testNFKC( $u, $c, $comment, $reportFailure ) && $result;
 164      $result = testNFKD( $u, $c, $comment, $reportFailure ) && $result;
 165      $result = testCleanUp( $u, $c, $comment, $reportFailure ) && $result;
 166  
 167      if ( $verbose && !$result && !$reportFailure ) {
 168          print $comment;
 169          testNormals( $u, $c, $comment, $verbose, true );
 170      }
 171  
 172      return $result;
 173  }
 174  
 175  function verbosify( $a, $b, $col, $form, $verbose ) {
 176      #$result = ($a === $b);
 177      $result = ( strcmp( $a, $b ) == 0 );
 178      if ( $verbose ) {
 179          $aa = pretty( $a );
 180          $bb = pretty( $b );
 181          $ok = $result ? "succeed" : " failed";
 182          $eq = $result ? "==" : "!=";
 183          print "  $ok $form c$col '$aa' $eq '$bb'\n";
 184      }
 185  
 186      return $result;
 187  }
 188  
 189  function testNFC( &$u, $c, $comment, $verbose ) {
 190      $result = verbosify( $c[2], $u->toNFC( $c[1] ), 1, 'NFC', $verbose );
 191      $result = verbosify( $c[2], $u->toNFC( $c[2] ), 2, 'NFC', $verbose ) && $result;
 192      $result = verbosify( $c[2], $u->toNFC( $c[3] ), 3, 'NFC', $verbose ) && $result;
 193      $result = verbosify( $c[4], $u->toNFC( $c[4] ), 4, 'NFC', $verbose ) && $result;
 194      $result = verbosify( $c[4], $u->toNFC( $c[5] ), 5, 'NFC', $verbose ) && $result;
 195  
 196      return $result;
 197  }
 198  
 199  function testCleanUp( &$u, $c, $comment, $verbose ) {
 200      $x = $c[1];
 201      $result = verbosify( $c[2], $u->cleanUp( $x ), 1, 'cleanUp', $verbose );
 202      $x = $c[2];
 203      $result = verbosify( $c[2], $u->cleanUp( $x ), 2, 'cleanUp', $verbose ) && $result;
 204      $x = $c[3];
 205      $result = verbosify( $c[2], $u->cleanUp( $x ), 3, 'cleanUp', $verbose ) && $result;
 206      $x = $c[4];
 207      $result = verbosify( $c[4], $u->cleanUp( $x ), 4, 'cleanUp', $verbose ) && $result;
 208      $x = $c[5];
 209      $result = verbosify( $c[4], $u->cleanUp( $x ), 5, 'cleanUp', $verbose ) && $result;
 210  
 211      return $result;
 212  }
 213  
 214  function testNFD( &$u, $c, $comment, $verbose ) {
 215      $result = verbosify( $c[3], $u->toNFD( $c[1] ), 1, 'NFD', $verbose );
 216      $result = verbosify( $c[3], $u->toNFD( $c[2] ), 2, 'NFD', $verbose ) && $result;
 217      $result = verbosify( $c[3], $u->toNFD( $c[3] ), 3, 'NFD', $verbose ) && $result;
 218      $result = verbosify( $c[5], $u->toNFD( $c[4] ), 4, 'NFD', $verbose ) && $result;
 219      $result = verbosify( $c[5], $u->toNFD( $c[5] ), 5, 'NFD', $verbose ) && $result;
 220  
 221      return $result;
 222  }
 223  
 224  function testNFKC( &$u, $c, $comment, $verbose ) {
 225      $result = verbosify( $c[4], $u->toNFKC( $c[1] ), 1, 'NFKC', $verbose );
 226      $result = verbosify( $c[4], $u->toNFKC( $c[2] ), 2, 'NFKC', $verbose ) && $result;
 227      $result = verbosify( $c[4], $u->toNFKC( $c[3] ), 3, 'NFKC', $verbose ) && $result;
 228      $result = verbosify( $c[4], $u->toNFKC( $c[4] ), 4, 'NFKC', $verbose ) && $result;
 229      $result = verbosify( $c[4], $u->toNFKC( $c[5] ), 5, 'NFKC', $verbose ) && $result;
 230  
 231      return $result;
 232  }
 233  
 234  function testNFKD( &$u, $c, $comment, $verbose ) {
 235      $result = verbosify( $c[5], $u->toNFKD( $c[1] ), 1, 'NFKD', $verbose );
 236      $result = verbosify( $c[5], $u->toNFKD( $c[2] ), 2, 'NFKD', $verbose ) && $result;
 237      $result = verbosify( $c[5], $u->toNFKD( $c[3] ), 3, 'NFKD', $verbose ) && $result;
 238      $result = verbosify( $c[5], $u->toNFKD( $c[4] ), 4, 'NFKD', $verbose ) && $result;
 239      $result = verbosify( $c[5], $u->toNFKD( $c[5] ), 5, 'NFKD', $verbose ) && $result;
 240  
 241      return $result;
 242  }
 243  
 244  function testInvariant( &$u, $char, $desc, $verbose, $reportFailure = false ) {
 245      $result = verbosify( $char, $u->toNFC( $char ), 1, 'NFC', $reportFailure );
 246      $result = verbosify( $char, $u->toNFD( $char ), 1, 'NFD', $reportFailure ) && $result;
 247      $result = verbosify( $char, $u->toNFKC( $char ), 1, 'NFKC', $reportFailure ) && $result;
 248      $result = verbosify( $char, $u->toNFKD( $char ), 1, 'NFKD', $reportFailure ) && $result;
 249      $result = verbosify( $char, $u->cleanUp( $char ), 1, 'cleanUp', $reportFailure ) && $result;
 250  
 251      if ( $verbose && !$result && !$reportFailure ) {
 252          print $desc;
 253          testInvariant( $u, $char, $desc, $verbose, true );
 254      }
 255  
 256      return $result;
 257  }


Generated: Fri Nov 28 14:03:12 2014 Cross-referenced by PHPXref 0.7.1