MediaWiki
REL1_24
|
00001 <?php 00028 if ( PHP_SAPI != 'cli' ) { 00029 die( "Run me from the command line please.\n" ); 00030 } 00031 00032 $verbose = true; 00033 #define( 'PRETTY_UTF8', true ); 00034 00035 if ( defined( 'PRETTY_UTF8' ) ) { 00036 function pretty( $string ) { 00037 return strtoupper( bin2hex( $string ) ); 00038 } 00039 } else { 00045 function pretty( $string ) { 00046 return strtoupper( utf8ToHexSequence( $string ) ); 00047 } 00048 } 00049 00050 if ( isset( $_SERVER['argv'] ) && in_array( '--icu', $_SERVER['argv'] ) ) { 00051 dl( 'php_utfnormal.so' ); 00052 } 00053 00054 require_once 'UtfNormalDefines.php'; 00055 require_once 'UtfNormalUtil.php'; 00056 require_once 'UtfNormal.php'; 00057 00058 $in = fopen( "NormalizationTest.txt", "rt" ); 00059 if ( !$in ) { 00060 print "Couldn't open NormalizationTest.txt -- can't run tests.\n"; 00061 print "If necessary, manually download this file. It can be obtained at\n"; 00062 print "http://www.unicode.org/Public/UNIDATA/NormalizationTest.txt"; 00063 exit( -1 ); 00064 } 00065 00066 $normalizer = new UtfNormal; 00067 00068 $total = 0; 00069 $success = 0; 00070 $failure = 0; 00071 $ok = true; 00072 $testedChars = array(); 00073 00074 while ( false !== ( $line = fgets( $in ) ) ) { 00075 list( $data, $comment ) = explode( '#', $line ); 00076 if ( $data === '' ) continue; 00077 $matches = array(); 00078 if ( preg_match( '/@Part([\d])/', $data, $matches ) ) { 00079 if ( $matches[1] > 0 ) { 00080 $ok = reportResults( $total, $success, $failure ) && $ok; 00081 } 00082 print "Part {$matches[1]}: $comment"; 00083 continue; 00084 } 00085 00086 $columns = array_map( "hexSequenceToUtf8", explode( ";", $data ) ); 00087 array_unshift( $columns, '' ); 00088 00089 $testedChars[$columns[1]] = true; 00090 $total++; 00091 if ( testNormals( $normalizer, $columns, $comment, $verbose ) ) { 00092 $success++; 00093 } else { 00094 $failure++; 00095 # print "FAILED: $comment"; 00096 } 00097 if ( $total % 100 == 0 ) print "$total "; 00098 } 00099 fclose( $in ); 00100 00101 $ok = reportResults( $total, $success, $failure ) && $ok; 00102 00103 $in = fopen( "UnicodeData.txt", "rt" ); 00104 if ( !$in ) { 00105 print "Can't open UnicodeData.txt for reading.\n"; 00106 print "If necessary, fetch this file from the internet:\n"; 00107 print "http://www.unicode.org/Public/UNIDATA/UnicodeData.txt\n"; 00108 exit( -1 ); 00109 } 00110 print "Now testing invariants...\n"; 00111 00112 while ( false !== ( $line = fgets( $in ) ) ) { 00113 $cols = explode( ';', $line ); 00114 $char = codepointToUtf8( hexdec( $cols[0] ) ); 00115 $desc = $cols[0] . ": " . $cols[1]; 00116 if ( $char < "\x20" || $char >= UTF8_SURROGATE_FIRST && $char <= UTF8_SURROGATE_LAST ) { 00117 # Can't check NULL with the ICU plugin, as null bytes fail in C land. 00118 # Skip other control characters, as we strip them for XML safety. 00119 # Surrogates are illegal on their own or in UTF-8, ignore. 00120 continue; 00121 } 00122 if ( empty( $testedChars[$char] ) ) { 00123 $total++; 00124 if ( testInvariant( $normalizer, $char, $desc, $verbose ) ) { 00125 $success++; 00126 } else { 00127 $failure++; 00128 } 00129 if ( $total % 100 == 0 ) print "$total "; 00130 } 00131 } 00132 fclose( $in ); 00133 00134 $ok = reportResults( $total, $success, $failure ) && $ok; 00135 00136 if ( $ok ) { 00137 print "TEST SUCCEEDED!\n"; 00138 exit( 0 ); 00139 } else { 00140 print "TEST FAILED!\n"; 00141 exit( -1 ); 00142 } 00143 00144 ## ------ 00145 00146 function reportResults( &$total, &$success, &$failure ) { 00147 $percSucc = intval( $success * 100 / $total ); 00148 $percFail = intval( $failure * 100 / $total ); 00149 print "\n"; 00150 print "$success tests successful ($percSucc%)\n"; 00151 print "$failure tests failed ($percFail%)\n\n"; 00152 $ok = ( $success > 0 && $failure == 0 ); 00153 $total = 0; 00154 $success = 0; 00155 $failure = 0; 00156 00157 return $ok; 00158 } 00159 00160 function testNormals( &$u, $c, $comment, $verbose, $reportFailure = false ) { 00161 $result = testNFC( $u, $c, $comment, $reportFailure ); 00162 $result = testNFD( $u, $c, $comment, $reportFailure ) && $result; 00163 $result = testNFKC( $u, $c, $comment, $reportFailure ) && $result; 00164 $result = testNFKD( $u, $c, $comment, $reportFailure ) && $result; 00165 $result = testCleanUp( $u, $c, $comment, $reportFailure ) && $result; 00166 00167 if ( $verbose && !$result && !$reportFailure ) { 00168 print $comment; 00169 testNormals( $u, $c, $comment, $verbose, true ); 00170 } 00171 00172 return $result; 00173 } 00174 00175 function verbosify( $a, $b, $col, $form, $verbose ) { 00176 #$result = ($a === $b); 00177 $result = ( strcmp( $a, $b ) == 0 ); 00178 if ( $verbose ) { 00179 $aa = pretty( $a ); 00180 $bb = pretty( $b ); 00181 $ok = $result ? "succeed" : " failed"; 00182 $eq = $result ? "==" : "!="; 00183 print " $ok $form c$col '$aa' $eq '$bb'\n"; 00184 } 00185 00186 return $result; 00187 } 00188 00189 function testNFC( &$u, $c, $comment, $verbose ) { 00190 $result = verbosify( $c[2], $u->toNFC( $c[1] ), 1, 'NFC', $verbose ); 00191 $result = verbosify( $c[2], $u->toNFC( $c[2] ), 2, 'NFC', $verbose ) && $result; 00192 $result = verbosify( $c[2], $u->toNFC( $c[3] ), 3, 'NFC', $verbose ) && $result; 00193 $result = verbosify( $c[4], $u->toNFC( $c[4] ), 4, 'NFC', $verbose ) && $result; 00194 $result = verbosify( $c[4], $u->toNFC( $c[5] ), 5, 'NFC', $verbose ) && $result; 00195 00196 return $result; 00197 } 00198 00199 function testCleanUp( &$u, $c, $comment, $verbose ) { 00200 $x = $c[1]; 00201 $result = verbosify( $c[2], $u->cleanUp( $x ), 1, 'cleanUp', $verbose ); 00202 $x = $c[2]; 00203 $result = verbosify( $c[2], $u->cleanUp( $x ), 2, 'cleanUp', $verbose ) && $result; 00204 $x = $c[3]; 00205 $result = verbosify( $c[2], $u->cleanUp( $x ), 3, 'cleanUp', $verbose ) && $result; 00206 $x = $c[4]; 00207 $result = verbosify( $c[4], $u->cleanUp( $x ), 4, 'cleanUp', $verbose ) && $result; 00208 $x = $c[5]; 00209 $result = verbosify( $c[4], $u->cleanUp( $x ), 5, 'cleanUp', $verbose ) && $result; 00210 00211 return $result; 00212 } 00213 00214 function testNFD( &$u, $c, $comment, $verbose ) { 00215 $result = verbosify( $c[3], $u->toNFD( $c[1] ), 1, 'NFD', $verbose ); 00216 $result = verbosify( $c[3], $u->toNFD( $c[2] ), 2, 'NFD', $verbose ) && $result; 00217 $result = verbosify( $c[3], $u->toNFD( $c[3] ), 3, 'NFD', $verbose ) && $result; 00218 $result = verbosify( $c[5], $u->toNFD( $c[4] ), 4, 'NFD', $verbose ) && $result; 00219 $result = verbosify( $c[5], $u->toNFD( $c[5] ), 5, 'NFD', $verbose ) && $result; 00220 00221 return $result; 00222 } 00223 00224 function testNFKC( &$u, $c, $comment, $verbose ) { 00225 $result = verbosify( $c[4], $u->toNFKC( $c[1] ), 1, 'NFKC', $verbose ); 00226 $result = verbosify( $c[4], $u->toNFKC( $c[2] ), 2, 'NFKC', $verbose ) && $result; 00227 $result = verbosify( $c[4], $u->toNFKC( $c[3] ), 3, 'NFKC', $verbose ) && $result; 00228 $result = verbosify( $c[4], $u->toNFKC( $c[4] ), 4, 'NFKC', $verbose ) && $result; 00229 $result = verbosify( $c[4], $u->toNFKC( $c[5] ), 5, 'NFKC', $verbose ) && $result; 00230 00231 return $result; 00232 } 00233 00234 function testNFKD( &$u, $c, $comment, $verbose ) { 00235 $result = verbosify( $c[5], $u->toNFKD( $c[1] ), 1, 'NFKD', $verbose ); 00236 $result = verbosify( $c[5], $u->toNFKD( $c[2] ), 2, 'NFKD', $verbose ) && $result; 00237 $result = verbosify( $c[5], $u->toNFKD( $c[3] ), 3, 'NFKD', $verbose ) && $result; 00238 $result = verbosify( $c[5], $u->toNFKD( $c[4] ), 4, 'NFKD', $verbose ) && $result; 00239 $result = verbosify( $c[5], $u->toNFKD( $c[5] ), 5, 'NFKD', $verbose ) && $result; 00240 00241 return $result; 00242 } 00243 00244 function testInvariant( &$u, $char, $desc, $verbose, $reportFailure = false ) { 00245 $result = verbosify( $char, $u->toNFC( $char ), 1, 'NFC', $reportFailure ); 00246 $result = verbosify( $char, $u->toNFD( $char ), 1, 'NFD', $reportFailure ) && $result; 00247 $result = verbosify( $char, $u->toNFKC( $char ), 1, 'NFKC', $reportFailure ) && $result; 00248 $result = verbosify( $char, $u->toNFKD( $char ), 1, 'NFKD', $reportFailure ) && $result; 00249 $result = verbosify( $char, $u->cleanUp( $char ), 1, 'cleanUp', $reportFailure ) && $result; 00250 00251 if ( $verbose && !$result && !$reportFailure ) { 00252 print $desc; 00253 testInvariant( $u, $char, $desc, $verbose, true ); 00254 } 00255 00256 return $result; 00257 }