MediaWiki
REL1_24
|
00001 #!/usr/bin/env php 00002 <?php 00025 if ( PHP_SAPI != 'cli' ) { 00026 die( "Run me from the command line please.\n" ); 00027 } 00028 00029 // From http://unicode.org/Public/UNIDATA/NormalizationTest.txt 00030 $file = "NormalizationTest.txt"; 00031 00032 // Anything after this character is a comment 00033 define ( 'COMMENT', '#' ); 00034 00035 // Semicolons are used to separate the columns 00036 define ( 'SEPARATOR', ';' ); 00037 00038 $f = fopen( $file, "r" ); 00039 00063 require_once './UtfNormal.php'; 00064 function normalize_form_c( $c ) { 00065 return UtfNormal::toNFC( $c ); 00066 } 00067 00068 function normalize_form_d( $c ) { 00069 return UtfNormal::toNFD( $c ); 00070 } 00071 00072 function normalize_form_kc( $c ) { 00073 return UtfNormal::toNFKC( $c ); 00074 } 00075 00076 function normalize_form_kd( $c ) { 00077 return UtfNormal::toNFKD( $c ); 00078 } 00079 00087 function normalize_form_c_php( $c ) { 00088 return UtfNormal::toNFC( $c, "php" ); 00089 } 00090 00091 function normalize_form_d_php( $c ) { 00092 return UtfNormal::toNFD( $c, "php" ); 00093 } 00094 00095 function normalize_form_kc_php( $c ) { 00096 return UtfNormal::toNFKC( $c, "php" ); 00097 } 00098 00099 function normalize_form_kd_php( $c ) { 00100 return UtfNormal::toNFKD( $c, "php" ); 00101 } 00102 00103 assert_options( ASSERT_ACTIVE, 1 ); 00104 assert_options( ASSERT_WARNING, 0 ); 00105 assert_options( ASSERT_QUIET_EVAL, 1 ); 00106 assert_options( ASSERT_CALLBACK, 'my_assert' ); 00107 00108 function my_assert( $file, $line, $code ) { 00109 // @codingStandardsIgnoreStart MediaWiki.NamingConventions.ValidGlobalName.wgPrefix 00110 global $col, $lineNo; 00111 // @codingStandardsIgnoreEnd 00112 00113 echo "Assertion that '$code' failed on line $lineNo ($col[5])\n"; 00114 } 00115 00116 $count = 0; 00117 $lineNo = 0; 00118 if ( $f !== false ) { 00119 while ( ( $col = getRow( $f ) ) !== false ) { 00120 $lineNo++; 00121 00122 if ( count( $col ) == 6 ) { 00123 $count++; 00124 if ( $count % 100 === 0 ) echo "Count: $count\n"; 00125 } else { 00126 continue; 00127 } 00128 00129 # verify that the pure PHP version is correct 00130 $NFCc1 = normalize_form_c( $col[0] ); 00131 $NFCc1p = normalize_form_c_php( $col[0] ); 00132 assert( '$NFCc1 === $NFCc1p' ); 00133 $NFCc2 = normalize_form_c( $col[1] ); 00134 $NFCc2p = normalize_form_c_php( $col[1] ); 00135 assert( '$NFCc2 === $NFCc2p' ); 00136 $NFCc3 = normalize_form_c( $col[2] ); 00137 $NFCc3p = normalize_form_c_php( $col[2] ); 00138 assert( '$NFCc3 === $NFCc3p' ); 00139 $NFCc4 = normalize_form_c( $col[3] ); 00140 $NFCc4p = normalize_form_c_php( $col[3] ); 00141 assert( '$NFCc4 === $NFCc4p' ); 00142 $NFCc5 = normalize_form_c( $col[4] ); 00143 $NFCc5p = normalize_form_c_php( $col[4] ); 00144 assert( '$NFCc5 === $NFCc5p' ); 00145 00146 $NFDc1 = normalize_form_d( $col[0] ); 00147 $NFDc1p = normalize_form_d_php( $col[0] ); 00148 assert( '$NFDc1 === $NFDc1p' ); 00149 $NFDc2 = normalize_form_d( $col[1] ); 00150 $NFDc2p = normalize_form_d_php( $col[1] ); 00151 assert( '$NFDc2 === $NFDc2p' ); 00152 $NFDc3 = normalize_form_d( $col[2] ); 00153 $NFDc3p = normalize_form_d_php( $col[2] ); 00154 assert( '$NFDc3 === $NFDc3p' ); 00155 $NFDc4 = normalize_form_d( $col[3] ); 00156 $NFDc4p = normalize_form_d_php( $col[3] ); 00157 assert( '$NFDc4 === $NFDc4p' ); 00158 $NFDc5 = normalize_form_d( $col[4] ); 00159 $NFDc5p = normalize_form_d_php( $col[4] ); 00160 assert( '$NFDc5 === $NFDc5p' ); 00161 00162 $NFKDc1 = normalize_form_kd( $col[0] ); 00163 $NFKDc1p = normalize_form_kd_php( $col[0] ); 00164 assert( '$NFKDc1 === $NFKDc1p' ); 00165 $NFKDc2 = normalize_form_kd( $col[1] ); 00166 $NFKDc2p = normalize_form_kd_php( $col[1] ); 00167 assert( '$NFKDc2 === $NFKDc2p' ); 00168 $NFKDc3 = normalize_form_kd( $col[2] ); 00169 $NFKDc3p = normalize_form_kd_php( $col[2] ); 00170 assert( '$NFKDc3 === $NFKDc3p' ); 00171 $NFKDc4 = normalize_form_kd( $col[3] ); 00172 $NFKDc4p = normalize_form_kd_php( $col[3] ); 00173 assert( '$NFKDc4 === $NFKDc4p' ); 00174 $NFKDc5 = normalize_form_kd( $col[4] ); 00175 $NFKDc5p = normalize_form_kd_php( $col[4] ); 00176 assert( '$NFKDc5 === $NFKDc5p' ); 00177 00178 $NFKCc1 = normalize_form_kc( $col[0] ); 00179 $NFKCc1p = normalize_form_kc_php( $col[0] ); 00180 assert( '$NFKCc1 === $NFKCc1p' ); 00181 $NFKCc2 = normalize_form_kc( $col[1] ); 00182 $NFKCc2p = normalize_form_kc_php( $col[1] ); 00183 assert( '$NFKCc2 === $NFKCc2p' ); 00184 $NFKCc3 = normalize_form_kc( $col[2] ); 00185 $NFKCc3p = normalize_form_kc_php( $col[2] ); 00186 assert( '$NFKCc3 === $NFKCc3p' ); 00187 $NFKCc4 = normalize_form_kc( $col[3] ); 00188 $NFKCc4p = normalize_form_kc_php( $col[3] ); 00189 assert( '$NFKCc4 === $NFKCc4p' ); 00190 $NFKCc5 = normalize_form_kc( $col[4] ); 00191 $NFKCc5p = normalize_form_kc_php( $col[4] ); 00192 assert( '$NFKCc5 === $NFKCc5p' ); 00193 00194 # c2 == NFC(c1) == NFC(c2) == NFC(c3) 00195 assert( '$col[1] === $NFCc1' ); 00196 assert( '$col[1] === $NFCc2' ); 00197 assert( '$col[1] === $NFCc3' ); 00198 00199 # c4 == NFC(c4) == NFC(c5) 00200 assert( '$col[3] === $NFCc4' ); 00201 assert( '$col[3] === $NFCc5' ); 00202 00203 # c3 == NFD(c1) == NFD(c2) == NFD(c3) 00204 assert( '$col[2] === $NFDc1' ); 00205 assert( '$col[2] === $NFDc2' ); 00206 assert( '$col[2] === $NFDc3' ); 00207 00208 # c5 == NFD(c4) == NFD(c5) 00209 assert( '$col[4] === $NFDc4' ); 00210 assert( '$col[4] === $NFDc5' ); 00211 00212 # c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5) 00213 assert( '$col[3] === $NFKCc1' ); 00214 assert( '$col[3] === $NFKCc2' ); 00215 assert( '$col[3] === $NFKCc3' ); 00216 assert( '$col[3] === $NFKCc4' ); 00217 assert( '$col[3] === $NFKCc5' ); 00218 00219 # c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5) 00220 assert( '$col[4] === $NFKDc1' ); 00221 assert( '$col[4] === $NFKDc2' ); 00222 assert( '$col[4] === $NFKDc3' ); 00223 assert( '$col[4] === $NFKDc4' ); 00224 assert( '$col[4] === $NFKDc5' ); 00225 } 00226 } 00227 echo "done.\n"; 00228 00229 // Compare against http://en.wikipedia.org/wiki/UTF-8#Description 00230 function unichr( $c ) { 00231 if ( $c <= 0x7F ) { 00232 return chr( $c ); 00233 } elseif ( $c <= 0x7FF ) { 00234 return chr( 0xC0 | $c >> 6 ) . chr( 0x80 | $c & 0x3F ); 00235 } elseif ( $c <= 0xFFFF ) { 00236 return chr( 0xE0 | $c >> 12 ) . chr( 0x80 | $c >> 6 & 0x3F ) 00237 . chr( 0x80 | $c & 0x3F ); 00238 } elseif ( $c <= 0x10FFFF ) { 00239 return chr( 0xF0 | $c >> 18 ) . chr( 0x80 | $c >> 12 & 0x3F ) 00240 . chr( 0x80 | $c >> 6 & 0x3F ) 00241 . chr( 0x80 | $c & 0x3F ); 00242 } else { 00243 return false; 00244 } 00245 } 00246 00247 function unistr( $c ) { 00248 return implode( "", array_map( "unichr", array_map( "hexdec", explode( " ", $c ) ) ) ); 00249 } 00250 00251 function getRow( $f ) { 00252 $row = fgets( $f ); 00253 if ( $row === false ) return false; 00254 $row = rtrim( $row ); 00255 $pos = strpos( $row, COMMENT ); 00256 $pos2 = strpos( $row, ")" ); 00257 if ( $pos === 0 ) return array( $row ); 00258 $c = ""; 00259 00260 if ( $pos ) { 00261 if ( $pos2 ) $c = substr( $row, $pos2 + 2 ); 00262 else $c = substr( $row, $pos ); 00263 $row = substr( $row, 0, $pos ); 00264 } 00265 00266 $ret = array(); 00267 foreach ( explode( SEPARATOR, $row ) as $ent ) { 00268 if ( trim( $ent ) !== "" ) { 00269 $ret[] = unistr( $ent ); 00270 } 00271 } 00272 $ret[] = $c; 00273 00274 return $ret; 00275 }