[ Index ] |
PHP Cross Reference of MediaWiki-1.24.0 |
[Summary view] [Print] [Text view]
1 #!/usr/bin/env php 2 <?php 3 /** 4 * Other tests for the unicode normalization module. 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License along 17 * with this program; if not, write to the Free Software Foundation, Inc., 18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 19 * http://www.gnu.org/copyleft/gpl.html 20 * 21 * @file 22 * @ingroup UtfNormal 23 */ 24 25 if ( PHP_SAPI != 'cli' ) { 26 die( "Run me from the command line please.\n" ); 27 } 28 29 // From http://unicode.org/Public/UNIDATA/NormalizationTest.txt 30 $file = "NormalizationTest.txt"; 31 32 // Anything after this character is a comment 33 define ( 'COMMENT', '#' ); 34 35 // Semicolons are used to separate the columns 36 define ( 'SEPARATOR', ';' ); 37 38 $f = fopen( $file, "r" ); 39 40 /** 41 * The following section will be used for testing different normalization methods. 42 * - Pure PHP 43 * ~ no assertion errors 44 * ~ 6.25 minutes 45 * - php_utfnormal.so or intl extension: both are wrappers around 46 * libicu so we list the version of libicu when making the 47 * comparison 48 * - libicu Ubuntu 3.8.1-3ubuntu1.1 php 5.2.6-3ubuntu4.5 49 * ~ 2200 assertion errors 50 * ~ 5 seconds 51 * ~ output: http://paste2.org/p/921566 52 * - libicu Ubuntu 4.2.1-3 php 5.3.2-1ubuntu4.2 53 * ~ 1384 assertion errors 54 * ~ 15 seconds 55 * ~ output: http://paste2.org/p/921435 56 * - libicu Debian 4.4.1-5 php 5.3.2-1ubuntu4.2 57 * ~ no assertion errors 58 * ~ 13 seconds 59 * - Tests comparing pure PHP output with libicu output were added 60 * later and slow down the runtime. 61 */ 62 63 require_once './UtfNormal.php'; 64 function normalize_form_c( $c ) { 65 return UtfNormal::toNFC( $c ); 66 } 67 68 function normalize_form_d( $c ) { 69 return UtfNormal::toNFD( $c ); 70 } 71 72 function normalize_form_kc( $c ) { 73 return UtfNormal::toNFKC( $c ); 74 } 75 76 function normalize_form_kd( $c ) { 77 return UtfNormal::toNFKD( $c ); 78 } 79 80 /** 81 * This set of functions is only useful if youve added a param to the 82 * following functions to force pure PHP usage. I decided not to 83 * commit that code since might produce a slowdown in the UTF 84 * normalization code just for the sake of these tests. -- hexmode 85 * @return string 86 */ 87 function normalize_form_c_php( $c ) { 88 return UtfNormal::toNFC( $c, "php" ); 89 } 90 91 function normalize_form_d_php( $c ) { 92 return UtfNormal::toNFD( $c, "php" ); 93 } 94 95 function normalize_form_kc_php( $c ) { 96 return UtfNormal::toNFKC( $c, "php" ); 97 } 98 99 function normalize_form_kd_php( $c ) { 100 return UtfNormal::toNFKD( $c, "php" ); 101 } 102 103 assert_options( ASSERT_ACTIVE, 1 ); 104 assert_options( ASSERT_WARNING, 0 ); 105 assert_options( ASSERT_QUIET_EVAL, 1 ); 106 assert_options( ASSERT_CALLBACK, 'my_assert' ); 107 108 function my_assert( $file, $line, $code ) { 109 // @codingStandardsIgnoreStart MediaWiki.NamingConventions.ValidGlobalName.wgPrefix 110 global $col, $lineNo; 111 // @codingStandardsIgnoreEnd 112 113 echo "Assertion that '$code' failed on line $lineNo ($col[5])\n"; 114 } 115 116 $count = 0; 117 $lineNo = 0; 118 if ( $f !== false ) { 119 while ( ( $col = getRow( $f ) ) !== false ) { 120 $lineNo++; 121 122 if ( count( $col ) == 6 ) { 123 $count++; 124 if ( $count % 100 === 0 ) echo "Count: $count\n"; 125 } else { 126 continue; 127 } 128 129 # verify that the pure PHP version is correct 130 $NFCc1 = normalize_form_c( $col[0] ); 131 $NFCc1p = normalize_form_c_php( $col[0] ); 132 assert( '$NFCc1 === $NFCc1p' ); 133 $NFCc2 = normalize_form_c( $col[1] ); 134 $NFCc2p = normalize_form_c_php( $col[1] ); 135 assert( '$NFCc2 === $NFCc2p' ); 136 $NFCc3 = normalize_form_c( $col[2] ); 137 $NFCc3p = normalize_form_c_php( $col[2] ); 138 assert( '$NFCc3 === $NFCc3p' ); 139 $NFCc4 = normalize_form_c( $col[3] ); 140 $NFCc4p = normalize_form_c_php( $col[3] ); 141 assert( '$NFCc4 === $NFCc4p' ); 142 $NFCc5 = normalize_form_c( $col[4] ); 143 $NFCc5p = normalize_form_c_php( $col[4] ); 144 assert( '$NFCc5 === $NFCc5p' ); 145 146 $NFDc1 = normalize_form_d( $col[0] ); 147 $NFDc1p = normalize_form_d_php( $col[0] ); 148 assert( '$NFDc1 === $NFDc1p' ); 149 $NFDc2 = normalize_form_d( $col[1] ); 150 $NFDc2p = normalize_form_d_php( $col[1] ); 151 assert( '$NFDc2 === $NFDc2p' ); 152 $NFDc3 = normalize_form_d( $col[2] ); 153 $NFDc3p = normalize_form_d_php( $col[2] ); 154 assert( '$NFDc3 === $NFDc3p' ); 155 $NFDc4 = normalize_form_d( $col[3] ); 156 $NFDc4p = normalize_form_d_php( $col[3] ); 157 assert( '$NFDc4 === $NFDc4p' ); 158 $NFDc5 = normalize_form_d( $col[4] ); 159 $NFDc5p = normalize_form_d_php( $col[4] ); 160 assert( '$NFDc5 === $NFDc5p' ); 161 162 $NFKDc1 = normalize_form_kd( $col[0] ); 163 $NFKDc1p = normalize_form_kd_php( $col[0] ); 164 assert( '$NFKDc1 === $NFKDc1p' ); 165 $NFKDc2 = normalize_form_kd( $col[1] ); 166 $NFKDc2p = normalize_form_kd_php( $col[1] ); 167 assert( '$NFKDc2 === $NFKDc2p' ); 168 $NFKDc3 = normalize_form_kd( $col[2] ); 169 $NFKDc3p = normalize_form_kd_php( $col[2] ); 170 assert( '$NFKDc3 === $NFKDc3p' ); 171 $NFKDc4 = normalize_form_kd( $col[3] ); 172 $NFKDc4p = normalize_form_kd_php( $col[3] ); 173 assert( '$NFKDc4 === $NFKDc4p' ); 174 $NFKDc5 = normalize_form_kd( $col[4] ); 175 $NFKDc5p = normalize_form_kd_php( $col[4] ); 176 assert( '$NFKDc5 === $NFKDc5p' ); 177 178 $NFKCc1 = normalize_form_kc( $col[0] ); 179 $NFKCc1p = normalize_form_kc_php( $col[0] ); 180 assert( '$NFKCc1 === $NFKCc1p' ); 181 $NFKCc2 = normalize_form_kc( $col[1] ); 182 $NFKCc2p = normalize_form_kc_php( $col[1] ); 183 assert( '$NFKCc2 === $NFKCc2p' ); 184 $NFKCc3 = normalize_form_kc( $col[2] ); 185 $NFKCc3p = normalize_form_kc_php( $col[2] ); 186 assert( '$NFKCc3 === $NFKCc3p' ); 187 $NFKCc4 = normalize_form_kc( $col[3] ); 188 $NFKCc4p = normalize_form_kc_php( $col[3] ); 189 assert( '$NFKCc4 === $NFKCc4p' ); 190 $NFKCc5 = normalize_form_kc( $col[4] ); 191 $NFKCc5p = normalize_form_kc_php( $col[4] ); 192 assert( '$NFKCc5 === $NFKCc5p' ); 193 194 # c2 == NFC(c1) == NFC(c2) == NFC(c3) 195 assert( '$col[1] === $NFCc1' ); 196 assert( '$col[1] === $NFCc2' ); 197 assert( '$col[1] === $NFCc3' ); 198 199 # c4 == NFC(c4) == NFC(c5) 200 assert( '$col[3] === $NFCc4' ); 201 assert( '$col[3] === $NFCc5' ); 202 203 # c3 == NFD(c1) == NFD(c2) == NFD(c3) 204 assert( '$col[2] === $NFDc1' ); 205 assert( '$col[2] === $NFDc2' ); 206 assert( '$col[2] === $NFDc3' ); 207 208 # c5 == NFD(c4) == NFD(c5) 209 assert( '$col[4] === $NFDc4' ); 210 assert( '$col[4] === $NFDc5' ); 211 212 # c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5) 213 assert( '$col[3] === $NFKCc1' ); 214 assert( '$col[3] === $NFKCc2' ); 215 assert( '$col[3] === $NFKCc3' ); 216 assert( '$col[3] === $NFKCc4' ); 217 assert( '$col[3] === $NFKCc5' ); 218 219 # c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5) 220 assert( '$col[4] === $NFKDc1' ); 221 assert( '$col[4] === $NFKDc2' ); 222 assert( '$col[4] === $NFKDc3' ); 223 assert( '$col[4] === $NFKDc4' ); 224 assert( '$col[4] === $NFKDc5' ); 225 } 226 } 227 echo "done.\n"; 228 229 // Compare against http://en.wikipedia.org/wiki/UTF-8#Description 230 function unichr( $c ) { 231 if ( $c <= 0x7F ) { 232 return chr( $c ); 233 } elseif ( $c <= 0x7FF ) { 234 return chr( 0xC0 | $c >> 6 ) . chr( 0x80 | $c & 0x3F ); 235 } elseif ( $c <= 0xFFFF ) { 236 return chr( 0xE0 | $c >> 12 ) . chr( 0x80 | $c >> 6 & 0x3F ) 237 . chr( 0x80 | $c & 0x3F ); 238 } elseif ( $c <= 0x10FFFF ) { 239 return chr( 0xF0 | $c >> 18 ) . chr( 0x80 | $c >> 12 & 0x3F ) 240 . chr( 0x80 | $c >> 6 & 0x3F ) 241 . chr( 0x80 | $c & 0x3F ); 242 } else { 243 return false; 244 } 245 } 246 247 function unistr( $c ) { 248 return implode( "", array_map( "unichr", array_map( "hexdec", explode( " ", $c ) ) ) ); 249 } 250 251 function getRow( $f ) { 252 $row = fgets( $f ); 253 if ( $row === false ) return false; 254 $row = rtrim( $row ); 255 $pos = strpos( $row, COMMENT ); 256 $pos2 = strpos( $row, ")" ); 257 if ( $pos === 0 ) return array( $row ); 258 $c = ""; 259 260 if ( $pos ) { 261 if ( $pos2 ) $c = substr( $row, $pos2 + 2 ); 262 else $c = substr( $row, $pos ); 263 $row = substr( $row, 0, $pos ); 264 } 265 266 $ret = array(); 267 foreach ( explode( SEPARATOR, $row ) as $ent ) { 268 if ( trim( $ent ) !== "" ) { 269 $ret[] = unistr( $ent ); 270 } 271 } 272 $ret[] = $c; 273 274 return $ret; 275 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Fri Nov 28 14:03:12 2014 | Cross-referenced by PHPXref 0.7.1 |