[ Index ] |
PHP Cross Reference of MediaWiki-1.24.0 |
[Summary view] [Print] [Text view]
1 <?php 2 /** 3 * This script generates UniNormalData.inc from the Unicode Character Database 4 * and supplementary files. 5 * 6 * Copyright (C) 2004 Brion Vibber <[email protected]> 7 * https://www.mediawiki.org/ 8 * 9 * This program is free software; you can redistribute it and/or modify 10 * it under the terms of the GNU General Public License as published by 11 * the Free Software Foundation; either version 2 of the License, or 12 * (at your option) any later version. 13 * 14 * This program is distributed in the hope that it will be useful, 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 * GNU General Public License for more details. 18 * 19 * You should have received a copy of the GNU General Public License along 20 * with this program; if not, write to the Free Software Foundation, Inc., 21 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 22 * http://www.gnu.org/copyleft/gpl.html 23 * 24 * @file 25 * @ingroup UtfNormal 26 */ 27 28 if ( PHP_SAPI != 'cli' ) { 29 die( "Run me from the command line please.\n" ); 30 } 31 32 require_once 'UtfNormalDefines.php'; 33 require_once 'UtfNormalUtil.php'; 34 35 $in = fopen( "DerivedNormalizationProps.txt", "rt" ); 36 if ( !$in ) { 37 print "Can't open DerivedNormalizationProps.txt for reading.\n"; 38 print "If necessary, fetch this file from the internet:\n"; 39 print "http://www.unicode.org/Public/UNIDATA/DerivedNormalizationProps.txt\n"; 40 exit( -1 ); 41 } 42 print "Initializing normalization quick check tables...\n"; 43 $checkNFC = array(); 44 while ( false !== ( $line = fgets( $in ) ) ) { 45 $matches = array(); 46 if ( preg_match( 47 '/^([0-9A-F]+)(?:..([0-9A-F]+))?\s*;\s*(NFC_QC)\s*;\s*([MN])/', 48 $line, 49 $matches ) 50 ) { 51 list( $junk, $first, $last, $prop, $value ) = $matches; 52 #print "$first $last $prop $value\n"; 53 if ( !$last ) { 54 $last = $first; 55 } 56 57 $lastInDecimal = hexdec( $last ); 58 for ( $i = hexdec( $first ); $i <= $lastInDecimal; $i++ ) { 59 $char = codepointToUtf8( $i ); 60 $checkNFC[$char] = $value; 61 } 62 } 63 } 64 fclose( $in ); 65 66 $in = fopen( "CompositionExclusions.txt", "rt" ); 67 if ( !$in ) { 68 print "Can't open CompositionExclusions.txt for reading.\n"; 69 print "If necessary, fetch this file from the internet:\n"; 70 print "http://www.unicode.org/Public/UNIDATA/CompositionExclusions.txt\n"; 71 exit( -1 ); 72 } 73 $exclude = array(); 74 while ( false !== ( $line = fgets( $in ) ) ) { 75 if ( preg_match( '/^([0-9A-F]+)/i', $line, $matches ) ) { 76 $codepoint = $matches[1]; 77 $source = codepointToUtf8( hexdec( $codepoint ) ); 78 $exclude[$source] = true; 79 } 80 } 81 fclose( $in ); 82 83 $in = fopen( "UnicodeData.txt", "rt" ); 84 if ( !$in ) { 85 print "Can't open UnicodeData.txt for reading.\n"; 86 print "If necessary, fetch this file from the internet:\n"; 87 print "http://www.unicode.org/Public/UNIDATA/UnicodeData.txt\n"; 88 exit( -1 ); 89 } 90 91 $compatibilityDecomp = array(); 92 $canonicalDecomp = array(); 93 $canonicalComp = array(); 94 $combiningClass = array(); 95 $total = 0; 96 $compat = 0; 97 $canon = 0; 98 99 print "Reading character definitions...\n"; 100 while ( false !== ( $line = fgets( $in ) ) ) { 101 $columns = explode( ';', $line ); 102 $codepoint = $columns[0]; 103 $name = $columns[1]; 104 $canonicalCombiningClass = $columns[3]; 105 $decompositionMapping = $columns[5]; 106 107 $source = codepointToUtf8( hexdec( $codepoint ) ); 108 109 if ( $canonicalCombiningClass != 0 ) { 110 $combiningClass[$source] = intval( $canonicalCombiningClass ); 111 } 112 113 if ( $decompositionMapping === '' ) continue; 114 if ( preg_match( '/^<(.+)> (.*)$/', $decompositionMapping, $matches ) ) { 115 # Compatibility decomposition 116 $canonical = false; 117 $decompositionMapping = $matches[2]; 118 $compat++; 119 } else { 120 $canonical = true; 121 $canon++; 122 } 123 $total++; 124 $dest = hexSequenceToUtf8( $decompositionMapping ); 125 126 $compatibilityDecomp[$source] = $dest; 127 if ( $canonical ) { 128 $canonicalDecomp[$source] = $dest; 129 if ( empty( $exclude[$source] ) ) { 130 $canonicalComp[$dest] = $source; 131 } 132 } 133 #print "$codepoint | $canonicalCombiningClasses | $decompositionMapping\n"; 134 } 135 fclose( $in ); 136 137 print "Recursively expanding canonical mappings...\n"; 138 $changed = 42; 139 $pass = 1; 140 while ( $changed > 0 ) { 141 print "pass $pass\n"; 142 $changed = 0; 143 foreach ( $canonicalDecomp as $source => $dest ) { 144 $newDest = preg_replace_callback( 145 '/([\xc0-\xff][\x80-\xbf]+)/', 146 'callbackCanonical', 147 $dest ); 148 if ( $newDest === $dest ) continue; 149 $changed++; 150 $canonicalDecomp[$source] = $newDest; 151 } 152 $pass++; 153 } 154 155 print "Recursively expanding compatibility mappings...\n"; 156 $changed = 42; 157 $pass = 1; 158 while ( $changed > 0 ) { 159 print "pass $pass\n"; 160 $changed = 0; 161 foreach ( $compatibilityDecomp as $source => $dest ) { 162 $newDest = preg_replace_callback( 163 '/([\xc0-\xff][\x80-\xbf]+)/', 164 'callbackCompat', 165 $dest ); 166 if ( $newDest === $dest ) continue; 167 $changed++; 168 $compatibilityDecomp[$source] = $newDest; 169 } 170 $pass++; 171 } 172 173 print "$total decomposition mappings ($canon canonical, $compat compatibility)\n"; 174 175 $out = fopen( "UtfNormalData.inc", "wt" ); 176 if ( $out ) { 177 $serCombining = escapeSingleString( serialize( $combiningClass ) ); 178 $serComp = escapeSingleString( serialize( $canonicalComp ) ); 179 $serCanon = escapeSingleString( serialize( $canonicalDecomp ) ); 180 $serCheckNFC = escapeSingleString( serialize( $checkNFC ) ); 181 $outdata = "<" . "?php 182 /** 183 * This file was automatically generated -- do not edit! 184 * Run UtfNormalGenerate.php to create this file again (make clean && make) 185 * 186 * @file 187 */ 188 // @codingStandardsIgnoreFile 189 190 UtfNormal::\$utfCombiningClass = unserialize( '$serCombining' ); 191 UtfNormal::\$utfCanonicalComp = unserialize( '$serComp' ); 192 UtfNormal::\$utfCanonicalDecomp = unserialize( '$serCanon' ); 193 UtfNormal::\$utfCheckNFC = unserialize( '$serCheckNFC' ); 194 \n"; 195 fputs( $out, $outdata ); 196 fclose( $out ); 197 print "Wrote out UtfNormalData.inc\n"; 198 } else { 199 print "Can't create file UtfNormalData.inc\n"; 200 exit( -1 ); 201 } 202 203 $out = fopen( "UtfNormalDataK.inc", "wt" ); 204 if ( $out ) { 205 $serCompat = escapeSingleString( serialize( $compatibilityDecomp ) ); 206 $outdata = "<" . "?php 207 /** 208 * This file was automatically generated -- do not edit! 209 * Run UtfNormalGenerate.php to create this file again (make clean && make) 210 * 211 * @file 212 */ 213 // @codingStandardsIgnoreFile 214 215 UtfNormal::\$utfCompatibilityDecomp = unserialize( '$serCompat' ); 216 \n"; 217 fputs( $out, $outdata ); 218 fclose( $out ); 219 print "Wrote out UtfNormalDataK.inc\n"; 220 exit( 0 ); 221 } else { 222 print "Can't create file UtfNormalDataK.inc\n"; 223 exit( -1 ); 224 } 225 226 # --------------- 227 228 function callbackCanonical( $matches ) { 229 // @codingStandardsIgnoreStart MediaWiki.NamingConventions.ValidGlobalName.wgPrefix 230 global $canonicalDecomp; 231 // @codingStandardsIgnoreEnd 232 233 if ( isset( $canonicalDecomp[$matches[1]] ) ) { 234 return $canonicalDecomp[$matches[1]]; 235 } 236 237 return $matches[1]; 238 } 239 240 function callbackCompat( $matches ) { 241 // @codingStandardsIgnoreStart MediaWiki.NamingConventions.ValidGlobalName.wgPrefix 242 global $compatibilityDecomp; 243 // @codingStandardsIgnoreEnd 244 245 if ( isset( $compatibilityDecomp[$matches[1]] ) ) { 246 return $compatibilityDecomp[$matches[1]]; 247 } 248 249 return $matches[1]; 250 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Fri Nov 28 14:03:12 2014 | Cross-referenced by PHPXref 0.7.1 |