MediaWiki
REL1_19
|
00001 <?php 00028 if( php_sapi_name() != 'cli' ) { 00029 die( "Run me from the command line please.\n" ); 00030 } 00031 00032 require_once 'UtfNormalDefines.php'; 00033 require_once 'UtfNormalUtil.php'; 00034 00035 $in = fopen("UnicodeData.txt", "rt" ); 00036 if( !$in ) { 00037 print "Can't open UnicodeData.txt for reading.\n"; 00038 print "If necessary, fetch this file from the internet:\n"; 00039 print "http://www.unicode.org/Public/UNIDATA/UnicodeData.txt\n"; 00040 exit(-1); 00041 } 00042 $wikiUpperChars = array(); 00043 $wikiLowerChars = array(); 00044 00045 print "Reading character definitions...\n"; 00046 while( false !== ($line = fgets( $in ) ) ) { 00047 $columns = explode(';', $line); 00048 $codepoint = $columns[0]; 00049 $name = $columns[1]; 00050 $simpleUpper = $columns[12]; 00051 $simpleLower = $columns[13]; 00052 00053 $source = codepointToUtf8( hexdec( $codepoint ) ); 00054 if( $simpleUpper ) { 00055 $wikiUpperChars[$source] = codepointToUtf8( hexdec( $simpleUpper ) ); 00056 } 00057 if( $simpleLower ) { 00058 $wikiLowerChars[$source] = codepointToUtf8( hexdec( $simpleLower ) ); 00059 } 00060 } 00061 fclose( $in ); 00062 00063 $out = fopen("Utf8Case.php", "wt"); 00064 if( $out ) { 00065 $outUpperChars = escapeArray( $wikiUpperChars ); 00066 $outLowerChars = escapeArray( $wikiLowerChars ); 00067 $outdata = "<" . "?php 00084 \$wikiUpperChars = $outUpperChars; 00085 00089 \$wikiLowerChars = $outLowerChars;\n"; 00090 fputs( $out, $outdata ); 00091 fclose( $out ); 00092 print "Wrote out Utf8Case.php\n"; 00093 } else { 00094 print "Can't create file Utf8Case.php\n"; 00095 exit(-1); 00096 } 00097 00098 00099 function escapeArray( $arr ) { 00100 return "array(\n" . 00101 implode( ",\n", 00102 array_map( "escapeLine", 00103 array_keys( $arr ), 00104 array_values( $arr ) ) ) . 00105 "\n)"; 00106 } 00107 00108 function escapeLine( $key, $val ) { 00109 $encKey = escapeSingleString( $key ); 00110 $encVal = escapeSingleString( $val ); 00111 return "\t'$encKey' => '$encVal'"; 00112 }