MediaWiki  REL1_22
Utf8CaseGenerate.php
Go to the documentation of this file.
00001 <?php
00028 if( PHP_SAPI != 'cli' ) {
00029     die( "Run me from the command line please.\n" );
00030 }
00031 
00032 require_once 'UtfNormalDefines.php';
00033 require_once 'UtfNormalUtil.php';
00034 
00035 $in = fopen("UnicodeData.txt", "rt" );
00036 if( !$in ) {
00037     print "Can't open UnicodeData.txt for reading.\n";
00038     print "If necessary, fetch this file from the internet:\n";
00039     print "http://www.unicode.org/Public/UNIDATA/UnicodeData.txt\n";
00040     exit(-1);
00041 }
00042 $wikiUpperChars = array();
00043 $wikiLowerChars = array();
00044 
00045 print "Reading character definitions...\n";
00046 while( false !== ($line = fgets( $in ) ) ) {
00047     $columns = explode(';', $line);
00048     $codepoint = $columns[0];
00049     $name = $columns[1];
00050     $simpleUpper = $columns[12];
00051     $simpleLower = $columns[13];
00052 
00053     $source = codepointToUtf8( hexdec( $codepoint ) );
00054     if( $simpleUpper ) {
00055         $wikiUpperChars[$source] = codepointToUtf8( hexdec( $simpleUpper ) );
00056     }
00057     if( $simpleLower ) {
00058         $wikiLowerChars[$source] = codepointToUtf8( hexdec( $simpleLower ) );
00059     }
00060 }
00061 fclose( $in );
00062 
00063 $out = fopen( "Utf8Case.php", "wt" );
00064 if( $out ) {
00065     $outUpperChars = escapeArray( $wikiUpperChars );
00066     $outLowerChars = escapeArray( $wikiLowerChars );
00067     $outdata = "<" . "?php
00084 \$wikiUpperChars = $outUpperChars;
00085 
00089 \$wikiLowerChars = $outLowerChars;\n";
00090     fputs( $out, $outdata );
00091     fclose( $out );
00092     print "Wrote out Utf8Case.php\n";
00093 } else {
00094     print "Can't create file Utf8Case.php\n";
00095     exit(-1);
00096 }
00097 
00098 
00099 function escapeArray( $arr ) {
00100     return "array(\n" .
00101         implode( ",\n",
00102             array_map( "escapeLine",
00103                 array_keys( $arr ),
00104                 array_values( $arr ) ) ) .
00105         "\n)";
00106 }
00107 
00108 function escapeLine( $key, $val ) {
00109     $encKey = escapeSingleString( $key );
00110     $encVal = escapeSingleString( $val );
00111     return "\t'$encKey' => '$encVal'";
00112 }