MediaWiki
REL1_24
|
00001 <?php 00028 require_once __DIR__ . '/../Maintenance.php'; 00029 00036 class GenerateUtf8Case extends Maintenance { 00037 00038 public function __construct() { 00039 parent::__construct(); 00040 $this->mDescription = 'Generate Utf8Case.ser from the Unicode Character Database ' . 00041 'and supplementary files'; 00042 $this->addOption( 'unicode-data-file', 'The local location of the data file ' . 00043 'from http://unicode.org/Public/UNIDATA/UnicodeData.txt', false, true ); 00044 } 00045 00046 public function getDbType() { 00047 return Maintenance::DB_NONE; 00048 } 00049 00050 public function execute() { 00051 if ( !$this->hasOption( 'unicode-data-file' ) ) { 00052 $dataFile = 'UnicodeData.txt'; 00053 if ( !file_exists( $dataFile ) ) { 00054 $this->error( "Unable to find UnicodeData.txt. Please specify " . 00055 "its location with --unicode-data-file=<FILE>" ); 00056 exit( 1 ); 00057 } 00058 } else { 00059 $dataFile = $this->getOption( 'unicode-data-file' ); 00060 if ( !file_exists( $dataFile ) ) { 00061 $this->error( 'Unable to find the specified data file.' ); 00062 exit( 1 ); 00063 } 00064 } 00065 00066 $file = fopen( $dataFile, 'r' ); 00067 if ( !$file ) { 00068 $this->error( 'Unable to open the data file.' ); 00069 exit( 1 ); 00070 } 00071 00072 // For the file format, see http://www.unicode.org/reports/tr44/ 00073 $fieldNames = array( 00074 'Code', 00075 'Name', 00076 'General_Category', 00077 'Canonical_Combining_Class', 00078 'Bidi_Class', 00079 'Decomposition_Type_Mapping', 00080 'Numeric_Type_Value_6', 00081 'Numeric_Type_Value_7', 00082 'Numeric_Type_Value_8', 00083 'Bidi_Mirrored', 00084 'Unicode_1_Name', 00085 'ISO_Comment', 00086 'Simple_Uppercase_Mapping', 00087 'Simple_Lowercase_Mapping', 00088 'Simple_Titlecase_Mapping' 00089 ); 00090 00091 $upper = array(); 00092 $lower = array(); 00093 00094 $lineNum = 0; 00095 while ( false !== ( $line = fgets( $file ) ) ) { 00096 ++$lineNum; 00097 00098 # Strip comments 00099 $line = trim( substr( $line, 0, strcspn( $line, '#' ) ) ); 00100 if ( $line === '' ) { 00101 continue; 00102 } 00103 00104 # Split fields 00105 $numberedData = explode( ';', $line ); 00106 $data = array(); 00107 foreach ( $fieldNames as $number => $name ) { 00108 $data[$name] = $numberedData[$number]; 00109 } 00110 00111 $source = hexSequenceToUtf8( $data['Code'] ); 00112 if ( $data['Simple_Uppercase_Mapping'] ) { 00113 $upper[$source] = hexSequenceToUtf8( $data['Simple_Uppercase_Mapping'] ); 00114 } 00115 if ( $data['Simple_Lowercase_Mapping'] ) { 00116 $lower[$source] = hexSequenceToUtf8( $data['Simple_Lowercase_Mapping'] ); 00117 } 00118 } 00119 00120 global $IP; 00121 file_put_contents( "$IP/serialized/Utf8Case.ser", serialize( array( 00122 'wikiUpperChars' => $upper, 00123 'wikiLowerChars' => $lower, 00124 ) ) ); 00125 } 00126 } 00127 00128 $maintClass = 'GenerateUtf8Case'; 00129 require_once RUN_MAINTENANCE_IF_MAIN;