MediaWiki
REL1_24
|
00001 <?php 00024 require_once __DIR__ . '/../Maintenance.php'; 00025 00032 class GenerateNormalizerDataMl extends Maintenance { 00033 public function __construct() { 00034 parent::__construct(); 00035 $this->mDescription = 'Generate the normalizer data file for Malayalam'; 00036 } 00037 00038 public function getDbType() { 00039 return Maintenance::DB_NONE; 00040 } 00041 00042 public function execute() { 00043 $hexPairs = array( 00044 # From http://unicode.org/versions/Unicode5.1.0/#Malayalam_Chillu_Characters 00045 '0D23 0D4D 200D' => '0D7A', 00046 '0D28 0D4D 200D' => '0D7B', 00047 '0D30 0D4D 200D' => '0D7C', 00048 '0D32 0D4D 200D' => '0D7D', 00049 '0D33 0D4D 200D' => '0D7E', 00050 00051 # From http://permalink.gmane.org/gmane.science.linguistics.wikipedia.technical/46413 00052 '0D15 0D4D 200D' => '0D7F', 00053 ); 00054 00055 $pairs = array(); 00056 foreach ( $hexPairs as $hexSource => $hexDest ) { 00057 $source = hexSequenceToUtf8( $hexSource ); 00058 $dest = hexSequenceToUtf8( $hexDest ); 00059 $pairs[$source] = $dest; 00060 } 00061 00062 global $IP; 00063 file_put_contents( "$IP/serialized/normalize-ml.ser", serialize( $pairs ) ); 00064 echo "ml: " . count( $pairs ) . " pairs written.\n"; 00065 } 00066 } 00067 00068 $maintClass = 'GenerateNormalizerDataMl'; 00069 require_once RUN_MAINTENANCE_IF_MAIN;