[ Index ] |
PHP Cross Reference of MediaWiki-1.24.0 |
[Summary view] [Print] [Text view]
1 <?php 2 /** 3 * Generates the normalizer data file for Arabic. 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 2 of the License, or 8 * (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License along 16 * with this program; if not, write to the Free Software Foundation, Inc., 17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 18 * http://www.gnu.org/copyleft/gpl.html 19 * 20 * @file 21 * @ingroup MaintenanceLanguage 22 */ 23 24 require_once __DIR__ . '/../Maintenance.php'; 25 26 /** 27 * Generates the normalizer data file for Arabic. 28 * For NFC see includes/normal. 29 * 30 * @ingroup MaintenanceLanguage 31 */ 32 class GenerateNormalizerDataAr extends Maintenance { 33 public function __construct() { 34 parent::__construct(); 35 $this->mDescription = 'Generate the normalizer data file for Arabic'; 36 $this->addOption( 'unicode-data-file', 'The local location of the data file ' . 37 'from http://unicode.org/Public/UNIDATA/UnicodeData.txt', false, true ); 38 } 39 40 public function getDbType() { 41 return Maintenance::DB_NONE; 42 } 43 44 public function execute() { 45 if ( !$this->hasOption( 'unicode-data-file' ) ) { 46 $dataFile = 'UnicodeData.txt'; 47 if ( !file_exists( $dataFile ) ) { 48 $this->error( "Unable to find UnicodeData.txt. Please specify " . 49 "its location with --unicode-data-file=<FILE>" ); 50 exit( 1 ); 51 } 52 } else { 53 $dataFile = $this->getOption( 'unicode-data-file' ); 54 if ( !file_exists( $dataFile ) ) { 55 $this->error( 'Unable to find the specified data file.' ); 56 exit( 1 ); 57 } 58 } 59 60 $file = fopen( $dataFile, 'r' ); 61 if ( !$file ) { 62 $this->error( 'Unable to open the data file.' ); 63 exit( 1 ); 64 } 65 66 // For the file format, see http://www.unicode.org/reports/tr44/ 67 $fieldNames = array( 68 'Code', 69 'Name', 70 'General_Category', 71 'Canonical_Combining_Class', 72 'Bidi_Class', 73 'Decomposition_Type_Mapping', 74 'Numeric_Type_Value_6', 75 'Numeric_Type_Value_7', 76 'Numeric_Type_Value_8', 77 'Bidi_Mirrored', 78 'Unicode_1_Name', 79 'ISO_Comment', 80 'Simple_Uppercase_Mapping', 81 'Simple_Lowercase_Mapping', 82 'Simple_Titlecase_Mapping' 83 ); 84 85 $pairs = array(); 86 87 $lineNum = 0; 88 while ( false !== ( $line = fgets( $file ) ) ) { 89 ++$lineNum; 90 91 # Strip comments 92 $line = trim( substr( $line, 0, strcspn( $line, '#' ) ) ); 93 if ( $line === '' ) { 94 continue; 95 } 96 97 # Split fields 98 $numberedData = explode( ';', $line ); 99 $data = array(); 100 foreach ( $fieldNames as $number => $name ) { 101 $data[$name] = $numberedData[$number]; 102 } 103 104 $code = base_convert( $data['Code'], 16, 10 ); 105 if ( ( $code >= 0xFB50 && $code <= 0xFDFF ) # Arabic presentation forms A 106 || ( $code >= 0xFE70 && $code <= 0xFEFF ) # Arabic presentation forms B 107 ) { 108 if ( $data['Decomposition_Type_Mapping'] === '' ) { 109 // No decomposition 110 continue; 111 } 112 if ( !preg_match( '/^ *(<\w*>) +([0-9A-F ]*)$/', 113 $data['Decomposition_Type_Mapping'], $m ) 114 ) { 115 $this->error( "Can't parse Decomposition_Type/Mapping on line $lineNum" ); 116 $this->error( $line ); 117 continue; 118 } 119 120 $source = hexSequenceToUtf8( $data['Code'] ); 121 $dest = hexSequenceToUtf8( $m[2] ); 122 $pairs[$source] = $dest; 123 } 124 } 125 126 global $IP; 127 file_put_contents( "$IP/serialized/normalize-ar.ser", serialize( $pairs ) ); 128 echo "ar: " . count( $pairs ) . " pairs written.\n"; 129 } 130 } 131 132 $maintClass = 'GenerateNormalizerDataAr'; 133 require_once RUN_MAINTENANCE_IF_MAIN;
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Fri Nov 28 14:03:12 2014 | Cross-referenced by PHPXref 0.7.1 |