[ Index ]

PHP Cross Reference of MediaWiki-1.24.0

title

Body

[close]

/maintenance/language/ -> generateUtf8Case.php (source)

   1  <?php
   2  /**
   3   * Generates Utf8Case.ser from the Unicode Character Database and
   4   * supplementary files.
   5   *
   6   * Copyright © 2004, 2008 Brion Vibber <[email protected]>
   7   * https://www.mediawiki.org/
   8   *
   9   * This program is free software; you can redistribute it and/or modify
  10   * it under the terms of the GNU General Public License as published by
  11   * the Free Software Foundation; either version 2 of the License, or
  12   * (at your option) any later version.
  13   *
  14   * This program is distributed in the hope that it will be useful,
  15   * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  17   * GNU General Public License for more details.
  18   *
  19   * You should have received a copy of the GNU General Public License along
  20   * with this program; if not, write to the Free Software Foundation, Inc.,
  21   * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  22   * http://www.gnu.org/copyleft/gpl.html
  23   *
  24   * @file
  25   * @ingroup MaintenanceLanguage
  26   */
  27  
  28  require_once  __DIR__ . '/../Maintenance.php';
  29  
  30  /**
  31   * Generates Utf8Case.ser from the Unicode Character Database and
  32   * supplementary files.
  33   *
  34   * @ingroup MaintenanceLanguage
  35   */
  36  class GenerateUtf8Case extends Maintenance {
  37  
  38  	public function __construct() {
  39          parent::__construct();
  40          $this->mDescription = 'Generate Utf8Case.ser from the Unicode Character Database ' .
  41              'and supplementary files';
  42          $this->addOption( 'unicode-data-file', 'The local location of the data file ' .
  43              'from http://unicode.org/Public/UNIDATA/UnicodeData.txt', false, true );
  44      }
  45  
  46  	public function getDbType() {
  47          return Maintenance::DB_NONE;
  48      }
  49  
  50  	public function execute() {
  51          if ( !$this->hasOption( 'unicode-data-file' ) ) {
  52              $dataFile = 'UnicodeData.txt';
  53              if ( !file_exists( $dataFile ) ) {
  54                  $this->error( "Unable to find UnicodeData.txt. Please specify " .
  55                      "its location with --unicode-data-file=<FILE>" );
  56                  exit( 1 );
  57              }
  58          } else {
  59              $dataFile = $this->getOption( 'unicode-data-file' );
  60              if ( !file_exists( $dataFile ) ) {
  61                  $this->error( 'Unable to find the specified data file.' );
  62                  exit( 1 );
  63              }
  64          }
  65  
  66          $file = fopen( $dataFile, 'r' );
  67          if ( !$file ) {
  68              $this->error( 'Unable to open the data file.' );
  69              exit( 1 );
  70          }
  71  
  72          // For the file format, see http://www.unicode.org/reports/tr44/
  73          $fieldNames = array(
  74              'Code',
  75              'Name',
  76              'General_Category',
  77              'Canonical_Combining_Class',
  78              'Bidi_Class',
  79              'Decomposition_Type_Mapping',
  80              'Numeric_Type_Value_6',
  81              'Numeric_Type_Value_7',
  82              'Numeric_Type_Value_8',
  83              'Bidi_Mirrored',
  84              'Unicode_1_Name',
  85              'ISO_Comment',
  86              'Simple_Uppercase_Mapping',
  87              'Simple_Lowercase_Mapping',
  88              'Simple_Titlecase_Mapping'
  89          );
  90  
  91          $upper = array();
  92          $lower = array();
  93  
  94          $lineNum = 0;
  95          while ( false !== ( $line = fgets( $file ) ) ) {
  96              ++$lineNum;
  97  
  98              # Strip comments
  99              $line = trim( substr( $line, 0, strcspn( $line, '#' ) ) );
 100              if ( $line === '' ) {
 101                  continue;
 102              }
 103  
 104              # Split fields
 105              $numberedData = explode( ';', $line );
 106              $data = array();
 107              foreach ( $fieldNames as $number => $name ) {
 108                  $data[$name] = $numberedData[$number];
 109              }
 110  
 111              $source = hexSequenceToUtf8( $data['Code'] );
 112              if ( $data['Simple_Uppercase_Mapping'] ) {
 113                  $upper[$source] = hexSequenceToUtf8( $data['Simple_Uppercase_Mapping'] );
 114              }
 115              if ( $data['Simple_Lowercase_Mapping'] ) {
 116                  $lower[$source] = hexSequenceToUtf8( $data['Simple_Lowercase_Mapping'] );
 117              }
 118          }
 119  
 120          global $IP;
 121          file_put_contents( "$IP/serialized/Utf8Case.ser", serialize( array(
 122              'wikiUpperChars' => $upper,
 123              'wikiLowerChars' => $lower,
 124          ) ) );
 125      }
 126  }
 127  
 128  $maintClass = 'GenerateUtf8Case';
 129  require_once RUN_MAINTENANCE_IF_MAIN;


Generated: Fri Nov 28 14:03:12 2014 Cross-referenced by PHPXref 0.7.1