MediaWiki  master
LanguageKk.php
Go to the documentation of this file.
1 <?php
24 define( 'KK_C_UC', 'АӘБВГҒДЕЁЖЗИЙКҚЛМНҢОӨПРСТУҰҮФХҺЦЧШЩЪЫІЬЭЮЯ' ); # Kazakh Cyrillic uppercase
25 define( 'KK_C_LC', 'аәбвгғдеёжзийкқлмнңоөпрстуұүфхһцчшщъыіьэюя' ); # Kazakh Cyrillic lowercase
26 define( 'KK_L_UC', 'AÄBCÇDEÉFGĞHIİÏJKLMNÑOÖPQRSŞTUÜVWXYÝZ' ); # Kazakh Latin uppercase
27 define( 'KK_L_LC', 'aäbcçdeéfgğhıiïjklmnñoöpqrsştuüvwxyýz' ); # Kazakh Latin lowercase
28 // define( 'KK_A', 'ٴابپتجحدرزسشعفقكلمنڭەوۇۋۆىيچھ' ); # Kazakh Arabic
29 define( 'H_HAMZA', 'ٴ' ); # U+0674 ARABIC LETTER HIGH HAMZA
30 // define( 'ZWNJ', '‌' ); # U+200C ZERO WIDTH NON-JOINER
31 
39 
47  function __construct( $langobj, $maincode,
48  $variants = [],
49  $variantfallbacks = [],
50  $flags = [] ) {
51  parent::__construct( $langobj, $maincode,
52  $variants, $variantfallbacks, $flags );
53 
54  // No point delaying this since they're in code.
55  // Waiting until loadDefaultTables() means they never get loaded
56  // when the tables themselves are loaded from cache.
57  $this->loadRegs();
58  }
59 
60  function loadDefaultTables() {
61  // require __DIR__."/../../includes/KkConversion.php";
62  // Placeholder for future implementing. Remove variables declarations
63  // after generating KkConversion.php
64  $kk2Cyrl = [];
65  $kk2Latn = [];
66  $kk2Arab = [];
67  $kk2KZ = [];
68  $kk2TR = [];
69  $kk2CN = [];
70 
71  $this->mTables = [
72  'kk-cyrl' => new ReplacementArray( $kk2Cyrl ),
73  'kk-latn' => new ReplacementArray( $kk2Latn ),
74  'kk-arab' => new ReplacementArray( $kk2Arab ),
75  'kk-kz' => new ReplacementArray( array_merge( $kk2Cyrl, $kk2KZ ) ),
76  'kk-tr' => new ReplacementArray( array_merge( $kk2Latn, $kk2TR ) ),
77  'kk-cn' => new ReplacementArray( array_merge( $kk2Arab, $kk2CN ) ),
78  'kk' => new ReplacementArray()
79  ];
80  }
81 
82  function postLoadTables() {
83  $this->mTables['kk-kz']->merge( $this->mTables['kk-cyrl'] );
84  $this->mTables['kk-tr']->merge( $this->mTables['kk-latn'] );
85  $this->mTables['kk-cn']->merge( $this->mTables['kk-arab'] );
86  }
87 
88  function loadRegs() {
89 
90  $this->mCyrl2Latn = [
91  # # Punctuation
92  '/№/u' => 'No.',
93  # # Е after vowels
94  '/([АӘЕЁИОӨҰҮЭЮЯЪЬ])Е/u' => '$1YE',
95  '/([АӘЕЁИОӨҰҮЭЮЯЪЬ])е/ui' => '$1ye',
96  # # leading ЁЮЯЩ
97  '/^Ё([' . KK_C_UC . ']|$)/u' => 'YO$1', '/^Ё([' . KK_C_LC . ']|$)/u' => 'Yo$1',
98  '/^Ю([' . KK_C_UC . ']|$)/u' => 'YU$1', '/^Ю([' . KK_C_LC . ']|$)/u' => 'Yu$1',
99  '/^Я([' . KK_C_UC . ']|$)/u' => 'YA$1', '/^Я([' . KK_C_LC . ']|$)/u' => 'Ya$1',
100  '/^Щ([' . KK_C_UC . ']|$)/u' => 'ŞÇ$1', '/^Щ([' . KK_C_LC . ']|$)/u' => 'Şç$1',
101  # # other ЁЮЯ
102  '/Ё/u' => 'YO', '/ё/u' => 'yo',
103  '/Ю/u' => 'YU', '/ю/u' => 'yu',
104  '/Я/u' => 'YA', '/я/u' => 'ya',
105  '/Щ/u' => 'ŞÇ', '/щ/u' => 'şç',
106  # # soft and hard signs
107  '/[ъЪ]/u' => 'ʺ', '/[ьЬ]/u' => 'ʹ',
108  # # other characters
109  '/А/u' => 'A', '/а/u' => 'a', '/Ә/u' => 'Ä', '/ә/u' => 'ä',
110  '/Б/u' => 'B', '/б/u' => 'b', '/В/u' => 'V', '/в/u' => 'v',
111  '/Г/u' => 'G', '/г/u' => 'g', '/Ғ/u' => 'Ğ', '/ғ/u' => 'ğ',
112  '/Д/u' => 'D', '/д/u' => 'd', '/Е/u' => 'E', '/е/u' => 'e',
113  '/Ж/u' => 'J', '/ж/u' => 'j', '/З/u' => 'Z', '/з/u' => 'z',
114  '/И/u' => 'Ï', '/и/u' => 'ï', '/Й/u' => 'Ý', '/й/u' => 'ý',
115  '/К/u' => 'K', '/к/u' => 'k', '/Қ/u' => 'Q', '/қ/u' => 'q',
116  '/Л/u' => 'L', '/л/u' => 'l', '/М/u' => 'M', '/м/u' => 'm',
117  '/Н/u' => 'N', '/н/u' => 'n', '/Ң/u' => 'Ñ', '/ң/u' => 'ñ',
118  '/О/u' => 'O', '/о/u' => 'o', '/Ө/u' => 'Ö', '/ө/u' => 'ö',
119  '/П/u' => 'P', '/п/u' => 'p', '/Р/u' => 'R', '/р/u' => 'r',
120  '/С/u' => 'S', '/с/u' => 's', '/Т/u' => 'T', '/т/u' => 't',
121  '/У/u' => 'W', '/у/u' => 'w', '/Ұ/u' => 'U', '/ұ/u' => 'u',
122  '/Ү/u' => 'Ü', '/ү/u' => 'ü', '/Ф/u' => 'F', '/ф/u' => 'f',
123  '/Х/u' => 'X', '/х/u' => 'x', '/Һ/u' => 'H', '/һ/u' => 'h',
124  '/Ц/u' => 'C', '/ц/u' => 'c', '/Ч/u' => 'Ç', '/ч/u' => 'ç',
125  '/Ш/u' => 'Ş', '/ш/u' => 'ş', '/Ы/u' => 'I', '/ы/u' => 'ı',
126  '/І/u' => 'İ', '/і/u' => 'i', '/Э/u' => 'É', '/э/u' => 'é',
127  ];
128 
129  $this->mLatn2Cyrl = [
130  # # Punctuation
131  '/#|No\./' => '№',
132  # # Şç
133  '/ŞÇʹ/u' => 'ЩЬ', '/Şçʹ/u' => 'Щь',
134  '/Ş[Çç]/u' => 'Щ', '/şç/u' => 'щ',
135  # # soft and hard signs
136  '/([' . KK_L_UC . '])ʺ([' . KK_L_UC . '])/u' => '$1Ъ$2',
137  '/ʺ([' . KK_L_LC . '])/u' => 'ъ$1',
138  '/([' . KK_L_UC . '])ʹ([' . KK_L_UC . '])/u' => '$1Ь$2',
139  '/ʹ([' . KK_L_LC . '])/u' => 'ь$1',
140  '/ʺ/u' => 'ъ',
141  '/ʹ/u' => 'ь',
142  # # Ye Yo Yu Ya.
143  '/Y[Ee]/u' => 'Е', '/ye/u' => 'е',
144  '/Y[Oo]/u' => 'Ё', '/yo/u' => 'ё',
145  '/Y[UWuw]/u' => 'Ю', '/y[uw]/u' => 'ю',
146  '/Y[Aa]/u' => 'Я', '/ya/u' => 'я',
147  # # other characters
148  '/A/u' => 'А', '/a/u' => 'а', '/Ä/u' => 'Ә', '/ä/u' => 'ә',
149  '/B/u' => 'Б', '/b/u' => 'б', '/C/u' => 'Ц', '/c/u' => 'ц',
150  '/Ç/u' => 'Ч', '/ç/u' => 'ч', '/D/u' => 'Д', '/d/u' => 'д',
151  '/E/u' => 'Е', '/e/u' => 'е', '/É/u' => 'Э', '/é/u' => 'э',
152  '/F/u' => 'Ф', '/f/u' => 'ф', '/G/u' => 'Г', '/g/u' => 'г',
153  '/Ğ/u' => 'Ғ', '/ğ/u' => 'ғ', '/H/u' => 'Һ', '/h/u' => 'һ',
154  '/I/u' => 'Ы', '/ı/u' => 'ы', '/İ/u' => 'І', '/i/u' => 'і',
155  '/Ï/u' => 'И', '/ï/u' => 'и', '/J/u' => 'Ж', '/j/u' => 'ж',
156  '/K/u' => 'К', '/k/u' => 'к', '/L/u' => 'Л', '/l/u' => 'л',
157  '/M/u' => 'М', '/m/u' => 'м', '/N/u' => 'Н', '/n/u' => 'н',
158  '/Ñ/u' => 'Ң', '/ñ/u' => 'ң', '/O/u' => 'О', '/o/u' => 'о',
159  '/Ö/u' => 'Ө', '/ö/u' => 'ө', '/P/u' => 'П', '/p/u' => 'п',
160  '/Q/u' => 'Қ', '/q/u' => 'қ', '/R/u' => 'Р', '/r/u' => 'р',
161  '/S/u' => 'С', '/s/u' => 'с', '/Ş/u' => 'Ш', '/ş/u' => 'ш',
162  '/T/u' => 'Т', '/t/u' => 'т', '/U/u' => 'Ұ', '/u/u' => 'ұ',
163  '/Ü/u' => 'Ү', '/ü/u' => 'ү', '/V/u' => 'В', '/v/u' => 'в',
164  '/W/u' => 'У', '/w/u' => 'у', '/Ý/u' => 'Й', '/ý/u' => 'й',
165  '/X/u' => 'Х', '/x/u' => 'х', '/Z/u' => 'З', '/z/u' => 'з',
166  ];
167 
168  $this->mCyLa2Arab = [
169  # # Punctuation -> Arabic
170  '/#|№|No\./u' => '؀', # &#x0600;
171  '/\,/' => '،', # &#x060C;
172  '/;/' => '؛', # &#x061B;
173  '/\?/' => '؟', # &#x061F;
174  '/%/' => '٪', # &#x066A;
175  '/\*/' => '٭', # &#x066D;
176  # # Digits -> Arabic
177  '/0/' => '۰', # &#x06F0;
178  '/1/' => '۱', # &#x06F1;
179  '/2/' => '۲', # &#x06F2;
180  '/3/' => '۳', # &#x06F3;
181  '/4/' => '۴', # &#x06F4;
182  '/5/' => '۵', # &#x06F5;
183  '/6/' => '۶', # &#x06F6;
184  '/7/' => '۷', # &#x06F7;
185  '/8/' => '۸', # &#x06F8;
186  '/9/' => '۹', # &#x06F9;
187  # # Cyrillic -> Arabic
188  '/Аллаһ/ui' => 'ﷲ',
189  '/([АӘЕЁИОӨҰҮЭЮЯЪЬ])е/ui' => '$1يە',
190  '/[еэ]/ui' => 'ە', '/[ъь]/ui' => '',
191  '/[аә]/ui' => 'ا', '/[оө]/ui' => 'و', '/[ұү]/ui' => 'ۇ', '/[ыі]/ui' => 'ى',
192  '/[и]/ui' => 'ىي', '/ё/ui' => 'يو', '/ю/ui' => 'يۋ', '/я/ui' => 'يا', '/[й]/ui' => 'ي',
193  '/ц/ui' => 'تس', '/щ/ui' => 'شش',
194  '/һ/ui' => 'ح', '/ч/ui' => 'تش',
195  # '/һ/ui' => 'ھ', '/ч/ui' => 'چ',
196  '/б/ui' => 'ب', '/в/ui' => 'ۆ', '/г/ui' => 'گ', '/ғ/ui' => 'ع',
197  '/д/ui' => 'د', '/ж/ui' => 'ج', '/з/ui' => 'ز', '/к/ui' => 'ك',
198  '/қ/ui' => 'ق', '/л/ui' => 'ل', '/м/ui' => 'م', '/н/ui' => 'ن',
199  '/ң/ui' => 'ڭ', '/п/ui' => 'پ', '/р/ui' => 'ر', '/с/ui' => 'س',
200  '/т/ui' => 'ت', '/у/ui' => 'ۋ', '/ф/ui' => 'ف', '/х/ui' => 'ح',
201  '/ш/ui' => 'ش',
202  # # Latin -> Arabic // commented for now...
203  /*'/Allah/ui' => 'ﷲ',
204  '/[eé]/ui' => 'ە', '/[yý]/ui' => 'ي', '/[ʺʹ]/ui' => '',
205  '/[aä]/ui' => 'ا', '/[oö]/ui' => 'و', '/[uü]/ui' => 'ۇ',
206  '/[ï]/ui' => 'ىي', '/[ıIiİ]/u' => 'ى',
207  '/c/ui' => 'تس',
208  '/ç/ui' => 'تش', '/h/ui' => 'ح',
209  #'/ç/ui' => 'چ', '/h/ui' => 'ھ',
210  '/b/ui' => 'ب','/d/ui' => 'د',
211  '/f/ui' => 'ف', '/g/ui' => 'گ', '/ğ/ui' => 'ع',
212  '/j/ui' => 'ج', '/k/ui' => 'ك', '/l/ui' => 'ل', '/m/ui' => 'م',
213  '/n/ui' => 'ن', '/ñ/ui' => 'ڭ', '/p/ui' => 'پ', '/q/ui' => 'ق',
214  '/r/ui' => 'ر', '/s/ui' => 'س', '/ş/ui' => 'ش', '/t/ui' => 'ت',
215  '/v/ui' => 'ۆ', '/w/ui' => 'ۋ', '/x/ui' => 'ح', '/z/ui' => 'ز',*/
216  ];
217  }
218 
230  function parseManualRule( $rule, $flags = [] ) {
231  if ( in_array( 'T', $flags ) ) {
232  return parent::parseManualRule( $rule, $flags );
233  }
234 
235  $carray = [];
236  // otherwise ignore all formatting
237  foreach ( $this->mVariants as $v ) {
238  $carray[$v] = $rule;
239  }
240 
241  return $carray;
242  }
243 
254  function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
255  // check for user namespace
256  if ( is_object( $nt ) ) {
257  $ns = $nt->getNamespace();
258  if ( $ns == NS_USER || $ns == NS_USER_TALK ) {
259  return;
260  }
261  }
262 
263  $oldlink = $link;
264  parent::findVariantLink( $link, $nt, $ignoreOtherCond );
265  if ( $this->getPreferredVariant() == $this->mMainLanguageCode ) {
266  $link = $oldlink;
267  }
268  }
269 
278  function translate( $text, $toVariant ) {
279  $text = parent::translate( $text, $toVariant );
280 
281  switch ( $toVariant ) {
282  case 'kk-cyrl':
283  case 'kk-kz':
284  $letters = KK_L_UC . KK_L_LC . 'ʺʹ#0123456789';
285  break;
286  case 'kk-latn':
287  case 'kk-tr':
288  $letters = KK_C_UC . KK_C_LC . '№0123456789';
289  break;
290  case 'kk-arab':
291  case 'kk-cn':
292  $letters = KK_C_UC . KK_C_LC . /*KK_L_UC.KK_L_LC.'ʺʹ'.*/',;\?%\*№0123456789';
293  break;
294  default:
295  return $text;
296  }
297  // disable conversion variables like $1, $2...
298  $varsfix = '\$[0-9]';
299 
300  $matches = preg_split(
301  '/' . $varsfix . '[^' . $letters . ']+/u',
302  $text,
303  -1,
304  PREG_SPLIT_OFFSET_CAPTURE
305  );
306 
307  $mstart = 0;
308  $ret = '';
309 
310  foreach ( $matches as $m ) {
311  $ret .= substr( $text, $mstart, $m[1] -$mstart );
312  $ret .= $this->regsConverter( $m[0], $toVariant );
313  $mstart = $m[1] + strlen( $m[0] );
314  }
315 
316  return $ret;
317  }
318 
324  function regsConverter( $text, $toVariant ) {
325  if ( $text == '' ) {
326  return $text;
327  }
328 
329  switch ( $toVariant ) {
330  case 'kk-arab':
331  case 'kk-cn':
332  $letters = KK_C_LC . KK_C_UC; /*.KK_L_LC.KK_L_UC*/
333  $front = 'әөүіӘӨҮІ'; /*.'äöüiÄÖÜİ'*/
334  $excludes = 'еэгғкқЕЭГҒКҚ'; /*.'eégğkqEÉGĞKQ'*/
335  // split text to words
336  $matches = preg_split( '/[\b\s\-\.:]+/', $text, -1, PREG_SPLIT_OFFSET_CAPTURE );
337  $mstart = 0;
338  $ret = '';
339  foreach ( $matches as $m ) {
340  $ret .= substr( $text, $mstart, $m[1] - $mstart );
341  // is matched the word to front vowels?
342  // exclude a words matched to е, э, г, к, к, қ,
343  // them should be without hamza
344  if ( preg_match( '/[' . $front . ']/u', $m[0] )
345  && !preg_match( '/[' . $excludes . ']/u', $m[0] )
346  ) {
347  $ret .= preg_replace( '/[' . $letters . ']+/u', H_HAMZA . '$0', $m[0] );
348  } else {
349  $ret .= $m[0];
350  }
351  $mstart = $m[1] + strlen( $m[0] );
352  }
353  $text =& $ret;
354  foreach ( $this->mCyLa2Arab as $pat => $rep ) {
355  $text = preg_replace( $pat, $rep, $text );
356  }
357  return $text;
358  break;
359  case 'kk-latn':
360  case 'kk-tr':
361  foreach ( $this->mCyrl2Latn as $pat => $rep ) {
362  $text = preg_replace( $pat, $rep, $text );
363  }
364  return $text;
365  break;
366  case 'kk-cyrl':
367  case 'kk-kz':
368  foreach ( $this->mLatn2Cyrl as $pat => $rep ) {
369  $text = preg_replace( $pat, $rep, $text );
370  }
371  return $text;
372  break;
373  default:
374  return $text;
375  }
376  }
377 
382  function convertCategoryKey( $key ) {
383  return $this->autoConvert( $key, 'kk' );
384  }
385 }
386 
394  function __construct() {
395  parent::__construct();
396 
397  $variants = [ 'kk', 'kk-cyrl', 'kk-latn', 'kk-arab', 'kk-kz', 'kk-tr', 'kk-cn' ];
398  $variantfallbacks = [
399  'kk' => 'kk-cyrl',
400  'kk-cyrl' => 'kk',
401  'kk-latn' => 'kk',
402  'kk-arab' => 'kk',
403  'kk-kz' => 'kk-cyrl',
404  'kk-tr' => 'kk-latn',
405  'kk-cn' => 'kk-arab'
406  ];
407 
408  $this->mConverter = new KkConverter( $this, 'kk', $variants, $variantfallbacks );
409  }
410 
418  public function ucfirst( $string ) {
419  if ( $string[0] == 'i' ) {
420  $variant = $this->getPreferredVariant();
421  if ( $variant == 'kk-latn' || $variant == 'kk-tr' ) {
422  return 'İ' . substr( $string, 1 );
423  }
424  }
425  return parent::ucfirst( $string );
426  }
427 
435  function lcfirst( $string ) {
436  if ( $string[0] == 'I' ) {
437  $variant = $this->getPreferredVariant();
438  if ( $variant == 'kk-latn' || $variant == 'kk-tr' ) {
439  return 'ı' . substr( $string, 1 );
440  }
441  }
442  return parent::lcfirst( $string );
443  }
444 
450  function convertGrammar( $word, $case ) {
451 
452  $variant = $this->getPreferredVariant();
453  switch ( $variant ) {
454  case 'kk-arab':
455  case 'kk-cn':
456  $word = parent::convertGrammarKk_arab( $word, $case );
457  break;
458  case 'kk-latn':
459  case 'kk-tr':
460  $word = parent::convertGrammarKk_latn( $word, $case );
461  break;
462  case 'kk-cyrl':
463  case 'kk-kz':
464  case 'kk':
465  default:
466  $word = parent::convertGrammarKk_cyrl( $word, $case );
467  }
468 
469  return $word;
470  }
471 }
Wrapper around strtr() that holds replacements.
parseManualRule($rule, $flags=[])
rules should be defined as -{ekavian | iyekavian-} -or- -{code:text | code:text | ...
Definition: LanguageKk.php:230
const KK_L_UC
Definition: LanguageKk.php:26
loadDefaultTables()
Definition: LanguageKk.php:60
Kazakh (Қазақша)
translate($text, $toVariant)
It translates text into variant.
Definition: LanguageKk.php:278
const KK_C_UC
Definition: LanguageKk.php:24
const H_HAMZA
Definition: LanguageKk.php:29
class that handles Cyrillic, Latin and Arabic scripts for Kazakh right now it only distinguish kk_cyr...
Definition: LanguageKk.php:393
it s the revision text itself In either if gzip is the revision text is gzipped $flags
Definition: hooks.txt:2588
Base class for language conversion.
convertCategoryKey($key)
Definition: LanguageKk.php:382
lcfirst($string)
It fixes issue with lcfirst for transforming 'I' to 'ı'.
Definition: LanguageKk.php:435
the value to return A Title object or null for latest to be modified or replaced by the hook handler or if authentication is not possible after cache objects are set for highlighting & $link
Definition: hooks.txt:2621
getPreferredVariant()
Definition: Language.php:4040
const KK_C_LC
Definition: LanguageKk.php:25
findVariantLink(&$link, &$nt, $ignoreOtherCond=false)
A function wrapper:
Definition: LanguageKk.php:254
Kazakh (Қазақша) converter routines.
Definition: LanguageKk.php:37
ucfirst($string)
It fixes issue with ucfirst for transforming 'i' to 'İ'.
Definition: LanguageKk.php:418
regsConverter($text, $toVariant)
Definition: LanguageKk.php:324
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses & $ret
Definition: hooks.txt:1816
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
__construct($langobj, $maincode, $variants=[], $variantfallbacks=[], $flags=[])
Definition: LanguageKk.php:47
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
autoConvert($text, $toVariant=false)
Dictionary-based conversion.
def translate
Definition: Makefile.py:235
const NS_USER_TALK
Definition: Defines.php:72
const KK_L_LC
Definition: LanguageKk.php:27
getPreferredVariant()
Get preferred language variant.
convertGrammar($word, $case)
Definition: LanguageKk.php:450
$matches