[ Index ] |
PHP Cross Reference of MediaWiki-1.24.0 |
[Summary view] [Print] [Text view]
1 <?php 2 /** 3 * Database row sorting. 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 2 of the License, or 8 * (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License along 16 * with this program; if not, write to the Free Software Foundation, Inc., 17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 18 * http://www.gnu.org/copyleft/gpl.html 19 * 20 * @file 21 */ 22 23 abstract class Collation { 24 private static $instance; 25 26 /** 27 * @return Collation 28 */ 29 static function singleton() { 30 if ( !self::$instance ) { 31 global $wgCategoryCollation; 32 self::$instance = self::factory( $wgCategoryCollation ); 33 } 34 return self::$instance; 35 } 36 37 /** 38 * @throws MWException 39 * @param string $collationName 40 * @return Collation 41 */ 42 static function factory( $collationName ) { 43 switch ( $collationName ) { 44 case 'uppercase': 45 return new UppercaseCollation; 46 case 'identity': 47 return new IdentityCollation; 48 case 'uca-default': 49 return new IcuCollation( 'root' ); 50 case 'xx-uca-ckb': 51 return new CollationCkb; 52 case 'xx-uca-et': 53 return new CollationEt; 54 default: 55 $match = array(); 56 if ( preg_match( '/^uca-([a-z@=-]+)$/', $collationName, $match ) ) { 57 return new IcuCollation( $match[1] ); 58 } 59 60 # Provide a mechanism for extensions to hook in. 61 $collationObject = null; 62 wfRunHooks( 'Collation::factory', array( $collationName, &$collationObject ) ); 63 64 if ( $collationObject instanceof Collation ) { 65 return $collationObject; 66 } 67 68 // If all else fails... 69 throw new MWException( __METHOD__ . ": unknown collation type \"$collationName\"" ); 70 } 71 } 72 73 /** 74 * Given a string, convert it to a (hopefully short) key that can be used 75 * for efficient sorting. A binary sort according to the sortkeys 76 * corresponds to a logical sort of the corresponding strings. Current 77 * code expects that a line feed character should sort before all others, but 78 * has no other particular expectations (and that one can be changed if 79 * necessary). 80 * 81 * @param string $string UTF-8 string 82 * @return string Binary sortkey 83 */ 84 abstract function getSortKey( $string ); 85 86 /** 87 * Given a string, return the logical "first letter" to be used for 88 * grouping on category pages and so on. This has to be coordinated 89 * carefully with convertToSortkey(), or else the sorted list might jump 90 * back and forth between the same "initial letters" or other pathological 91 * behavior. For instance, if you just return the first character, but "a" 92 * sorts the same as "A" based on getSortKey(), then you might get a 93 * list like 94 * 95 * == A == 96 * * [[Aardvark]] 97 * 98 * == a == 99 * * [[antelope]] 100 * 101 * == A == 102 * * [[Ape]] 103 * 104 * etc., assuming for the sake of argument that $wgCapitalLinks is false. 105 * 106 * @param string $string UTF-8 string 107 * @return string UTF-8 string corresponding to the first letter of input 108 */ 109 abstract function getFirstLetter( $string ); 110 } 111 112 class UppercaseCollation extends Collation { 113 private $lang; 114 115 function __construct() { 116 // Get a language object so that we can use the generic UTF-8 uppercase 117 // function there 118 $this->lang = Language::factory( 'en' ); 119 } 120 121 function getSortKey( $string ) { 122 return $this->lang->uc( $string ); 123 } 124 125 function getFirstLetter( $string ) { 126 if ( $string[0] == "\0" ) { 127 $string = substr( $string, 1 ); 128 } 129 return $this->lang->ucfirst( $this->lang->firstChar( $string ) ); 130 } 131 } 132 133 /** 134 * Collation class that's essentially a no-op. 135 * 136 * Does sorting based on binary value of the string. 137 * Like how things were pre 1.17. 138 */ 139 class IdentityCollation extends Collation { 140 141 function getSortKey( $string ) { 142 return $string; 143 } 144 145 function getFirstLetter( $string ) { 146 global $wgContLang; 147 // Copied from UppercaseCollation. 148 // I'm kind of unclear on when this could happen... 149 if ( $string[0] == "\0" ) { 150 $string = substr( $string, 1 ); 151 } 152 return $wgContLang->firstChar( $string ); 153 } 154 } 155 156 class IcuCollation extends Collation { 157 const FIRST_LETTER_VERSION = 2; 158 159 /** @var Collator */ 160 private $primaryCollator; 161 162 /** @var Collator */ 163 private $mainCollator; 164 165 /** @var string */ 166 private $locale; 167 168 /** @var Language */ 169 protected $digitTransformLanguage; 170 171 /** @var array */ 172 private $firstLetterData; 173 174 /** 175 * Unified CJK blocks. 176 * 177 * The same definition of a CJK block must be used for both Collation and 178 * generateCollationData.php. These blocks are omitted from the first 179 * letter data, as an optimisation measure and because the default UCA table 180 * is pretty useless for sorting Chinese text anyway. Japanese and Korean 181 * blocks are not included here, because they are smaller and more useful. 182 */ 183 private static $cjkBlocks = array( 184 array( 0x2E80, 0x2EFF ), // CJK Radicals Supplement 185 array( 0x2F00, 0x2FDF ), // Kangxi Radicals 186 array( 0x2FF0, 0x2FFF ), // Ideographic Description Characters 187 array( 0x3000, 0x303F ), // CJK Symbols and Punctuation 188 array( 0x31C0, 0x31EF ), // CJK Strokes 189 array( 0x3200, 0x32FF ), // Enclosed CJK Letters and Months 190 array( 0x3300, 0x33FF ), // CJK Compatibility 191 array( 0x3400, 0x4DBF ), // CJK Unified Ideographs Extension A 192 array( 0x4E00, 0x9FFF ), // CJK Unified Ideographs 193 array( 0xF900, 0xFAFF ), // CJK Compatibility Ideographs 194 array( 0xFE30, 0xFE4F ), // CJK Compatibility Forms 195 array( 0x20000, 0x2A6DF ), // CJK Unified Ideographs Extension B 196 array( 0x2A700, 0x2B73F ), // CJK Unified Ideographs Extension C 197 array( 0x2B740, 0x2B81F ), // CJK Unified Ideographs Extension D 198 array( 0x2F800, 0x2FA1F ), // CJK Compatibility Ideographs Supplement 199 ); 200 201 /** 202 * Additional characters (or character groups) to be considered separate 203 * letters for given languages, or to be removed from the list of such 204 * letters (denoted by keys starting with '-'). 205 * 206 * These are additions to (or subtractions from) the data stored in the 207 * first-letters-root.ser file (which among others includes full basic latin, 208 * cyrillic and greek alphabets). 209 * 210 * "Separate letter" is a letter that would have a separate heading/section 211 * for it in a dictionary or a phone book in this language. This data isn't 212 * used for sorting (the ICU library handles that), only for deciding which 213 * characters (or character groups) to use as headings. 214 * 215 * Initially generated based on the primary level of Unicode collation 216 * tailorings available at http://developer.mimer.com/charts/tailorings.htm , 217 * later modified. 218 * 219 * Empty arrays are intended; this signifies that the data for the language is 220 * available and that there are, in fact, no additional letters to consider. 221 */ 222 private static $tailoringFirstLetters = array( 223 // Verified by native speakers 224 'be' => array( "Ё" ), 225 'be-tarask' => array( "Ё" ), 226 'cy' => array( "Ch", "Dd", "Ff", "Ng", "Ll", "Ph", "Rh", "Th" ), 227 'en' => array(), 228 'fa' => array( "آ", "ء", "ه" ), 229 'fi' => array( "Å", "Ä", "Ö" ), 230 'fr' => array(), 231 'hu' => array( "Cs", "Dz", "Dzs", "Gy", "Ly", "Ny", "Ö", "Sz", "Ty", "Ü", "Zs" ), 232 'is' => array( "Á", "Ð", "É", "Í", "Ó", "Ú", "Ý", "Þ", "Æ", "Ö", "Å" ), 233 'it' => array(), 234 'lv' => array( "Č", "Ģ", "Ķ", "Ļ", "Ņ", "Š", "Ž" ), 235 'pl' => array( "Ą", "Ć", "Ę", "Ł", "Ń", "Ó", "Ś", "Ź", "Ż" ), 236 'pt' => array(), 237 'ru' => array(), 238 'sv' => array( "Å", "Ä", "Ö" ), 239 'sv@collation=standard' => array( "Å", "Ä", "Ö" ), 240 'uk' => array( "Ґ", "Ь" ), 241 'vi' => array( "Ă", "Â", "Đ", "Ê", "Ô", "Ơ", "Ư" ), 242 // Not verified, but likely correct 243 'af' => array(), 244 'ast' => array( "Ch", "Ll", "Ñ" ), 245 'az' => array( "Ç", "Ə", "Ğ", "İ", "Ö", "Ş", "Ü" ), 246 'bg' => array(), 247 'br' => array( "Ch", "C'h" ), 248 'bs' => array( "Č", "Ć", "Dž", "Đ", "Lj", "Nj", "Š", "Ž" ), 249 'ca' => array(), 250 'co' => array(), 251 'cs' => array( "Č", "Ch", "Ř", "Š", "Ž" ), 252 'da' => array( "Æ", "Ø", "Å" ), 253 'de' => array(), 254 'dsb' => array( "Č", "Ć", "Dź", "Ě", "Ch", "Ł", "Ń", "Ŕ", "Š", "Ś", "Ž", "Ź" ), 255 'el' => array(), 256 'eo' => array( "Ĉ", "Ĝ", "Ĥ", "Ĵ", "Ŝ", "Ŭ" ), 257 'es' => array( "Ñ" ), 258 'et' => array( "Š", "Ž", "Õ", "Ä", "Ö", "Ü", "W" ), // added W for CollationEt (xx-uca-et) 259 'eu' => array( "Ñ" ), 260 'fo' => array( "Á", "Ð", "Í", "Ó", "Ú", "Ý", "Æ", "Ø", "Å" ), 261 'fur' => array( "À", "Á", "Â", "È", "Ì", "Ò", "Ù" ), 262 'fy' => array(), 263 'ga' => array(), 264 'gd' => array(), 265 'gl' => array( "Ch", "Ll", "Ñ" ), 266 'hr' => array( "Č", "Ć", "Dž", "Đ", "Lj", "Nj", "Š", "Ž" ), 267 'hsb' => array( "Č", "Dź", "Ě", "Ch", "Ł", "Ń", "Ř", "Š", "Ć", "Ž" ), 268 'kk' => array( "Ү", "І" ), 269 'kl' => array( "Æ", "Ø", "Å" ), 270 'ku' => array( "Ç", "Ê", "Î", "Ş", "Û" ), 271 'ky' => array( "Ё" ), 272 'la' => array(), 273 'lb' => array(), 274 'lt' => array( "Č", "Š", "Ž" ), 275 'mk' => array(), 276 'mo' => array( "Ă", "Â", "Î", "Ş", "Ţ" ), 277 'mt' => array( "Ċ", "Ġ", "Għ", "Ħ", "Ż" ), 278 'nl' => array(), 279 'no' => array( "Æ", "Ø", "Å" ), 280 'oc' => array(), 281 'rm' => array(), 282 'ro' => array( "Ă", "Â", "Î", "Ş", "Ţ" ), 283 'rup' => array( "Ă", "Â", "Î", "Ľ", "Ń", "Ş", "Ţ" ), 284 'sco' => array(), 285 'sk' => array( "Ä", "Č", "Ch", "Ô", "Š", "Ž" ), 286 'sl' => array( "Č", "Š", "Ž" ), 287 'smn' => array( "Á", "Č", "Đ", "Ŋ", "Š", "Ŧ", "Ž", "Æ", "Ø", "Å", "Ä", "Ö" ), 288 'sq' => array( "Ç", "Dh", "Ë", "Gj", "Ll", "Nj", "Rr", "Sh", "Th", "Xh", "Zh" ), 289 'sr' => array(), 290 'tk' => array( "Ç", "Ä", "Ž", "Ň", "Ö", "Ş", "Ü", "Ý" ), 291 'tl' => array( "Ñ", "Ng" ), 292 'tr' => array( "Ç", "Ğ", "İ", "Ö", "Ş", "Ü" ), 293 'tt' => array( "Ә", "Ө", "Ү", "Җ", "Ң", "Һ" ), 294 'uz' => array( "Ch", "G'", "Ng", "O'", "Sh" ), 295 ); 296 297 const RECORD_LENGTH = 14; 298 299 function __construct( $locale ) { 300 if ( !extension_loaded( 'intl' ) ) { 301 throw new MWException( 'An ICU collation was requested, ' . 302 'but the intl extension is not available.' ); 303 } 304 305 $this->locale = $locale; 306 // Drop everything after the '@' in locale's name 307 $localeParts = explode( '@', $locale ); 308 $this->digitTransformLanguage = Language::factory( $locale === 'root' ? 'en' : $localeParts[0] ); 309 310 $this->mainCollator = Collator::create( $locale ); 311 if ( !$this->mainCollator ) { 312 throw new MWException( "Invalid ICU locale specified for collation: $locale" ); 313 } 314 315 $this->primaryCollator = Collator::create( $locale ); 316 $this->primaryCollator->setStrength( Collator::PRIMARY ); 317 } 318 319 function getSortKey( $string ) { 320 // intl extension produces non null-terminated 321 // strings. Appending '' fixes it so that it doesn't generate 322 // a warning on each access in debug php. 323 wfSuppressWarnings(); 324 $key = $this->mainCollator->getSortKey( $string ) . ''; 325 wfRestoreWarnings(); 326 return $key; 327 } 328 329 function getPrimarySortKey( $string ) { 330 wfSuppressWarnings(); 331 $key = $this->primaryCollator->getSortKey( $string ) . ''; 332 wfRestoreWarnings(); 333 return $key; 334 } 335 336 function getFirstLetter( $string ) { 337 $string = strval( $string ); 338 if ( $string === '' ) { 339 return ''; 340 } 341 342 // Check for CJK 343 $firstChar = mb_substr( $string, 0, 1, 'UTF-8' ); 344 if ( ord( $firstChar ) > 0x7f && self::isCjk( utf8ToCodepoint( $firstChar ) ) ) { 345 return $firstChar; 346 } 347 348 $sortKey = $this->getPrimarySortKey( $string ); 349 350 // Do a binary search to find the correct letter to sort under 351 $min = ArrayUtils::findLowerBound( 352 array( $this, 'getSortKeyByLetterIndex' ), 353 $this->getFirstLetterCount(), 354 'strcmp', 355 $sortKey ); 356 357 if ( $min === false ) { 358 // Before the first letter 359 return ''; 360 } 361 return $this->getLetterByIndex( $min ); 362 } 363 364 function getFirstLetterData() { 365 if ( $this->firstLetterData !== null ) { 366 return $this->firstLetterData; 367 } 368 369 $cache = wfGetCache( CACHE_ANYTHING ); 370 $cacheKey = wfMemcKey( 371 'first-letters', 372 $this->locale, 373 $this->digitTransformLanguage->getCode(), 374 self::getICUVersion() 375 ); 376 $cacheEntry = $cache->get( $cacheKey ); 377 378 if ( $cacheEntry && isset( $cacheEntry['version'] ) 379 && $cacheEntry['version'] == self::FIRST_LETTER_VERSION 380 ) { 381 $this->firstLetterData = $cacheEntry; 382 return $this->firstLetterData; 383 } 384 385 // Generate data from serialized data file 386 387 if ( isset( self::$tailoringFirstLetters[$this->locale] ) ) { 388 $letters = wfGetPrecompiledData( "first-letters-root.ser" ); 389 // Append additional characters 390 $letters = array_merge( $letters, self::$tailoringFirstLetters[$this->locale] ); 391 // Remove unnecessary ones, if any 392 if ( isset( self::$tailoringFirstLetters['-' . $this->locale] ) ) { 393 $letters = array_diff( $letters, self::$tailoringFirstLetters['-' . $this->locale] ); 394 } 395 // Apply digit transforms 396 $digits = array( '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' ); 397 $letters = array_diff( $letters, $digits ); 398 foreach ( $digits as $digit ) { 399 $letters[] = $this->digitTransformLanguage->formatNum( $digit, true ); 400 } 401 } else { 402 $letters = wfGetPrecompiledData( "first-letters-{$this->locale}.ser" ); 403 if ( $letters === false ) { 404 throw new MWException( "MediaWiki does not support ICU locale " . 405 "\"{$this->locale}\"" ); 406 } 407 } 408 409 // Sort the letters. 410 // 411 // It's impossible to have the precompiled data file properly sorted, 412 // because the sort order changes depending on ICU version. If the 413 // array is not properly sorted, the binary search will return random 414 // results. 415 // 416 // We also take this opportunity to remove primary collisions. 417 $letterMap = array(); 418 foreach ( $letters as $letter ) { 419 $key = $this->getPrimarySortKey( $letter ); 420 if ( isset( $letterMap[$key] ) ) { 421 // Primary collision 422 // Keep whichever one sorts first in the main collator 423 if ( $this->mainCollator->compare( $letter, $letterMap[$key] ) < 0 ) { 424 $letterMap[$key] = $letter; 425 } 426 } else { 427 $letterMap[$key] = $letter; 428 } 429 } 430 ksort( $letterMap, SORT_STRING ); 431 // Remove duplicate prefixes. Basically if something has a sortkey 432 // which is a prefix of some other sortkey, then it is an 433 // expansion and probably should not be considered a section 434 // header. 435 // 436 // For example 'þ' is sometimes sorted as if it is the letters 437 // 'th'. Other times it is its own primary element. Another 438 // example is '₨'. Sometimes its a currency symbol. Sometimes it 439 // is an 'R' followed by an 's'. 440 // 441 // Additionally an expanded element should always sort directly 442 // after its first element due to they way sortkeys work. 443 // 444 // UCA sortkey elements are of variable length but no collation 445 // element should be a prefix of some other element, so I think 446 // this is safe. See: 447 // * https://ssl.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm 448 // * http://site.icu-project.org/design/collation/uca-weight-allocation 449 // 450 // Additionally, there is something called primary compression to 451 // worry about. Basically, if you have two primary elements that 452 // are more than one byte and both start with the same byte then 453 // the first byte is dropped on the second primary. Additionally 454 // either \x03 or \xFF may be added to mean that the next primary 455 // does not start with the first byte of the first primary. 456 // 457 // This shouldn't matter much, as the first primary is not 458 // changed, and that is what we are comparing against. 459 // 460 // tl;dr: This makes some assumptions about how icu implements 461 // collations. It seems incredibly unlikely these assumptions 462 // will change, but nonetheless they are assumptions. 463 464 $prev = false; 465 $duplicatePrefixes = array(); 466 foreach ( $letterMap as $key => $value ) { 467 // Remove terminator byte. Otherwise the prefix 468 // comparison will get hung up on that. 469 $trimmedKey = rtrim( $key, "\0" ); 470 if ( $prev === false || $prev === '' ) { 471 $prev = $trimmedKey; 472 // We don't yet have a collation element 473 // to compare against, so continue. 474 continue; 475 } 476 477 // Due to the fact the array is sorted, we only have 478 // to compare with the element directly previous 479 // to the current element (skipping expansions). 480 // An element "X" will always sort directly 481 // before "XZ" (Unless we have "XY", but we 482 // do not update $prev in that case). 483 if ( substr( $trimmedKey, 0, strlen( $prev ) ) === $prev ) { 484 $duplicatePrefixes[] = $key; 485 // If this is an expansion, we don't want to 486 // compare the next element to this element, 487 // but to what is currently $prev 488 continue; 489 } 490 $prev = $trimmedKey; 491 } 492 foreach ( $duplicatePrefixes as $badKey ) { 493 wfDebug( "Removing '{$letterMap[$badKey]}' from first letters.\n" ); 494 unset( $letterMap[$badKey] ); 495 // This code assumes that unsetting does not change sort order. 496 } 497 $data = array( 498 'chars' => array_values( $letterMap ), 499 'keys' => array_keys( $letterMap ), 500 'version' => self::FIRST_LETTER_VERSION, 501 ); 502 503 // Reduce memory usage before caching 504 unset( $letterMap ); 505 506 // Save to cache 507 $this->firstLetterData = $data; 508 $cache->set( $cacheKey, $data, 86400 * 7 /* 1 week */ ); 509 return $data; 510 } 511 512 function getLetterByIndex( $index ) { 513 if ( $this->firstLetterData === null ) { 514 $this->getFirstLetterData(); 515 } 516 return $this->firstLetterData['chars'][$index]; 517 } 518 519 function getSortKeyByLetterIndex( $index ) { 520 if ( $this->firstLetterData === null ) { 521 $this->getFirstLetterData(); 522 } 523 return $this->firstLetterData['keys'][$index]; 524 } 525 526 function getFirstLetterCount() { 527 if ( $this->firstLetterData === null ) { 528 $this->getFirstLetterData(); 529 } 530 return count( $this->firstLetterData['chars'] ); 531 } 532 533 static function isCjk( $codepoint ) { 534 foreach ( self::$cjkBlocks as $block ) { 535 if ( $codepoint >= $block[0] && $codepoint <= $block[1] ) { 536 return true; 537 } 538 } 539 return false; 540 } 541 542 /** 543 * Return the version of ICU library used by PHP's intl extension, 544 * or false when the extension is not installed of the version 545 * can't be determined. 546 * 547 * The constant INTL_ICU_VERSION this function refers to isn't really 548 * documented. It is available since PHP 5.3.7 (see PHP bug 54561). 549 * This function will return false on older PHPs. 550 * 551 * @since 1.21 552 * @return string|bool 553 */ 554 static function getICUVersion() { 555 return defined( 'INTL_ICU_VERSION' ) ? INTL_ICU_VERSION : false; 556 } 557 558 /** 559 * Return the version of Unicode appropriate for the version of ICU library 560 * currently in use, or false when it can't be determined. 561 * 562 * @since 1.21 563 * @return string|bool 564 */ 565 static function getUnicodeVersionForICU() { 566 $icuVersion = IcuCollation::getICUVersion(); 567 if ( !$icuVersion ) { 568 return false; 569 } 570 571 $versionPrefix = substr( $icuVersion, 0, 3 ); 572 // Source: http://site.icu-project.org/download 573 $map = array( 574 '50.' => '6.2', 575 '49.' => '6.1', 576 '4.8' => '6.0', 577 '4.6' => '6.0', 578 '4.4' => '5.2', 579 '4.2' => '5.1', 580 '4.0' => '5.1', 581 '3.8' => '5.0', 582 '3.6' => '5.0', 583 '3.4' => '4.1', 584 ); 585 586 if ( isset( $map[$versionPrefix] ) ) { 587 return $map[$versionPrefix]; 588 } else { 589 return false; 590 } 591 } 592 } 593 594 /** 595 * Workaround for the lack of support of Sorani Kurdish / Central Kurdish language ('ckb') in ICU. 596 * 597 * Uses the same collation rules as Persian / Farsi ('fa'), but different characters for digits. 598 */ 599 class CollationCkb extends IcuCollation { 600 function __construct() { 601 // This will set $locale and collators, which affect the actual sorting order 602 parent::__construct( 'fa' ); 603 // Override the 'fa' language set by parent constructor, which affects #getFirstLetterData() 604 $this->digitTransformLanguage = Language::factory( 'ckb' ); 605 } 606 } 607 608 /** 609 * Workaround for incorrect collation of Estonian language ('et') in ICU (bug 54168). 610 * 611 * 'W' and 'V' should not be considered the same letter for the purposes of collation in modern 612 * Estonian. We work around this by replacing 'W' and 'w' with 'ᴡ' U+1D21 'LATIN LETTER SMALL 613 * CAPITAL W' for sortkey generation, which is collated like 'W' and is not tailored to have the 614 * same primary weight as 'V' in Estonian. 615 */ 616 class CollationEt extends IcuCollation { 617 function __construct() { 618 parent::__construct( 'et' ); 619 } 620 621 private static function mangle( $string ) { 622 return str_replace( 623 array( 'w', 'W' ), 624 'ᴡ', // U+1D21 'LATIN LETTER SMALL CAPITAL W' 625 $string 626 ); 627 } 628 629 private static function unmangle( $string ) { 630 // Casing data is lost… 631 return str_replace( 632 'ᴡ', // U+1D21 'LATIN LETTER SMALL CAPITAL W' 633 'W', 634 $string 635 ); 636 } 637 638 function getSortKey( $string ) { 639 return parent::getSortKey( self::mangle( $string ) ); 640 } 641 642 function getFirstLetter( $string ) { 643 return self::unmangle( parent::getFirstLetter( self::mangle( $string ) ) ); 644 } 645 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Fri Nov 28 14:03:12 2014 | Cross-referenced by PHPXref 0.7.1 |