MediaWiki  REL1_21
CollationTest.php
Go to the documentation of this file.
00001 <?php
00002 class CollationTest extends MediaWikiLangTestCase {
00003         protected function setUp() {
00004                 parent::setUp();
00005                 if ( !extension_loaded( 'intl' ) ) {
00006                         $this->markTestSkipped( 'These tests require intl extension' );
00007                 }
00008         }
00009 
00023         function testIsPrefix( $lang, $base, $extended ) {
00024                 $cp = Collator::create( $lang );
00025                 $cp->setStrength( Collator::PRIMARY );
00026                 $baseBin = $cp->getSortKey( $base );
00027                 // Remove sortkey terminator
00028                 $baseBin = rtrim( $baseBin, "\0" );
00029                 $extendedBin = $cp->getSortKey( $extended );
00030                 $this->assertStringStartsWith( $baseBin, $extendedBin, "$base is not a prefix of $extended" );
00031         }
00032 
00033         function prefixDataProvider() {
00034                 return array(
00035                         array( 'en', 'A', 'AA' ),
00036                         array( 'en', 'A', 'AAA' ),
00037                         array( 'en', 'Д', 'ДЂ' ),
00038                         array( 'en', 'Д', 'ДA' ),
00039                         // 'Ʒ' should expand to 'Z ' (note space).
00040                         array( 'fi', 'Z', 'Ʒ' ),
00041                         // 'Þ' should expand to 'th'
00042                         array( 'sv', 't', 'Þ' ),
00043                         // Javanese is a limited use alphabet, so should have 3 bytes
00044                         // per character, so do some tests with it.
00045                         array( 'en', 'ꦲ', 'ꦲꦤ' ),
00046                         array( 'en', 'ꦲ', 'ꦲД' ),
00047                         array( 'en', 'A', 'Aꦲ' ),
00048                 );
00049         }
00055         function testNotIsPrefix( $lang, $base, $extended ) {
00056                 $cp = Collator::create( $lang );
00057                 $cp->setStrength( Collator::PRIMARY );
00058                 $baseBin = $cp->getSortKey( $base );
00059                 // Remove sortkey terminator
00060                 $baseBin = rtrim( $baseBin, "\0" );
00061                 $extendedBin = $cp->getSortKey( $extended );
00062                 $this->assertStringStartsNotWith( $baseBin, $extendedBin, "$base is a prefix of $extended" );
00063         }
00064 
00065         function notPrefixDataProvider() {
00066                 return array(
00067                         array( 'en', 'A', 'B' ),
00068                         array( 'en', 'AC', 'ABC' ),
00069                         array( 'en', 'Z', 'Ʒ' ),
00070                         array( 'en', 'A', 'ꦲ' ),
00071                 );
00072         }
00073 
00083         function testGetFirstLetter( $collation, $string, $firstLetter ) {
00084                 $col = Collation::factory( $collation );
00085                 $this->assertEquals( $firstLetter, $col->getFirstLetter( $string ) );
00086         }
00087         function firstLetterProvider() {
00088                 return array(
00089                         array( 'uppercase', 'Abc', 'A' ),
00090                         array( 'uppercase', 'abc', 'A' ),
00091                         array( 'identity', 'abc', 'a' ),
00092                         array( 'uca-en', 'abc', 'A' ),
00093                         array( 'uca-en', ' ', ' ' ),
00094                         array( 'uca-en', 'Êveryone', 'E' ),
00095                         array( 'uca-vi', 'Êveryone', 'Ê' ),
00096                         // Make sure thorn is not a first letter.
00097                         array( 'uca-sv', 'The', 'T' ),
00098                         array( 'uca-sv', 'Å', 'Å' ),
00099                         array( 'uca-hu', 'dzsdo', 'Dzs' ),
00100                         array( 'uca-hu', 'dzdso', 'Dz' ),
00101                         array( 'uca-hu', 'CSD', 'Cs' ),
00102                         array( 'uca-root', 'CSD', 'C' ),
00103                         array( 'uca-fi', 'Ǥ', 'G' ),
00104                         array( 'uca-fi', 'Ŧ', 'T' ),
00105                         array( 'uca-fi', 'Ʒ', 'Z' ),
00106                         array( 'uca-fi', 'Ŋ', 'N' ),
00107                 );
00108         }
00109 }