MediaWiki
REL1_21
|
00001 <?php 00027 class CheckLanguageCLI { 00028 protected $code = null; 00029 protected $level = 2; 00030 protected $doLinks = false; 00031 protected $linksPrefix = ''; 00032 protected $wikiCode = 'en'; 00033 protected $checkAll = false; 00034 protected $output = 'plain'; 00035 protected $checks = array(); 00036 protected $L = null; 00037 00038 protected $results = array(); 00039 00040 private $includeExif = false; 00041 00046 public function __construct( array $options ) { 00047 if ( isset( $options['help'] ) ) { 00048 echo $this->help(); 00049 exit(1); 00050 } 00051 00052 if ( isset( $options['lang'] ) ) { 00053 $this->code = $options['lang']; 00054 } else { 00055 global $wgLanguageCode; 00056 $this->code = $wgLanguageCode; 00057 } 00058 00059 if ( isset( $options['level'] ) ) { 00060 $this->level = $options['level']; 00061 } 00062 00063 $this->doLinks = isset( $options['links'] ); 00064 $this->includeExif = !isset( $options['noexif'] ); 00065 $this->checkAll = isset( $options['all'] ); 00066 00067 if ( isset( $options['prefix'] ) ) { 00068 $this->linksPrefix = $options['prefix']; 00069 } 00070 00071 if ( isset( $options['wikilang'] ) ) { 00072 $this->wikiCode = $options['wikilang']; 00073 } 00074 00075 if ( isset( $options['whitelist'] ) ) { 00076 $this->checks = explode( ',', $options['whitelist'] ); 00077 } elseif ( isset( $options['blacklist'] ) ) { 00078 $this->checks = array_diff( 00079 isset( $options['easy'] ) ? $this->easyChecks() : $this->defaultChecks(), 00080 explode( ',', $options['blacklist'] ) 00081 ); 00082 } elseif ( isset( $options['easy'] ) ) { 00083 $this->checks = $this->easyChecks(); 00084 } else { 00085 $this->checks = $this->defaultChecks(); 00086 } 00087 00088 if ( isset( $options['output'] ) ) { 00089 $this->output = $options['output']; 00090 } 00091 00092 $this->L = new languages( $this->includeExif ); 00093 } 00094 00099 protected function defaultChecks() { 00100 return array( 00101 'untranslated', 'duplicate', 'obsolete', 'variables', 'empty', 'plural', 00102 'whitespace', 'xhtml', 'chars', 'links', 'unbalanced', 'namespace', 00103 'projecttalk', 'magic', 'magic-old', 'magic-over', 'magic-case', 00104 'special', 'special-old', 00105 ); 00106 } 00107 00112 protected function nonMessageChecks() { 00113 return array( 00114 'namespace', 'projecttalk', 'magic', 'magic-old', 'magic-over', 00115 'magic-case', 'special', 'special-old', 00116 ); 00117 } 00118 00123 protected function easyChecks() { 00124 return array( 00125 'duplicate', 'obsolete', 'empty', 'whitespace', 'xhtml', 'chars', 'magic-old', 00126 'magic-over', 'magic-case', 'special-old', 00127 ); 00128 } 00129 00134 protected function getChecks() { 00135 return array( 00136 'untranslated' => 'getUntranslatedMessages', 00137 'duplicate' => 'getDuplicateMessages', 00138 'obsolete' => 'getObsoleteMessages', 00139 'variables' => 'getMessagesWithMismatchVariables', 00140 'plural' => 'getMessagesWithoutPlural', 00141 'empty' => 'getEmptyMessages', 00142 'whitespace' => 'getMessagesWithWhitespace', 00143 'xhtml' => 'getNonXHTMLMessages', 00144 'chars' => 'getMessagesWithWrongChars', 00145 'links' => 'getMessagesWithDubiousLinks', 00146 'unbalanced' => 'getMessagesWithUnbalanced', 00147 'namespace' => 'getUntranslatedNamespaces', 00148 'projecttalk' => 'getProblematicProjectTalks', 00149 'magic' => 'getUntranslatedMagicWords', 00150 'magic-old' => 'getObsoleteMagicWords', 00151 'magic-over' => 'getOverridingMagicWords', 00152 'magic-case' => 'getCaseMismatchMagicWords', 00153 'special' => 'getUntraslatedSpecialPages', 00154 'special-old' => 'getObsoleteSpecialPages', 00155 ); 00156 } 00157 00164 protected function getTotalCount() { 00165 return array( 00166 'namespace' => array( 'getNamespaceNames', 'en' ), 00167 'projecttalk' => null, 00168 'magic' => array( 'getMagicWords', 'en' ), 00169 'magic-old' => array( 'getMagicWords', null ), 00170 'magic-over' => array( 'getMagicWords', null ), 00171 'magic-case' => array( 'getMagicWords', null ), 00172 'special' => array( 'getSpecialPageAliases', 'en' ), 00173 'special-old' => array( 'getSpecialPageAliases', null ), 00174 ); 00175 } 00176 00181 protected function getDescriptions() { 00182 return array( 00183 'untranslated' => '$1 message(s) of $2 are not translated to $3, but exist in en:', 00184 'duplicate' => '$1 message(s) of $2 are translated the same in en and $3:', 00185 'obsolete' => '$1 message(s) of $2 do not exist in en or are in the ignore list, but exist in $3:', 00186 'variables' => '$1 message(s) of $2 in $3 don\'t match the variables used in en:', 00187 'plural' => '$1 message(s) of $2 in $3 don\'t use {{plural}} while en uses:', 00188 'empty' => '$1 message(s) of $2 in $3 are empty or -:', 00189 'whitespace' => '$1 message(s) of $2 in $3 have trailing whitespace:', 00190 'xhtml' => '$1 message(s) of $2 in $3 contain illegal XHTML:', 00191 'chars' => '$1 message(s) of $2 in $3 include hidden chars which should not be used in the messages:', 00192 'links' => '$1 message(s) of $2 in $3 have problematic link(s):', 00193 'unbalanced' => '$1 message(s) of $2 in $3 have unbalanced {[]}:', 00194 'namespace' => '$1 namespace name(s) of $2 are not translated to $3, but exist in en:', 00195 'projecttalk' => '$1 namespace name(s) and alias(es) in $3 are project talk namespaces without the parameter:', 00196 'magic' => '$1 magic word(s) of $2 are not translated to $3, but exist in en:', 00197 'magic-old' => '$1 magic word(s) of $2 do not exist in en, but exist in $3:', 00198 'magic-over' => '$1 magic word(s) of $2 in $3 do not contain the original en word(s):', 00199 'magic-case' => '$1 magic word(s) of $2 in $3 change the case-sensitivity of the original en word:', 00200 'special' => '$1 special page alias(es) of $2 are not translated to $3, but exist in en:', 00201 'special-old' => '$1 special page alias(es) of $2 do not exist in en, but exist in $3:', 00202 ); 00203 } 00204 00209 protected function help() { 00210 return <<<ENDS 00211 Run this script to check a specific language file, or all of them. 00212 Command line settings are in form --parameter[=value]. 00213 Parameters: 00214 --help: Show this help. 00215 --lang: Language code (default: the installation default language). 00216 --all: Check all customized languages. 00217 --level: Show the following display level (default: 2): 00218 * 0: Skip the checks (useful for checking syntax). 00219 * 1: Show only the stub headers and number of wrong messages, without list of messages. 00220 * 2: Show only the headers and the message keys, without the message values. 00221 * 3: Show both the headers and the complete messages, with both keys and values. 00222 --links: Link the message values (default off). 00223 --prefix: prefix to add to links. 00224 --wikilang: For the links, what is the content language of the wiki to display the output in (default en). 00225 --noexif: Do not check for EXIF messages (a bit hard and boring to translate), if you know 00226 that they are currently not translated and want to focus on other problems (default off). 00227 --whitelist: Do only the following checks (form: code,code). 00228 --blacklist: Do not do the following checks (form: code,code). 00229 --easy: Do only the easy checks, which can be treated by non-speakers of the language. 00230 00231 Check codes (ideally, all of them should result 0; all the checks are executed by default (except language-specific check blacklists in checkLanguage.inc): 00232 * untranslated: Messages which are required to translate, but are not translated. 00233 * duplicate: Messages which translation equal to fallback 00234 * obsolete: Messages which are untranslatable or do not exist, but are translated. 00235 * variables: Messages without variables which should be used, or with variables which should not be used. 00236 * empty: Empty messages and messages that contain only -. 00237 * whitespace: Messages which have trailing whitespace. 00238 * xhtml: Messages which are not well-formed XHTML (checks only few common errors). 00239 * chars: Messages with hidden characters. 00240 * links: Messages which contains broken links to pages (does not find all). 00241 * unbalanced: Messages which contains unequal numbers of opening {[ and closing ]}. 00242 * namespace: Namespace names that were not translated. 00243 * projecttalk: Namespace names and aliases where the project talk does not contain $1. 00244 * magic: Magic words that were not translated. 00245 * magic-old: Magic words which do not exist. 00246 * magic-over: Magic words that override the original English word. 00247 * magic-case: Magic words whose translation changes the case-sensitivity of the original English word. 00248 * special: Special page names that were not translated. 00249 * special-old: Special page names which do not exist. 00250 00251 ENDS; 00252 } 00253 00257 public function execute() { 00258 $this->doChecks(); 00259 if ( $this->level > 0 ) { 00260 switch ( $this->output ) { 00261 case 'plain': 00262 $this->outputText(); 00263 break; 00264 case 'wiki': 00265 $this->outputWiki(); 00266 break; 00267 default: 00268 throw new MWException( "Invalid output type $this->output" ); 00269 } 00270 } 00271 } 00272 00276 protected function doChecks() { 00277 $ignoredCodes = array( 'en', 'enRTL' ); 00278 00279 $this->results = array(); 00280 # Check the language 00281 if ( $this->checkAll ) { 00282 foreach ( $this->L->getLanguages() as $language ) { 00283 if ( !in_array( $language, $ignoredCodes ) ) { 00284 $this->results[$language] = $this->checkLanguage( $language ); 00285 } 00286 } 00287 } else { 00288 if ( in_array( $this->code, $ignoredCodes ) ) { 00289 throw new MWException( "Cannot check code $this->code." ); 00290 } else { 00291 $this->results[$this->code] = $this->checkLanguage( $this->code ); 00292 } 00293 } 00294 } 00295 00300 protected function getCheckBlacklist() { 00301 global $checkBlacklist; 00302 return $checkBlacklist; 00303 } 00304 00311 protected function checkLanguage( $code ) { 00312 # Syntax check only 00313 $results = array(); 00314 if ( $this->level === 0 ) { 00315 $this->L->getMessages( $code ); 00316 return $results; 00317 } 00318 00319 $checkFunctions = $this->getChecks(); 00320 $checkBlacklist = $this->getCheckBlacklist(); 00321 foreach ( $this->checks as $check ) { 00322 if ( isset( $checkBlacklist[$code] ) && 00323 in_array( $check, $checkBlacklist[$code] ) ) { 00324 $results[$check] = array(); 00325 continue; 00326 } 00327 00328 $callback = array( $this->L, $checkFunctions[$check] ); 00329 if ( !is_callable( $callback ) ) { 00330 throw new MWException( "Unkown check $check." ); 00331 } 00332 $results[$check] = call_user_func( $callback, $code ); 00333 } 00334 00335 return $results; 00336 } 00337 00344 protected function formatKey( $key, $code ) { 00345 if ( $this->doLinks ) { 00346 $displayKey = ucfirst( $key ); 00347 if ( $code == $this->wikiCode ) { 00348 return "[[{$this->linksPrefix}MediaWiki:$displayKey|$key]]"; 00349 } else { 00350 return "[[{$this->linksPrefix}MediaWiki:$displayKey/$code|$key]]"; 00351 } 00352 } else { 00353 return $key; 00354 } 00355 } 00356 00360 protected function outputText() { 00361 foreach ( $this->results as $code => $results ) { 00362 $translated = $this->L->getMessages( $code ); 00363 $translated = count( $translated['translated'] ); 00364 foreach ( $results as $check => $messages ) { 00365 $count = count( $messages ); 00366 if ( $count ) { 00367 if ( $check == 'untranslated' ) { 00368 $translatable = $this->L->getGeneralMessages(); 00369 $total = count( $translatable['translatable'] ); 00370 } elseif ( in_array( $check, $this->nonMessageChecks() ) ) { 00371 $totalCount = $this->getTotalCount(); 00372 $totalCount = $totalCount[$check]; 00373 $callback = array( $this->L, $totalCount[0] ); 00374 $callCode = $totalCount[1] ? $totalCount[1] : $code; 00375 $total = count( call_user_func( $callback, $callCode ) ); 00376 } else { 00377 $total = $translated; 00378 } 00379 $search = array( '$1', '$2', '$3' ); 00380 $replace = array( $count, $total, $code ); 00381 $descriptions = $this->getDescriptions(); 00382 echo "\n" . str_replace( $search, $replace, $descriptions[$check] ) . "\n"; 00383 if ( $this->level == 1 ) { 00384 echo "[messages are hidden]\n"; 00385 } else { 00386 foreach ( $messages as $key => $value ) { 00387 if( !in_array( $check, $this->nonMessageChecks() ) ) { 00388 $key = $this->formatKey( $key, $code ); 00389 } 00390 if ( $this->level == 2 || empty( $value ) ) { 00391 echo "* $key\n"; 00392 } else { 00393 echo "* $key: '$value'\n"; 00394 } 00395 } 00396 } 00397 } 00398 } 00399 } 00400 } 00401 00405 function outputWiki() { 00406 $detailText = ''; 00407 $rows[] = '! Language !! Code !! Total !! ' . implode( ' !! ', array_diff( $this->checks, $this->nonMessageChecks() ) ); 00408 foreach ( $this->results as $code => $results ) { 00409 $detailTextForLang = "==$code==\n"; 00410 $numbers = array(); 00411 $problems = 0; 00412 $detailTextForLangChecks = array(); 00413 foreach ( $results as $check => $messages ) { 00414 if( in_array( $check, $this->nonMessageChecks() ) ) { 00415 continue; 00416 } 00417 $count = count( $messages ); 00418 if ( $count ) { 00419 $problems += $count; 00420 $messageDetails = array(); 00421 foreach ( $messages as $key => $details ) { 00422 $displayKey = $this->formatKey( $key, $code ); 00423 $messageDetails[] = $displayKey; 00424 } 00425 $detailTextForLangChecks[] = "=== $code-$check ===\n* " . implode( ', ', $messageDetails ); 00426 $numbers[] = "'''[[#$code-$check|$count]]'''"; 00427 } else { 00428 $numbers[] = $count; 00429 } 00430 00431 } 00432 00433 if ( count( $detailTextForLangChecks ) ) { 00434 $detailText .= $detailTextForLang . implode( "\n", $detailTextForLangChecks ) . "\n"; 00435 } 00436 00437 if ( !$problems ) { 00438 # Don't list languages without problems 00439 continue; 00440 } 00441 $language = Language::fetchLanguageName( $code ); 00442 $rows[] = "| $language || $code || $problems || " . implode( ' || ', $numbers ); 00443 } 00444 00445 $tableRows = implode( "\n|-\n", $rows ); 00446 00447 $version = SpecialVersion::getVersion( 'nodb' ); 00448 echo <<<EOL 00449 '''Check results are for:''' <code>$version</code> 00450 00451 00452 {| class="sortable wikitable" border="2" cellpadding="4" cellspacing="0" style="background-color: #F9F9F9; border: 1px #AAAAAA solid; border-collapse: collapse; clear: both;" 00453 $tableRows 00454 |} 00455 00456 $detailText 00457 00458 EOL; 00459 } 00460 00465 protected function isEmpty() { 00466 foreach( $this->results as $results ) { 00467 foreach( $results as $messages ) { 00468 if( !empty( $messages ) ) { 00469 return false; 00470 } 00471 } 00472 } 00473 return true; 00474 } 00475 } 00476 00480 class CheckExtensionsCLI extends CheckLanguageCLI { 00481 private $extensions; 00482 00488 public function __construct( array $options, $extension ) { 00489 if ( isset( $options['help'] ) ) { 00490 echo $this->help(); 00491 exit(1); 00492 } 00493 00494 if ( isset( $options['lang'] ) ) { 00495 $this->code = $options['lang']; 00496 } else { 00497 global $wgLanguageCode; 00498 $this->code = $wgLanguageCode; 00499 } 00500 00501 if ( isset( $options['level'] ) ) { 00502 $this->level = $options['level']; 00503 } 00504 00505 $this->doLinks = isset( $options['links'] ); 00506 00507 if ( isset( $options['wikilang'] ) ) { 00508 $this->wikiCode = $options['wikilang']; 00509 } 00510 00511 if ( isset( $options['whitelist'] ) ) { 00512 $this->checks = explode( ',', $options['whitelist'] ); 00513 } elseif ( isset( $options['blacklist'] ) ) { 00514 $this->checks = array_diff( 00515 isset( $options['easy'] ) ? $this->easyChecks() : $this->defaultChecks(), 00516 explode( ',', $options['blacklist'] ) 00517 ); 00518 } elseif ( isset( $options['easy'] ) ) { 00519 $this->checks = $this->easyChecks(); 00520 } else { 00521 $this->checks = $this->defaultChecks(); 00522 } 00523 00524 if ( isset( $options['output'] ) ) { 00525 $this->output = $options['output']; 00526 } 00527 00528 # Some additional checks not enabled by default 00529 if ( isset( $options['duplicate'] ) ) { 00530 $this->checks[] = 'duplicate'; 00531 } 00532 00533 $this->extensions = array(); 00534 $extensions = new PremadeMediawikiExtensionGroups(); 00535 $extensions->addAll(); 00536 if ( $extension == 'all' ) { 00537 foreach ( MessageGroups::singleton()->getGroups() as $group ) { 00538 if ( strpos( $group->getId(), 'ext-' ) === 0 && !$group->isMeta() ) { 00539 $this->extensions[] = new extensionLanguages( $group ); 00540 } 00541 } 00542 } elseif ( $extension == 'wikimedia' ) { 00543 $wikimedia = MessageGroups::getGroup( 'ext-0-wikimedia' ); 00544 foreach ( $wikimedia->wmfextensions() as $extension ) { 00545 $group = MessageGroups::getGroup( $extension ); 00546 $this->extensions[] = new extensionLanguages( $group ); 00547 } 00548 } elseif ( $extension == 'flaggedrevs' ) { 00549 foreach ( MessageGroups::singleton()->getGroups() as $group ) { 00550 if ( strpos( $group->getId(), 'ext-flaggedrevs-' ) === 0 && !$group->isMeta() ) { 00551 $this->extensions[] = new extensionLanguages( $group ); 00552 } 00553 } 00554 } else { 00555 $extensions = explode( ',', $extension ); 00556 foreach ( $extensions as $extension ) { 00557 $group = MessageGroups::getGroup( 'ext-' . $extension ); 00558 if ( $group ) { 00559 $extension = new extensionLanguages( $group ); 00560 $this->extensions[] = $extension; 00561 } else { 00562 print "No such extension $extension.\n"; 00563 } 00564 } 00565 } 00566 } 00567 00572 protected function defaultChecks() { 00573 return array( 00574 'untranslated', 'duplicate', 'obsolete', 'variables', 'empty', 'plural', 00575 'whitespace', 'xhtml', 'chars', 'links', 'unbalanced', 00576 ); 00577 } 00578 00583 protected function nonMessageChecks() { 00584 return array(); 00585 } 00586 00591 protected function easyChecks() { 00592 return array( 00593 'duplicate', 'obsolete', 'empty', 'whitespace', 'xhtml', 'chars', 00594 ); 00595 } 00596 00601 protected function help() { 00602 return <<<ENDS 00603 Run this script to check the status of a specific language in extensions, or all of them. 00604 Command line settings are in form --parameter[=value], except for the first one. 00605 Parameters: 00606 * First parameter (mandatory): Extension name, multiple extension names (separated by commas), "all" for all the extensions, "wikimedia" for extensions used by Wikimedia or "flaggedrevs" for all FLaggedRevs extension messages. 00607 * lang: Language code (default: the installation default language). 00608 * help: Show this help. 00609 * level: Show the following display level (default: 2). 00610 * links: Link the message values (default off). 00611 * wikilang: For the links, what is the content language of the wiki to display the output in (default en). 00612 * whitelist: Do only the following checks (form: code,code). 00613 * blacklist: Do not perform the following checks (form: code,code). 00614 * easy: Do only the easy checks, which can be treated by non-speakers of the language. 00615 Check codes (ideally, all of them should result 0; all the checks are executed by default (except language-specific check blacklists in checkLanguage.inc): 00616 * untranslated: Messages which are required to translate, but are not translated. 00617 * duplicate: Messages which translation equal to fallback 00618 * obsolete: Messages which are untranslatable, but translated. 00619 * variables: Messages without variables which should be used, or with variables which should not be used. 00620 * empty: Empty messages. 00621 * whitespace: Messages which have trailing whitespace. 00622 * xhtml: Messages which are not well-formed XHTML (checks only few common errors). 00623 * chars: Messages with hidden characters. 00624 * links: Messages which contains broken links to pages (does not find all). 00625 * unbalanced: Messages which contains unequal numbers of opening {[ and closing ]}. 00626 Display levels (default: 2): 00627 * 0: Skip the checks (useful for checking syntax). 00628 * 1: Show only the stub headers and number of wrong messages, without list of messages. 00629 * 2: Show only the headers and the message keys, without the message values. 00630 * 3: Show both the headers and the complete messages, with both keys and values. 00631 00632 ENDS; 00633 } 00634 00638 public function execute() { 00639 $this->doChecks(); 00640 } 00641 00647 protected function checkLanguage( $code ) { 00648 foreach( $this->extensions as $extension ) { 00649 $this->L = $extension; 00650 $this->results = array(); 00651 $this->results[$code] = parent::checkLanguage( $code ); 00652 00653 if( !$this->isEmpty() ) { 00654 echo $extension->name() . ":\n"; 00655 00656 if( $this->level > 0 ) { 00657 switch( $this->output ) { 00658 case 'plain': 00659 $this->outputText(); 00660 break; 00661 case 'wiki': 00662 $this->outputWiki(); 00663 break; 00664 default: 00665 throw new MWException( "Invalid output type $this->output" ); 00666 } 00667 } 00668 00669 echo "\n"; 00670 } 00671 } 00672 } 00673 } 00674 00675 # Blacklist some checks for some languages 00676 $checkBlacklist = array( 00677 #'code' => array( 'check1', 'check2' ... ) 00678 'az' => array( 'plural' ), 00679 'bo' => array( 'plural' ), 00680 'dz' => array( 'plural' ), 00681 'id' => array( 'plural' ), 00682 'fa' => array( 'plural' ), 00683 'gan' => array( 'plural' ), 00684 'gan-hans' => array( 'plural' ), 00685 'gan-hant' => array( 'plural' ), 00686 'gn' => array( 'plural' ), 00687 'hak' => array( 'plural' ), 00688 'hu' => array( 'plural' ), 00689 'ja' => array( 'plural' ), // Does not use plural 00690 'jv' => array( 'plural' ), 00691 'ka' => array( 'plural' ), 00692 'kk-arab' => array( 'plural' ), 00693 'kk-cyrl' => array( 'plural' ), 00694 'kk-latn' => array( 'plural' ), 00695 'km' => array( 'plural' ), 00696 'kn' => array( 'plural' ), 00697 'ko' => array( 'plural' ), 00698 'lzh' => array( 'plural' ), 00699 'mn' => array( 'plural' ), 00700 'ms' => array( 'plural' ), 00701 'my' => array( 'plural', 'chars' ), // Uses a lot zwnj 00702 'sah' => array( 'plural' ), 00703 'sq' => array( 'plural' ), 00704 'tet' => array( 'plural' ), 00705 'th' => array( 'plural' ), 00706 'to' => array( 'plural' ), 00707 'tr' => array( 'plural' ), 00708 'vi' => array( 'plural' ), 00709 'wuu' => array( 'plural' ), 00710 'xmf' => array( 'plural' ), 00711 'yo' => array( 'plural' ), 00712 'yue' => array( 'plural' ), 00713 'zh' => array( 'plural' ), 00714 'zh-classical' => array( 'plural' ), 00715 'zh-cn' => array( 'plural' ), 00716 'zh-hans' => array( 'plural' ), 00717 'zh-hant' => array( 'plural' ), 00718 'zh-hk' => array( 'plural' ), 00719 'zh-sg' => array( 'plural' ), 00720 'zh-tw' => array( 'plural' ), 00721 'zh-yue' => array( 'plural' ), 00722 );