MediaWiki
REL1_22
|
00001 <?php 00027 class CheckLanguageCLI { 00028 protected $code = null; 00029 protected $level = 2; 00030 protected $doLinks = false; 00031 protected $linksPrefix = ''; 00032 protected $wikiCode = 'en'; 00033 protected $checkAll = false; 00034 protected $output = 'plain'; 00035 protected $checks = array(); 00036 protected $L = null; 00037 00038 protected $results = array(); 00039 00040 private $includeExif = false; 00041 00046 public function __construct( array $options ) { 00047 if ( isset( $options['help'] ) ) { 00048 echo $this->help(); 00049 exit( 1 ); 00050 } 00051 00052 if ( isset( $options['lang'] ) ) { 00053 $this->code = $options['lang']; 00054 } else { 00055 global $wgLanguageCode; 00056 $this->code = $wgLanguageCode; 00057 } 00058 00059 if ( isset( $options['level'] ) ) { 00060 $this->level = $options['level']; 00061 } 00062 00063 $this->doLinks = isset( $options['links'] ); 00064 $this->includeExif = !isset( $options['noexif'] ); 00065 $this->checkAll = isset( $options['all'] ); 00066 00067 if ( isset( $options['prefix'] ) ) { 00068 $this->linksPrefix = $options['prefix']; 00069 } 00070 00071 if ( isset( $options['wikilang'] ) ) { 00072 $this->wikiCode = $options['wikilang']; 00073 } 00074 00075 if ( isset( $options['whitelist'] ) ) { 00076 $this->checks = explode( ',', $options['whitelist'] ); 00077 } elseif ( isset( $options['blacklist'] ) ) { 00078 $this->checks = array_diff( 00079 isset( $options['easy'] ) ? $this->easyChecks() : $this->defaultChecks(), 00080 explode( ',', $options['blacklist'] ) 00081 ); 00082 } elseif ( isset( $options['easy'] ) ) { 00083 $this->checks = $this->easyChecks(); 00084 } else { 00085 $this->checks = $this->defaultChecks(); 00086 } 00087 00088 if ( isset( $options['output'] ) ) { 00089 $this->output = $options['output']; 00090 } 00091 00092 $this->L = new languages( $this->includeExif ); 00093 } 00094 00099 protected function defaultChecks() { 00100 return array( 00101 'untranslated', 'duplicate', 'obsolete', 'variables', 'empty', 'plural', 00102 'whitespace', 'xhtml', 'chars', 'links', 'unbalanced', 'namespace', 00103 'projecttalk', 'magic', 'magic-old', 'magic-over', 'magic-case', 00104 'special', 'special-old', 00105 ); 00106 } 00107 00112 protected function nonMessageChecks() { 00113 return array( 00114 'namespace', 'projecttalk', 'magic', 'magic-old', 'magic-over', 00115 'magic-case', 'special', 'special-old', 00116 ); 00117 } 00118 00123 protected function easyChecks() { 00124 return array( 00125 'duplicate', 'obsolete', 'empty', 'whitespace', 'xhtml', 'chars', 'magic-old', 00126 'magic-over', 'magic-case', 'special-old', 00127 ); 00128 } 00129 00134 protected function getChecks() { 00135 return array( 00136 'untranslated' => 'getUntranslatedMessages', 00137 'duplicate' => 'getDuplicateMessages', 00138 'obsolete' => 'getObsoleteMessages', 00139 'variables' => 'getMessagesWithMismatchVariables', 00140 'plural' => 'getMessagesWithoutPlural', 00141 'empty' => 'getEmptyMessages', 00142 'whitespace' => 'getMessagesWithWhitespace', 00143 'xhtml' => 'getNonXHTMLMessages', 00144 'chars' => 'getMessagesWithWrongChars', 00145 'links' => 'getMessagesWithDubiousLinks', 00146 'unbalanced' => 'getMessagesWithUnbalanced', 00147 'namespace' => 'getUntranslatedNamespaces', 00148 'projecttalk' => 'getProblematicProjectTalks', 00149 'magic' => 'getUntranslatedMagicWords', 00150 'magic-old' => 'getObsoleteMagicWords', 00151 'magic-over' => 'getOverridingMagicWords', 00152 'magic-case' => 'getCaseMismatchMagicWords', 00153 'special' => 'getUntraslatedSpecialPages', 00154 'special-old' => 'getObsoleteSpecialPages', 00155 ); 00156 } 00157 00164 protected function getTotalCount() { 00165 return array( 00166 'namespace' => array( 'getNamespaceNames', 'en' ), 00167 'projecttalk' => null, 00168 'magic' => array( 'getMagicWords', 'en' ), 00169 'magic-old' => array( 'getMagicWords', null ), 00170 'magic-over' => array( 'getMagicWords', null ), 00171 'magic-case' => array( 'getMagicWords', null ), 00172 'special' => array( 'getSpecialPageAliases', 'en' ), 00173 'special-old' => array( 'getSpecialPageAliases', null ), 00174 ); 00175 } 00176 00181 protected function getDescriptions() { 00182 return array( 00183 'untranslated' => '$1 message(s) of $2 are not translated to $3, but exist in en:', 00184 'duplicate' => '$1 message(s) of $2 are translated the same in en and $3:', 00185 'obsolete' => '$1 message(s) of $2 do not exist in en or are in the ignore list, but exist in $3:', 00186 'variables' => '$1 message(s) of $2 in $3 don\'t match the variables used in en:', 00187 'plural' => '$1 message(s) of $2 in $3 don\'t use {{plural}} while en uses:', 00188 'empty' => '$1 message(s) of $2 in $3 are empty or -:', 00189 'whitespace' => '$1 message(s) of $2 in $3 have trailing whitespace:', 00190 'xhtml' => '$1 message(s) of $2 in $3 contain illegal XHTML:', 00191 'chars' => '$1 message(s) of $2 in $3 include hidden chars which should not be used in the messages:', 00192 'links' => '$1 message(s) of $2 in $3 have problematic link(s):', 00193 'unbalanced' => '$1 message(s) of $2 in $3 have unbalanced {[]}:', 00194 'namespace' => '$1 namespace name(s) of $2 are not translated to $3, but exist in en:', 00195 'projecttalk' => '$1 namespace name(s) and alias(es) in $3 are project talk namespaces without the parameter:', 00196 'magic' => '$1 magic word(s) of $2 are not translated to $3, but exist in en:', 00197 'magic-old' => '$1 magic word(s) of $2 do not exist in en, but exist in $3:', 00198 'magic-over' => '$1 magic word(s) of $2 in $3 do not contain the original en word(s):', 00199 'magic-case' => '$1 magic word(s) of $2 in $3 change the case-sensitivity of the original en word:', 00200 'special' => '$1 special page alias(es) of $2 are not translated to $3, but exist in en:', 00201 'special-old' => '$1 special page alias(es) of $2 do not exist in en, but exist in $3:', 00202 ); 00203 } 00204 00209 protected function help() { 00210 return <<<ENDS 00211 Run this script to check a specific language file, or all of them. 00212 Command line settings are in form --parameter[=value]. 00213 Parameters: 00214 --help: Show this help. 00215 --lang: Language code (default: the installation default language). 00216 --all: Check all customized languages. 00217 --level: Show the following display level (default: 2): 00218 * 0: Skip the checks (useful for checking syntax). 00219 * 1: Show only the stub headers and number of wrong messages, without list of messages. 00220 * 2: Show only the headers and the message keys, without the message values. 00221 * 3: Show both the headers and the complete messages, with both keys and values. 00222 --links: Link the message values (default off). 00223 --prefix: prefix to add to links. 00224 --wikilang: For the links, what is the content language of the wiki to display the output in (default en). 00225 --noexif: Do not check for Exif messages (a bit hard and boring to translate), if you know 00226 that they are currently not translated and want to focus on other problems (default off). 00227 --whitelist: Do only the following checks (form: code,code). 00228 --blacklist: Do not do the following checks (form: code,code). 00229 --easy: Do only the easy checks, which can be treated by non-speakers of the language. 00230 00231 Check codes (ideally, all of them should result 0; all the checks are executed by default (except language-specific check blacklists in checkLanguage.inc): 00232 * untranslated: Messages which are required to translate, but are not translated. 00233 * duplicate: Messages which translation equal to fallback 00234 * obsolete: Messages which are untranslatable or do not exist, but are translated. 00235 * variables: Messages without variables which should be used, or with variables which should not be used. 00236 * empty: Empty messages and messages that contain only -. 00237 * whitespace: Messages which have trailing whitespace. 00238 * xhtml: Messages which are not well-formed XHTML (checks only few common errors). 00239 * chars: Messages with hidden characters. 00240 * links: Messages which contains broken links to pages (does not find all). 00241 * unbalanced: Messages which contains unequal numbers of opening {[ and closing ]}. 00242 * namespace: Namespace names that were not translated. 00243 * projecttalk: Namespace names and aliases where the project talk does not contain $1. 00244 * magic: Magic words that were not translated. 00245 * magic-old: Magic words which do not exist. 00246 * magic-over: Magic words that override the original English word. 00247 * magic-case: Magic words whose translation changes the case-sensitivity of the original English word. 00248 * special: Special page names that were not translated. 00249 * special-old: Special page names which do not exist. 00250 00251 ENDS; 00252 } 00253 00257 public function execute() { 00258 $this->doChecks(); 00259 if ( $this->level > 0 ) { 00260 switch ( $this->output ) { 00261 case 'plain': 00262 $this->outputText(); 00263 break; 00264 case 'wiki': 00265 $this->outputWiki(); 00266 break; 00267 default: 00268 throw new MWException( "Invalid output type $this->output" ); 00269 } 00270 } 00271 } 00272 00276 protected function doChecks() { 00277 $ignoredCodes = array( 'en', 'enRTL' ); 00278 00279 $this->results = array(); 00280 # Check the language 00281 if ( $this->checkAll ) { 00282 foreach ( $this->L->getLanguages() as $language ) { 00283 if ( !in_array( $language, $ignoredCodes ) ) { 00284 $this->results[$language] = $this->checkLanguage( $language ); 00285 } 00286 } 00287 } else { 00288 if ( in_array( $this->code, $ignoredCodes ) ) { 00289 throw new MWException( "Cannot check code $this->code." ); 00290 } else { 00291 $this->results[$this->code] = $this->checkLanguage( $this->code ); 00292 } 00293 } 00294 } 00295 00300 protected function getCheckBlacklist() { 00301 global $checkBlacklist; 00302 00303 return $checkBlacklist; 00304 } 00305 00312 protected function checkLanguage( $code ) { 00313 # Syntax check only 00314 $results = array(); 00315 if ( $this->level === 0 ) { 00316 $this->L->getMessages( $code ); 00317 00318 return $results; 00319 } 00320 00321 $checkFunctions = $this->getChecks(); 00322 $checkBlacklist = $this->getCheckBlacklist(); 00323 foreach ( $this->checks as $check ) { 00324 if ( isset( $checkBlacklist[$code] ) && 00325 in_array( $check, $checkBlacklist[$code] ) 00326 ) { 00327 $results[$check] = array(); 00328 continue; 00329 } 00330 00331 $callback = array( $this->L, $checkFunctions[$check] ); 00332 if ( !is_callable( $callback ) ) { 00333 throw new MWException( "Unkown check $check." ); 00334 } 00335 $results[$check] = call_user_func( $callback, $code ); 00336 } 00337 00338 return $results; 00339 } 00340 00347 protected function formatKey( $key, $code ) { 00348 if ( $this->doLinks ) { 00349 $displayKey = ucfirst( $key ); 00350 if ( $code == $this->wikiCode ) { 00351 return "[[{$this->linksPrefix}MediaWiki:$displayKey|$key]]"; 00352 } else { 00353 return "[[{$this->linksPrefix}MediaWiki:$displayKey/$code|$key]]"; 00354 } 00355 } else { 00356 return $key; 00357 } 00358 } 00359 00363 protected function outputText() { 00364 foreach ( $this->results as $code => $results ) { 00365 $translated = $this->L->getMessages( $code ); 00366 $translated = count( $translated['translated'] ); 00367 foreach ( $results as $check => $messages ) { 00368 $count = count( $messages ); 00369 if ( $count ) { 00370 if ( $check == 'untranslated' ) { 00371 $translatable = $this->L->getGeneralMessages(); 00372 $total = count( $translatable['translatable'] ); 00373 } elseif ( in_array( $check, $this->nonMessageChecks() ) ) { 00374 $totalCount = $this->getTotalCount(); 00375 $totalCount = $totalCount[$check]; 00376 $callback = array( $this->L, $totalCount[0] ); 00377 $callCode = $totalCount[1] ? $totalCount[1] : $code; 00378 $total = count( call_user_func( $callback, $callCode ) ); 00379 } else { 00380 $total = $translated; 00381 } 00382 $search = array( '$1', '$2', '$3' ); 00383 $replace = array( $count, $total, $code ); 00384 $descriptions = $this->getDescriptions(); 00385 echo "\n" . str_replace( $search, $replace, $descriptions[$check] ) . "\n"; 00386 if ( $this->level == 1 ) { 00387 echo "[messages are hidden]\n"; 00388 } else { 00389 foreach ( $messages as $key => $value ) { 00390 if ( !in_array( $check, $this->nonMessageChecks() ) ) { 00391 $key = $this->formatKey( $key, $code ); 00392 } 00393 if ( $this->level == 2 || empty( $value ) ) { 00394 echo "* $key\n"; 00395 } else { 00396 echo "* $key: '$value'\n"; 00397 } 00398 } 00399 } 00400 } 00401 } 00402 } 00403 } 00404 00408 function outputWiki() { 00409 $detailText = ''; 00410 $rows[] = '! Language !! Code !! Total !! ' . implode( ' !! ', array_diff( $this->checks, $this->nonMessageChecks() ) ); 00411 foreach ( $this->results as $code => $results ) { 00412 $detailTextForLang = "==$code==\n"; 00413 $numbers = array(); 00414 $problems = 0; 00415 $detailTextForLangChecks = array(); 00416 foreach ( $results as $check => $messages ) { 00417 if ( in_array( $check, $this->nonMessageChecks() ) ) { 00418 continue; 00419 } 00420 $count = count( $messages ); 00421 if ( $count ) { 00422 $problems += $count; 00423 $messageDetails = array(); 00424 foreach ( $messages as $key => $details ) { 00425 $displayKey = $this->formatKey( $key, $code ); 00426 $messageDetails[] = $displayKey; 00427 } 00428 $detailTextForLangChecks[] = "=== $code-$check ===\n* " . implode( ', ', $messageDetails ); 00429 $numbers[] = "'''[[#$code-$check|$count]]'''"; 00430 } else { 00431 $numbers[] = $count; 00432 } 00433 } 00434 00435 if ( count( $detailTextForLangChecks ) ) { 00436 $detailText .= $detailTextForLang . implode( "\n", $detailTextForLangChecks ) . "\n"; 00437 } 00438 00439 if ( !$problems ) { 00440 # Don't list languages without problems 00441 continue; 00442 } 00443 $language = Language::fetchLanguageName( $code ); 00444 $rows[] = "| $language || $code || $problems || " . implode( ' || ', $numbers ); 00445 } 00446 00447 $tableRows = implode( "\n|-\n", $rows ); 00448 00449 $version = SpecialVersion::getVersion( 'nodb' ); 00450 echo <<<EOL 00451 '''Check results are for:''' <code>$version</code> 00452 00453 00454 {| class="sortable wikitable" border="2" cellpadding="4" cellspacing="0" style="background-color: #F9F9F9; border: 1px #AAAAAA solid; border-collapse: collapse; clear: both;" 00455 $tableRows 00456 |} 00457 00458 $detailText 00459 00460 EOL; 00461 } 00462 00467 protected function isEmpty() { 00468 foreach ( $this->results as $results ) { 00469 foreach ( $results as $messages ) { 00470 if ( !empty( $messages ) ) { 00471 return false; 00472 } 00473 } 00474 } 00475 00476 return true; 00477 } 00478 } 00479 00483 class CheckExtensionsCLI extends CheckLanguageCLI { 00484 private $extensions; 00485 00491 public function __construct( array $options, $extension ) { 00492 if ( isset( $options['help'] ) ) { 00493 echo $this->help(); 00494 exit( 1 ); 00495 } 00496 00497 if ( isset( $options['lang'] ) ) { 00498 $this->code = $options['lang']; 00499 } else { 00500 global $wgLanguageCode; 00501 $this->code = $wgLanguageCode; 00502 } 00503 00504 if ( isset( $options['level'] ) ) { 00505 $this->level = $options['level']; 00506 } 00507 00508 $this->doLinks = isset( $options['links'] ); 00509 00510 if ( isset( $options['wikilang'] ) ) { 00511 $this->wikiCode = $options['wikilang']; 00512 } 00513 00514 if ( isset( $options['whitelist'] ) ) { 00515 $this->checks = explode( ',', $options['whitelist'] ); 00516 } elseif ( isset( $options['blacklist'] ) ) { 00517 $this->checks = array_diff( 00518 isset( $options['easy'] ) ? $this->easyChecks() : $this->defaultChecks(), 00519 explode( ',', $options['blacklist'] ) 00520 ); 00521 } elseif ( isset( $options['easy'] ) ) { 00522 $this->checks = $this->easyChecks(); 00523 } else { 00524 $this->checks = $this->defaultChecks(); 00525 } 00526 00527 if ( isset( $options['output'] ) ) { 00528 $this->output = $options['output']; 00529 } 00530 00531 # Some additional checks not enabled by default 00532 if ( isset( $options['duplicate'] ) ) { 00533 $this->checks[] = 'duplicate'; 00534 } 00535 00536 $this->extensions = array(); 00537 $extensions = new PremadeMediawikiExtensionGroups(); 00538 $extensions->addAll(); 00539 if ( $extension == 'all' ) { 00540 foreach ( MessageGroups::singleton()->getGroups() as $group ) { 00541 if ( strpos( $group->getId(), 'ext-' ) === 0 && !$group->isMeta() ) { 00542 $this->extensions[] = new extensionLanguages( $group ); 00543 } 00544 } 00545 } elseif ( $extension == 'wikimedia' ) { 00546 $wikimedia = MessageGroups::getGroup( 'ext-0-wikimedia' ); 00547 foreach ( $wikimedia->wmfextensions() as $extension ) { 00548 $group = MessageGroups::getGroup( $extension ); 00549 $this->extensions[] = new extensionLanguages( $group ); 00550 } 00551 } elseif ( $extension == 'flaggedrevs' ) { 00552 foreach ( MessageGroups::singleton()->getGroups() as $group ) { 00553 if ( strpos( $group->getId(), 'ext-flaggedrevs-' ) === 0 && !$group->isMeta() ) { 00554 $this->extensions[] = new extensionLanguages( $group ); 00555 } 00556 } 00557 } else { 00558 $extensions = explode( ',', $extension ); 00559 foreach ( $extensions as $extension ) { 00560 $group = MessageGroups::getGroup( 'ext-' . $extension ); 00561 if ( $group ) { 00562 $extension = new extensionLanguages( $group ); 00563 $this->extensions[] = $extension; 00564 } else { 00565 print "No such extension $extension.\n"; 00566 } 00567 } 00568 } 00569 } 00570 00575 protected function defaultChecks() { 00576 return array( 00577 'untranslated', 'duplicate', 'obsolete', 'variables', 'empty', 'plural', 00578 'whitespace', 'xhtml', 'chars', 'links', 'unbalanced', 00579 ); 00580 } 00581 00586 protected function nonMessageChecks() { 00587 return array(); 00588 } 00589 00594 protected function easyChecks() { 00595 return array( 00596 'duplicate', 'obsolete', 'empty', 'whitespace', 'xhtml', 'chars', 00597 ); 00598 } 00599 00604 protected function help() { 00605 return <<<ENDS 00606 Run this script to check the status of a specific language in extensions, or all of them. 00607 Command line settings are in form --parameter[=value], except for the first one. 00608 Parameters: 00609 * First parameter (mandatory): Extension name, multiple extension names (separated by commas), "all" for all the extensions, "wikimedia" for extensions used by Wikimedia or "flaggedrevs" for all FLaggedRevs extension messages. 00610 * lang: Language code (default: the installation default language). 00611 * help: Show this help. 00612 * level: Show the following display level (default: 2). 00613 * links: Link the message values (default off). 00614 * wikilang: For the links, what is the content language of the wiki to display the output in (default en). 00615 * whitelist: Do only the following checks (form: code,code). 00616 * blacklist: Do not perform the following checks (form: code,code). 00617 * easy: Do only the easy checks, which can be treated by non-speakers of the language. 00618 Check codes (ideally, all of them should result 0; all the checks are executed by default (except language-specific check blacklists in checkLanguage.inc): 00619 * untranslated: Messages which are required to translate, but are not translated. 00620 * duplicate: Messages which translation equal to fallback 00621 * obsolete: Messages which are untranslatable, but translated. 00622 * variables: Messages without variables which should be used, or with variables which should not be used. 00623 * empty: Empty messages. 00624 * whitespace: Messages which have trailing whitespace. 00625 * xhtml: Messages which are not well-formed XHTML (checks only few common errors). 00626 * chars: Messages with hidden characters. 00627 * links: Messages which contains broken links to pages (does not find all). 00628 * unbalanced: Messages which contains unequal numbers of opening {[ and closing ]}. 00629 Display levels (default: 2): 00630 * 0: Skip the checks (useful for checking syntax). 00631 * 1: Show only the stub headers and number of wrong messages, without list of messages. 00632 * 2: Show only the headers and the message keys, without the message values. 00633 * 3: Show both the headers and the complete messages, with both keys and values. 00634 00635 ENDS; 00636 } 00637 00641 public function execute() { 00642 $this->doChecks(); 00643 } 00644 00650 protected function checkLanguage( $code ) { 00651 foreach ( $this->extensions as $extension ) { 00652 $this->L = $extension; 00653 $this->results = array(); 00654 $this->results[$code] = parent::checkLanguage( $code ); 00655 00656 if ( !$this->isEmpty() ) { 00657 echo $extension->name() . ":\n"; 00658 00659 if ( $this->level > 0 ) { 00660 switch ( $this->output ) { 00661 case 'plain': 00662 $this->outputText(); 00663 break; 00664 case 'wiki': 00665 $this->outputWiki(); 00666 break; 00667 default: 00668 throw new MWException( "Invalid output type $this->output" ); 00669 } 00670 } 00671 00672 echo "\n"; 00673 } 00674 } 00675 } 00676 } 00677 00678 # Blacklist some checks for some languages 00679 $checkBlacklist = array( 00680 #'code' => array( 'check1', 'check2' ... ) 00681 'az' => array( 'plural' ), 00682 'bo' => array( 'plural' ), 00683 'cdo' => array( 'plural' ), 00684 'dz' => array( 'plural' ), 00685 'id' => array( 'plural' ), 00686 'fa' => array( 'plural' ), 00687 'gan' => array( 'plural' ), 00688 'gan-hans' => array( 'plural' ), 00689 'gan-hant' => array( 'plural' ), 00690 'gn' => array( 'plural' ), 00691 'hak' => array( 'plural' ), 00692 'hu' => array( 'plural' ), 00693 'ja' => array( 'plural' ), // Does not use plural 00694 'jv' => array( 'plural' ), 00695 'ka' => array( 'plural' ), 00696 'kk-arab' => array( 'plural' ), 00697 'kk-cyrl' => array( 'plural' ), 00698 'kk-latn' => array( 'plural' ), 00699 'km' => array( 'plural' ), 00700 'kn' => array( 'plural' ), 00701 'ko' => array( 'plural' ), 00702 'lzh' => array( 'plural' ), 00703 'mn' => array( 'plural' ), 00704 'ms' => array( 'plural' ), 00705 'my' => array( 'plural', 'chars' ), // Uses a lot zwnj 00706 'sah' => array( 'plural' ), 00707 'sq' => array( 'plural' ), 00708 'tet' => array( 'plural' ), 00709 'th' => array( 'plural' ), 00710 'to' => array( 'plural' ), 00711 'tr' => array( 'plural' ), 00712 'vi' => array( 'plural' ), 00713 'wuu' => array( 'plural' ), 00714 'xmf' => array( 'plural' ), 00715 'yo' => array( 'plural' ), 00716 'yue' => array( 'plural' ), 00717 'zh' => array( 'plural' ), 00718 'zh-classical' => array( 'plural' ), 00719 'zh-cn' => array( 'plural' ), 00720 'zh-hans' => array( 'plural' ), 00721 'zh-hant' => array( 'plural' ), 00722 'zh-hk' => array( 'plural' ), 00723 'zh-sg' => array( 'plural' ), 00724 'zh-tw' => array( 'plural' ), 00725 'zh-yue' => array( 'plural' ), 00726 );