MediaWiki
REL1_20
|
00001 <?php 00027 class CheckLanguageCLI { 00028 protected $code = null; 00029 protected $level = 2; 00030 protected $doLinks = false; 00031 protected $linksPrefix = ''; 00032 protected $wikiCode = 'en'; 00033 protected $checkAll = false; 00034 protected $output = 'plain'; 00035 protected $checks = array(); 00036 protected $L = null; 00037 00038 protected $results = array(); 00039 00040 private $includeExif = false; 00041 00046 public function __construct( Array $options ) { 00047 if ( isset( $options['help'] ) ) { 00048 echo $this->help(); 00049 exit(1); 00050 } 00051 00052 if ( isset( $options['lang'] ) ) { 00053 $this->code = $options['lang']; 00054 } else { 00055 global $wgLanguageCode; 00056 $this->code = $wgLanguageCode; 00057 } 00058 00059 if ( isset( $options['level'] ) ) { 00060 $this->level = $options['level']; 00061 } 00062 00063 $this->doLinks = isset( $options['links'] ); 00064 $this->includeExif = !isset( $options['noexif'] ); 00065 $this->checkAll = isset( $options['all'] ); 00066 00067 if ( isset( $options['prefix'] ) ) { 00068 $this->linksPrefix = $options['prefix']; 00069 } 00070 00071 if ( isset( $options['wikilang'] ) ) { 00072 $this->wikiCode = $options['wikilang']; 00073 } 00074 00075 if ( isset( $options['whitelist'] ) ) { 00076 $this->checks = explode( ',', $options['whitelist'] ); 00077 } elseif ( isset( $options['blacklist'] ) ) { 00078 $this->checks = array_diff( 00079 isset( $options['easy'] ) ? $this->easyChecks() : $this->defaultChecks(), 00080 explode( ',', $options['blacklist'] ) 00081 ); 00082 } elseif ( isset( $options['easy'] ) ) { 00083 $this->checks = $this->easyChecks(); 00084 } else { 00085 $this->checks = $this->defaultChecks(); 00086 } 00087 00088 if ( isset( $options['output'] ) ) { 00089 $this->output = $options['output']; 00090 } 00091 00092 $this->L = new languages( $this->includeExif ); 00093 } 00094 00099 protected function defaultChecks() { 00100 return array( 00101 'untranslated', 'duplicate', 'obsolete', 'variables', 'empty', 'plural', 00102 'whitespace', 'xhtml', 'chars', 'links', 'unbalanced', 'namespace', 00103 'projecttalk', 'magic', 'magic-old', 'magic-over', 'magic-case', 00104 'special', 'special-old', 00105 ); 00106 } 00107 00112 protected function nonMessageChecks() { 00113 return array( 00114 'namespace', 'projecttalk', 'magic', 'magic-old', 'magic-over', 00115 'magic-case', 'special', 'special-old', 00116 ); 00117 } 00118 00123 protected function easyChecks() { 00124 return array( 00125 'duplicate', 'obsolete', 'empty', 'whitespace', 'xhtml', 'chars', 'magic-old', 00126 'magic-over', 'magic-case', 'special-old', 00127 ); 00128 } 00129 00134 protected function getChecks() { 00135 return array( 00136 'untranslated' => 'getUntranslatedMessages', 00137 'duplicate' => 'getDuplicateMessages', 00138 'obsolete' => 'getObsoleteMessages', 00139 'variables' => 'getMessagesWithMismatchVariables', 00140 'plural' => 'getMessagesWithoutPlural', 00141 'empty' => 'getEmptyMessages', 00142 'whitespace' => 'getMessagesWithWhitespace', 00143 'xhtml' => 'getNonXHTMLMessages', 00144 'chars' => 'getMessagesWithWrongChars', 00145 'links' => 'getMessagesWithDubiousLinks', 00146 'unbalanced' => 'getMessagesWithUnbalanced', 00147 'namespace' => 'getUntranslatedNamespaces', 00148 'projecttalk' => 'getProblematicProjectTalks', 00149 'magic' => 'getUntranslatedMagicWords', 00150 'magic-old' => 'getObsoleteMagicWords', 00151 'magic-over' => 'getOverridingMagicWords', 00152 'magic-case' => 'getCaseMismatchMagicWords', 00153 'special' => 'getUntraslatedSpecialPages', 00154 'special-old' => 'getObsoleteSpecialPages', 00155 ); 00156 } 00157 00164 protected function getTotalCount() { 00165 return array( 00166 'namespace' => array( 'getNamespaceNames', 'en' ), 00167 'projecttalk' => null, 00168 'magic' => array( 'getMagicWords', 'en' ), 00169 'magic-old' => array( 'getMagicWords', null ), 00170 'magic-over' => array( 'getMagicWords', null ), 00171 'magic-case' => array( 'getMagicWords', null ), 00172 'special' => array( 'getSpecialPageAliases', 'en' ), 00173 'special-old' => array( 'getSpecialPageAliases', null ), 00174 ); 00175 } 00176 00181 protected function getDescriptions() { 00182 return array( 00183 'untranslated' => '$1 message(s) of $2 are not translated to $3, but exist in en:', 00184 'duplicate' => '$1 message(s) of $2 are translated the same in en and $3:', 00185 'obsolete' => '$1 message(s) of $2 do not exist in en or are in the ignore list, but exist in $3:', 00186 'variables' => '$1 message(s) of $2 in $3 don\'t match the variables used in en:', 00187 'plural' => '$1 message(s) of $2 in $3 don\'t use {{plural}} while en uses:', 00188 'empty' => '$1 message(s) of $2 in $3 are empty or -:', 00189 'whitespace' => '$1 message(s) of $2 in $3 have trailing whitespace:', 00190 'xhtml' => '$1 message(s) of $2 in $3 contain illegal XHTML:', 00191 'chars' => '$1 message(s) of $2 in $3 include hidden chars which should not be used in the messages:', 00192 'links' => '$1 message(s) of $2 in $3 have problematic link(s):', 00193 'unbalanced' => '$1 message(s) of $2 in $3 have unbalanced {[]}:', 00194 'namespace' => '$1 namespace name(s) of $2 are not translated to $3, but exist in en:', 00195 'projecttalk' => '$1 namespace name(s) and alias(es) in $3 are project talk namespaces without the parameter:', 00196 'magic' => '$1 magic word(s) of $2 are not translated to $3, but exist in en:', 00197 'magic-old' => '$1 magic word(s) of $2 do not exist in en, but exist in $3:', 00198 'magic-over' => '$1 magic word(s) of $2 in $3 do not contain the original en word(s):', 00199 'magic-case' => '$1 magic word(s) of $2 in $3 change the case-sensitivity of the original en word:', 00200 'special' => '$1 special page alias(es) of $2 are not translated to $3, but exist in en:', 00201 'special-old' => '$1 special page alias(es) of $2 do not exist in en, but exist in $3:', 00202 ); 00203 } 00204 00209 protected function help() { 00210 return <<<ENDS 00211 Run this script to check a specific language file, or all of them. 00212 Command line settings are in form --parameter[=value]. 00213 Parameters: 00214 --help: Show this help. 00215 --lang: Language code (default: the installation default language). 00216 --all: Check all customized languages. 00217 --level: Show the following display level (default: 2): 00218 * 0: Skip the checks (useful for checking syntax). 00219 * 1: Show only the stub headers and number of wrong messages, without list of messages. 00220 * 2: Show only the headers and the message keys, without the message values. 00221 * 3: Show both the headers and the complete messages, with both keys and values. 00222 --links: Link the message values (default off). 00223 --prefix: prefix to add to links. 00224 --wikilang: For the links, what is the content language of the wiki to display the output in (default en). 00225 --noexif: Do not check for EXIF messages (a bit hard and boring to translate), if you know 00226 that they are currently not translated and want to focus on other problems (default off). 00227 --whitelist: Do only the following checks (form: code,code). 00228 --blacklist: Do not do the following checks (form: code,code). 00229 --easy: Do only the easy checks, which can be treated by non-speakers of the language. 00230 00231 Check codes (ideally, all of them should result 0; all the checks are executed by default (except language-specific check blacklists in checkLanguage.inc): 00232 * untranslated: Messages which are required to translate, but are not translated. 00233 * duplicate: Messages which translation equal to fallback 00234 * obsolete: Messages which are untranslatable or do not exist, but are translated. 00235 * variables: Messages without variables which should be used, or with variables which should not be used. 00236 * empty: Empty messages and messages that contain only -. 00237 * whitespace: Messages which have trailing whitespace. 00238 * xhtml: Messages which are not well-formed XHTML (checks only few common errors). 00239 * chars: Messages with hidden characters. 00240 * links: Messages which contains broken links to pages (does not find all). 00241 * unbalanced: Messages which contains unequal numbers of opening {[ and closing ]}. 00242 * namespace: Namespace names that were not translated. 00243 * projecttalk: Namespace names and aliases where the project talk does not contain $1. 00244 * magic: Magic words that were not translated. 00245 * magic-old: Magic words which do not exist. 00246 * magic-over: Magic words that override the original English word. 00247 * magic-case: Magic words whose translation changes the case-sensitivity of the original English word. 00248 * special: Special page names that were not translated. 00249 * special-old: Special page names which do not exist. 00250 00251 ENDS; 00252 } 00253 00257 public function execute() { 00258 $this->doChecks(); 00259 if ( $this->level > 0 ) { 00260 switch ( $this->output ) { 00261 case 'plain': 00262 $this->outputText(); 00263 break; 00264 case 'wiki': 00265 $this->outputWiki(); 00266 break; 00267 default: 00268 throw new MWException( "Invalid output type $this->output" ); 00269 } 00270 } 00271 } 00272 00276 protected function doChecks() { 00277 $ignoredCodes = array( 'en', 'enRTL' ); 00278 00279 $this->results = array(); 00280 # Check the language 00281 if ( $this->checkAll ) { 00282 foreach ( $this->L->getLanguages() as $language ) { 00283 if ( !in_array( $language, $ignoredCodes ) ) { 00284 $this->results[$language] = $this->checkLanguage( $language ); 00285 } 00286 } 00287 } else { 00288 if ( in_array( $this->code, $ignoredCodes ) ) { 00289 throw new MWException( "Cannot check code $this->code." ); 00290 } else { 00291 $this->results[$this->code] = $this->checkLanguage( $this->code ); 00292 } 00293 } 00294 } 00295 00300 protected function getCheckBlacklist() { 00301 global $checkBlacklist; 00302 return $checkBlacklist; 00303 } 00304 00310 protected function checkLanguage( $code ) { 00311 # Syntax check only 00312 $results = array(); 00313 if ( $this->level === 0 ) { 00314 $this->L->getMessages( $code ); 00315 return $results; 00316 } 00317 00318 $checkFunctions = $this->getChecks(); 00319 $checkBlacklist = $this->getCheckBlacklist(); 00320 foreach ( $this->checks as $check ) { 00321 if ( isset( $checkBlacklist[$code] ) && 00322 in_array( $check, $checkBlacklist[$code] ) ) { 00323 $results[$check] = array(); 00324 continue; 00325 } 00326 00327 $callback = array( $this->L, $checkFunctions[$check] ); 00328 if ( !is_callable( $callback ) ) { 00329 throw new MWException( "Unkown check $check." ); 00330 } 00331 $results[$check] = call_user_func( $callback, $code ); 00332 } 00333 00334 return $results; 00335 } 00336 00343 protected function formatKey( $key, $code ) { 00344 if ( $this->doLinks ) { 00345 $displayKey = ucfirst( $key ); 00346 if ( $code == $this->wikiCode ) { 00347 return "[[{$this->linksPrefix}MediaWiki:$displayKey|$key]]"; 00348 } else { 00349 return "[[{$this->linksPrefix}MediaWiki:$displayKey/$code|$key]]"; 00350 } 00351 } else { 00352 return $key; 00353 } 00354 } 00355 00359 protected function outputText() { 00360 foreach ( $this->results as $code => $results ) { 00361 $translated = $this->L->getMessages( $code ); 00362 $translated = count( $translated['translated'] ); 00363 foreach ( $results as $check => $messages ) { 00364 $count = count( $messages ); 00365 if ( $count ) { 00366 if ( $check == 'untranslated' ) { 00367 $translatable = $this->L->getGeneralMessages(); 00368 $total = count( $translatable['translatable'] ); 00369 } elseif ( in_array( $check, $this->nonMessageChecks() ) ) { 00370 $totalCount = $this->getTotalCount(); 00371 $totalCount = $totalCount[$check]; 00372 $callback = array( $this->L, $totalCount[0] ); 00373 $callCode = $totalCount[1] ? $totalCount[1] : $code; 00374 $total = count( call_user_func( $callback, $callCode ) ); 00375 } else { 00376 $total = $translated; 00377 } 00378 $search = array( '$1', '$2', '$3' ); 00379 $replace = array( $count, $total, $code ); 00380 $descriptions = $this->getDescriptions(); 00381 echo "\n" . str_replace( $search, $replace, $descriptions[$check] ) . "\n"; 00382 if ( $this->level == 1 ) { 00383 echo "[messages are hidden]\n"; 00384 } else { 00385 foreach ( $messages as $key => $value ) { 00386 if( !in_array( $check, $this->nonMessageChecks() ) ) { 00387 $key = $this->formatKey( $key, $code ); 00388 } 00389 if ( $this->level == 2 || empty( $value ) ) { 00390 echo "* $key\n"; 00391 } else { 00392 echo "* $key: '$value'\n"; 00393 } 00394 } 00395 } 00396 } 00397 } 00398 } 00399 } 00400 00404 function outputWiki() { 00405 $detailText = ''; 00406 $rows[] = '! Language !! Code !! Total !! ' . implode( ' !! ', array_diff( $this->checks, $this->nonMessageChecks() ) ); 00407 foreach ( $this->results as $code => $results ) { 00408 $detailTextForLang = "==$code==\n"; 00409 $numbers = array(); 00410 $problems = 0; 00411 $detailTextForLangChecks = array(); 00412 foreach ( $results as $check => $messages ) { 00413 if( in_array( $check, $this->nonMessageChecks() ) ) { 00414 continue; 00415 } 00416 $count = count( $messages ); 00417 if ( $count ) { 00418 $problems += $count; 00419 $messageDetails = array(); 00420 foreach ( $messages as $key => $details ) { 00421 $displayKey = $this->formatKey( $key, $code ); 00422 $messageDetails[] = $displayKey; 00423 } 00424 $detailTextForLangChecks[] = "=== $code-$check ===\n* " . implode( ', ', $messageDetails ); 00425 $numbers[] = "'''[[#$code-$check|$count]]'''"; 00426 } else { 00427 $numbers[] = $count; 00428 } 00429 00430 } 00431 00432 if ( count( $detailTextForLangChecks ) ) { 00433 $detailText .= $detailTextForLang . implode( "\n", $detailTextForLangChecks ) . "\n"; 00434 } 00435 00436 if ( !$problems ) { 00437 # Don't list languages without problems 00438 continue; 00439 } 00440 $language = Language::fetchLanguageName( $code ); 00441 $rows[] = "| $language || $code || $problems || " . implode( ' || ', $numbers ); 00442 } 00443 00444 $tableRows = implode( "\n|-\n", $rows ); 00445 00446 $version = SpecialVersion::getVersion( 'nodb' ); 00447 echo <<<EOL 00448 '''Check results are for:''' <code>$version</code> 00449 00450 00451 {| class="sortable wikitable" border="2" cellpadding="4" cellspacing="0" style="background-color: #F9F9F9; border: 1px #AAAAAA solid; border-collapse: collapse; clear: both;" 00452 $tableRows 00453 |} 00454 00455 $detailText 00456 00457 EOL; 00458 } 00459 00464 protected function isEmpty() { 00465 foreach( $this->results as $results ) { 00466 foreach( $results as $messages ) { 00467 if( !empty( $messages ) ) { 00468 return false; 00469 } 00470 } 00471 } 00472 return true; 00473 } 00474 } 00475 00479 class CheckExtensionsCLI extends CheckLanguageCLI { 00480 private $extensions; 00481 00487 public function __construct( Array $options, $extension ) { 00488 if ( isset( $options['help'] ) ) { 00489 echo $this->help(); 00490 exit(1); 00491 } 00492 00493 if ( isset( $options['lang'] ) ) { 00494 $this->code = $options['lang']; 00495 } else { 00496 global $wgLanguageCode; 00497 $this->code = $wgLanguageCode; 00498 } 00499 00500 if ( isset( $options['level'] ) ) { 00501 $this->level = $options['level']; 00502 } 00503 00504 $this->doLinks = isset( $options['links'] ); 00505 00506 if ( isset( $options['wikilang'] ) ) { 00507 $this->wikiCode = $options['wikilang']; 00508 } 00509 00510 if ( isset( $options['whitelist'] ) ) { 00511 $this->checks = explode( ',', $options['whitelist'] ); 00512 } elseif ( isset( $options['blacklist'] ) ) { 00513 $this->checks = array_diff( 00514 isset( $options['easy'] ) ? $this->easyChecks() : $this->defaultChecks(), 00515 explode( ',', $options['blacklist'] ) 00516 ); 00517 } elseif ( isset( $options['easy'] ) ) { 00518 $this->checks = $this->easyChecks(); 00519 } else { 00520 $this->checks = $this->defaultChecks(); 00521 } 00522 00523 if ( isset( $options['output'] ) ) { 00524 $this->output = $options['output']; 00525 } 00526 00527 # Some additional checks not enabled by default 00528 if ( isset( $options['duplicate'] ) ) { 00529 $this->checks[] = 'duplicate'; 00530 } 00531 00532 $this->extensions = array(); 00533 $extensions = new PremadeMediawikiExtensionGroups(); 00534 $extensions->addAll(); 00535 if ( $extension == 'all' ) { 00536 foreach ( MessageGroups::singleton()->getGroups() as $group ) { 00537 if ( strpos( $group->getId(), 'ext-' ) === 0 && !$group->isMeta() ) { 00538 $this->extensions[] = new extensionLanguages( $group ); 00539 } 00540 } 00541 } elseif ( $extension == 'wikimedia' ) { 00542 $wikimedia = MessageGroups::getGroup( 'ext-0-wikimedia' ); 00543 foreach ( $wikimedia->wmfextensions() as $extension ) { 00544 $group = MessageGroups::getGroup( $extension ); 00545 $this->extensions[] = new extensionLanguages( $group ); 00546 } 00547 } elseif ( $extension == 'flaggedrevs' ) { 00548 foreach ( MessageGroups::singleton()->getGroups() as $group ) { 00549 if ( strpos( $group->getId(), 'ext-flaggedrevs-' ) === 0 && !$group->isMeta() ) { 00550 $this->extensions[] = new extensionLanguages( $group ); 00551 } 00552 } 00553 } else { 00554 $extensions = explode( ',', $extension ); 00555 foreach ( $extensions as $extension ) { 00556 $group = MessageGroups::getGroup( 'ext-' . $extension ); 00557 if ( $group ) { 00558 $extension = new extensionLanguages( $group ); 00559 $this->extensions[] = $extension; 00560 } else { 00561 print "No such extension $extension.\n"; 00562 } 00563 } 00564 } 00565 } 00566 00571 protected function defaultChecks() { 00572 return array( 00573 'untranslated', 'duplicate', 'obsolete', 'variables', 'empty', 'plural', 00574 'whitespace', 'xhtml', 'chars', 'links', 'unbalanced', 00575 ); 00576 } 00577 00582 protected function nonMessageChecks() { 00583 return array(); 00584 } 00585 00590 protected function easyChecks() { 00591 return array( 00592 'duplicate', 'obsolete', 'empty', 'whitespace', 'xhtml', 'chars', 00593 ); 00594 } 00595 00600 protected function help() { 00601 return <<<ENDS 00602 Run this script to check the status of a specific language in extensions, or all of them. 00603 Command line settings are in form --parameter[=value], except for the first one. 00604 Parameters: 00605 * First parameter (mandatory): Extension name, multiple extension names (separated by commas), "all" for all the extensions, "wikimedia" for extensions used by Wikimedia or "flaggedrevs" for all FLaggedRevs extension messages. 00606 * lang: Language code (default: the installation default language). 00607 * help: Show this help. 00608 * level: Show the following display level (default: 2). 00609 * links: Link the message values (default off). 00610 * wikilang: For the links, what is the content language of the wiki to display the output in (default en). 00611 * whitelist: Do only the following checks (form: code,code). 00612 * blacklist: Do not perform the following checks (form: code,code). 00613 * easy: Do only the easy checks, which can be treated by non-speakers of the language. 00614 Check codes (ideally, all of them should result 0; all the checks are executed by default (except language-specific check blacklists in checkLanguage.inc): 00615 * untranslated: Messages which are required to translate, but are not translated. 00616 * duplicate: Messages which translation equal to fallback 00617 * obsolete: Messages which are untranslatable, but translated. 00618 * variables: Messages without variables which should be used, or with variables which should not be used. 00619 * empty: Empty messages. 00620 * whitespace: Messages which have trailing whitespace. 00621 * xhtml: Messages which are not well-formed XHTML (checks only few common errors). 00622 * chars: Messages with hidden characters. 00623 * links: Messages which contains broken links to pages (does not find all). 00624 * unbalanced: Messages which contains unequal numbers of opening {[ and closing ]}. 00625 Display levels (default: 2): 00626 * 0: Skip the checks (useful for checking syntax). 00627 * 1: Show only the stub headers and number of wrong messages, without list of messages. 00628 * 2: Show only the headers and the message keys, without the message values. 00629 * 3: Show both the headers and the complete messages, with both keys and values. 00630 00631 ENDS; 00632 } 00633 00637 public function execute() { 00638 $this->doChecks(); 00639 } 00640 00645 protected function checkLanguage( $code ) { 00646 foreach( $this->extensions as $extension ) { 00647 $this->L = $extension; 00648 $this->results = array(); 00649 $this->results[$code] = parent::checkLanguage( $code ); 00650 00651 if( !$this->isEmpty() ) { 00652 echo $extension->name() . ":\n"; 00653 00654 if( $this->level > 0 ) { 00655 switch( $this->output ) { 00656 case 'plain': 00657 $this->outputText(); 00658 break; 00659 case 'wiki': 00660 $this->outputWiki(); 00661 break; 00662 default: 00663 throw new MWException( "Invalid output type $this->output" ); 00664 } 00665 } 00666 00667 echo "\n"; 00668 } 00669 } 00670 } 00671 } 00672 00673 # Blacklist some checks for some languages 00674 $checkBlacklist = array( 00675 #'code' => array( 'check1', 'check2' ... ) 00676 'az' => array( 'plural' ), 00677 'bo' => array( 'plural' ), 00678 'dz' => array( 'plural' ), 00679 'id' => array( 'plural' ), 00680 'fa' => array( 'plural' ), 00681 'gan' => array( 'plural' ), 00682 'gan-hans' => array( 'plural' ), 00683 'gan-hant' => array( 'plural' ), 00684 'gn' => array( 'plural' ), 00685 'hak' => array( 'plural' ), 00686 'hu' => array( 'plural' ), 00687 'ja' => array( 'plural' ), // Does not use plural 00688 'jv' => array( 'plural' ), 00689 'ka' => array( 'plural' ), 00690 'kk-arab' => array( 'plural' ), 00691 'kk-cyrl' => array( 'plural' ), 00692 'kk-latn' => array( 'plural' ), 00693 'km' => array( 'plural' ), 00694 'kn' => array( 'plural' ), 00695 'ko' => array( 'plural' ), 00696 'lzh' => array( 'plural' ), 00697 'mn' => array( 'plural' ), 00698 'ms' => array( 'plural' ), 00699 'my' => array( 'plural', 'chars' ), // Uses a lot zwnj 00700 'sah' => array( 'plural' ), 00701 'sq' => array( 'plural' ), 00702 'tet' => array( 'plural' ), 00703 'th' => array( 'plural' ), 00704 'to' => array( 'plural' ), 00705 'tr' => array( 'plural' ), 00706 'vi' => array( 'plural' ), 00707 'wuu' => array( 'plural' ), 00708 'xmf' => array( 'plural' ), 00709 'yo' => array( 'plural' ), 00710 'yue' => array( 'plural' ), 00711 'zh' => array( 'plural' ), 00712 'zh-classical' => array( 'plural' ), 00713 'zh-cn' => array( 'plural' ), 00714 'zh-hans' => array( 'plural' ), 00715 'zh-hant' => array( 'plural' ), 00716 'zh-hk' => array( 'plural' ), 00717 'zh-sg' => array( 'plural' ), 00718 'zh-tw' => array( 'plural' ), 00719 'zh-yue' => array( 'plural' ), 00720 );