MediaWiki
REL1_24
|
00001 <?php 00067 class Parser { 00073 const VERSION = '1.6.4'; 00074 00079 const HALF_PARSED_VERSION = 2; 00080 00081 # Flags for Parser::setFunctionHook 00082 # Also available as global constants from Defines.php 00083 const SFH_NO_HASH = 1; 00084 const SFH_OBJECT_ARGS = 2; 00085 00086 # Constants needed for external link processing 00087 # Everything except bracket, space, or control characters 00088 # \p{Zs} is unicode 'separator, space' category. It covers the space 0x20 00089 # as well as U+3000 is IDEOGRAPHIC SPACE for bug 19052 00090 const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F\p{Zs}]'; 00091 const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)([^][<>"\\x00-\\x20\\x7F\p{Zs}]+) 00092 \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sxu'; 00093 00094 # State constants for the definition list colon extraction 00095 const COLON_STATE_TEXT = 0; 00096 const COLON_STATE_TAG = 1; 00097 const COLON_STATE_TAGSTART = 2; 00098 const COLON_STATE_CLOSETAG = 3; 00099 const COLON_STATE_TAGSLASH = 4; 00100 const COLON_STATE_COMMENT = 5; 00101 const COLON_STATE_COMMENTDASH = 6; 00102 const COLON_STATE_COMMENTDASHDASH = 7; 00103 00104 # Flags for preprocessToDom 00105 const PTD_FOR_INCLUSION = 1; 00106 00107 # Allowed values for $this->mOutputType 00108 # Parameter to startExternalParse(). 00109 const OT_HTML = 1; # like parse() 00110 const OT_WIKI = 2; # like preSaveTransform() 00111 const OT_PREPROCESS = 3; # like preprocess() 00112 const OT_MSG = 3; 00113 const OT_PLAIN = 4; # like extractSections() - portions of the original are returned unchanged. 00114 00115 # Marker Suffix needs to be accessible staticly. 00116 const MARKER_SUFFIX = "-QINU\x7f"; 00117 00118 # Markers used for wrapping the table of contents 00119 const TOC_START = '<mw:toc>'; 00120 const TOC_END = '</mw:toc>'; 00121 00122 # Persistent: 00123 public $mTagHooks = array(); 00124 public $mTransparentTagHooks = array(); 00125 public $mFunctionHooks = array(); 00126 public $mFunctionSynonyms = array( 0 => array(), 1 => array() ); 00127 public $mFunctionTagHooks = array(); 00128 public $mStripList = array(); 00129 public $mDefaultStripList = array(); 00130 public $mVarCache = array(); 00131 public $mImageParams = array(); 00132 public $mImageParamsMagicArray = array(); 00133 public $mMarkerIndex = 0; 00134 public $mFirstCall = true; 00135 00136 # Initialised by initialiseVariables() 00137 00141 public $mVariables; 00142 00146 public $mSubstWords; 00147 public $mConf, $mPreprocessor, $mExtLinkBracketedRegex, $mUrlProtocols; # Initialised in constructor 00148 00149 # Cleared with clearState(): 00150 00153 public $mOutput; 00154 public $mAutonumber, $mDTopen; 00155 00159 public $mStripState; 00160 00161 public $mIncludeCount, $mArgStack, $mLastSection, $mInPre; 00165 public $mLinkHolders; 00166 00167 public $mLinkID; 00168 public $mIncludeSizes, $mPPNodeCount, $mGeneratedPPNodeCount, $mHighestExpansionDepth; 00169 public $mDefaultSort; 00170 public $mTplRedirCache, $mTplDomCache, $mHeadings, $mDoubleUnderscores; 00171 public $mExpensiveFunctionCount; # number of expensive parser function calls 00172 public $mShowToc, $mForceTocPosition; 00173 00177 public $mUser; # User object; only used when doing pre-save transform 00178 00179 # Temporary 00180 # These are variables reset at least once per parse regardless of $clearState 00181 00185 public $mOptions; 00186 00190 public $mTitle; # Title context, used for self-link rendering and similar things 00191 public $mOutputType; # Output type, one of the OT_xxx constants 00192 public $ot; # Shortcut alias, see setOutputType() 00193 public $mRevisionObject; # The revision object of the specified revision ID 00194 public $mRevisionId; # ID to display in {{REVISIONID}} tags 00195 public $mRevisionTimestamp; # The timestamp of the specified revision ID 00196 public $mRevisionUser; # User to display in {{REVISIONUSER}} tag 00197 public $mRevisionSize; # Size to display in {{REVISIONSIZE}} variable 00198 public $mRevIdForTs; # The revision ID which was used to fetch the timestamp 00199 public $mInputSize = false; # For {{PAGESIZE}} on current page. 00200 00204 public $mUniqPrefix; 00205 00211 public $mLangLinkLanguages; 00212 00217 public $mInParse = false; 00218 00222 public function __construct( $conf = array() ) { 00223 $this->mConf = $conf; 00224 $this->mUrlProtocols = wfUrlProtocols(); 00225 $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->mUrlProtocols . ')' . 00226 self::EXT_LINK_URL_CLASS . '+)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F]*?)\]/Su'; 00227 if ( isset( $conf['preprocessorClass'] ) ) { 00228 $this->mPreprocessorClass = $conf['preprocessorClass']; 00229 } elseif ( defined( 'HPHP_VERSION' ) ) { 00230 # Preprocessor_Hash is much faster than Preprocessor_DOM under HipHop 00231 $this->mPreprocessorClass = 'Preprocessor_Hash'; 00232 } elseif ( extension_loaded( 'domxml' ) ) { 00233 # PECL extension that conflicts with the core DOM extension (bug 13770) 00234 wfDebug( "Warning: you have the obsolete domxml extension for PHP. Please remove it!\n" ); 00235 $this->mPreprocessorClass = 'Preprocessor_Hash'; 00236 } elseif ( extension_loaded( 'dom' ) ) { 00237 $this->mPreprocessorClass = 'Preprocessor_DOM'; 00238 } else { 00239 $this->mPreprocessorClass = 'Preprocessor_Hash'; 00240 } 00241 wfDebug( __CLASS__ . ": using preprocessor: {$this->mPreprocessorClass}\n" ); 00242 } 00243 00247 public function __destruct() { 00248 if ( isset( $this->mLinkHolders ) ) { 00249 unset( $this->mLinkHolders ); 00250 } 00251 foreach ( $this as $name => $value ) { 00252 unset( $this->$name ); 00253 } 00254 } 00255 00259 public function __clone() { 00260 $this->mInParse = false; 00261 wfRunHooks( 'ParserCloned', array( $this ) ); 00262 } 00263 00267 public function firstCallInit() { 00268 if ( !$this->mFirstCall ) { 00269 return; 00270 } 00271 $this->mFirstCall = false; 00272 00273 wfProfileIn( __METHOD__ ); 00274 00275 CoreParserFunctions::register( $this ); 00276 CoreTagHooks::register( $this ); 00277 $this->initialiseVariables(); 00278 00279 wfRunHooks( 'ParserFirstCallInit', array( &$this ) ); 00280 wfProfileOut( __METHOD__ ); 00281 } 00282 00288 public function clearState() { 00289 wfProfileIn( __METHOD__ ); 00290 if ( $this->mFirstCall ) { 00291 $this->firstCallInit(); 00292 } 00293 $this->mOutput = new ParserOutput; 00294 $this->mOptions->registerWatcher( array( $this->mOutput, 'recordOption' ) ); 00295 $this->mAutonumber = 0; 00296 $this->mLastSection = ''; 00297 $this->mDTopen = false; 00298 $this->mIncludeCount = array(); 00299 $this->mArgStack = false; 00300 $this->mInPre = false; 00301 $this->mLinkHolders = new LinkHolderArray( $this ); 00302 $this->mLinkID = 0; 00303 $this->mRevisionObject = $this->mRevisionTimestamp = 00304 $this->mRevisionId = $this->mRevisionUser = $this->mRevisionSize = null; 00305 $this->mVarCache = array(); 00306 $this->mUser = null; 00307 $this->mLangLinkLanguages = array(); 00308 00319 $this->mUniqPrefix = "\x7fUNIQ" . self::getRandomString(); 00320 $this->mStripState = new StripState( $this->mUniqPrefix ); 00321 00322 # Clear these on every parse, bug 4549 00323 $this->mTplRedirCache = $this->mTplDomCache = array(); 00324 00325 $this->mShowToc = true; 00326 $this->mForceTocPosition = false; 00327 $this->mIncludeSizes = array( 00328 'post-expand' => 0, 00329 'arg' => 0, 00330 ); 00331 $this->mPPNodeCount = 0; 00332 $this->mGeneratedPPNodeCount = 0; 00333 $this->mHighestExpansionDepth = 0; 00334 $this->mDefaultSort = false; 00335 $this->mHeadings = array(); 00336 $this->mDoubleUnderscores = array(); 00337 $this->mExpensiveFunctionCount = 0; 00338 00339 # Fix cloning 00340 if ( isset( $this->mPreprocessor ) && $this->mPreprocessor->parser !== $this ) { 00341 $this->mPreprocessor = null; 00342 } 00343 00344 wfRunHooks( 'ParserClearState', array( &$this ) ); 00345 wfProfileOut( __METHOD__ ); 00346 } 00347 00360 public function parse( $text, Title $title, ParserOptions $options, 00361 $linestart = true, $clearState = true, $revid = null 00362 ) { 00368 global $wgUseTidy, $wgAlwaysUseTidy, $wgShowHostnames; 00369 $fname = __METHOD__ . '-' . wfGetCaller(); 00370 wfProfileIn( __METHOD__ ); 00371 wfProfileIn( $fname ); 00372 00373 if ( $clearState ) { 00374 $magicScopeVariable = $this->lock(); 00375 } 00376 00377 $this->startParse( $title, $options, self::OT_HTML, $clearState ); 00378 00379 $this->mInputSize = strlen( $text ); 00380 if ( $this->mOptions->getEnableLimitReport() ) { 00381 $this->mOutput->resetParseStartTime(); 00382 } 00383 00384 # Remove the strip marker tag prefix from the input, if present. 00385 if ( $clearState ) { 00386 $text = str_replace( $this->mUniqPrefix, '', $text ); 00387 } 00388 00389 $oldRevisionId = $this->mRevisionId; 00390 $oldRevisionObject = $this->mRevisionObject; 00391 $oldRevisionTimestamp = $this->mRevisionTimestamp; 00392 $oldRevisionUser = $this->mRevisionUser; 00393 $oldRevisionSize = $this->mRevisionSize; 00394 if ( $revid !== null ) { 00395 $this->mRevisionId = $revid; 00396 $this->mRevisionObject = null; 00397 $this->mRevisionTimestamp = null; 00398 $this->mRevisionUser = null; 00399 $this->mRevisionSize = null; 00400 } 00401 00402 wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) ); 00403 # No more strip! 00404 wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) ); 00405 $text = $this->internalParse( $text ); 00406 wfRunHooks( 'ParserAfterParse', array( &$this, &$text, &$this->mStripState ) ); 00407 00408 $text = $this->mStripState->unstripGeneral( $text ); 00409 00410 # Clean up special characters, only run once, next-to-last before doBlockLevels 00411 $fixtags = array( 00412 # french spaces, last one Guillemet-left 00413 # only if there is something before the space 00414 '/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1 ', 00415 # french spaces, Guillemet-right 00416 '/(\\302\\253) /' => '\\1 ', 00417 '/ (!\s*important)/' => ' \\1', # Beware of CSS magic word !important, bug #11874. 00418 ); 00419 $text = preg_replace( array_keys( $fixtags ), array_values( $fixtags ), $text ); 00420 00421 $text = $this->doBlockLevels( $text, $linestart ); 00422 00423 $this->replaceLinkHolders( $text ); 00424 00432 if ( !( $options->getDisableContentConversion() 00433 || isset( $this->mDoubleUnderscores['nocontentconvert'] ) ) 00434 ) { 00435 if ( !$this->mOptions->getInterfaceMessage() ) { 00436 # The position of the convert() call should not be changed. it 00437 # assumes that the links are all replaced and the only thing left 00438 # is the <nowiki> mark. 00439 $text = $this->getConverterLanguage()->convert( $text ); 00440 } 00441 } 00442 00450 if ( !( $options->getDisableTitleConversion() 00451 || isset( $this->mDoubleUnderscores['nocontentconvert'] ) 00452 || isset( $this->mDoubleUnderscores['notitleconvert'] ) 00453 || $this->mOutput->getDisplayTitle() !== false ) 00454 ) { 00455 $convruletitle = $this->getConverterLanguage()->getConvRuleTitle(); 00456 if ( $convruletitle ) { 00457 $this->mOutput->setTitleText( $convruletitle ); 00458 } else { 00459 $titleText = $this->getConverterLanguage()->convertTitle( $title ); 00460 $this->mOutput->setTitleText( $titleText ); 00461 } 00462 } 00463 00464 $text = $this->mStripState->unstripNoWiki( $text ); 00465 00466 wfRunHooks( 'ParserBeforeTidy', array( &$this, &$text ) ); 00467 00468 $text = $this->replaceTransparentTags( $text ); 00469 $text = $this->mStripState->unstripGeneral( $text ); 00470 00471 $text = Sanitizer::normalizeCharReferences( $text ); 00472 00473 if ( ( $wgUseTidy && $this->mOptions->getTidy() ) || $wgAlwaysUseTidy ) { 00474 $text = MWTidy::tidy( $text ); 00475 } else { 00476 # attempt to sanitize at least some nesting problems 00477 # (bug #2702 and quite a few others) 00478 $tidyregs = array( 00479 # ''Something [http://www.cool.com cool''] --> 00480 # <i>Something</i><a href="http://www.cool.com"..><i>cool></i></a> 00481 '/(<([bi])>)(<([bi])>)?([^<]*)(<\/?a[^<]*>)([^<]*)(<\/\\4>)?(<\/\\2>)/' => 00482 '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9', 00483 # fix up an anchor inside another anchor, only 00484 # at least for a single single nested link (bug 3695) 00485 '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\/a>(.*)<\/a>/' => 00486 '\\1\\2</a>\\3</a>\\1\\4</a>', 00487 # fix div inside inline elements- doBlockLevels won't wrap a line which 00488 # contains a div, so fix it up here; replace 00489 # div with escaped text 00490 '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\/div>)([^<]*)(<\/\\2>)/' => 00491 '\\1\\3<div\\5>\\6</div>\\8\\9', 00492 # remove empty italic or bold tag pairs, some 00493 # introduced by rules above 00494 '/<([bi])><\/\\1>/' => '', 00495 ); 00496 00497 $text = preg_replace( 00498 array_keys( $tidyregs ), 00499 array_values( $tidyregs ), 00500 $text ); 00501 } 00502 00503 if ( $this->mExpensiveFunctionCount > $this->mOptions->getExpensiveParserFunctionLimit() ) { 00504 $this->limitationWarn( 'expensive-parserfunction', 00505 $this->mExpensiveFunctionCount, 00506 $this->mOptions->getExpensiveParserFunctionLimit() 00507 ); 00508 } 00509 00510 wfRunHooks( 'ParserAfterTidy', array( &$this, &$text ) ); 00511 00512 # Information on include size limits, for the benefit of users who try to skirt them 00513 if ( $this->mOptions->getEnableLimitReport() ) { 00514 $max = $this->mOptions->getMaxIncludeSize(); 00515 00516 $cpuTime = $this->mOutput->getTimeSinceStart( 'cpu' ); 00517 if ( $cpuTime !== null ) { 00518 $this->mOutput->setLimitReportData( 'limitreport-cputime', 00519 sprintf( "%.3f", $cpuTime ) 00520 ); 00521 } 00522 00523 $wallTime = $this->mOutput->getTimeSinceStart( 'wall' ); 00524 $this->mOutput->setLimitReportData( 'limitreport-walltime', 00525 sprintf( "%.3f", $wallTime ) 00526 ); 00527 00528 $this->mOutput->setLimitReportData( 'limitreport-ppvisitednodes', 00529 array( $this->mPPNodeCount, $this->mOptions->getMaxPPNodeCount() ) 00530 ); 00531 $this->mOutput->setLimitReportData( 'limitreport-ppgeneratednodes', 00532 array( $this->mGeneratedPPNodeCount, $this->mOptions->getMaxGeneratedPPNodeCount() ) 00533 ); 00534 $this->mOutput->setLimitReportData( 'limitreport-postexpandincludesize', 00535 array( $this->mIncludeSizes['post-expand'], $max ) 00536 ); 00537 $this->mOutput->setLimitReportData( 'limitreport-templateargumentsize', 00538 array( $this->mIncludeSizes['arg'], $max ) 00539 ); 00540 $this->mOutput->setLimitReportData( 'limitreport-expansiondepth', 00541 array( $this->mHighestExpansionDepth, $this->mOptions->getMaxPPExpandDepth() ) 00542 ); 00543 $this->mOutput->setLimitReportData( 'limitreport-expensivefunctioncount', 00544 array( $this->mExpensiveFunctionCount, $this->mOptions->getExpensiveParserFunctionLimit() ) 00545 ); 00546 wfRunHooks( 'ParserLimitReportPrepare', array( $this, $this->mOutput ) ); 00547 00548 $limitReport = "NewPP limit report\n"; 00549 if ( $wgShowHostnames ) { 00550 $limitReport .= 'Parsed by ' . wfHostname() . "\n"; 00551 } 00552 foreach ( $this->mOutput->getLimitReportData() as $key => $value ) { 00553 if ( wfRunHooks( 'ParserLimitReportFormat', 00554 array( $key, &$value, &$limitReport, false, false ) 00555 ) ) { 00556 $keyMsg = wfMessage( $key )->inLanguage( 'en' )->useDatabase( false ); 00557 $valueMsg = wfMessage( array( "$key-value-text", "$key-value" ) ) 00558 ->inLanguage( 'en' )->useDatabase( false ); 00559 if ( !$valueMsg->exists() ) { 00560 $valueMsg = new RawMessage( '$1' ); 00561 } 00562 if ( !$keyMsg->isDisabled() && !$valueMsg->isDisabled() ) { 00563 $valueMsg->params( $value ); 00564 $limitReport .= "{$keyMsg->text()}: {$valueMsg->text()}\n"; 00565 } 00566 } 00567 } 00568 // Since we're not really outputting HTML, decode the entities and 00569 // then re-encode the things that need hiding inside HTML comments. 00570 $limitReport = htmlspecialchars_decode( $limitReport ); 00571 wfRunHooks( 'ParserLimitReport', array( $this, &$limitReport ) ); 00572 00573 // Sanitize for comment. Note '‐' in the replacement is U+2010, 00574 // which looks much like the problematic '-'. 00575 $limitReport = str_replace( array( '-', '&' ), array( '‐', '&' ), $limitReport ); 00576 $text .= "\n<!-- \n$limitReport-->\n"; 00577 00578 if ( $this->mGeneratedPPNodeCount > $this->mOptions->getMaxGeneratedPPNodeCount() / 10 ) { 00579 wfDebugLog( 'generated-pp-node-count', $this->mGeneratedPPNodeCount . ' ' . 00580 $this->mTitle->getPrefixedDBkey() ); 00581 } 00582 } 00583 $this->mOutput->setText( $text ); 00584 00585 $this->mRevisionId = $oldRevisionId; 00586 $this->mRevisionObject = $oldRevisionObject; 00587 $this->mRevisionTimestamp = $oldRevisionTimestamp; 00588 $this->mRevisionUser = $oldRevisionUser; 00589 $this->mRevisionSize = $oldRevisionSize; 00590 $this->mInputSize = false; 00591 wfProfileOut( $fname ); 00592 wfProfileOut( __METHOD__ ); 00593 00594 return $this->mOutput; 00595 } 00596 00608 public function recursiveTagParse( $text, $frame = false ) { 00609 wfProfileIn( __METHOD__ ); 00610 wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) ); 00611 wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) ); 00612 $text = $this->internalParse( $text, false, $frame ); 00613 wfProfileOut( __METHOD__ ); 00614 return $text; 00615 } 00616 00628 public function preprocess( $text, Title $title = null, ParserOptions $options, $revid = null, $frame = false ) { 00629 wfProfileIn( __METHOD__ ); 00630 $magicScopeVariable = $this->lock(); 00631 $this->startParse( $title, $options, self::OT_PREPROCESS, true ); 00632 if ( $revid !== null ) { 00633 $this->mRevisionId = $revid; 00634 } 00635 wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) ); 00636 wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) ); 00637 $text = $this->replaceVariables( $text, $frame ); 00638 $text = $this->mStripState->unstripBoth( $text ); 00639 wfProfileOut( __METHOD__ ); 00640 return $text; 00641 } 00642 00652 public function recursivePreprocess( $text, $frame = false ) { 00653 wfProfileIn( __METHOD__ ); 00654 $text = $this->replaceVariables( $text, $frame ); 00655 $text = $this->mStripState->unstripBoth( $text ); 00656 wfProfileOut( __METHOD__ ); 00657 return $text; 00658 } 00659 00673 public function getPreloadText( $text, Title $title, ParserOptions $options, $params = array() ) { 00674 $msg = new RawMessage( $text ); 00675 $text = $msg->params( $params )->plain(); 00676 00677 # Parser (re)initialisation 00678 $magicScopeVariable = $this->lock(); 00679 $this->startParse( $title, $options, self::OT_PLAIN, true ); 00680 00681 $flags = PPFrame::NO_ARGS | PPFrame::NO_TEMPLATES; 00682 $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION ); 00683 $text = $this->getPreprocessor()->newFrame()->expand( $dom, $flags ); 00684 $text = $this->mStripState->unstripBoth( $text ); 00685 return $text; 00686 } 00687 00693 public static function getRandomString() { 00694 return wfRandomString( 16 ); 00695 } 00696 00703 public function setUser( $user ) { 00704 $this->mUser = $user; 00705 } 00706 00712 public function uniqPrefix() { 00713 if ( !isset( $this->mUniqPrefix ) ) { 00714 # @todo FIXME: This is probably *horribly wrong* 00715 # LanguageConverter seems to want $wgParser's uniqPrefix, however 00716 # if this is called for a parser cache hit, the parser may not 00717 # have ever been initialized in the first place. 00718 # Not really sure what the heck is supposed to be going on here. 00719 return ''; 00720 # throw new MWException( "Accessing uninitialized mUniqPrefix" ); 00721 } 00722 return $this->mUniqPrefix; 00723 } 00724 00730 public function setTitle( $t ) { 00731 if ( !$t ) { 00732 $t = Title::newFromText( 'NO TITLE' ); 00733 } 00734 00735 if ( $t->hasFragment() ) { 00736 # Strip the fragment to avoid various odd effects 00737 $this->mTitle = clone $t; 00738 $this->mTitle->setFragment( '' ); 00739 } else { 00740 $this->mTitle = $t; 00741 } 00742 } 00743 00749 public function getTitle() { 00750 return $this->mTitle; 00751 } 00752 00759 public function Title( $x = null ) { 00760 return wfSetVar( $this->mTitle, $x ); 00761 } 00762 00768 public function setOutputType( $ot ) { 00769 $this->mOutputType = $ot; 00770 # Shortcut alias 00771 $this->ot = array( 00772 'html' => $ot == self::OT_HTML, 00773 'wiki' => $ot == self::OT_WIKI, 00774 'pre' => $ot == self::OT_PREPROCESS, 00775 'plain' => $ot == self::OT_PLAIN, 00776 ); 00777 } 00778 00785 public function OutputType( $x = null ) { 00786 return wfSetVar( $this->mOutputType, $x ); 00787 } 00788 00794 public function getOutput() { 00795 return $this->mOutput; 00796 } 00797 00803 public function getOptions() { 00804 return $this->mOptions; 00805 } 00806 00813 public function Options( $x = null ) { 00814 return wfSetVar( $this->mOptions, $x ); 00815 } 00816 00820 public function nextLinkID() { 00821 return $this->mLinkID++; 00822 } 00823 00827 public function setLinkID( $id ) { 00828 $this->mLinkID = $id; 00829 } 00830 00835 public function getFunctionLang() { 00836 return $this->getTargetLanguage(); 00837 } 00838 00848 public function getTargetLanguage() { 00849 $target = $this->mOptions->getTargetLanguage(); 00850 00851 if ( $target !== null ) { 00852 return $target; 00853 } elseif ( $this->mOptions->getInterfaceMessage() ) { 00854 return $this->mOptions->getUserLangObj(); 00855 } elseif ( is_null( $this->mTitle ) ) { 00856 throw new MWException( __METHOD__ . ': $this->mTitle is null' ); 00857 } 00858 00859 return $this->mTitle->getPageLanguage(); 00860 } 00861 00866 public function getConverterLanguage() { 00867 return $this->getTargetLanguage(); 00868 } 00869 00876 public function getUser() { 00877 if ( !is_null( $this->mUser ) ) { 00878 return $this->mUser; 00879 } 00880 return $this->mOptions->getUser(); 00881 } 00882 00888 public function getPreprocessor() { 00889 if ( !isset( $this->mPreprocessor ) ) { 00890 $class = $this->mPreprocessorClass; 00891 $this->mPreprocessor = new $class( $this ); 00892 } 00893 return $this->mPreprocessor; 00894 } 00895 00916 public static function extractTagsAndParams( $elements, $text, &$matches, $uniq_prefix = '' ) { 00917 static $n = 1; 00918 $stripped = ''; 00919 $matches = array(); 00920 00921 $taglist = implode( '|', $elements ); 00922 $start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?" . ">)|<(!--)/i"; 00923 00924 while ( $text != '' ) { 00925 $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE ); 00926 $stripped .= $p[0]; 00927 if ( count( $p ) < 5 ) { 00928 break; 00929 } 00930 if ( count( $p ) > 5 ) { 00931 # comment 00932 $element = $p[4]; 00933 $attributes = ''; 00934 $close = ''; 00935 $inside = $p[5]; 00936 } else { 00937 # tag 00938 $element = $p[1]; 00939 $attributes = $p[2]; 00940 $close = $p[3]; 00941 $inside = $p[4]; 00942 } 00943 00944 $marker = "$uniq_prefix-$element-" . sprintf( '%08X', $n++ ) . self::MARKER_SUFFIX; 00945 $stripped .= $marker; 00946 00947 if ( $close === '/>' ) { 00948 # Empty element tag, <tag /> 00949 $content = null; 00950 $text = $inside; 00951 $tail = null; 00952 } else { 00953 if ( $element === '!--' ) { 00954 $end = '/(-->)/'; 00955 } else { 00956 $end = "/(<\\/$element\\s*>)/i"; 00957 } 00958 $q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE ); 00959 $content = $q[0]; 00960 if ( count( $q ) < 3 ) { 00961 # No end tag -- let it run out to the end of the text. 00962 $tail = ''; 00963 $text = ''; 00964 } else { 00965 $tail = $q[1]; 00966 $text = $q[2]; 00967 } 00968 } 00969 00970 $matches[$marker] = array( $element, 00971 $content, 00972 Sanitizer::decodeTagAttributes( $attributes ), 00973 "<$element$attributes$close$content$tail" ); 00974 } 00975 return $stripped; 00976 } 00977 00983 public function getStripList() { 00984 return $this->mStripList; 00985 } 00986 00996 public function insertStripItem( $text ) { 00997 $rnd = "{$this->mUniqPrefix}-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX; 00998 $this->mMarkerIndex++; 00999 $this->mStripState->addGeneral( $rnd, $text ); 01000 return $rnd; 01001 } 01002 01010 public function doTableStuff( $text ) { 01011 wfProfileIn( __METHOD__ ); 01012 01013 $lines = StringUtils::explode( "\n", $text ); 01014 $out = ''; 01015 $td_history = array(); # Is currently a td tag open? 01016 $last_tag_history = array(); # Save history of last lag activated (td, th or caption) 01017 $tr_history = array(); # Is currently a tr tag open? 01018 $tr_attributes = array(); # history of tr attributes 01019 $has_opened_tr = array(); # Did this table open a <tr> element? 01020 $indent_level = 0; # indent level of the table 01021 01022 foreach ( $lines as $outLine ) { 01023 $line = trim( $outLine ); 01024 01025 if ( $line === '' ) { # empty line, go to next line 01026 $out .= $outLine . "\n"; 01027 continue; 01028 } 01029 01030 $first_character = $line[0]; 01031 $matches = array(); 01032 01033 if ( preg_match( '/^(:*)\{\|(.*)$/', $line, $matches ) ) { 01034 # First check if we are starting a new table 01035 $indent_level = strlen( $matches[1] ); 01036 01037 $attributes = $this->mStripState->unstripBoth( $matches[2] ); 01038 $attributes = Sanitizer::fixTagAttributes( $attributes, 'table' ); 01039 01040 $outLine = str_repeat( '<dl><dd>', $indent_level ) . "<table{$attributes}>"; 01041 array_push( $td_history, false ); 01042 array_push( $last_tag_history, '' ); 01043 array_push( $tr_history, false ); 01044 array_push( $tr_attributes, '' ); 01045 array_push( $has_opened_tr, false ); 01046 } elseif ( count( $td_history ) == 0 ) { 01047 # Don't do any of the following 01048 $out .= $outLine . "\n"; 01049 continue; 01050 } elseif ( substr( $line, 0, 2 ) === '|}' ) { 01051 # We are ending a table 01052 $line = '</table>' . substr( $line, 2 ); 01053 $last_tag = array_pop( $last_tag_history ); 01054 01055 if ( !array_pop( $has_opened_tr ) ) { 01056 $line = "<tr><td></td></tr>{$line}"; 01057 } 01058 01059 if ( array_pop( $tr_history ) ) { 01060 $line = "</tr>{$line}"; 01061 } 01062 01063 if ( array_pop( $td_history ) ) { 01064 $line = "</{$last_tag}>{$line}"; 01065 } 01066 array_pop( $tr_attributes ); 01067 $outLine = $line . str_repeat( '</dd></dl>', $indent_level ); 01068 } elseif ( substr( $line, 0, 2 ) === '|-' ) { 01069 # Now we have a table row 01070 $line = preg_replace( '#^\|-+#', '', $line ); 01071 01072 # Whats after the tag is now only attributes 01073 $attributes = $this->mStripState->unstripBoth( $line ); 01074 $attributes = Sanitizer::fixTagAttributes( $attributes, 'tr' ); 01075 array_pop( $tr_attributes ); 01076 array_push( $tr_attributes, $attributes ); 01077 01078 $line = ''; 01079 $last_tag = array_pop( $last_tag_history ); 01080 array_pop( $has_opened_tr ); 01081 array_push( $has_opened_tr, true ); 01082 01083 if ( array_pop( $tr_history ) ) { 01084 $line = '</tr>'; 01085 } 01086 01087 if ( array_pop( $td_history ) ) { 01088 $line = "</{$last_tag}>{$line}"; 01089 } 01090 01091 $outLine = $line; 01092 array_push( $tr_history, false ); 01093 array_push( $td_history, false ); 01094 array_push( $last_tag_history, '' ); 01095 } elseif ( $first_character === '|' 01096 || $first_character === '!' 01097 || substr( $line, 0, 2 ) === '|+' 01098 ) { 01099 # This might be cell elements, td, th or captions 01100 if ( substr( $line, 0, 2 ) === '|+' ) { 01101 $first_character = '+'; 01102 $line = substr( $line, 1 ); 01103 } 01104 01105 $line = substr( $line, 1 ); 01106 01107 if ( $first_character === '!' ) { 01108 $line = str_replace( '!!', '||', $line ); 01109 } 01110 01111 # Split up multiple cells on the same line. 01112 # FIXME : This can result in improper nesting of tags processed 01113 # by earlier parser steps, but should avoid splitting up eg 01114 # attribute values containing literal "||". 01115 $cells = StringUtils::explodeMarkup( '||', $line ); 01116 01117 $outLine = ''; 01118 01119 # Loop through each table cell 01120 foreach ( $cells as $cell ) { 01121 $previous = ''; 01122 if ( $first_character !== '+' ) { 01123 $tr_after = array_pop( $tr_attributes ); 01124 if ( !array_pop( $tr_history ) ) { 01125 $previous = "<tr{$tr_after}>\n"; 01126 } 01127 array_push( $tr_history, true ); 01128 array_push( $tr_attributes, '' ); 01129 array_pop( $has_opened_tr ); 01130 array_push( $has_opened_tr, true ); 01131 } 01132 01133 $last_tag = array_pop( $last_tag_history ); 01134 01135 if ( array_pop( $td_history ) ) { 01136 $previous = "</{$last_tag}>\n{$previous}"; 01137 } 01138 01139 if ( $first_character === '|' ) { 01140 $last_tag = 'td'; 01141 } elseif ( $first_character === '!' ) { 01142 $last_tag = 'th'; 01143 } elseif ( $first_character === '+' ) { 01144 $last_tag = 'caption'; 01145 } else { 01146 $last_tag = ''; 01147 } 01148 01149 array_push( $last_tag_history, $last_tag ); 01150 01151 # A cell could contain both parameters and data 01152 $cell_data = explode( '|', $cell, 2 ); 01153 01154 # Bug 553: Note that a '|' inside an invalid link should not 01155 # be mistaken as delimiting cell parameters 01156 if ( strpos( $cell_data[0], '[[' ) !== false ) { 01157 $cell = "{$previous}<{$last_tag}>{$cell}"; 01158 } elseif ( count( $cell_data ) == 1 ) { 01159 $cell = "{$previous}<{$last_tag}>{$cell_data[0]}"; 01160 } else { 01161 $attributes = $this->mStripState->unstripBoth( $cell_data[0] ); 01162 $attributes = Sanitizer::fixTagAttributes( $attributes, $last_tag ); 01163 $cell = "{$previous}<{$last_tag}{$attributes}>{$cell_data[1]}"; 01164 } 01165 01166 $outLine .= $cell; 01167 array_push( $td_history, true ); 01168 } 01169 } 01170 $out .= $outLine . "\n"; 01171 } 01172 01173 # Closing open td, tr && table 01174 while ( count( $td_history ) > 0 ) { 01175 if ( array_pop( $td_history ) ) { 01176 $out .= "</td>\n"; 01177 } 01178 if ( array_pop( $tr_history ) ) { 01179 $out .= "</tr>\n"; 01180 } 01181 if ( !array_pop( $has_opened_tr ) ) { 01182 $out .= "<tr><td></td></tr>\n"; 01183 } 01184 01185 $out .= "</table>\n"; 01186 } 01187 01188 # Remove trailing line-ending (b/c) 01189 if ( substr( $out, -1 ) === "\n" ) { 01190 $out = substr( $out, 0, -1 ); 01191 } 01192 01193 # special case: don't return empty table 01194 if ( $out === "<table>\n<tr><td></td></tr>\n</table>" ) { 01195 $out = ''; 01196 } 01197 01198 wfProfileOut( __METHOD__ ); 01199 01200 return $out; 01201 } 01202 01215 public function internalParse( $text, $isMain = true, $frame = false ) { 01216 wfProfileIn( __METHOD__ ); 01217 01218 $origText = $text; 01219 01220 # Hook to suspend the parser in this state 01221 if ( !wfRunHooks( 'ParserBeforeInternalParse', array( &$this, &$text, &$this->mStripState ) ) ) { 01222 wfProfileOut( __METHOD__ ); 01223 return $text; 01224 } 01225 01226 # if $frame is provided, then use $frame for replacing any variables 01227 if ( $frame ) { 01228 # use frame depth to infer how include/noinclude tags should be handled 01229 # depth=0 means this is the top-level document; otherwise it's an included document 01230 if ( !$frame->depth ) { 01231 $flag = 0; 01232 } else { 01233 $flag = Parser::PTD_FOR_INCLUSION; 01234 } 01235 $dom = $this->preprocessToDom( $text, $flag ); 01236 $text = $frame->expand( $dom ); 01237 } else { 01238 # if $frame is not provided, then use old-style replaceVariables 01239 $text = $this->replaceVariables( $text ); 01240 } 01241 01242 wfRunHooks( 'InternalParseBeforeSanitize', array( &$this, &$text, &$this->mStripState ) ); 01243 $text = Sanitizer::removeHTMLtags( 01244 $text, 01245 array( &$this, 'attributeStripCallback' ), 01246 false, 01247 array_keys( $this->mTransparentTagHooks ) 01248 ); 01249 wfRunHooks( 'InternalParseBeforeLinks', array( &$this, &$text, &$this->mStripState ) ); 01250 01251 # Tables need to come after variable replacement for things to work 01252 # properly; putting them before other transformations should keep 01253 # exciting things like link expansions from showing up in surprising 01254 # places. 01255 $text = $this->doTableStuff( $text ); 01256 01257 $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text ); 01258 01259 $text = $this->doDoubleUnderscore( $text ); 01260 01261 $text = $this->doHeadings( $text ); 01262 $text = $this->replaceInternalLinks( $text ); 01263 $text = $this->doAllQuotes( $text ); 01264 $text = $this->replaceExternalLinks( $text ); 01265 01266 # replaceInternalLinks may sometimes leave behind 01267 # absolute URLs, which have to be masked to hide them from replaceExternalLinks 01268 $text = str_replace( $this->mUniqPrefix . 'NOPARSE', '', $text ); 01269 01270 $text = $this->doMagicLinks( $text ); 01271 $text = $this->formatHeadings( $text, $origText, $isMain ); 01272 01273 wfProfileOut( __METHOD__ ); 01274 return $text; 01275 } 01276 01288 public function doMagicLinks( $text ) { 01289 wfProfileIn( __METHOD__ ); 01290 $prots = wfUrlProtocolsWithoutProtRel(); 01291 $urlChar = self::EXT_LINK_URL_CLASS; 01292 $text = preg_replace_callback( 01293 '!(?: # Start cases 01294 (<a[ \t\r\n>].*?</a>) | # m[1]: Skip link text 01295 (<.*?>) | # m[2]: Skip stuff inside HTML elements' . " 01296 (\\b(?i:$prots)$urlChar+) | # m[3]: Free external links" . ' 01297 (?:RFC|PMID)\s+([0-9]+) | # m[4]: RFC or PMID, capture number 01298 ISBN\s+(\b # m[5]: ISBN, capture number 01299 (?: 97[89] [\ \-]? )? # optional 13-digit ISBN prefix 01300 (?: [0-9] [\ \-]? ){9} # 9 digits with opt. delimiters 01301 [0-9Xx] # check digit 01302 \b) 01303 )!xu', array( &$this, 'magicLinkCallback' ), $text ); 01304 wfProfileOut( __METHOD__ ); 01305 return $text; 01306 } 01307 01313 public function magicLinkCallback( $m ) { 01314 if ( isset( $m[1] ) && $m[1] !== '' ) { 01315 # Skip anchor 01316 return $m[0]; 01317 } elseif ( isset( $m[2] ) && $m[2] !== '' ) { 01318 # Skip HTML element 01319 return $m[0]; 01320 } elseif ( isset( $m[3] ) && $m[3] !== '' ) { 01321 # Free external link 01322 return $this->makeFreeExternalLink( $m[0] ); 01323 } elseif ( isset( $m[4] ) && $m[4] !== '' ) { 01324 # RFC or PMID 01325 if ( substr( $m[0], 0, 3 ) === 'RFC' ) { 01326 $keyword = 'RFC'; 01327 $urlmsg = 'rfcurl'; 01328 $cssClass = 'mw-magiclink-rfc'; 01329 $id = $m[4]; 01330 } elseif ( substr( $m[0], 0, 4 ) === 'PMID' ) { 01331 $keyword = 'PMID'; 01332 $urlmsg = 'pubmedurl'; 01333 $cssClass = 'mw-magiclink-pmid'; 01334 $id = $m[4]; 01335 } else { 01336 throw new MWException( __METHOD__ . ': unrecognised match type "' . 01337 substr( $m[0], 0, 20 ) . '"' ); 01338 } 01339 $url = wfMessage( $urlmsg, $id )->inContentLanguage()->text(); 01340 return Linker::makeExternalLink( $url, "{$keyword} {$id}", true, $cssClass ); 01341 } elseif ( isset( $m[5] ) && $m[5] !== '' ) { 01342 # ISBN 01343 $isbn = $m[5]; 01344 $num = strtr( $isbn, array( 01345 '-' => '', 01346 ' ' => '', 01347 'x' => 'X', 01348 )); 01349 $titleObj = SpecialPage::getTitleFor( 'Booksources', $num ); 01350 return '<a href="' . 01351 htmlspecialchars( $titleObj->getLocalURL() ) . 01352 "\" class=\"internal mw-magiclink-isbn\">ISBN $isbn</a>"; 01353 } else { 01354 return $m[0]; 01355 } 01356 } 01357 01366 public function makeFreeExternalLink( $url ) { 01367 wfProfileIn( __METHOD__ ); 01368 01369 $trail = ''; 01370 01371 # The characters '<' and '>' (which were escaped by 01372 # removeHTMLtags()) should not be included in 01373 # URLs, per RFC 2396. 01374 $m2 = array(); 01375 if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) { 01376 $trail = substr( $url, $m2[0][1] ) . $trail; 01377 $url = substr( $url, 0, $m2[0][1] ); 01378 } 01379 01380 # Move trailing punctuation to $trail 01381 $sep = ',;\.:!?'; 01382 # If there is no left bracket, then consider right brackets fair game too 01383 if ( strpos( $url, '(' ) === false ) { 01384 $sep .= ')'; 01385 } 01386 01387 $numSepChars = strspn( strrev( $url ), $sep ); 01388 if ( $numSepChars ) { 01389 $trail = substr( $url, -$numSepChars ) . $trail; 01390 $url = substr( $url, 0, -$numSepChars ); 01391 } 01392 01393 $url = Sanitizer::cleanUrl( $url ); 01394 01395 # Is this an external image? 01396 $text = $this->maybeMakeExternalImage( $url ); 01397 if ( $text === false ) { 01398 # Not an image, make a link 01399 $text = Linker::makeExternalLink( $url, 01400 $this->getConverterLanguage()->markNoConversion( $url, true ), 01401 true, 'free', 01402 $this->getExternalLinkAttribs( $url ) ); 01403 # Register it in the output object... 01404 # Replace unnecessary URL escape codes with their equivalent characters 01405 $pasteurized = self::normalizeLinkUrl( $url ); 01406 $this->mOutput->addExternalLink( $pasteurized ); 01407 } 01408 wfProfileOut( __METHOD__ ); 01409 return $text . $trail; 01410 } 01411 01421 public function doHeadings( $text ) { 01422 wfProfileIn( __METHOD__ ); 01423 for ( $i = 6; $i >= 1; --$i ) { 01424 $h = str_repeat( '=', $i ); 01425 $text = preg_replace( "/^$h(.+)$h\\s*$/m", "<h$i>\\1</h$i>", $text ); 01426 } 01427 wfProfileOut( __METHOD__ ); 01428 return $text; 01429 } 01430 01439 public function doAllQuotes( $text ) { 01440 wfProfileIn( __METHOD__ ); 01441 $outtext = ''; 01442 $lines = StringUtils::explode( "\n", $text ); 01443 foreach ( $lines as $line ) { 01444 $outtext .= $this->doQuotes( $line ) . "\n"; 01445 } 01446 $outtext = substr( $outtext, 0, -1 ); 01447 wfProfileOut( __METHOD__ ); 01448 return $outtext; 01449 } 01450 01458 public function doQuotes( $text ) { 01459 $arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE ); 01460 $countarr = count( $arr ); 01461 if ( $countarr == 1 ) { 01462 return $text; 01463 } 01464 01465 // First, do some preliminary work. This may shift some apostrophes from 01466 // being mark-up to being text. It also counts the number of occurrences 01467 // of bold and italics mark-ups. 01468 $numbold = 0; 01469 $numitalics = 0; 01470 for ( $i = 1; $i < $countarr; $i += 2 ) { 01471 $thislen = strlen( $arr[$i] ); 01472 // If there are ever four apostrophes, assume the first is supposed to 01473 // be text, and the remaining three constitute mark-up for bold text. 01474 // (bug 13227: ''''foo'''' turns into ' ''' foo ' ''') 01475 if ( $thislen == 4 ) { 01476 $arr[$i - 1] .= "'"; 01477 $arr[$i] = "'''"; 01478 $thislen = 3; 01479 } elseif ( $thislen > 5 ) { 01480 // If there are more than 5 apostrophes in a row, assume they're all 01481 // text except for the last 5. 01482 // (bug 13227: ''''''foo'''''' turns into ' ''''' foo ' ''''') 01483 $arr[$i - 1] .= str_repeat( "'", $thislen - 5 ); 01484 $arr[$i] = "'''''"; 01485 $thislen = 5; 01486 } 01487 // Count the number of occurrences of bold and italics mark-ups. 01488 if ( $thislen == 2 ) { 01489 $numitalics++; 01490 } elseif ( $thislen == 3 ) { 01491 $numbold++; 01492 } elseif ( $thislen == 5 ) { 01493 $numitalics++; 01494 $numbold++; 01495 } 01496 } 01497 01498 // If there is an odd number of both bold and italics, it is likely 01499 // that one of the bold ones was meant to be an apostrophe followed 01500 // by italics. Which one we cannot know for certain, but it is more 01501 // likely to be one that has a single-letter word before it. 01502 if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) ) { 01503 $firstsingleletterword = -1; 01504 $firstmultiletterword = -1; 01505 $firstspace = -1; 01506 for ( $i = 1; $i < $countarr; $i += 2 ) { 01507 if ( strlen( $arr[$i] ) == 3 ) { 01508 $x1 = substr( $arr[$i - 1], -1 ); 01509 $x2 = substr( $arr[$i - 1], -2, 1 ); 01510 if ( $x1 === ' ' ) { 01511 if ( $firstspace == -1 ) { 01512 $firstspace = $i; 01513 } 01514 } elseif ( $x2 === ' ' ) { 01515 if ( $firstsingleletterword == -1 ) { 01516 $firstsingleletterword = $i; 01517 // if $firstsingleletterword is set, we don't 01518 // look at the other options, so we can bail early. 01519 break; 01520 } 01521 } else { 01522 if ( $firstmultiletterword == -1 ) { 01523 $firstmultiletterword = $i; 01524 } 01525 } 01526 } 01527 } 01528 01529 // If there is a single-letter word, use it! 01530 if ( $firstsingleletterword > -1 ) { 01531 $arr[$firstsingleletterword] = "''"; 01532 $arr[$firstsingleletterword - 1] .= "'"; 01533 } elseif ( $firstmultiletterword > -1 ) { 01534 // If not, but there's a multi-letter word, use that one. 01535 $arr[$firstmultiletterword] = "''"; 01536 $arr[$firstmultiletterword - 1] .= "'"; 01537 } elseif ( $firstspace > -1 ) { 01538 // ... otherwise use the first one that has neither. 01539 // (notice that it is possible for all three to be -1 if, for example, 01540 // there is only one pentuple-apostrophe in the line) 01541 $arr[$firstspace] = "''"; 01542 $arr[$firstspace - 1] .= "'"; 01543 } 01544 } 01545 01546 // Now let's actually convert our apostrophic mush to HTML! 01547 $output = ''; 01548 $buffer = ''; 01549 $state = ''; 01550 $i = 0; 01551 foreach ( $arr as $r ) { 01552 if ( ( $i % 2 ) == 0 ) { 01553 if ( $state === 'both' ) { 01554 $buffer .= $r; 01555 } else { 01556 $output .= $r; 01557 } 01558 } else { 01559 $thislen = strlen( $r ); 01560 if ( $thislen == 2 ) { 01561 if ( $state === 'i' ) { 01562 $output .= '</i>'; 01563 $state = ''; 01564 } elseif ( $state === 'bi' ) { 01565 $output .= '</i>'; 01566 $state = 'b'; 01567 } elseif ( $state === 'ib' ) { 01568 $output .= '</b></i><b>'; 01569 $state = 'b'; 01570 } elseif ( $state === 'both' ) { 01571 $output .= '<b><i>' . $buffer . '</i>'; 01572 $state = 'b'; 01573 } else { // $state can be 'b' or '' 01574 $output .= '<i>'; 01575 $state .= 'i'; 01576 } 01577 } elseif ( $thislen == 3 ) { 01578 if ( $state === 'b' ) { 01579 $output .= '</b>'; 01580 $state = ''; 01581 } elseif ( $state === 'bi' ) { 01582 $output .= '</i></b><i>'; 01583 $state = 'i'; 01584 } elseif ( $state === 'ib' ) { 01585 $output .= '</b>'; 01586 $state = 'i'; 01587 } elseif ( $state === 'both' ) { 01588 $output .= '<i><b>' . $buffer . '</b>'; 01589 $state = 'i'; 01590 } else { // $state can be 'i' or '' 01591 $output .= '<b>'; 01592 $state .= 'b'; 01593 } 01594 } elseif ( $thislen == 5 ) { 01595 if ( $state === 'b' ) { 01596 $output .= '</b><i>'; 01597 $state = 'i'; 01598 } elseif ( $state === 'i' ) { 01599 $output .= '</i><b>'; 01600 $state = 'b'; 01601 } elseif ( $state === 'bi' ) { 01602 $output .= '</i></b>'; 01603 $state = ''; 01604 } elseif ( $state === 'ib' ) { 01605 $output .= '</b></i>'; 01606 $state = ''; 01607 } elseif ( $state === 'both' ) { 01608 $output .= '<i><b>' . $buffer . '</b></i>'; 01609 $state = ''; 01610 } else { // ($state == '') 01611 $buffer = ''; 01612 $state = 'both'; 01613 } 01614 } 01615 } 01616 $i++; 01617 } 01618 // Now close all remaining tags. Notice that the order is important. 01619 if ( $state === 'b' || $state === 'ib' ) { 01620 $output .= '</b>'; 01621 } 01622 if ( $state === 'i' || $state === 'bi' || $state === 'ib' ) { 01623 $output .= '</i>'; 01624 } 01625 if ( $state === 'bi' ) { 01626 $output .= '</b>'; 01627 } 01628 // There might be lonely ''''', so make sure we have a buffer 01629 if ( $state === 'both' && $buffer ) { 01630 $output .= '<b><i>' . $buffer . '</i></b>'; 01631 } 01632 return $output; 01633 } 01634 01648 public function replaceExternalLinks( $text ) { 01649 wfProfileIn( __METHOD__ ); 01650 01651 $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE ); 01652 if ( $bits === false ) { 01653 wfProfileOut( __METHOD__ ); 01654 throw new MWException( "PCRE needs to be compiled with " 01655 . "--enable-unicode-properties in order for MediaWiki to function" ); 01656 } 01657 $s = array_shift( $bits ); 01658 01659 $i = 0; 01660 while ( $i < count( $bits ) ) { 01661 $url = $bits[$i++]; 01662 $i++; // protocol 01663 $text = $bits[$i++]; 01664 $trail = $bits[$i++]; 01665 01666 # The characters '<' and '>' (which were escaped by 01667 # removeHTMLtags()) should not be included in 01668 # URLs, per RFC 2396. 01669 $m2 = array(); 01670 if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) { 01671 $text = substr( $url, $m2[0][1] ) . ' ' . $text; 01672 $url = substr( $url, 0, $m2[0][1] ); 01673 } 01674 01675 # If the link text is an image URL, replace it with an <img> tag 01676 # This happened by accident in the original parser, but some people used it extensively 01677 $img = $this->maybeMakeExternalImage( $text ); 01678 if ( $img !== false ) { 01679 $text = $img; 01680 } 01681 01682 $dtrail = ''; 01683 01684 # Set linktype for CSS - if URL==text, link is essentially free 01685 $linktype = ( $text === $url ) ? 'free' : 'text'; 01686 01687 # No link text, e.g. [http://domain.tld/some.link] 01688 if ( $text == '' ) { 01689 # Autonumber 01690 $langObj = $this->getTargetLanguage(); 01691 $text = '[' . $langObj->formatNum( ++$this->mAutonumber ) . ']'; 01692 $linktype = 'autonumber'; 01693 } else { 01694 # Have link text, e.g. [http://domain.tld/some.link text]s 01695 # Check for trail 01696 list( $dtrail, $trail ) = Linker::splitTrail( $trail ); 01697 } 01698 01699 $text = $this->getConverterLanguage()->markNoConversion( $text ); 01700 01701 $url = Sanitizer::cleanUrl( $url ); 01702 01703 # Use the encoded URL 01704 # This means that users can paste URLs directly into the text 01705 # Funny characters like ö aren't valid in URLs anyway 01706 # This was changed in August 2004 01707 $s .= Linker::makeExternalLink( $url, $text, false, $linktype, 01708 $this->getExternalLinkAttribs( $url ) ) . $dtrail . $trail; 01709 01710 # Register link in the output object. 01711 # Replace unnecessary URL escape codes with the referenced character 01712 # This prevents spammers from hiding links from the filters 01713 $pasteurized = self::normalizeLinkUrl( $url ); 01714 $this->mOutput->addExternalLink( $pasteurized ); 01715 } 01716 01717 wfProfileOut( __METHOD__ ); 01718 return $s; 01719 } 01720 01730 public static function getExternalLinkRel( $url = false, $title = null ) { 01731 global $wgNoFollowLinks, $wgNoFollowNsExceptions, $wgNoFollowDomainExceptions; 01732 $ns = $title ? $title->getNamespace() : false; 01733 if ( $wgNoFollowLinks && !in_array( $ns, $wgNoFollowNsExceptions ) 01734 && !wfMatchesDomainList( $url, $wgNoFollowDomainExceptions ) 01735 ) { 01736 return 'nofollow'; 01737 } 01738 return null; 01739 } 01740 01751 public function getExternalLinkAttribs( $url = false ) { 01752 $attribs = array(); 01753 $attribs['rel'] = self::getExternalLinkRel( $url, $this->mTitle ); 01754 01755 if ( $this->mOptions->getExternalLinkTarget() ) { 01756 $attribs['target'] = $this->mOptions->getExternalLinkTarget(); 01757 } 01758 return $attribs; 01759 } 01760 01768 public static function replaceUnusualEscapes( $url ) { 01769 wfDeprecated( __METHOD__, '1.24' ); 01770 return self::normalizeLinkUrl( $url ); 01771 } 01772 01782 public static function normalizeLinkUrl( $url ) { 01783 # First, make sure unsafe characters are encoded 01784 $url = preg_replace_callback( '/[\x00-\x20"<>\[\\\\\]^`{|}\x7F-\xFF]/', 01785 function ( $m ) { 01786 return rawurlencode( $m[0] ); 01787 }, 01788 $url 01789 ); 01790 01791 $ret = ''; 01792 $end = strlen( $url ); 01793 01794 # Fragment part - 'fragment' 01795 $start = strpos( $url, '#' ); 01796 if ( $start !== false && $start < $end ) { 01797 $ret = self::normalizeUrlComponent( 01798 substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}' ) . $ret; 01799 $end = $start; 01800 } 01801 01802 # Query part - 'query' minus &=+; 01803 $start = strpos( $url, '?' ); 01804 if ( $start !== false && $start < $end ) { 01805 $ret = self::normalizeUrlComponent( 01806 substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}&=+;' ) . $ret; 01807 $end = $start; 01808 } 01809 01810 # Scheme and path part - 'pchar' 01811 # (we assume no userinfo or encoded colons in the host) 01812 $ret = self::normalizeUrlComponent( 01813 substr( $url, 0, $end ), '"#%<>[\]^`{|}/?' ) . $ret; 01814 01815 return $ret; 01816 } 01817 01818 private static function normalizeUrlComponent( $component, $unsafe ) { 01819 $callback = function ( $matches ) use ( $unsafe ) { 01820 $char = urldecode( $matches[0] ); 01821 $ord = ord( $char ); 01822 if ( $ord > 32 && $ord < 127 && strpos( $unsafe, $char ) === false ) { 01823 # Unescape it 01824 return $char; 01825 } else { 01826 # Leave it escaped, but use uppercase for a-f 01827 return strtoupper( $matches[0] ); 01828 } 01829 }; 01830 return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', $callback, $component ); 01831 } 01832 01841 private function maybeMakeExternalImage( $url ) { 01842 $imagesfrom = $this->mOptions->getAllowExternalImagesFrom(); 01843 $imagesexception = !empty( $imagesfrom ); 01844 $text = false; 01845 # $imagesfrom could be either a single string or an array of strings, parse out the latter 01846 if ( $imagesexception && is_array( $imagesfrom ) ) { 01847 $imagematch = false; 01848 foreach ( $imagesfrom as $match ) { 01849 if ( strpos( $url, $match ) === 0 ) { 01850 $imagematch = true; 01851 break; 01852 } 01853 } 01854 } elseif ( $imagesexception ) { 01855 $imagematch = ( strpos( $url, $imagesfrom ) === 0 ); 01856 } else { 01857 $imagematch = false; 01858 } 01859 01860 if ( $this->mOptions->getAllowExternalImages() 01861 || ( $imagesexception && $imagematch ) 01862 ) { 01863 if ( preg_match( self::EXT_IMAGE_REGEX, $url ) ) { 01864 # Image found 01865 $text = Linker::makeExternalImage( $url ); 01866 } 01867 } 01868 if ( !$text && $this->mOptions->getEnableImageWhitelist() 01869 && preg_match( self::EXT_IMAGE_REGEX, $url ) 01870 ) { 01871 $whitelist = explode( 01872 "\n", 01873 wfMessage( 'external_image_whitelist' )->inContentLanguage()->text() 01874 ); 01875 01876 foreach ( $whitelist as $entry ) { 01877 # Sanitize the regex fragment, make it case-insensitive, ignore blank entries/comments 01878 if ( strpos( $entry, '#' ) === 0 || $entry === '' ) { 01879 continue; 01880 } 01881 if ( preg_match( '/' . str_replace( '/', '\\/', $entry ) . '/i', $url ) ) { 01882 # Image matches a whitelist entry 01883 $text = Linker::makeExternalImage( $url ); 01884 break; 01885 } 01886 } 01887 } 01888 return $text; 01889 } 01890 01900 public function replaceInternalLinks( $s ) { 01901 $this->mLinkHolders->merge( $this->replaceInternalLinks2( $s ) ); 01902 return $s; 01903 } 01904 01913 public function replaceInternalLinks2( &$s ) { 01914 global $wgExtraInterlanguageLinkPrefixes; 01915 wfProfileIn( __METHOD__ ); 01916 01917 wfProfileIn( __METHOD__ . '-setup' ); 01918 static $tc = false, $e1, $e1_img; 01919 # the % is needed to support urlencoded titles as well 01920 if ( !$tc ) { 01921 $tc = Title::legalChars() . '#%'; 01922 # Match a link having the form [[namespace:link|alternate]]trail 01923 $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD"; 01924 # Match cases where there is no "]]", which might still be images 01925 $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD"; 01926 } 01927 01928 $holders = new LinkHolderArray( $this ); 01929 01930 # split the entire text string on occurrences of [[ 01931 $a = StringUtils::explode( '[[', ' ' . $s ); 01932 # get the first element (all text up to first [[), and remove the space we added 01933 $s = $a->current(); 01934 $a->next(); 01935 $line = $a->current(); # Workaround for broken ArrayIterator::next() that returns "void" 01936 $s = substr( $s, 1 ); 01937 01938 $useLinkPrefixExtension = $this->getTargetLanguage()->linkPrefixExtension(); 01939 $e2 = null; 01940 if ( $useLinkPrefixExtension ) { 01941 # Match the end of a line for a word that's not followed by whitespace, 01942 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched 01943 global $wgContLang; 01944 $charset = $wgContLang->linkPrefixCharset(); 01945 $e2 = "/^((?>.*[^$charset]|))(.+)$/sDu"; 01946 } 01947 01948 if ( is_null( $this->mTitle ) ) { 01949 wfProfileOut( __METHOD__ . '-setup' ); 01950 wfProfileOut( __METHOD__ ); 01951 throw new MWException( __METHOD__ . ": \$this->mTitle is null\n" ); 01952 } 01953 $nottalk = !$this->mTitle->isTalkPage(); 01954 01955 if ( $useLinkPrefixExtension ) { 01956 $m = array(); 01957 if ( preg_match( $e2, $s, $m ) ) { 01958 $first_prefix = $m[2]; 01959 } else { 01960 $first_prefix = false; 01961 } 01962 } else { 01963 $prefix = ''; 01964 } 01965 01966 $useSubpages = $this->areSubpagesAllowed(); 01967 wfProfileOut( __METHOD__ . '-setup' ); 01968 01969 // @codingStandardsIgnoreStart Squiz.WhiteSpace.SemicolonSpacing.Incorrect 01970 # Loop for each link 01971 for ( ; $line !== false && $line !== null; $a->next(), $line = $a->current() ) { 01972 // @codingStandardsIgnoreStart 01973 01974 # Check for excessive memory usage 01975 if ( $holders->isBig() ) { 01976 # Too big 01977 # Do the existence check, replace the link holders and clear the array 01978 $holders->replace( $s ); 01979 $holders->clear(); 01980 } 01981 01982 if ( $useLinkPrefixExtension ) { 01983 wfProfileIn( __METHOD__ . '-prefixhandling' ); 01984 if ( preg_match( $e2, $s, $m ) ) { 01985 $prefix = $m[2]; 01986 $s = $m[1]; 01987 } else { 01988 $prefix = ''; 01989 } 01990 # first link 01991 if ( $first_prefix ) { 01992 $prefix = $first_prefix; 01993 $first_prefix = false; 01994 } 01995 wfProfileOut( __METHOD__ . '-prefixhandling' ); 01996 } 01997 01998 $might_be_img = false; 01999 02000 wfProfileIn( __METHOD__ . "-e1" ); 02001 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt 02002 $text = $m[2]; 02003 # If we get a ] at the beginning of $m[3] that means we have a link that's something like: 02004 # [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up, 02005 # the real problem is with the $e1 regex 02006 # See bug 1300. 02007 # 02008 # Still some problems for cases where the ] is meant to be outside punctuation, 02009 # and no image is in sight. See bug 2095. 02010 # 02011 if ( $text !== '' 02012 && substr( $m[3], 0, 1 ) === ']' 02013 && strpos( $text, '[' ) !== false 02014 ) { 02015 $text .= ']'; # so that replaceExternalLinks($text) works later 02016 $m[3] = substr( $m[3], 1 ); 02017 } 02018 # fix up urlencoded title texts 02019 if ( strpos( $m[1], '%' ) !== false ) { 02020 # Should anchors '#' also be rejected? 02021 $m[1] = str_replace( array( '<', '>' ), array( '<', '>' ), rawurldecode( $m[1] ) ); 02022 } 02023 $trail = $m[3]; 02024 } elseif ( preg_match( $e1_img, $line, $m ) ) { 02025 # Invalid, but might be an image with a link in its caption 02026 $might_be_img = true; 02027 $text = $m[2]; 02028 if ( strpos( $m[1], '%' ) !== false ) { 02029 $m[1] = rawurldecode( $m[1] ); 02030 } 02031 $trail = ""; 02032 } else { # Invalid form; output directly 02033 $s .= $prefix . '[[' . $line; 02034 wfProfileOut( __METHOD__ . "-e1" ); 02035 continue; 02036 } 02037 wfProfileOut( __METHOD__ . "-e1" ); 02038 wfProfileIn( __METHOD__ . "-misc" ); 02039 02040 $origLink = $m[1]; 02041 02042 # Don't allow internal links to pages containing 02043 # PROTO: where PROTO is a valid URL protocol; these 02044 # should be external links. 02045 if ( preg_match( '/^(?i:' . $this->mUrlProtocols . ')/', $origLink ) ) { 02046 $s .= $prefix . '[[' . $line; 02047 wfProfileOut( __METHOD__ . "-misc" ); 02048 continue; 02049 } 02050 02051 # Make subpage if necessary 02052 if ( $useSubpages ) { 02053 $link = $this->maybeDoSubpageLink( $origLink, $text ); 02054 } else { 02055 $link = $origLink; 02056 } 02057 02058 $noforce = ( substr( $origLink, 0, 1 ) !== ':' ); 02059 if ( !$noforce ) { 02060 # Strip off leading ':' 02061 $link = substr( $link, 1 ); 02062 } 02063 02064 wfProfileOut( __METHOD__ . "-misc" ); 02065 wfProfileIn( __METHOD__ . "-title" ); 02066 $nt = Title::newFromText( $this->mStripState->unstripNoWiki( $link ) ); 02067 if ( $nt === null ) { 02068 $s .= $prefix . '[[' . $line; 02069 wfProfileOut( __METHOD__ . "-title" ); 02070 continue; 02071 } 02072 02073 $ns = $nt->getNamespace(); 02074 $iw = $nt->getInterwiki(); 02075 wfProfileOut( __METHOD__ . "-title" ); 02076 02077 if ( $might_be_img ) { # if this is actually an invalid link 02078 wfProfileIn( __METHOD__ . "-might_be_img" ); 02079 if ( $ns == NS_FILE && $noforce ) { # but might be an image 02080 $found = false; 02081 while ( true ) { 02082 # look at the next 'line' to see if we can close it there 02083 $a->next(); 02084 $next_line = $a->current(); 02085 if ( $next_line === false || $next_line === null ) { 02086 break; 02087 } 02088 $m = explode( ']]', $next_line, 3 ); 02089 if ( count( $m ) == 3 ) { 02090 # the first ]] closes the inner link, the second the image 02091 $found = true; 02092 $text .= "[[{$m[0]}]]{$m[1]}"; 02093 $trail = $m[2]; 02094 break; 02095 } elseif ( count( $m ) == 2 ) { 02096 # if there's exactly one ]] that's fine, we'll keep looking 02097 $text .= "[[{$m[0]}]]{$m[1]}"; 02098 } else { 02099 # if $next_line is invalid too, we need look no further 02100 $text .= '[[' . $next_line; 02101 break; 02102 } 02103 } 02104 if ( !$found ) { 02105 # we couldn't find the end of this imageLink, so output it raw 02106 # but don't ignore what might be perfectly normal links in the text we've examined 02107 $holders->merge( $this->replaceInternalLinks2( $text ) ); 02108 $s .= "{$prefix}[[$link|$text"; 02109 # note: no $trail, because without an end, there *is* no trail 02110 wfProfileOut( __METHOD__ . "-might_be_img" ); 02111 continue; 02112 } 02113 } else { # it's not an image, so output it raw 02114 $s .= "{$prefix}[[$link|$text"; 02115 # note: no $trail, because without an end, there *is* no trail 02116 wfProfileOut( __METHOD__ . "-might_be_img" ); 02117 continue; 02118 } 02119 wfProfileOut( __METHOD__ . "-might_be_img" ); 02120 } 02121 02122 $wasblank = ( $text == '' ); 02123 if ( $wasblank ) { 02124 $text = $link; 02125 } else { 02126 # Bug 4598 madness. Handle the quotes only if they come from the alternate part 02127 # [[Lista d''e paise d''o munno]] -> <a href="...">Lista d''e paise d''o munno</a> 02128 # [[Criticism of Harry Potter|Criticism of ''Harry Potter'']] 02129 # -> <a href="Criticism of Harry Potter">Criticism of <i>Harry Potter</i></a> 02130 $text = $this->doQuotes( $text ); 02131 } 02132 02133 # Link not escaped by : , create the various objects 02134 if ( $noforce && !$nt->wasLocalInterwiki() ) { 02135 # Interwikis 02136 wfProfileIn( __METHOD__ . "-interwiki" ); 02137 if ( 02138 $iw && $this->mOptions->getInterwikiMagic() && $nottalk && ( 02139 Language::fetchLanguageName( $iw, null, 'mw' ) || 02140 in_array( $iw, $wgExtraInterlanguageLinkPrefixes ) 02141 ) 02142 ) { 02143 # Bug 24502: filter duplicates 02144 if ( !isset( $this->mLangLinkLanguages[$iw] ) ) { 02145 $this->mLangLinkLanguages[$iw] = true; 02146 $this->mOutput->addLanguageLink( $nt->getFullText() ); 02147 } 02148 02149 $s = rtrim( $s . $prefix ); 02150 $s .= trim( $trail, "\n" ) == '' ? '': $prefix . $trail; 02151 wfProfileOut( __METHOD__ . "-interwiki" ); 02152 continue; 02153 } 02154 wfProfileOut( __METHOD__ . "-interwiki" ); 02155 02156 if ( $ns == NS_FILE ) { 02157 wfProfileIn( __METHOD__ . "-image" ); 02158 if ( !wfIsBadImage( $nt->getDBkey(), $this->mTitle ) ) { 02159 if ( $wasblank ) { 02160 # if no parameters were passed, $text 02161 # becomes something like "File:Foo.png", 02162 # which we don't want to pass on to the 02163 # image generator 02164 $text = ''; 02165 } else { 02166 # recursively parse links inside the image caption 02167 # actually, this will parse them in any other parameters, too, 02168 # but it might be hard to fix that, and it doesn't matter ATM 02169 $text = $this->replaceExternalLinks( $text ); 02170 $holders->merge( $this->replaceInternalLinks2( $text ) ); 02171 } 02172 # cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them 02173 $s .= $prefix . $this->armorLinks( 02174 $this->makeImage( $nt, $text, $holders ) ) . $trail; 02175 } else { 02176 $s .= $prefix . $trail; 02177 } 02178 wfProfileOut( __METHOD__ . "-image" ); 02179 continue; 02180 } 02181 02182 if ( $ns == NS_CATEGORY ) { 02183 wfProfileIn( __METHOD__ . "-category" ); 02184 $s = rtrim( $s . "\n" ); # bug 87 02185 02186 if ( $wasblank ) { 02187 $sortkey = $this->getDefaultSort(); 02188 } else { 02189 $sortkey = $text; 02190 } 02191 $sortkey = Sanitizer::decodeCharReferences( $sortkey ); 02192 $sortkey = str_replace( "\n", '', $sortkey ); 02193 $sortkey = $this->getConverterLanguage()->convertCategoryKey( $sortkey ); 02194 $this->mOutput->addCategory( $nt->getDBkey(), $sortkey ); 02195 02199 $s .= trim( $prefix . $trail, "\n" ) == '' ? '' : $prefix . $trail; 02200 02201 wfProfileOut( __METHOD__ . "-category" ); 02202 continue; 02203 } 02204 } 02205 02206 # Self-link checking. For some languages, variants of the title are checked in 02207 # LinkHolderArray::doVariants() to allow batching the existence checks necessary 02208 # for linking to a different variant. 02209 if ( $ns != NS_SPECIAL && $nt->equals( $this->mTitle ) && !$nt->hasFragment() ) { 02210 $s .= $prefix . Linker::makeSelfLinkObj( $nt, $text, '', $trail ); 02211 continue; 02212 } 02213 02214 # NS_MEDIA is a pseudo-namespace for linking directly to a file 02215 # @todo FIXME: Should do batch file existence checks, see comment below 02216 if ( $ns == NS_MEDIA ) { 02217 wfProfileIn( __METHOD__ . "-media" ); 02218 # Give extensions a chance to select the file revision for us 02219 $options = array(); 02220 $descQuery = false; 02221 wfRunHooks( 'BeforeParserFetchFileAndTitle', 02222 array( $this, $nt, &$options, &$descQuery ) ); 02223 # Fetch and register the file (file title may be different via hooks) 02224 list( $file, $nt ) = $this->fetchFileAndTitle( $nt, $options ); 02225 # Cloak with NOPARSE to avoid replacement in replaceExternalLinks 02226 $s .= $prefix . $this->armorLinks( 02227 Linker::makeMediaLinkFile( $nt, $file, $text ) ) . $trail; 02228 wfProfileOut( __METHOD__ . "-media" ); 02229 continue; 02230 } 02231 02232 wfProfileIn( __METHOD__ . "-always_known" ); 02233 # Some titles, such as valid special pages or files in foreign repos, should 02234 # be shown as bluelinks even though they're not included in the page table 02235 # 02236 # @todo FIXME: isAlwaysKnown() can be expensive for file links; we should really do 02237 # batch file existence checks for NS_FILE and NS_MEDIA 02238 if ( $iw == '' && $nt->isAlwaysKnown() ) { 02239 $this->mOutput->addLink( $nt ); 02240 $s .= $this->makeKnownLinkHolder( $nt, $text, array(), $trail, $prefix ); 02241 } else { 02242 # Links will be added to the output link list after checking 02243 $s .= $holders->makeHolder( $nt, $text, array(), $trail, $prefix ); 02244 } 02245 wfProfileOut( __METHOD__ . "-always_known" ); 02246 } 02247 wfProfileOut( __METHOD__ ); 02248 return $holders; 02249 } 02250 02265 public function makeKnownLinkHolder( $nt, $text = '', $query = array(), $trail = '', $prefix = '' ) { 02266 list( $inside, $trail ) = Linker::splitTrail( $trail ); 02267 02268 if ( is_string( $query ) ) { 02269 $query = wfCgiToArray( $query ); 02270 } 02271 if ( $text == '' ) { 02272 $text = htmlspecialchars( $nt->getPrefixedText() ); 02273 } 02274 02275 $link = Linker::linkKnown( $nt, "$prefix$text$inside", array(), $query ); 02276 02277 return $this->armorLinks( $link ) . $trail; 02278 } 02279 02290 public function armorLinks( $text ) { 02291 return preg_replace( '/\b((?i)' . $this->mUrlProtocols . ')/', 02292 "{$this->mUniqPrefix}NOPARSE$1", $text ); 02293 } 02294 02299 public function areSubpagesAllowed() { 02300 # Some namespaces don't allow subpages 02301 return MWNamespace::hasSubpages( $this->mTitle->getNamespace() ); 02302 } 02303 02312 public function maybeDoSubpageLink( $target, &$text ) { 02313 return Linker::normalizeSubpageLink( $this->mTitle, $target, $text ); 02314 } 02315 02322 public function closeParagraph() { 02323 $result = ''; 02324 if ( $this->mLastSection != '' ) { 02325 $result = '</' . $this->mLastSection . ">\n"; 02326 } 02327 $this->mInPre = false; 02328 $this->mLastSection = ''; 02329 return $result; 02330 } 02331 02342 public function getCommon( $st1, $st2 ) { 02343 $fl = strlen( $st1 ); 02344 $shorter = strlen( $st2 ); 02345 if ( $fl < $shorter ) { 02346 $shorter = $fl; 02347 } 02348 02349 for ( $i = 0; $i < $shorter; ++$i ) { 02350 if ( $st1[$i] != $st2[$i] ) { 02351 break; 02352 } 02353 } 02354 return $i; 02355 } 02356 02366 public function openList( $char ) { 02367 $result = $this->closeParagraph(); 02368 02369 if ( '*' === $char ) { 02370 $result .= "<ul><li>"; 02371 } elseif ( '#' === $char ) { 02372 $result .= "<ol><li>"; 02373 } elseif ( ':' === $char ) { 02374 $result .= "<dl><dd>"; 02375 } elseif ( ';' === $char ) { 02376 $result .= "<dl><dt>"; 02377 $this->mDTopen = true; 02378 } else { 02379 $result = '<!-- ERR 1 -->'; 02380 } 02381 02382 return $result; 02383 } 02384 02392 public function nextItem( $char ) { 02393 if ( '*' === $char || '#' === $char ) { 02394 return "</li>\n<li>"; 02395 } elseif ( ':' === $char || ';' === $char ) { 02396 $close = "</dd>\n"; 02397 if ( $this->mDTopen ) { 02398 $close = "</dt>\n"; 02399 } 02400 if ( ';' === $char ) { 02401 $this->mDTopen = true; 02402 return $close . '<dt>'; 02403 } else { 02404 $this->mDTopen = false; 02405 return $close . '<dd>'; 02406 } 02407 } 02408 return '<!-- ERR 2 -->'; 02409 } 02410 02418 public function closeList( $char ) { 02419 if ( '*' === $char ) { 02420 $text = "</li></ul>"; 02421 } elseif ( '#' === $char ) { 02422 $text = "</li></ol>"; 02423 } elseif ( ':' === $char ) { 02424 if ( $this->mDTopen ) { 02425 $this->mDTopen = false; 02426 $text = "</dt></dl>"; 02427 } else { 02428 $text = "</dd></dl>"; 02429 } 02430 } else { 02431 return '<!-- ERR 3 -->'; 02432 } 02433 return $text; 02434 } 02445 public function doBlockLevels( $text, $linestart ) { 02446 wfProfileIn( __METHOD__ ); 02447 02448 # Parsing through the text line by line. The main thing 02449 # happening here is handling of block-level elements p, pre, 02450 # and making lists from lines starting with * # : etc. 02451 # 02452 $textLines = StringUtils::explode( "\n", $text ); 02453 02454 $lastPrefix = $output = ''; 02455 $this->mDTopen = $inBlockElem = false; 02456 $prefixLength = 0; 02457 $paragraphStack = false; 02458 $inBlockquote = false; 02459 02460 foreach ( $textLines as $oLine ) { 02461 # Fix up $linestart 02462 if ( !$linestart ) { 02463 $output .= $oLine; 02464 $linestart = true; 02465 continue; 02466 } 02467 # * = ul 02468 # # = ol 02469 # ; = dt 02470 # : = dd 02471 02472 $lastPrefixLength = strlen( $lastPrefix ); 02473 $preCloseMatch = preg_match( '/<\\/pre/i', $oLine ); 02474 $preOpenMatch = preg_match( '/<pre/i', $oLine ); 02475 # If not in a <pre> element, scan for and figure out what prefixes are there. 02476 if ( !$this->mInPre ) { 02477 # Multiple prefixes may abut each other for nested lists. 02478 $prefixLength = strspn( $oLine, '*#:;' ); 02479 $prefix = substr( $oLine, 0, $prefixLength ); 02480 02481 # eh? 02482 # ; and : are both from definition-lists, so they're equivalent 02483 # for the purposes of determining whether or not we need to open/close 02484 # elements. 02485 $prefix2 = str_replace( ';', ':', $prefix ); 02486 $t = substr( $oLine, $prefixLength ); 02487 $this->mInPre = (bool)$preOpenMatch; 02488 } else { 02489 # Don't interpret any other prefixes in preformatted text 02490 $prefixLength = 0; 02491 $prefix = $prefix2 = ''; 02492 $t = $oLine; 02493 } 02494 02495 # List generation 02496 if ( $prefixLength && $lastPrefix === $prefix2 ) { 02497 # Same as the last item, so no need to deal with nesting or opening stuff 02498 $output .= $this->nextItem( substr( $prefix, -1 ) ); 02499 $paragraphStack = false; 02500 02501 if ( substr( $prefix, -1 ) === ';' ) { 02502 # The one nasty exception: definition lists work like this: 02503 # ; title : definition text 02504 # So we check for : in the remainder text to split up the 02505 # title and definition, without b0rking links. 02506 $term = $t2 = ''; 02507 if ( $this->findColonNoLinks( $t, $term, $t2 ) !== false ) { 02508 $t = $t2; 02509 $output .= $term . $this->nextItem( ':' ); 02510 } 02511 } 02512 } elseif ( $prefixLength || $lastPrefixLength ) { 02513 # We need to open or close prefixes, or both. 02514 02515 # Either open or close a level... 02516 $commonPrefixLength = $this->getCommon( $prefix, $lastPrefix ); 02517 $paragraphStack = false; 02518 02519 # Close all the prefixes which aren't shared. 02520 while ( $commonPrefixLength < $lastPrefixLength ) { 02521 $output .= $this->closeList( $lastPrefix[$lastPrefixLength - 1] ); 02522 --$lastPrefixLength; 02523 } 02524 02525 # Continue the current prefix if appropriate. 02526 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) { 02527 $output .= $this->nextItem( $prefix[$commonPrefixLength - 1] ); 02528 } 02529 02530 # Open prefixes where appropriate. 02531 if ( $lastPrefix && $prefixLength > $commonPrefixLength ) { 02532 $output .= "\n"; 02533 } 02534 while ( $prefixLength > $commonPrefixLength ) { 02535 $char = substr( $prefix, $commonPrefixLength, 1 ); 02536 $output .= $this->openList( $char ); 02537 02538 if ( ';' === $char ) { 02539 # @todo FIXME: This is dupe of code above 02540 if ( $this->findColonNoLinks( $t, $term, $t2 ) !== false ) { 02541 $t = $t2; 02542 $output .= $term . $this->nextItem( ':' ); 02543 } 02544 } 02545 ++$commonPrefixLength; 02546 } 02547 if ( !$prefixLength && $lastPrefix ) { 02548 $output .= "\n"; 02549 } 02550 $lastPrefix = $prefix2; 02551 } 02552 02553 # If we have no prefixes, go to paragraph mode. 02554 if ( 0 == $prefixLength ) { 02555 wfProfileIn( __METHOD__ . "-paragraph" ); 02556 # No prefix (not in list)--go to paragraph mode 02557 # XXX: use a stack for nestable elements like span, table and div 02558 $openmatch = preg_match( 02559 '/(?:<table|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|' 02560 . '<p|<ul|<ol|<dl|<li|<\\/tr|<\\/td|<\\/th)/iS', 02561 $t 02562 ); 02563 $closematch = preg_match( 02564 '/(?:<\\/table|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|' 02565 . '<td|<th|<\\/?blockquote|<\\/?div|<hr|<\\/pre|<\\/p|<\\/mw:|' 02566 . $this->mUniqPrefix 02567 . '-pre|<\\/li|<\\/ul|<\\/ol|<\\/dl|<\\/?center)/iS', 02568 $t 02569 ); 02570 02571 if ( $openmatch or $closematch ) { 02572 $paragraphStack = false; 02573 # @todo bug 5718: paragraph closed 02574 $output .= $this->closeParagraph(); 02575 if ( $preOpenMatch and !$preCloseMatch ) { 02576 $this->mInPre = true; 02577 } 02578 $bqOffset = 0; 02579 while ( preg_match( '/<(\\/?)blockquote[\s>]/i', $t, $bqMatch, PREG_OFFSET_CAPTURE, $bqOffset ) ) { 02580 $inBlockquote = !$bqMatch[1][0]; // is this a close tag? 02581 $bqOffset = $bqMatch[0][1] + strlen( $bqMatch[0][0] ); 02582 } 02583 $inBlockElem = !$closematch; 02584 } elseif ( !$inBlockElem && !$this->mInPre ) { 02585 if ( ' ' == substr( $t, 0, 1 ) 02586 && ( $this->mLastSection === 'pre' || trim( $t ) != '' ) 02587 && !$inBlockquote 02588 ) { 02589 # pre 02590 if ( $this->mLastSection !== 'pre' ) { 02591 $paragraphStack = false; 02592 $output .= $this->closeParagraph() . '<pre>'; 02593 $this->mLastSection = 'pre'; 02594 } 02595 $t = substr( $t, 1 ); 02596 } else { 02597 # paragraph 02598 if ( trim( $t ) === '' ) { 02599 if ( $paragraphStack ) { 02600 $output .= $paragraphStack . '<br />'; 02601 $paragraphStack = false; 02602 $this->mLastSection = 'p'; 02603 } else { 02604 if ( $this->mLastSection !== 'p' ) { 02605 $output .= $this->closeParagraph(); 02606 $this->mLastSection = ''; 02607 $paragraphStack = '<p>'; 02608 } else { 02609 $paragraphStack = '</p><p>'; 02610 } 02611 } 02612 } else { 02613 if ( $paragraphStack ) { 02614 $output .= $paragraphStack; 02615 $paragraphStack = false; 02616 $this->mLastSection = 'p'; 02617 } elseif ( $this->mLastSection !== 'p' ) { 02618 $output .= $this->closeParagraph() . '<p>'; 02619 $this->mLastSection = 'p'; 02620 } 02621 } 02622 } 02623 } 02624 wfProfileOut( __METHOD__ . "-paragraph" ); 02625 } 02626 # somewhere above we forget to get out of pre block (bug 785) 02627 if ( $preCloseMatch && $this->mInPre ) { 02628 $this->mInPre = false; 02629 } 02630 if ( $paragraphStack === false ) { 02631 $output .= $t; 02632 if ( $prefixLength === 0 ) { 02633 $output .= "\n"; 02634 } 02635 } 02636 } 02637 while ( $prefixLength ) { 02638 $output .= $this->closeList( $prefix2[$prefixLength - 1] ); 02639 --$prefixLength; 02640 if ( !$prefixLength ) { 02641 $output .= "\n"; 02642 } 02643 } 02644 if ( $this->mLastSection != '' ) { 02645 $output .= '</' . $this->mLastSection . '>'; 02646 $this->mLastSection = ''; 02647 } 02648 02649 wfProfileOut( __METHOD__ ); 02650 return $output; 02651 } 02652 02663 public function findColonNoLinks( $str, &$before, &$after ) { 02664 wfProfileIn( __METHOD__ ); 02665 02666 $pos = strpos( $str, ':' ); 02667 if ( $pos === false ) { 02668 # Nothing to find! 02669 wfProfileOut( __METHOD__ ); 02670 return false; 02671 } 02672 02673 $lt = strpos( $str, '<' ); 02674 if ( $lt === false || $lt > $pos ) { 02675 # Easy; no tag nesting to worry about 02676 $before = substr( $str, 0, $pos ); 02677 $after = substr( $str, $pos + 1 ); 02678 wfProfileOut( __METHOD__ ); 02679 return $pos; 02680 } 02681 02682 # Ugly state machine to walk through avoiding tags. 02683 $state = self::COLON_STATE_TEXT; 02684 $stack = 0; 02685 $len = strlen( $str ); 02686 for ( $i = 0; $i < $len; $i++ ) { 02687 $c = $str[$i]; 02688 02689 switch ( $state ) { 02690 # (Using the number is a performance hack for common cases) 02691 case 0: # self::COLON_STATE_TEXT: 02692 switch ( $c ) { 02693 case "<": 02694 # Could be either a <start> tag or an </end> tag 02695 $state = self::COLON_STATE_TAGSTART; 02696 break; 02697 case ":": 02698 if ( $stack == 0 ) { 02699 # We found it! 02700 $before = substr( $str, 0, $i ); 02701 $after = substr( $str, $i + 1 ); 02702 wfProfileOut( __METHOD__ ); 02703 return $i; 02704 } 02705 # Embedded in a tag; don't break it. 02706 break; 02707 default: 02708 # Skip ahead looking for something interesting 02709 $colon = strpos( $str, ':', $i ); 02710 if ( $colon === false ) { 02711 # Nothing else interesting 02712 wfProfileOut( __METHOD__ ); 02713 return false; 02714 } 02715 $lt = strpos( $str, '<', $i ); 02716 if ( $stack === 0 ) { 02717 if ( $lt === false || $colon < $lt ) { 02718 # We found it! 02719 $before = substr( $str, 0, $colon ); 02720 $after = substr( $str, $colon + 1 ); 02721 wfProfileOut( __METHOD__ ); 02722 return $i; 02723 } 02724 } 02725 if ( $lt === false ) { 02726 # Nothing else interesting to find; abort! 02727 # We're nested, but there's no close tags left. Abort! 02728 break 2; 02729 } 02730 # Skip ahead to next tag start 02731 $i = $lt; 02732 $state = self::COLON_STATE_TAGSTART; 02733 } 02734 break; 02735 case 1: # self::COLON_STATE_TAG: 02736 # In a <tag> 02737 switch ( $c ) { 02738 case ">": 02739 $stack++; 02740 $state = self::COLON_STATE_TEXT; 02741 break; 02742 case "/": 02743 # Slash may be followed by >? 02744 $state = self::COLON_STATE_TAGSLASH; 02745 break; 02746 default: 02747 # ignore 02748 } 02749 break; 02750 case 2: # self::COLON_STATE_TAGSTART: 02751 switch ( $c ) { 02752 case "/": 02753 $state = self::COLON_STATE_CLOSETAG; 02754 break; 02755 case "!": 02756 $state = self::COLON_STATE_COMMENT; 02757 break; 02758 case ">": 02759 # Illegal early close? This shouldn't happen D: 02760 $state = self::COLON_STATE_TEXT; 02761 break; 02762 default: 02763 $state = self::COLON_STATE_TAG; 02764 } 02765 break; 02766 case 3: # self::COLON_STATE_CLOSETAG: 02767 # In a </tag> 02768 if ( $c === ">" ) { 02769 $stack--; 02770 if ( $stack < 0 ) { 02771 wfDebug( __METHOD__ . ": Invalid input; too many close tags\n" ); 02772 wfProfileOut( __METHOD__ ); 02773 return false; 02774 } 02775 $state = self::COLON_STATE_TEXT; 02776 } 02777 break; 02778 case self::COLON_STATE_TAGSLASH: 02779 if ( $c === ">" ) { 02780 # Yes, a self-closed tag <blah/> 02781 $state = self::COLON_STATE_TEXT; 02782 } else { 02783 # Probably we're jumping the gun, and this is an attribute 02784 $state = self::COLON_STATE_TAG; 02785 } 02786 break; 02787 case 5: # self::COLON_STATE_COMMENT: 02788 if ( $c === "-" ) { 02789 $state = self::COLON_STATE_COMMENTDASH; 02790 } 02791 break; 02792 case self::COLON_STATE_COMMENTDASH: 02793 if ( $c === "-" ) { 02794 $state = self::COLON_STATE_COMMENTDASHDASH; 02795 } else { 02796 $state = self::COLON_STATE_COMMENT; 02797 } 02798 break; 02799 case self::COLON_STATE_COMMENTDASHDASH: 02800 if ( $c === ">" ) { 02801 $state = self::COLON_STATE_TEXT; 02802 } else { 02803 $state = self::COLON_STATE_COMMENT; 02804 } 02805 break; 02806 default: 02807 wfProfileOut( __METHOD__ ); 02808 throw new MWException( "State machine error in " . __METHOD__ ); 02809 } 02810 } 02811 if ( $stack > 0 ) { 02812 wfDebug( __METHOD__ . ": Invalid input; not enough close tags (stack $stack, state $state)\n" ); 02813 wfProfileOut( __METHOD__ ); 02814 return false; 02815 } 02816 wfProfileOut( __METHOD__ ); 02817 return false; 02818 } 02819 02831 public function getVariableValue( $index, $frame = false ) { 02832 global $wgContLang, $wgSitename, $wgServer, $wgServerName; 02833 global $wgArticlePath, $wgScriptPath, $wgStylePath; 02834 02835 if ( is_null( $this->mTitle ) ) { 02836 // If no title set, bad things are going to happen 02837 // later. Title should always be set since this 02838 // should only be called in the middle of a parse 02839 // operation (but the unit-tests do funky stuff) 02840 throw new MWException( __METHOD__ . ' Should only be ' 02841 . ' called while parsing (no title set)' ); 02842 } 02843 02848 if ( wfRunHooks( 'ParserGetVariableValueVarCache', array( &$this, &$this->mVarCache ) ) ) { 02849 if ( isset( $this->mVarCache[$index] ) ) { 02850 return $this->mVarCache[$index]; 02851 } 02852 } 02853 02854 $ts = wfTimestamp( TS_UNIX, $this->mOptions->getTimestamp() ); 02855 wfRunHooks( 'ParserGetVariableValueTs', array( &$this, &$ts ) ); 02856 02857 $pageLang = $this->getFunctionLang(); 02858 02859 switch ( $index ) { 02860 case '!': 02861 $value = '|'; 02862 break; 02863 case 'currentmonth': 02864 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'm' ) ); 02865 break; 02866 case 'currentmonth1': 02867 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'n' ) ); 02868 break; 02869 case 'currentmonthname': 02870 $value = $pageLang->getMonthName( MWTimestamp::getInstance( $ts )->format( 'n' ) ); 02871 break; 02872 case 'currentmonthnamegen': 02873 $value = $pageLang->getMonthNameGen( MWTimestamp::getInstance( $ts )->format( 'n' ) ); 02874 break; 02875 case 'currentmonthabbrev': 02876 $value = $pageLang->getMonthAbbreviation( MWTimestamp::getInstance( $ts )->format( 'n' ) ); 02877 break; 02878 case 'currentday': 02879 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'j' ) ); 02880 break; 02881 case 'currentday2': 02882 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'd' ) ); 02883 break; 02884 case 'localmonth': 02885 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'm' ) ); 02886 break; 02887 case 'localmonth1': 02888 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) ); 02889 break; 02890 case 'localmonthname': 02891 $value = $pageLang->getMonthName( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) ); 02892 break; 02893 case 'localmonthnamegen': 02894 $value = $pageLang->getMonthNameGen( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) ); 02895 break; 02896 case 'localmonthabbrev': 02897 $value = $pageLang->getMonthAbbreviation( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) ); 02898 break; 02899 case 'localday': 02900 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'j' ) ); 02901 break; 02902 case 'localday2': 02903 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'd' ) ); 02904 break; 02905 case 'pagename': 02906 $value = wfEscapeWikiText( $this->mTitle->getText() ); 02907 break; 02908 case 'pagenamee': 02909 $value = wfEscapeWikiText( $this->mTitle->getPartialURL() ); 02910 break; 02911 case 'fullpagename': 02912 $value = wfEscapeWikiText( $this->mTitle->getPrefixedText() ); 02913 break; 02914 case 'fullpagenamee': 02915 $value = wfEscapeWikiText( $this->mTitle->getPrefixedURL() ); 02916 break; 02917 case 'subpagename': 02918 $value = wfEscapeWikiText( $this->mTitle->getSubpageText() ); 02919 break; 02920 case 'subpagenamee': 02921 $value = wfEscapeWikiText( $this->mTitle->getSubpageUrlForm() ); 02922 break; 02923 case 'rootpagename': 02924 $value = wfEscapeWikiText( $this->mTitle->getRootText() ); 02925 break; 02926 case 'rootpagenamee': 02927 $value = wfEscapeWikiText( wfUrlEncode( str_replace( 02928 ' ', 02929 '_', 02930 $this->mTitle->getRootText() 02931 ) ) ); 02932 break; 02933 case 'basepagename': 02934 $value = wfEscapeWikiText( $this->mTitle->getBaseText() ); 02935 break; 02936 case 'basepagenamee': 02937 $value = wfEscapeWikiText( wfUrlEncode( str_replace( 02938 ' ', 02939 '_', 02940 $this->mTitle->getBaseText() 02941 ) ) ); 02942 break; 02943 case 'talkpagename': 02944 if ( $this->mTitle->canTalk() ) { 02945 $talkPage = $this->mTitle->getTalkPage(); 02946 $value = wfEscapeWikiText( $talkPage->getPrefixedText() ); 02947 } else { 02948 $value = ''; 02949 } 02950 break; 02951 case 'talkpagenamee': 02952 if ( $this->mTitle->canTalk() ) { 02953 $talkPage = $this->mTitle->getTalkPage(); 02954 $value = wfEscapeWikiText( $talkPage->getPrefixedURL() ); 02955 } else { 02956 $value = ''; 02957 } 02958 break; 02959 case 'subjectpagename': 02960 $subjPage = $this->mTitle->getSubjectPage(); 02961 $value = wfEscapeWikiText( $subjPage->getPrefixedText() ); 02962 break; 02963 case 'subjectpagenamee': 02964 $subjPage = $this->mTitle->getSubjectPage(); 02965 $value = wfEscapeWikiText( $subjPage->getPrefixedURL() ); 02966 break; 02967 case 'pageid': // requested in bug 23427 02968 $pageid = $this->getTitle()->getArticleID(); 02969 if ( $pageid == 0 ) { 02970 # 0 means the page doesn't exist in the database, 02971 # which means the user is previewing a new page. 02972 # The vary-revision flag must be set, because the magic word 02973 # will have a different value once the page is saved. 02974 $this->mOutput->setFlag( 'vary-revision' ); 02975 wfDebug( __METHOD__ . ": {{PAGEID}} used in a new page, setting vary-revision...\n" ); 02976 } 02977 $value = $pageid ? $pageid : null; 02978 break; 02979 case 'revisionid': 02980 # Let the edit saving system know we should parse the page 02981 # *after* a revision ID has been assigned. 02982 $this->mOutput->setFlag( 'vary-revision' ); 02983 wfDebug( __METHOD__ . ": {{REVISIONID}} used, setting vary-revision...\n" ); 02984 $value = $this->mRevisionId; 02985 break; 02986 case 'revisionday': 02987 # Let the edit saving system know we should parse the page 02988 # *after* a revision ID has been assigned. This is for null edits. 02989 $this->mOutput->setFlag( 'vary-revision' ); 02990 wfDebug( __METHOD__ . ": {{REVISIONDAY}} used, setting vary-revision...\n" ); 02991 $value = intval( substr( $this->getRevisionTimestamp(), 6, 2 ) ); 02992 break; 02993 case 'revisionday2': 02994 # Let the edit saving system know we should parse the page 02995 # *after* a revision ID has been assigned. This is for null edits. 02996 $this->mOutput->setFlag( 'vary-revision' ); 02997 wfDebug( __METHOD__ . ": {{REVISIONDAY2}} used, setting vary-revision...\n" ); 02998 $value = substr( $this->getRevisionTimestamp(), 6, 2 ); 02999 break; 03000 case 'revisionmonth': 03001 # Let the edit saving system know we should parse the page 03002 # *after* a revision ID has been assigned. This is for null edits. 03003 $this->mOutput->setFlag( 'vary-revision' ); 03004 wfDebug( __METHOD__ . ": {{REVISIONMONTH}} used, setting vary-revision...\n" ); 03005 $value = substr( $this->getRevisionTimestamp(), 4, 2 ); 03006 break; 03007 case 'revisionmonth1': 03008 # Let the edit saving system know we should parse the page 03009 # *after* a revision ID has been assigned. This is for null edits. 03010 $this->mOutput->setFlag( 'vary-revision' ); 03011 wfDebug( __METHOD__ . ": {{REVISIONMONTH1}} used, setting vary-revision...\n" ); 03012 $value = intval( substr( $this->getRevisionTimestamp(), 4, 2 ) ); 03013 break; 03014 case 'revisionyear': 03015 # Let the edit saving system know we should parse the page 03016 # *after* a revision ID has been assigned. This is for null edits. 03017 $this->mOutput->setFlag( 'vary-revision' ); 03018 wfDebug( __METHOD__ . ": {{REVISIONYEAR}} used, setting vary-revision...\n" ); 03019 $value = substr( $this->getRevisionTimestamp(), 0, 4 ); 03020 break; 03021 case 'revisiontimestamp': 03022 # Let the edit saving system know we should parse the page 03023 # *after* a revision ID has been assigned. This is for null edits. 03024 $this->mOutput->setFlag( 'vary-revision' ); 03025 wfDebug( __METHOD__ . ": {{REVISIONTIMESTAMP}} used, setting vary-revision...\n" ); 03026 $value = $this->getRevisionTimestamp(); 03027 break; 03028 case 'revisionuser': 03029 # Let the edit saving system know we should parse the page 03030 # *after* a revision ID has been assigned. This is for null edits. 03031 $this->mOutput->setFlag( 'vary-revision' ); 03032 wfDebug( __METHOD__ . ": {{REVISIONUSER}} used, setting vary-revision...\n" ); 03033 $value = $this->getRevisionUser(); 03034 break; 03035 case 'revisionsize': 03036 # Let the edit saving system know we should parse the page 03037 # *after* a revision ID has been assigned. This is for null edits. 03038 $this->mOutput->setFlag( 'vary-revision' ); 03039 wfDebug( __METHOD__ . ": {{REVISIONSIZE}} used, setting vary-revision...\n" ); 03040 $value = $this->getRevisionSize(); 03041 break; 03042 case 'namespace': 03043 $value = str_replace( '_', ' ', $wgContLang->getNsText( $this->mTitle->getNamespace() ) ); 03044 break; 03045 case 'namespacee': 03046 $value = wfUrlencode( $wgContLang->getNsText( $this->mTitle->getNamespace() ) ); 03047 break; 03048 case 'namespacenumber': 03049 $value = $this->mTitle->getNamespace(); 03050 break; 03051 case 'talkspace': 03052 $value = $this->mTitle->canTalk() 03053 ? str_replace( '_', ' ', $this->mTitle->getTalkNsText() ) 03054 : ''; 03055 break; 03056 case 'talkspacee': 03057 $value = $this->mTitle->canTalk() ? wfUrlencode( $this->mTitle->getTalkNsText() ) : ''; 03058 break; 03059 case 'subjectspace': 03060 $value = str_replace( '_', ' ', $this->mTitle->getSubjectNsText() ); 03061 break; 03062 case 'subjectspacee': 03063 $value = ( wfUrlencode( $this->mTitle->getSubjectNsText() ) ); 03064 break; 03065 case 'currentdayname': 03066 $value = $pageLang->getWeekdayName( (int)MWTimestamp::getInstance( $ts )->format( 'w' ) + 1 ); 03067 break; 03068 case 'currentyear': 03069 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'Y' ), true ); 03070 break; 03071 case 'currenttime': 03072 $value = $pageLang->time( wfTimestamp( TS_MW, $ts ), false, false ); 03073 break; 03074 case 'currenthour': 03075 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'H' ), true ); 03076 break; 03077 case 'currentweek': 03078 # @bug 4594 PHP5 has it zero padded, PHP4 does not, cast to 03079 # int to remove the padding 03080 $value = $pageLang->formatNum( (int)MWTimestamp::getInstance( $ts )->format( 'W' ) ); 03081 break; 03082 case 'currentdow': 03083 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'w' ) ); 03084 break; 03085 case 'localdayname': 03086 $value = $pageLang->getWeekdayName( 03087 (int)MWTimestamp::getLocalInstance( $ts )->format( 'w' ) + 1 03088 ); 03089 break; 03090 case 'localyear': 03091 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'Y' ), true ); 03092 break; 03093 case 'localtime': 03094 $value = $pageLang->time( 03095 MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' ), 03096 false, 03097 false 03098 ); 03099 break; 03100 case 'localhour': 03101 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'H' ), true ); 03102 break; 03103 case 'localweek': 03104 # @bug 4594 PHP5 has it zero padded, PHP4 does not, cast to 03105 # int to remove the padding 03106 $value = $pageLang->formatNum( (int)MWTimestamp::getLocalInstance( $ts )->format( 'W' ) ); 03107 break; 03108 case 'localdow': 03109 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'w' ) ); 03110 break; 03111 case 'numberofarticles': 03112 $value = $pageLang->formatNum( SiteStats::articles() ); 03113 break; 03114 case 'numberoffiles': 03115 $value = $pageLang->formatNum( SiteStats::images() ); 03116 break; 03117 case 'numberofusers': 03118 $value = $pageLang->formatNum( SiteStats::users() ); 03119 break; 03120 case 'numberofactiveusers': 03121 $value = $pageLang->formatNum( SiteStats::activeUsers() ); 03122 break; 03123 case 'numberofpages': 03124 $value = $pageLang->formatNum( SiteStats::pages() ); 03125 break; 03126 case 'numberofadmins': 03127 $value = $pageLang->formatNum( SiteStats::numberingroup( 'sysop' ) ); 03128 break; 03129 case 'numberofedits': 03130 $value = $pageLang->formatNum( SiteStats::edits() ); 03131 break; 03132 case 'numberofviews': 03133 global $wgDisableCounters; 03134 $value = !$wgDisableCounters ? $pageLang->formatNum( SiteStats::views() ) : ''; 03135 break; 03136 case 'currenttimestamp': 03137 $value = wfTimestamp( TS_MW, $ts ); 03138 break; 03139 case 'localtimestamp': 03140 $value = MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' ); 03141 break; 03142 case 'currentversion': 03143 $value = SpecialVersion::getVersion(); 03144 break; 03145 case 'articlepath': 03146 return $wgArticlePath; 03147 case 'sitename': 03148 return $wgSitename; 03149 case 'server': 03150 return $wgServer; 03151 case 'servername': 03152 return $wgServerName; 03153 case 'scriptpath': 03154 return $wgScriptPath; 03155 case 'stylepath': 03156 return $wgStylePath; 03157 case 'directionmark': 03158 return $pageLang->getDirMark(); 03159 case 'contentlanguage': 03160 global $wgLanguageCode; 03161 return $wgLanguageCode; 03162 case 'cascadingsources': 03163 $value = CoreParserFunctions::cascadingsources( $this ); 03164 break; 03165 default: 03166 $ret = null; 03167 wfRunHooks( 03168 'ParserGetVariableValueSwitch', 03169 array( &$this, &$this->mVarCache, &$index, &$ret, &$frame ) 03170 ); 03171 03172 return $ret; 03173 } 03174 03175 if ( $index ) { 03176 $this->mVarCache[$index] = $value; 03177 } 03178 03179 return $value; 03180 } 03181 03187 public function initialiseVariables() { 03188 wfProfileIn( __METHOD__ ); 03189 $variableIDs = MagicWord::getVariableIDs(); 03190 $substIDs = MagicWord::getSubstIDs(); 03191 03192 $this->mVariables = new MagicWordArray( $variableIDs ); 03193 $this->mSubstWords = new MagicWordArray( $substIDs ); 03194 wfProfileOut( __METHOD__ ); 03195 } 03196 03219 public function preprocessToDom( $text, $flags = 0 ) { 03220 $dom = $this->getPreprocessor()->preprocessToObj( $text, $flags ); 03221 return $dom; 03222 } 03223 03231 public static function splitWhitespace( $s ) { 03232 $ltrimmed = ltrim( $s ); 03233 $w1 = substr( $s, 0, strlen( $s ) - strlen( $ltrimmed ) ); 03234 $trimmed = rtrim( $ltrimmed ); 03235 $diff = strlen( $ltrimmed ) - strlen( $trimmed ); 03236 if ( $diff > 0 ) { 03237 $w2 = substr( $ltrimmed, -$diff ); 03238 } else { 03239 $w2 = ''; 03240 } 03241 return array( $w1, $trimmed, $w2 ); 03242 } 03243 03264 public function replaceVariables( $text, $frame = false, $argsOnly = false ) { 03265 # Is there any text? Also, Prevent too big inclusions! 03266 if ( strlen( $text ) < 1 || strlen( $text ) > $this->mOptions->getMaxIncludeSize() ) { 03267 return $text; 03268 } 03269 wfProfileIn( __METHOD__ ); 03270 03271 if ( $frame === false ) { 03272 $frame = $this->getPreprocessor()->newFrame(); 03273 } elseif ( !( $frame instanceof PPFrame ) ) { 03274 wfDebug( __METHOD__ . " called using plain parameters instead of " 03275 . "a PPFrame instance. Creating custom frame.\n" ); 03276 $frame = $this->getPreprocessor()->newCustomFrame( $frame ); 03277 } 03278 03279 $dom = $this->preprocessToDom( $text ); 03280 $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0; 03281 $text = $frame->expand( $dom, $flags ); 03282 03283 wfProfileOut( __METHOD__ ); 03284 return $text; 03285 } 03286 03294 public static function createAssocArgs( $args ) { 03295 $assocArgs = array(); 03296 $index = 1; 03297 foreach ( $args as $arg ) { 03298 $eqpos = strpos( $arg, '=' ); 03299 if ( $eqpos === false ) { 03300 $assocArgs[$index++] = $arg; 03301 } else { 03302 $name = trim( substr( $arg, 0, $eqpos ) ); 03303 $value = trim( substr( $arg, $eqpos + 1 ) ); 03304 if ( $value === false ) { 03305 $value = ''; 03306 } 03307 if ( $name !== false ) { 03308 $assocArgs[$name] = $value; 03309 } 03310 } 03311 } 03312 03313 return $assocArgs; 03314 } 03315 03340 public function limitationWarn( $limitationType, $current = '', $max = '' ) { 03341 # does no harm if $current and $max are present but are unnecessary for the message 03342 $warning = wfMessage( "$limitationType-warning" )->numParams( $current, $max ) 03343 ->inLanguage( $this->mOptions->getUserLangObj() )->text(); 03344 $this->mOutput->addWarning( $warning ); 03345 $this->addTrackingCategory( "$limitationType-category" ); 03346 } 03347 03360 public function braceSubstitution( $piece, $frame ) { 03361 wfProfileIn( __METHOD__ ); 03362 wfProfileIn( __METHOD__ . '-setup' ); 03363 03364 // Flags 03365 03366 // $text has been filled 03367 $found = false; 03368 // wiki markup in $text should be escaped 03369 $nowiki = false; 03370 // $text is HTML, armour it against wikitext transformation 03371 $isHTML = false; 03372 // Force interwiki transclusion to be done in raw mode not rendered 03373 $forceRawInterwiki = false; 03374 // $text is a DOM node needing expansion in a child frame 03375 $isChildObj = false; 03376 // $text is a DOM node needing expansion in the current frame 03377 $isLocalObj = false; 03378 03379 # Title object, where $text came from 03380 $title = false; 03381 03382 # $part1 is the bit before the first |, and must contain only title characters. 03383 # Various prefixes will be stripped from it later. 03384 $titleWithSpaces = $frame->expand( $piece['title'] ); 03385 $part1 = trim( $titleWithSpaces ); 03386 $titleText = false; 03387 03388 # Original title text preserved for various purposes 03389 $originalTitle = $part1; 03390 03391 # $args is a list of argument nodes, starting from index 0, not including $part1 03392 # @todo FIXME: If piece['parts'] is null then the call to getLength() 03393 # below won't work b/c this $args isn't an object 03394 $args = ( null == $piece['parts'] ) ? array() : $piece['parts']; 03395 wfProfileOut( __METHOD__ . '-setup' ); 03396 03397 $titleProfileIn = null; // profile templates 03398 03399 # SUBST 03400 wfProfileIn( __METHOD__ . '-modifiers' ); 03401 if ( !$found ) { 03402 03403 $substMatch = $this->mSubstWords->matchStartAndRemove( $part1 ); 03404 03405 # Possibilities for substMatch: "subst", "safesubst" or FALSE 03406 # Decide whether to expand template or keep wikitext as-is. 03407 if ( $this->ot['wiki'] ) { 03408 if ( $substMatch === false ) { 03409 $literal = true; # literal when in PST with no prefix 03410 } else { 03411 $literal = false; # expand when in PST with subst: or safesubst: 03412 } 03413 } else { 03414 if ( $substMatch == 'subst' ) { 03415 $literal = true; # literal when not in PST with plain subst: 03416 } else { 03417 $literal = false; # expand when not in PST with safesubst: or no prefix 03418 } 03419 } 03420 if ( $literal ) { 03421 $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args ); 03422 $isLocalObj = true; 03423 $found = true; 03424 } 03425 } 03426 03427 # Variables 03428 if ( !$found && $args->getLength() == 0 ) { 03429 $id = $this->mVariables->matchStartToEnd( $part1 ); 03430 if ( $id !== false ) { 03431 $text = $this->getVariableValue( $id, $frame ); 03432 if ( MagicWord::getCacheTTL( $id ) > -1 ) { 03433 $this->mOutput->updateCacheExpiry( MagicWord::getCacheTTL( $id ) ); 03434 } 03435 $found = true; 03436 } 03437 } 03438 03439 # MSG, MSGNW and RAW 03440 if ( !$found ) { 03441 # Check for MSGNW: 03442 $mwMsgnw = MagicWord::get( 'msgnw' ); 03443 if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) { 03444 $nowiki = true; 03445 } else { 03446 # Remove obsolete MSG: 03447 $mwMsg = MagicWord::get( 'msg' ); 03448 $mwMsg->matchStartAndRemove( $part1 ); 03449 } 03450 03451 # Check for RAW: 03452 $mwRaw = MagicWord::get( 'raw' ); 03453 if ( $mwRaw->matchStartAndRemove( $part1 ) ) { 03454 $forceRawInterwiki = true; 03455 } 03456 } 03457 wfProfileOut( __METHOD__ . '-modifiers' ); 03458 03459 # Parser functions 03460 if ( !$found ) { 03461 wfProfileIn( __METHOD__ . '-pfunc' ); 03462 03463 $colonPos = strpos( $part1, ':' ); 03464 if ( $colonPos !== false ) { 03465 $func = substr( $part1, 0, $colonPos ); 03466 $funcArgs = array( trim( substr( $part1, $colonPos + 1 ) ) ); 03467 for ( $i = 0; $i < $args->getLength(); $i++ ) { 03468 $funcArgs[] = $args->item( $i ); 03469 } 03470 try { 03471 $result = $this->callParserFunction( $frame, $func, $funcArgs ); 03472 } catch ( Exception $ex ) { 03473 wfProfileOut( __METHOD__ . '-pfunc' ); 03474 wfProfileOut( __METHOD__ ); 03475 throw $ex; 03476 } 03477 03478 # The interface for parser functions allows for extracting 03479 # flags into the local scope. Extract any forwarded flags 03480 # here. 03481 extract( $result ); 03482 } 03483 wfProfileOut( __METHOD__ . '-pfunc' ); 03484 } 03485 03486 # Finish mangling title and then check for loops. 03487 # Set $title to a Title object and $titleText to the PDBK 03488 if ( !$found ) { 03489 $ns = NS_TEMPLATE; 03490 # Split the title into page and subpage 03491 $subpage = ''; 03492 $relative = $this->maybeDoSubpageLink( $part1, $subpage ); 03493 if ( $part1 !== $relative ) { 03494 $part1 = $relative; 03495 $ns = $this->mTitle->getNamespace(); 03496 } 03497 $title = Title::newFromText( $part1, $ns ); 03498 if ( $title ) { 03499 $titleText = $title->getPrefixedText(); 03500 # Check for language variants if the template is not found 03501 if ( $this->getConverterLanguage()->hasVariants() && $title->getArticleID() == 0 ) { 03502 $this->getConverterLanguage()->findVariantLink( $part1, $title, true ); 03503 } 03504 # Do recursion depth check 03505 $limit = $this->mOptions->getMaxTemplateDepth(); 03506 if ( $frame->depth >= $limit ) { 03507 $found = true; 03508 $text = '<span class="error">' 03509 . wfMessage( 'parser-template-recursion-depth-warning' ) 03510 ->numParams( $limit )->inContentLanguage()->text() 03511 . '</span>'; 03512 } 03513 } 03514 } 03515 03516 # Load from database 03517 if ( !$found && $title ) { 03518 if ( !Profiler::instance()->isPersistent() ) { 03519 # Too many unique items can kill profiling DBs/collectors 03520 $titleProfileIn = __METHOD__ . "-title-" . $title->getPrefixedDBkey(); 03521 wfProfileIn( $titleProfileIn ); // template in 03522 } 03523 wfProfileIn( __METHOD__ . '-loadtpl' ); 03524 if ( !$title->isExternal() ) { 03525 if ( $title->isSpecialPage() 03526 && $this->mOptions->getAllowSpecialInclusion() 03527 && $this->ot['html'] 03528 ) { 03529 // Pass the template arguments as URL parameters. 03530 // "uselang" will have no effect since the Language object 03531 // is forced to the one defined in ParserOptions. 03532 $pageArgs = array(); 03533 $argsLength = $args->getLength(); 03534 for ( $i = 0; $i < $argsLength; $i++ ) { 03535 $bits = $args->item( $i )->splitArg(); 03536 if ( strval( $bits['index'] ) === '' ) { 03537 $name = trim( $frame->expand( $bits['name'], PPFrame::STRIP_COMMENTS ) ); 03538 $value = trim( $frame->expand( $bits['value'] ) ); 03539 $pageArgs[$name] = $value; 03540 } 03541 } 03542 03543 // Create a new context to execute the special page 03544 $context = new RequestContext; 03545 $context->setTitle( $title ); 03546 $context->setRequest( new FauxRequest( $pageArgs ) ); 03547 $context->setUser( $this->getUser() ); 03548 $context->setLanguage( $this->mOptions->getUserLangObj() ); 03549 $ret = SpecialPageFactory::capturePath( $title, $context ); 03550 if ( $ret ) { 03551 $text = $context->getOutput()->getHTML(); 03552 $this->mOutput->addOutputPageMetadata( $context->getOutput() ); 03553 $found = true; 03554 $isHTML = true; 03555 $this->disableCache(); 03556 } 03557 } elseif ( MWNamespace::isNonincludable( $title->getNamespace() ) ) { 03558 $found = false; # access denied 03559 wfDebug( __METHOD__ . ": template inclusion denied for " . 03560 $title->getPrefixedDBkey() . "\n" ); 03561 } else { 03562 list( $text, $title ) = $this->getTemplateDom( $title ); 03563 if ( $text !== false ) { 03564 $found = true; 03565 $isChildObj = true; 03566 } 03567 } 03568 03569 # If the title is valid but undisplayable, make a link to it 03570 if ( !$found && ( $this->ot['html'] || $this->ot['pre'] ) ) { 03571 $text = "[[:$titleText]]"; 03572 $found = true; 03573 } 03574 } elseif ( $title->isTrans() ) { 03575 # Interwiki transclusion 03576 if ( $this->ot['html'] && !$forceRawInterwiki ) { 03577 $text = $this->interwikiTransclude( $title, 'render' ); 03578 $isHTML = true; 03579 } else { 03580 $text = $this->interwikiTransclude( $title, 'raw' ); 03581 # Preprocess it like a template 03582 $text = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION ); 03583 $isChildObj = true; 03584 } 03585 $found = true; 03586 } 03587 03588 # Do infinite loop check 03589 # This has to be done after redirect resolution to avoid infinite loops via redirects 03590 if ( !$frame->loopCheck( $title ) ) { 03591 $found = true; 03592 $text = '<span class="error">' 03593 . wfMessage( 'parser-template-loop-warning', $titleText )->inContentLanguage()->text() 03594 . '</span>'; 03595 wfDebug( __METHOD__ . ": template loop broken at '$titleText'\n" ); 03596 } 03597 wfProfileOut( __METHOD__ . '-loadtpl' ); 03598 } 03599 03600 # If we haven't found text to substitute by now, we're done 03601 # Recover the source wikitext and return it 03602 if ( !$found ) { 03603 $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args ); 03604 if ( $titleProfileIn ) { 03605 wfProfileOut( $titleProfileIn ); // template out 03606 } 03607 wfProfileOut( __METHOD__ ); 03608 return array( 'object' => $text ); 03609 } 03610 03611 # Expand DOM-style return values in a child frame 03612 if ( $isChildObj ) { 03613 # Clean up argument array 03614 $newFrame = $frame->newChild( $args, $title ); 03615 03616 if ( $nowiki ) { 03617 $text = $newFrame->expand( $text, PPFrame::RECOVER_ORIG ); 03618 } elseif ( $titleText !== false && $newFrame->isEmpty() ) { 03619 # Expansion is eligible for the empty-frame cache 03620 $text = $newFrame->cachedExpand( $titleText, $text ); 03621 } else { 03622 # Uncached expansion 03623 $text = $newFrame->expand( $text ); 03624 } 03625 } 03626 if ( $isLocalObj && $nowiki ) { 03627 $text = $frame->expand( $text, PPFrame::RECOVER_ORIG ); 03628 $isLocalObj = false; 03629 } 03630 03631 if ( $titleProfileIn ) { 03632 wfProfileOut( $titleProfileIn ); // template out 03633 } 03634 03635 # Replace raw HTML by a placeholder 03636 if ( $isHTML ) { 03637 $text = $this->insertStripItem( $text ); 03638 } elseif ( $nowiki && ( $this->ot['html'] || $this->ot['pre'] ) ) { 03639 # Escape nowiki-style return values 03640 $text = wfEscapeWikiText( $text ); 03641 } elseif ( is_string( $text ) 03642 && !$piece['lineStart'] 03643 && preg_match( '/^(?:{\\||:|;|#|\*)/', $text ) 03644 ) { 03645 # Bug 529: if the template begins with a table or block-level 03646 # element, it should be treated as beginning a new line. 03647 # This behavior is somewhat controversial. 03648 $text = "\n" . $text; 03649 } 03650 03651 if ( is_string( $text ) && !$this->incrementIncludeSize( 'post-expand', strlen( $text ) ) ) { 03652 # Error, oversize inclusion 03653 if ( $titleText !== false ) { 03654 # Make a working, properly escaped link if possible (bug 23588) 03655 $text = "[[:$titleText]]"; 03656 } else { 03657 # This will probably not be a working link, but at least it may 03658 # provide some hint of where the problem is 03659 preg_replace( '/^:/', '', $originalTitle ); 03660 $text = "[[:$originalTitle]]"; 03661 } 03662 $text .= $this->insertStripItem( '<!-- WARNING: template omitted, ' 03663 . 'post-expand include size too large -->' ); 03664 $this->limitationWarn( 'post-expand-template-inclusion' ); 03665 } 03666 03667 if ( $isLocalObj ) { 03668 $ret = array( 'object' => $text ); 03669 } else { 03670 $ret = array( 'text' => $text ); 03671 } 03672 03673 wfProfileOut( __METHOD__ ); 03674 return $ret; 03675 } 03676 03696 public function callParserFunction( $frame, $function, array $args = array() ) { 03697 global $wgContLang; 03698 03699 wfProfileIn( __METHOD__ ); 03700 03701 # Case sensitive functions 03702 if ( isset( $this->mFunctionSynonyms[1][$function] ) ) { 03703 $function = $this->mFunctionSynonyms[1][$function]; 03704 } else { 03705 # Case insensitive functions 03706 $function = $wgContLang->lc( $function ); 03707 if ( isset( $this->mFunctionSynonyms[0][$function] ) ) { 03708 $function = $this->mFunctionSynonyms[0][$function]; 03709 } else { 03710 wfProfileOut( __METHOD__ ); 03711 return array( 'found' => false ); 03712 } 03713 } 03714 03715 wfProfileIn( __METHOD__ . '-pfunc-' . $function ); 03716 list( $callback, $flags ) = $this->mFunctionHooks[$function]; 03717 03718 # Workaround for PHP bug 35229 and similar 03719 if ( !is_callable( $callback ) ) { 03720 wfProfileOut( __METHOD__ . '-pfunc-' . $function ); 03721 wfProfileOut( __METHOD__ ); 03722 throw new MWException( "Tag hook for $function is not callable\n" ); 03723 } 03724 03725 $allArgs = array( &$this ); 03726 if ( $flags & SFH_OBJECT_ARGS ) { 03727 # Convert arguments to PPNodes and collect for appending to $allArgs 03728 $funcArgs = array(); 03729 foreach ( $args as $k => $v ) { 03730 if ( $v instanceof PPNode || $k === 0 ) { 03731 $funcArgs[] = $v; 03732 } else { 03733 $funcArgs[] = $this->mPreprocessor->newPartNodeArray( array( $k => $v ) )->item( 0 ); 03734 } 03735 } 03736 03737 # Add a frame parameter, and pass the arguments as an array 03738 $allArgs[] = $frame; 03739 $allArgs[] = $funcArgs; 03740 } else { 03741 # Convert arguments to plain text and append to $allArgs 03742 foreach ( $args as $k => $v ) { 03743 if ( $v instanceof PPNode ) { 03744 $allArgs[] = trim( $frame->expand( $v ) ); 03745 } elseif ( is_int( $k ) && $k >= 0 ) { 03746 $allArgs[] = trim( $v ); 03747 } else { 03748 $allArgs[] = trim( "$k=$v" ); 03749 } 03750 } 03751 } 03752 03753 $result = call_user_func_array( $callback, $allArgs ); 03754 03755 # The interface for function hooks allows them to return a wikitext 03756 # string or an array containing the string and any flags. This mungs 03757 # things around to match what this method should return. 03758 if ( !is_array( $result ) ) { 03759 $result = array( 03760 'found' => true, 03761 'text' => $result, 03762 ); 03763 } else { 03764 if ( isset( $result[0] ) && !isset( $result['text'] ) ) { 03765 $result['text'] = $result[0]; 03766 } 03767 unset( $result[0] ); 03768 $result += array( 03769 'found' => true, 03770 ); 03771 } 03772 03773 $noparse = true; 03774 $preprocessFlags = 0; 03775 if ( isset( $result['noparse'] ) ) { 03776 $noparse = $result['noparse']; 03777 } 03778 if ( isset( $result['preprocessFlags'] ) ) { 03779 $preprocessFlags = $result['preprocessFlags']; 03780 } 03781 03782 if ( !$noparse ) { 03783 $result['text'] = $this->preprocessToDom( $result['text'], $preprocessFlags ); 03784 $result['isChildObj'] = true; 03785 } 03786 wfProfileOut( __METHOD__ . '-pfunc-' . $function ); 03787 wfProfileOut( __METHOD__ ); 03788 03789 return $result; 03790 } 03791 03800 public function getTemplateDom( $title ) { 03801 $cacheTitle = $title; 03802 $titleText = $title->getPrefixedDBkey(); 03803 03804 if ( isset( $this->mTplRedirCache[$titleText] ) ) { 03805 list( $ns, $dbk ) = $this->mTplRedirCache[$titleText]; 03806 $title = Title::makeTitle( $ns, $dbk ); 03807 $titleText = $title->getPrefixedDBkey(); 03808 } 03809 if ( isset( $this->mTplDomCache[$titleText] ) ) { 03810 return array( $this->mTplDomCache[$titleText], $title ); 03811 } 03812 03813 # Cache miss, go to the database 03814 list( $text, $title ) = $this->fetchTemplateAndTitle( $title ); 03815 03816 if ( $text === false ) { 03817 $this->mTplDomCache[$titleText] = false; 03818 return array( false, $title ); 03819 } 03820 03821 $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION ); 03822 $this->mTplDomCache[$titleText] = $dom; 03823 03824 if ( !$title->equals( $cacheTitle ) ) { 03825 $this->mTplRedirCache[$cacheTitle->getPrefixedDBkey()] = 03826 array( $title->getNamespace(), $cdb = $title->getDBkey() ); 03827 } 03828 03829 return array( $dom, $title ); 03830 } 03831 03837 public function fetchTemplateAndTitle( $title ) { 03838 // Defaults to Parser::statelessFetchTemplate() 03839 $templateCb = $this->mOptions->getTemplateCallback(); 03840 $stuff = call_user_func( $templateCb, $title, $this ); 03841 $text = $stuff['text']; 03842 $finalTitle = isset( $stuff['finalTitle'] ) ? $stuff['finalTitle'] : $title; 03843 if ( isset( $stuff['deps'] ) ) { 03844 foreach ( $stuff['deps'] as $dep ) { 03845 $this->mOutput->addTemplate( $dep['title'], $dep['page_id'], $dep['rev_id'] ); 03846 if ( $dep['title']->equals( $this->getTitle() ) ) { 03847 // If we transclude ourselves, the final result 03848 // will change based on the new version of the page 03849 $this->mOutput->setFlag( 'vary-revision' ); 03850 } 03851 } 03852 } 03853 return array( $text, $finalTitle ); 03854 } 03855 03861 public function fetchTemplate( $title ) { 03862 $rv = $this->fetchTemplateAndTitle( $title ); 03863 return $rv[0]; 03864 } 03865 03875 public static function statelessFetchTemplate( $title, $parser = false ) { 03876 $text = $skip = false; 03877 $finalTitle = $title; 03878 $deps = array(); 03879 03880 # Loop to fetch the article, with up to 1 redirect 03881 for ( $i = 0; $i < 2 && is_object( $title ); $i++ ) { 03882 # Give extensions a chance to select the revision instead 03883 $id = false; # Assume current 03884 wfRunHooks( 'BeforeParserFetchTemplateAndtitle', 03885 array( $parser, $title, &$skip, &$id ) ); 03886 03887 if ( $skip ) { 03888 $text = false; 03889 $deps[] = array( 03890 'title' => $title, 03891 'page_id' => $title->getArticleID(), 03892 'rev_id' => null 03893 ); 03894 break; 03895 } 03896 # Get the revision 03897 $rev = $id 03898 ? Revision::newFromId( $id ) 03899 : Revision::newFromTitle( $title, false, Revision::READ_NORMAL ); 03900 $rev_id = $rev ? $rev->getId() : 0; 03901 # If there is no current revision, there is no page 03902 if ( $id === false && !$rev ) { 03903 $linkCache = LinkCache::singleton(); 03904 $linkCache->addBadLinkObj( $title ); 03905 } 03906 03907 $deps[] = array( 03908 'title' => $title, 03909 'page_id' => $title->getArticleID(), 03910 'rev_id' => $rev_id ); 03911 if ( $rev && !$title->equals( $rev->getTitle() ) ) { 03912 # We fetched a rev from a different title; register it too... 03913 $deps[] = array( 03914 'title' => $rev->getTitle(), 03915 'page_id' => $rev->getPage(), 03916 'rev_id' => $rev_id ); 03917 } 03918 03919 if ( $rev ) { 03920 $content = $rev->getContent(); 03921 $text = $content ? $content->getWikitextForTransclusion() : null; 03922 03923 if ( $text === false || $text === null ) { 03924 $text = false; 03925 break; 03926 } 03927 } elseif ( $title->getNamespace() == NS_MEDIAWIKI ) { 03928 global $wgContLang; 03929 $message = wfMessage( $wgContLang->lcfirst( $title->getText() ) )->inContentLanguage(); 03930 if ( !$message->exists() ) { 03931 $text = false; 03932 break; 03933 } 03934 $content = $message->content(); 03935 $text = $message->plain(); 03936 } else { 03937 break; 03938 } 03939 if ( !$content ) { 03940 break; 03941 } 03942 # Redirect? 03943 $finalTitle = $title; 03944 $title = $content->getRedirectTarget(); 03945 } 03946 return array( 03947 'text' => $text, 03948 'finalTitle' => $finalTitle, 03949 'deps' => $deps ); 03950 } 03951 03959 public function fetchFile( $title, $options = array() ) { 03960 $res = $this->fetchFileAndTitle( $title, $options ); 03961 return $res[0]; 03962 } 03963 03971 public function fetchFileAndTitle( $title, $options = array() ) { 03972 $file = $this->fetchFileNoRegister( $title, $options ); 03973 03974 $time = $file ? $file->getTimestamp() : false; 03975 $sha1 = $file ? $file->getSha1() : false; 03976 # Register the file as a dependency... 03977 $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 ); 03978 if ( $file && !$title->equals( $file->getTitle() ) ) { 03979 # Update fetched file title 03980 $title = $file->getTitle(); 03981 $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 ); 03982 } 03983 return array( $file, $title ); 03984 } 03985 03996 protected function fetchFileNoRegister( $title, $options = array() ) { 03997 if ( isset( $options['broken'] ) ) { 03998 $file = false; // broken thumbnail forced by hook 03999 } elseif ( isset( $options['sha1'] ) ) { // get by (sha1,timestamp) 04000 $file = RepoGroup::singleton()->findFileFromKey( $options['sha1'], $options ); 04001 } else { // get by (name,timestamp) 04002 $file = wfFindFile( $title, $options ); 04003 } 04004 return $file; 04005 } 04006 04015 public function interwikiTransclude( $title, $action ) { 04016 global $wgEnableScaryTranscluding; 04017 04018 if ( !$wgEnableScaryTranscluding ) { 04019 return wfMessage( 'scarytranscludedisabled' )->inContentLanguage()->text(); 04020 } 04021 04022 $url = $title->getFullURL( array( 'action' => $action ) ); 04023 04024 if ( strlen( $url ) > 255 ) { 04025 return wfMessage( 'scarytranscludetoolong' )->inContentLanguage()->text(); 04026 } 04027 return $this->fetchScaryTemplateMaybeFromCache( $url ); 04028 } 04029 04034 public function fetchScaryTemplateMaybeFromCache( $url ) { 04035 global $wgTranscludeCacheExpiry; 04036 $dbr = wfGetDB( DB_SLAVE ); 04037 $tsCond = $dbr->timestamp( time() - $wgTranscludeCacheExpiry ); 04038 $obj = $dbr->selectRow( 'transcache', array( 'tc_time', 'tc_contents' ), 04039 array( 'tc_url' => $url, "tc_time >= " . $dbr->addQuotes( $tsCond ) ) ); 04040 if ( $obj ) { 04041 return $obj->tc_contents; 04042 } 04043 04044 $req = MWHttpRequest::factory( $url ); 04045 $status = $req->execute(); // Status object 04046 if ( $status->isOK() ) { 04047 $text = $req->getContent(); 04048 } elseif ( $req->getStatus() != 200 ) { 04049 // Though we failed to fetch the content, this status is useless. 04050 return wfMessage( 'scarytranscludefailed-httpstatus' ) 04051 ->params( $url, $req->getStatus() /* HTTP status */ )->inContentLanguage()->text(); 04052 } else { 04053 return wfMessage( 'scarytranscludefailed', $url )->inContentLanguage()->text(); 04054 } 04055 04056 $dbw = wfGetDB( DB_MASTER ); 04057 $dbw->replace( 'transcache', array( 'tc_url' ), array( 04058 'tc_url' => $url, 04059 'tc_time' => $dbw->timestamp( time() ), 04060 'tc_contents' => $text 04061 ) ); 04062 return $text; 04063 } 04064 04074 public function argSubstitution( $piece, $frame ) { 04075 wfProfileIn( __METHOD__ ); 04076 04077 $error = false; 04078 $parts = $piece['parts']; 04079 $nameWithSpaces = $frame->expand( $piece['title'] ); 04080 $argName = trim( $nameWithSpaces ); 04081 $object = false; 04082 $text = $frame->getArgument( $argName ); 04083 if ( $text === false && $parts->getLength() > 0 04084 && ( $this->ot['html'] 04085 || $this->ot['pre'] 04086 || ( $this->ot['wiki'] && $frame->isTemplate() ) 04087 ) 04088 ) { 04089 # No match in frame, use the supplied default 04090 $object = $parts->item( 0 )->getChildren(); 04091 } 04092 if ( !$this->incrementIncludeSize( 'arg', strlen( $text ) ) ) { 04093 $error = '<!-- WARNING: argument omitted, expansion size too large -->'; 04094 $this->limitationWarn( 'post-expand-template-argument' ); 04095 } 04096 04097 if ( $text === false && $object === false ) { 04098 # No match anywhere 04099 $object = $frame->virtualBracketedImplode( '{{{', '|', '}}}', $nameWithSpaces, $parts ); 04100 } 04101 if ( $error !== false ) { 04102 $text .= $error; 04103 } 04104 if ( $object !== false ) { 04105 $ret = array( 'object' => $object ); 04106 } else { 04107 $ret = array( 'text' => $text ); 04108 } 04109 04110 wfProfileOut( __METHOD__ ); 04111 return $ret; 04112 } 04113 04129 public function extensionSubstitution( $params, $frame ) { 04130 $name = $frame->expand( $params['name'] ); 04131 $attrText = !isset( $params['attr'] ) ? null : $frame->expand( $params['attr'] ); 04132 $content = !isset( $params['inner'] ) ? null : $frame->expand( $params['inner'] ); 04133 $marker = "{$this->mUniqPrefix}-$name-" 04134 . sprintf( '%08X', $this->mMarkerIndex++ ) . self::MARKER_SUFFIX; 04135 04136 $isFunctionTag = isset( $this->mFunctionTagHooks[strtolower( $name )] ) && 04137 ( $this->ot['html'] || $this->ot['pre'] ); 04138 if ( $isFunctionTag ) { 04139 $markerType = 'none'; 04140 } else { 04141 $markerType = 'general'; 04142 } 04143 if ( $this->ot['html'] || $isFunctionTag ) { 04144 $name = strtolower( $name ); 04145 $attributes = Sanitizer::decodeTagAttributes( $attrText ); 04146 if ( isset( $params['attributes'] ) ) { 04147 $attributes = $attributes + $params['attributes']; 04148 } 04149 04150 if ( isset( $this->mTagHooks[$name] ) ) { 04151 # Workaround for PHP bug 35229 and similar 04152 if ( !is_callable( $this->mTagHooks[$name] ) ) { 04153 throw new MWException( "Tag hook for $name is not callable\n" ); 04154 } 04155 $output = call_user_func_array( $this->mTagHooks[$name], 04156 array( $content, $attributes, $this, $frame ) ); 04157 } elseif ( isset( $this->mFunctionTagHooks[$name] ) ) { 04158 list( $callback, ) = $this->mFunctionTagHooks[$name]; 04159 if ( !is_callable( $callback ) ) { 04160 throw new MWException( "Tag hook for $name is not callable\n" ); 04161 } 04162 04163 $output = call_user_func_array( $callback, array( &$this, $frame, $content, $attributes ) ); 04164 } else { 04165 $output = '<span class="error">Invalid tag extension name: ' . 04166 htmlspecialchars( $name ) . '</span>'; 04167 } 04168 04169 if ( is_array( $output ) ) { 04170 # Extract flags to local scope (to override $markerType) 04171 $flags = $output; 04172 $output = $flags[0]; 04173 unset( $flags[0] ); 04174 extract( $flags ); 04175 } 04176 } else { 04177 if ( is_null( $attrText ) ) { 04178 $attrText = ''; 04179 } 04180 if ( isset( $params['attributes'] ) ) { 04181 foreach ( $params['attributes'] as $attrName => $attrValue ) { 04182 $attrText .= ' ' . htmlspecialchars( $attrName ) . '="' . 04183 htmlspecialchars( $attrValue ) . '"'; 04184 } 04185 } 04186 if ( $content === null ) { 04187 $output = "<$name$attrText/>"; 04188 } else { 04189 $close = is_null( $params['close'] ) ? '' : $frame->expand( $params['close'] ); 04190 $output = "<$name$attrText>$content$close"; 04191 } 04192 } 04193 04194 if ( $markerType === 'none' ) { 04195 return $output; 04196 } elseif ( $markerType === 'nowiki' ) { 04197 $this->mStripState->addNoWiki( $marker, $output ); 04198 } elseif ( $markerType === 'general' ) { 04199 $this->mStripState->addGeneral( $marker, $output ); 04200 } else { 04201 throw new MWException( __METHOD__ . ': invalid marker type' ); 04202 } 04203 return $marker; 04204 } 04205 04213 public function incrementIncludeSize( $type, $size ) { 04214 if ( $this->mIncludeSizes[$type] + $size > $this->mOptions->getMaxIncludeSize() ) { 04215 return false; 04216 } else { 04217 $this->mIncludeSizes[$type] += $size; 04218 return true; 04219 } 04220 } 04221 04227 public function incrementExpensiveFunctionCount() { 04228 $this->mExpensiveFunctionCount++; 04229 return $this->mExpensiveFunctionCount <= $this->mOptions->getExpensiveParserFunctionLimit(); 04230 } 04231 04240 public function doDoubleUnderscore( $text ) { 04241 wfProfileIn( __METHOD__ ); 04242 04243 # The position of __TOC__ needs to be recorded 04244 $mw = MagicWord::get( 'toc' ); 04245 if ( $mw->match( $text ) ) { 04246 $this->mShowToc = true; 04247 $this->mForceTocPosition = true; 04248 04249 # Set a placeholder. At the end we'll fill it in with the TOC. 04250 $text = $mw->replace( '<!--MWTOC-->', $text, 1 ); 04251 04252 # Only keep the first one. 04253 $text = $mw->replace( '', $text ); 04254 } 04255 04256 # Now match and remove the rest of them 04257 $mwa = MagicWord::getDoubleUnderscoreArray(); 04258 $this->mDoubleUnderscores = $mwa->matchAndRemove( $text ); 04259 04260 if ( isset( $this->mDoubleUnderscores['nogallery'] ) ) { 04261 $this->mOutput->mNoGallery = true; 04262 } 04263 if ( isset( $this->mDoubleUnderscores['notoc'] ) && !$this->mForceTocPosition ) { 04264 $this->mShowToc = false; 04265 } 04266 if ( isset( $this->mDoubleUnderscores['hiddencat'] ) 04267 && $this->mTitle->getNamespace() == NS_CATEGORY 04268 ) { 04269 $this->addTrackingCategory( 'hidden-category-category' ); 04270 } 04271 # (bug 8068) Allow control over whether robots index a page. 04272 # 04273 # @todo FIXME: Bug 14899: __INDEX__ always overrides __NOINDEX__ here! This 04274 # is not desirable, the last one on the page should win. 04275 if ( isset( $this->mDoubleUnderscores['noindex'] ) && $this->mTitle->canUseNoindex() ) { 04276 $this->mOutput->setIndexPolicy( 'noindex' ); 04277 $this->addTrackingCategory( 'noindex-category' ); 04278 } 04279 if ( isset( $this->mDoubleUnderscores['index'] ) && $this->mTitle->canUseNoindex() ) { 04280 $this->mOutput->setIndexPolicy( 'index' ); 04281 $this->addTrackingCategory( 'index-category' ); 04282 } 04283 04284 # Cache all double underscores in the database 04285 foreach ( $this->mDoubleUnderscores as $key => $val ) { 04286 $this->mOutput->setProperty( $key, '' ); 04287 } 04288 04289 wfProfileOut( __METHOD__ ); 04290 return $text; 04291 } 04292 04304 public function addTrackingCategory( $msg ) { 04305 if ( $this->mTitle->getNamespace() === NS_SPECIAL ) { 04306 wfDebug( __METHOD__ . ": Not adding tracking category $msg to special page!\n" ); 04307 return false; 04308 } 04309 // Important to parse with correct title (bug 31469) 04310 $cat = wfMessage( $msg ) 04311 ->title( $this->getTitle() ) 04312 ->inContentLanguage() 04313 ->text(); 04314 04315 # Allow tracking categories to be disabled by setting them to "-" 04316 if ( $cat === '-' ) { 04317 return false; 04318 } 04319 04320 $containerCategory = Title::makeTitleSafe( NS_CATEGORY, $cat ); 04321 if ( $containerCategory ) { 04322 $this->mOutput->addCategory( $containerCategory->getDBkey(), $this->getDefaultSort() ); 04323 return true; 04324 } else { 04325 wfDebug( __METHOD__ . ": [[MediaWiki:$msg]] is not a valid title!\n" ); 04326 return false; 04327 } 04328 } 04329 04346 public function formatHeadings( $text, $origText, $isMain = true ) { 04347 global $wgMaxTocLevel, $wgExperimentalHtmlIds; 04348 04349 # Inhibit editsection links if requested in the page 04350 if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) { 04351 $maybeShowEditLink = $showEditLink = false; 04352 } else { 04353 $maybeShowEditLink = true; /* Actual presence will depend on ParserOptions option */ 04354 $showEditLink = $this->mOptions->getEditSection(); 04355 } 04356 if ( $showEditLink ) { 04357 $this->mOutput->setEditSectionTokens( true ); 04358 } 04359 04360 # Get all headlines for numbering them and adding funky stuff like [edit] 04361 # links - this is for later, but we need the number of headlines right now 04362 $matches = array(); 04363 $numMatches = preg_match_all( 04364 '/<H(?P<level>[1-6])(?P<attrib>.*?' . '>)\s*(?P<header>[\s\S]*?)\s*<\/H[1-6] *>/i', 04365 $text, 04366 $matches 04367 ); 04368 04369 # if there are fewer than 4 headlines in the article, do not show TOC 04370 # unless it's been explicitly enabled. 04371 $enoughToc = $this->mShowToc && 04372 ( ( $numMatches >= 4 ) || $this->mForceTocPosition ); 04373 04374 # Allow user to stipulate that a page should have a "new section" 04375 # link added via __NEWSECTIONLINK__ 04376 if ( isset( $this->mDoubleUnderscores['newsectionlink'] ) ) { 04377 $this->mOutput->setNewSection( true ); 04378 } 04379 04380 # Allow user to remove the "new section" 04381 # link via __NONEWSECTIONLINK__ 04382 if ( isset( $this->mDoubleUnderscores['nonewsectionlink'] ) ) { 04383 $this->mOutput->hideNewSection( true ); 04384 } 04385 04386 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML, 04387 # override above conditions and always show TOC above first header 04388 if ( isset( $this->mDoubleUnderscores['forcetoc'] ) ) { 04389 $this->mShowToc = true; 04390 $enoughToc = true; 04391 } 04392 04393 # headline counter 04394 $headlineCount = 0; 04395 $numVisible = 0; 04396 04397 # Ugh .. the TOC should have neat indentation levels which can be 04398 # passed to the skin functions. These are determined here 04399 $toc = ''; 04400 $full = ''; 04401 $head = array(); 04402 $sublevelCount = array(); 04403 $levelCount = array(); 04404 $level = 0; 04405 $prevlevel = 0; 04406 $toclevel = 0; 04407 $prevtoclevel = 0; 04408 $markerRegex = "{$this->mUniqPrefix}-h-(\d+)-" . self::MARKER_SUFFIX; 04409 $baseTitleText = $this->mTitle->getPrefixedDBkey(); 04410 $oldType = $this->mOutputType; 04411 $this->setOutputType( self::OT_WIKI ); 04412 $frame = $this->getPreprocessor()->newFrame(); 04413 $root = $this->preprocessToDom( $origText ); 04414 $node = $root->getFirstChild(); 04415 $byteOffset = 0; 04416 $tocraw = array(); 04417 $refers = array(); 04418 04419 foreach ( $matches[3] as $headline ) { 04420 $isTemplate = false; 04421 $titleText = false; 04422 $sectionIndex = false; 04423 $numbering = ''; 04424 $markerMatches = array(); 04425 if ( preg_match( "/^$markerRegex/", $headline, $markerMatches ) ) { 04426 $serial = $markerMatches[1]; 04427 list( $titleText, $sectionIndex ) = $this->mHeadings[$serial]; 04428 $isTemplate = ( $titleText != $baseTitleText ); 04429 $headline = preg_replace( "/^$markerRegex\\s*/", "", $headline ); 04430 } 04431 04432 if ( $toclevel ) { 04433 $prevlevel = $level; 04434 } 04435 $level = $matches[1][$headlineCount]; 04436 04437 if ( $level > $prevlevel ) { 04438 # Increase TOC level 04439 $toclevel++; 04440 $sublevelCount[$toclevel] = 0; 04441 if ( $toclevel < $wgMaxTocLevel ) { 04442 $prevtoclevel = $toclevel; 04443 $toc .= Linker::tocIndent(); 04444 $numVisible++; 04445 } 04446 } elseif ( $level < $prevlevel && $toclevel > 1 ) { 04447 # Decrease TOC level, find level to jump to 04448 04449 for ( $i = $toclevel; $i > 0; $i-- ) { 04450 if ( $levelCount[$i] == $level ) { 04451 # Found last matching level 04452 $toclevel = $i; 04453 break; 04454 } elseif ( $levelCount[$i] < $level ) { 04455 # Found first matching level below current level 04456 $toclevel = $i + 1; 04457 break; 04458 } 04459 } 04460 if ( $i == 0 ) { 04461 $toclevel = 1; 04462 } 04463 if ( $toclevel < $wgMaxTocLevel ) { 04464 if ( $prevtoclevel < $wgMaxTocLevel ) { 04465 # Unindent only if the previous toc level was shown :p 04466 $toc .= Linker::tocUnindent( $prevtoclevel - $toclevel ); 04467 $prevtoclevel = $toclevel; 04468 } else { 04469 $toc .= Linker::tocLineEnd(); 04470 } 04471 } 04472 } else { 04473 # No change in level, end TOC line 04474 if ( $toclevel < $wgMaxTocLevel ) { 04475 $toc .= Linker::tocLineEnd(); 04476 } 04477 } 04478 04479 $levelCount[$toclevel] = $level; 04480 04481 # count number of headlines for each level 04482 $sublevelCount[$toclevel]++; 04483 $dot = 0; 04484 for ( $i = 1; $i <= $toclevel; $i++ ) { 04485 if ( !empty( $sublevelCount[$i] ) ) { 04486 if ( $dot ) { 04487 $numbering .= '.'; 04488 } 04489 $numbering .= $this->getTargetLanguage()->formatNum( $sublevelCount[$i] ); 04490 $dot = 1; 04491 } 04492 } 04493 04494 # The safe header is a version of the header text safe to use for links 04495 04496 # Remove link placeholders by the link text. 04497 # <!--LINK number--> 04498 # turns into 04499 # link text with suffix 04500 # Do this before unstrip since link text can contain strip markers 04501 $safeHeadline = $this->replaceLinkHoldersText( $headline ); 04502 04503 # Avoid insertion of weird stuff like <math> by expanding the relevant sections 04504 $safeHeadline = $this->mStripState->unstripBoth( $safeHeadline ); 04505 04506 # Strip out HTML (first regex removes any tag not allowed) 04507 # Allowed tags are: 04508 # * <sup> and <sub> (bug 8393) 04509 # * <i> (bug 26375) 04510 # * <b> (r105284) 04511 # * <span dir="rtl"> and <span dir="ltr"> (bug 35167) 04512 # 04513 # We strip any parameter from accepted tags (second regex), except dir="rtl|ltr" from <span>, 04514 # to allow setting directionality in toc items. 04515 $tocline = preg_replace( 04516 array( 04517 '#<(?!/?(span|sup|sub|i|b)(?: [^>]*)?>).*?' . '>#', 04518 '#<(/?(?:span(?: dir="(?:rtl|ltr)")?|sup|sub|i|b))(?: .*?)?' . '>#' 04519 ), 04520 array( '', '<$1>' ), 04521 $safeHeadline 04522 ); 04523 $tocline = trim( $tocline ); 04524 04525 # For the anchor, strip out HTML-y stuff period 04526 $safeHeadline = preg_replace( '/<.*?' . '>/', '', $safeHeadline ); 04527 $safeHeadline = Sanitizer::normalizeSectionNameWhitespace( $safeHeadline ); 04528 04529 # Save headline for section edit hint before it's escaped 04530 $headlineHint = $safeHeadline; 04531 04532 if ( $wgExperimentalHtmlIds ) { 04533 # For reverse compatibility, provide an id that's 04534 # HTML4-compatible, like we used to. 04535 # 04536 # It may be worth noting, academically, that it's possible for 04537 # the legacy anchor to conflict with a non-legacy headline 04538 # anchor on the page. In this case likely the "correct" thing 04539 # would be to either drop the legacy anchors or make sure 04540 # they're numbered first. However, this would require people 04541 # to type in section names like "abc_.D7.93.D7.90.D7.A4" 04542 # manually, so let's not bother worrying about it. 04543 $legacyHeadline = Sanitizer::escapeId( $safeHeadline, 04544 array( 'noninitial', 'legacy' ) ); 04545 $safeHeadline = Sanitizer::escapeId( $safeHeadline ); 04546 04547 if ( $legacyHeadline == $safeHeadline ) { 04548 # No reason to have both (in fact, we can't) 04549 $legacyHeadline = false; 04550 } 04551 } else { 04552 $legacyHeadline = false; 04553 $safeHeadline = Sanitizer::escapeId( $safeHeadline, 04554 'noninitial' ); 04555 } 04556 04557 # HTML names must be case-insensitively unique (bug 10721). 04558 # This does not apply to Unicode characters per 04559 # http://dev.w3.org/html5/spec/infrastructure.html#case-sensitivity-and-string-comparison 04560 # @todo FIXME: We may be changing them depending on the current locale. 04561 $arrayKey = strtolower( $safeHeadline ); 04562 if ( $legacyHeadline === false ) { 04563 $legacyArrayKey = false; 04564 } else { 04565 $legacyArrayKey = strtolower( $legacyHeadline ); 04566 } 04567 04568 # count how many in assoc. array so we can track dupes in anchors 04569 if ( isset( $refers[$arrayKey] ) ) { 04570 $refers[$arrayKey]++; 04571 } else { 04572 $refers[$arrayKey] = 1; 04573 } 04574 if ( isset( $refers[$legacyArrayKey] ) ) { 04575 $refers[$legacyArrayKey]++; 04576 } else { 04577 $refers[$legacyArrayKey] = 1; 04578 } 04579 04580 # Don't number the heading if it is the only one (looks silly) 04581 if ( count( $matches[3] ) > 1 && $this->mOptions->getNumberHeadings() ) { 04582 # the two are different if the line contains a link 04583 $headline = Html::element( 04584 'span', 04585 array( 'class' => 'mw-headline-number' ), 04586 $numbering 04587 ) . ' ' . $headline; 04588 } 04589 04590 # Create the anchor for linking from the TOC to the section 04591 $anchor = $safeHeadline; 04592 $legacyAnchor = $legacyHeadline; 04593 if ( $refers[$arrayKey] > 1 ) { 04594 $anchor .= '_' . $refers[$arrayKey]; 04595 } 04596 if ( $legacyHeadline !== false && $refers[$legacyArrayKey] > 1 ) { 04597 $legacyAnchor .= '_' . $refers[$legacyArrayKey]; 04598 } 04599 if ( $enoughToc && ( !isset( $wgMaxTocLevel ) || $toclevel < $wgMaxTocLevel ) ) { 04600 $toc .= Linker::tocLine( $anchor, $tocline, 04601 $numbering, $toclevel, ( $isTemplate ? false : $sectionIndex ) ); 04602 } 04603 04604 # Add the section to the section tree 04605 # Find the DOM node for this header 04606 $noOffset = ( $isTemplate || $sectionIndex === false ); 04607 while ( $node && !$noOffset ) { 04608 if ( $node->getName() === 'h' ) { 04609 $bits = $node->splitHeading(); 04610 if ( $bits['i'] == $sectionIndex ) { 04611 break; 04612 } 04613 } 04614 $byteOffset += mb_strlen( $this->mStripState->unstripBoth( 04615 $frame->expand( $node, PPFrame::RECOVER_ORIG ) ) ); 04616 $node = $node->getNextSibling(); 04617 } 04618 $tocraw[] = array( 04619 'toclevel' => $toclevel, 04620 'level' => $level, 04621 'line' => $tocline, 04622 'number' => $numbering, 04623 'index' => ( $isTemplate ? 'T-' : '' ) . $sectionIndex, 04624 'fromtitle' => $titleText, 04625 'byteoffset' => ( $noOffset ? null : $byteOffset ), 04626 'anchor' => $anchor, 04627 ); 04628 04629 # give headline the correct <h#> tag 04630 if ( $maybeShowEditLink && $sectionIndex !== false ) { 04631 // Output edit section links as markers with styles that can be customized by skins 04632 if ( $isTemplate ) { 04633 # Put a T flag in the section identifier, to indicate to extractSections() 04634 # that sections inside <includeonly> should be counted. 04635 $editsectionPage = $titleText; 04636 $editsectionSection = "T-$sectionIndex"; 04637 $editsectionContent = null; 04638 } else { 04639 $editsectionPage = $this->mTitle->getPrefixedText(); 04640 $editsectionSection = $sectionIndex; 04641 $editsectionContent = $headlineHint; 04642 } 04643 // We use a bit of pesudo-xml for editsection markers. The 04644 // language converter is run later on. Using a UNIQ style marker 04645 // leads to the converter screwing up the tokens when it 04646 // converts stuff. And trying to insert strip tags fails too. At 04647 // this point all real inputted tags have already been escaped, 04648 // so we don't have to worry about a user trying to input one of 04649 // these markers directly. We use a page and section attribute 04650 // to stop the language converter from converting these 04651 // important bits of data, but put the headline hint inside a 04652 // content block because the language converter is supposed to 04653 // be able to convert that piece of data. 04654 // Gets replaced with html in ParserOutput::getText 04655 $editlink = '<mw:editsection page="' . htmlspecialchars( $editsectionPage ); 04656 $editlink .= '" section="' . htmlspecialchars( $editsectionSection ) . '"'; 04657 if ( $editsectionContent !== null ) { 04658 $editlink .= '>' . $editsectionContent . '</mw:editsection>'; 04659 } else { 04660 $editlink .= '/>'; 04661 } 04662 } else { 04663 $editlink = ''; 04664 } 04665 $head[$headlineCount] = Linker::makeHeadline( $level, 04666 $matches['attrib'][$headlineCount], $anchor, $headline, 04667 $editlink, $legacyAnchor ); 04668 04669 $headlineCount++; 04670 } 04671 04672 $this->setOutputType( $oldType ); 04673 04674 # Never ever show TOC if no headers 04675 if ( $numVisible < 1 ) { 04676 $enoughToc = false; 04677 } 04678 04679 if ( $enoughToc ) { 04680 if ( $prevtoclevel > 0 && $prevtoclevel < $wgMaxTocLevel ) { 04681 $toc .= Linker::tocUnindent( $prevtoclevel - 1 ); 04682 } 04683 $toc = Linker::tocList( $toc, $this->mOptions->getUserLangObj() ); 04684 $this->mOutput->setTOCHTML( $toc ); 04685 $toc = self::TOC_START . $toc . self::TOC_END; 04686 $this->mOutput->addModules( 'mediawiki.toc' ); 04687 } 04688 04689 if ( $isMain ) { 04690 $this->mOutput->setSections( $tocraw ); 04691 } 04692 04693 # split up and insert constructed headlines 04694 $blocks = preg_split( '/<H[1-6].*?' . '>[\s\S]*?<\/H[1-6]>/i', $text ); 04695 $i = 0; 04696 04697 // build an array of document sections 04698 $sections = array(); 04699 foreach ( $blocks as $block ) { 04700 // $head is zero-based, sections aren't. 04701 if ( empty( $head[$i - 1] ) ) { 04702 $sections[$i] = $block; 04703 } else { 04704 $sections[$i] = $head[$i - 1] . $block; 04705 } 04706 04717 wfRunHooks( 'ParserSectionCreate', array( $this, $i, &$sections[$i], $showEditLink ) ); 04718 04719 $i++; 04720 } 04721 04722 if ( $enoughToc && $isMain && !$this->mForceTocPosition ) { 04723 // append the TOC at the beginning 04724 // Top anchor now in skin 04725 $sections[0] = $sections[0] . $toc . "\n"; 04726 } 04727 04728 $full .= join( '', $sections ); 04729 04730 if ( $this->mForceTocPosition ) { 04731 return str_replace( '<!--MWTOC-->', $toc, $full ); 04732 } else { 04733 return $full; 04734 } 04735 } 04736 04748 public function preSaveTransform( $text, Title $title, User $user, 04749 ParserOptions $options, $clearState = true 04750 ) { 04751 if ( $clearState ) { 04752 $magicScopeVariable = $this->lock(); 04753 } 04754 $this->startParse( $title, $options, self::OT_WIKI, $clearState ); 04755 $this->setUser( $user ); 04756 04757 $pairs = array( 04758 "\r\n" => "\n", 04759 ); 04760 $text = str_replace( array_keys( $pairs ), array_values( $pairs ), $text ); 04761 if ( $options->getPreSaveTransform() ) { 04762 $text = $this->pstPass2( $text, $user ); 04763 } 04764 $text = $this->mStripState->unstripBoth( $text ); 04765 04766 $this->setUser( null ); #Reset 04767 04768 return $text; 04769 } 04770 04779 private function pstPass2( $text, $user ) { 04780 global $wgContLang; 04781 04782 # Note: This is the timestamp saved as hardcoded wikitext to 04783 # the database, we use $wgContLang here in order to give 04784 # everyone the same signature and use the default one rather 04785 # than the one selected in each user's preferences. 04786 # (see also bug 12815) 04787 $ts = $this->mOptions->getTimestamp(); 04788 $timestamp = MWTimestamp::getLocalInstance( $ts ); 04789 $ts = $timestamp->format( 'YmdHis' ); 04790 $tzMsg = $timestamp->format( 'T' ); # might vary on DST changeover! 04791 04792 # Allow translation of timezones through wiki. format() can return 04793 # whatever crap the system uses, localised or not, so we cannot 04794 # ship premade translations. 04795 $key = 'timezone-' . strtolower( trim( $tzMsg ) ); 04796 $msg = wfMessage( $key )->inContentLanguage(); 04797 if ( $msg->exists() ) { 04798 $tzMsg = $msg->text(); 04799 } 04800 04801 $d = $wgContLang->timeanddate( $ts, false, false ) . " ($tzMsg)"; 04802 04803 # Variable replacement 04804 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags 04805 $text = $this->replaceVariables( $text ); 04806 04807 # This works almost by chance, as the replaceVariables are done before the getUserSig(), 04808 # which may corrupt this parser instance via its wfMessage()->text() call- 04809 04810 # Signatures 04811 $sigText = $this->getUserSig( $user ); 04812 $text = strtr( $text, array( 04813 '~~~~~' => $d, 04814 '~~~~' => "$sigText $d", 04815 '~~~' => $sigText 04816 ) ); 04817 04818 # Context links ("pipe tricks"): [[|name]] and [[name (context)|]] 04819 $tc = '[' . Title::legalChars() . ']'; 04820 $nc = '[ _0-9A-Za-z\x80-\xff-]'; # Namespaces can use non-ascii! 04821 04822 // [[ns:page (context)|]] 04823 $p1 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\))\\|]]/"; 04824 // [[ns:page(context)|]] (double-width brackets, added in r40257) 04825 $p4 = "/\[\[(:?$nc+:|:|)($tc+?)( ?($tc+))\\|]]/"; 04826 // [[ns:page (context), context|]] (using either single or double-width comma) 04827 $p3 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\)|)((?:, |,)$tc+|)\\|]]/"; 04828 // [[|page]] (reverse pipe trick: add context from page title) 04829 $p2 = "/\[\[\\|($tc+)]]/"; 04830 04831 # try $p1 first, to turn "[[A, B (C)|]]" into "[[A, B (C)|A, B]]" 04832 $text = preg_replace( $p1, '[[\\1\\2\\3|\\2]]', $text ); 04833 $text = preg_replace( $p4, '[[\\1\\2\\3|\\2]]', $text ); 04834 $text = preg_replace( $p3, '[[\\1\\2\\3\\4|\\2]]', $text ); 04835 04836 $t = $this->mTitle->getText(); 04837 $m = array(); 04838 if ( preg_match( "/^($nc+:|)$tc+?( \\($tc+\\))$/", $t, $m ) ) { 04839 $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text ); 04840 } elseif ( preg_match( "/^($nc+:|)$tc+?(, $tc+|)$/", $t, $m ) && "$m[1]$m[2]" != '' ) { 04841 $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text ); 04842 } else { 04843 # if there's no context, don't bother duplicating the title 04844 $text = preg_replace( $p2, '[[\\1]]', $text ); 04845 } 04846 04847 # Trim trailing whitespace 04848 $text = rtrim( $text ); 04849 04850 return $text; 04851 } 04852 04867 public function getUserSig( &$user, $nickname = false, $fancySig = null ) { 04868 global $wgMaxSigChars; 04869 04870 $username = $user->getName(); 04871 04872 # If not given, retrieve from the user object. 04873 if ( $nickname === false ) { 04874 $nickname = $user->getOption( 'nickname' ); 04875 } 04876 04877 if ( is_null( $fancySig ) ) { 04878 $fancySig = $user->getBoolOption( 'fancysig' ); 04879 } 04880 04881 $nickname = $nickname == null ? $username : $nickname; 04882 04883 if ( mb_strlen( $nickname ) > $wgMaxSigChars ) { 04884 $nickname = $username; 04885 wfDebug( __METHOD__ . ": $username has overlong signature.\n" ); 04886 } elseif ( $fancySig !== false ) { 04887 # Sig. might contain markup; validate this 04888 if ( $this->validateSig( $nickname ) !== false ) { 04889 # Validated; clean up (if needed) and return it 04890 return $this->cleanSig( $nickname, true ); 04891 } else { 04892 # Failed to validate; fall back to the default 04893 $nickname = $username; 04894 wfDebug( __METHOD__ . ": $username has bad XML tags in signature.\n" ); 04895 } 04896 } 04897 04898 # Make sure nickname doesnt get a sig in a sig 04899 $nickname = self::cleanSigInSig( $nickname ); 04900 04901 # If we're still here, make it a link to the user page 04902 $userText = wfEscapeWikiText( $username ); 04903 $nickText = wfEscapeWikiText( $nickname ); 04904 $msgName = $user->isAnon() ? 'signature-anon' : 'signature'; 04905 04906 return wfMessage( $msgName, $userText, $nickText )->inContentLanguage() 04907 ->title( $this->getTitle() )->text(); 04908 } 04909 04916 public function validateSig( $text ) { 04917 return Xml::isWellFormedXmlFragment( $text ) ? $text : false; 04918 } 04919 04930 public function cleanSig( $text, $parsing = false ) { 04931 if ( !$parsing ) { 04932 global $wgTitle; 04933 $magicScopeVariable = $this->lock(); 04934 $this->startParse( $wgTitle, new ParserOptions, self::OT_PREPROCESS, true ); 04935 } 04936 04937 # Option to disable this feature 04938 if ( !$this->mOptions->getCleanSignatures() ) { 04939 return $text; 04940 } 04941 04942 # @todo FIXME: Regex doesn't respect extension tags or nowiki 04943 # => Move this logic to braceSubstitution() 04944 $substWord = MagicWord::get( 'subst' ); 04945 $substRegex = '/\{\{(?!(?:' . $substWord->getBaseRegex() . '))/x' . $substWord->getRegexCase(); 04946 $substText = '{{' . $substWord->getSynonym( 0 ); 04947 04948 $text = preg_replace( $substRegex, $substText, $text ); 04949 $text = self::cleanSigInSig( $text ); 04950 $dom = $this->preprocessToDom( $text ); 04951 $frame = $this->getPreprocessor()->newFrame(); 04952 $text = $frame->expand( $dom ); 04953 04954 if ( !$parsing ) { 04955 $text = $this->mStripState->unstripBoth( $text ); 04956 } 04957 04958 return $text; 04959 } 04960 04967 public static function cleanSigInSig( $text ) { 04968 $text = preg_replace( '/~{3,5}/', '', $text ); 04969 return $text; 04970 } 04971 04981 public function startExternalParse( Title $title = null, ParserOptions $options, 04982 $outputType, $clearState = true 04983 ) { 04984 $this->startParse( $title, $options, $outputType, $clearState ); 04985 } 04986 04993 private function startParse( Title $title = null, ParserOptions $options, 04994 $outputType, $clearState = true 04995 ) { 04996 $this->setTitle( $title ); 04997 $this->mOptions = $options; 04998 $this->setOutputType( $outputType ); 04999 if ( $clearState ) { 05000 $this->clearState(); 05001 } 05002 } 05003 05012 public function transformMsg( $text, $options, $title = null ) { 05013 static $executing = false; 05014 05015 # Guard against infinite recursion 05016 if ( $executing ) { 05017 return $text; 05018 } 05019 $executing = true; 05020 05021 wfProfileIn( __METHOD__ ); 05022 if ( !$title ) { 05023 global $wgTitle; 05024 $title = $wgTitle; 05025 } 05026 05027 $text = $this->preprocess( $text, $title, $options ); 05028 05029 $executing = false; 05030 wfProfileOut( __METHOD__ ); 05031 return $text; 05032 } 05033 05058 public function setHook( $tag, $callback ) { 05059 $tag = strtolower( $tag ); 05060 if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) { 05061 throw new MWException( "Invalid character {$m[0]} in setHook('$tag', ...) call" ); 05062 } 05063 $oldVal = isset( $this->mTagHooks[$tag] ) ? $this->mTagHooks[$tag] : null; 05064 $this->mTagHooks[$tag] = $callback; 05065 if ( !in_array( $tag, $this->mStripList ) ) { 05066 $this->mStripList[] = $tag; 05067 } 05068 05069 return $oldVal; 05070 } 05071 05089 public function setTransparentTagHook( $tag, $callback ) { 05090 $tag = strtolower( $tag ); 05091 if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) { 05092 throw new MWException( "Invalid character {$m[0]} in setTransparentHook('$tag', ...) call" ); 05093 } 05094 $oldVal = isset( $this->mTransparentTagHooks[$tag] ) ? $this->mTransparentTagHooks[$tag] : null; 05095 $this->mTransparentTagHooks[$tag] = $callback; 05096 05097 return $oldVal; 05098 } 05099 05103 public function clearTagHooks() { 05104 $this->mTagHooks = array(); 05105 $this->mFunctionTagHooks = array(); 05106 $this->mStripList = $this->mDefaultStripList; 05107 } 05108 05152 public function setFunctionHook( $id, $callback, $flags = 0 ) { 05153 global $wgContLang; 05154 05155 $oldVal = isset( $this->mFunctionHooks[$id] ) ? $this->mFunctionHooks[$id][0] : null; 05156 $this->mFunctionHooks[$id] = array( $callback, $flags ); 05157 05158 # Add to function cache 05159 $mw = MagicWord::get( $id ); 05160 if ( !$mw ) { 05161 throw new MWException( __METHOD__ . '() expecting a magic word identifier.' ); 05162 } 05163 05164 $synonyms = $mw->getSynonyms(); 05165 $sensitive = intval( $mw->isCaseSensitive() ); 05166 05167 foreach ( $synonyms as $syn ) { 05168 # Case 05169 if ( !$sensitive ) { 05170 $syn = $wgContLang->lc( $syn ); 05171 } 05172 # Add leading hash 05173 if ( !( $flags & SFH_NO_HASH ) ) { 05174 $syn = '#' . $syn; 05175 } 05176 # Remove trailing colon 05177 if ( substr( $syn, -1, 1 ) === ':' ) { 05178 $syn = substr( $syn, 0, -1 ); 05179 } 05180 $this->mFunctionSynonyms[$sensitive][$syn] = $id; 05181 } 05182 return $oldVal; 05183 } 05184 05190 public function getFunctionHooks() { 05191 return array_keys( $this->mFunctionHooks ); 05192 } 05193 05204 public function setFunctionTagHook( $tag, $callback, $flags ) { 05205 $tag = strtolower( $tag ); 05206 if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) { 05207 throw new MWException( "Invalid character {$m[0]} in setFunctionTagHook('$tag', ...) call" ); 05208 } 05209 $old = isset( $this->mFunctionTagHooks[$tag] ) ? 05210 $this->mFunctionTagHooks[$tag] : null; 05211 $this->mFunctionTagHooks[$tag] = array( $callback, $flags ); 05212 05213 if ( !in_array( $tag, $this->mStripList ) ) { 05214 $this->mStripList[] = $tag; 05215 } 05216 05217 return $old; 05218 } 05219 05230 public function replaceLinkHolders( &$text, $options = 0 ) { 05231 return $this->mLinkHolders->replace( $text ); 05232 } 05233 05241 public function replaceLinkHoldersText( $text ) { 05242 return $this->mLinkHolders->replaceText( $text ); 05243 } 05244 05258 public function renderImageGallery( $text, $params ) { 05259 wfProfileIn( __METHOD__ ); 05260 05261 $mode = false; 05262 if ( isset( $params['mode'] ) ) { 05263 $mode = $params['mode']; 05264 } 05265 05266 try { 05267 $ig = ImageGalleryBase::factory( $mode ); 05268 } catch ( MWException $e ) { 05269 // If invalid type set, fallback to default. 05270 $ig = ImageGalleryBase::factory( false ); 05271 } 05272 05273 $ig->setContextTitle( $this->mTitle ); 05274 $ig->setShowBytes( false ); 05275 $ig->setShowFilename( false ); 05276 $ig->setParser( $this ); 05277 $ig->setHideBadImages(); 05278 $ig->setAttributes( Sanitizer::validateTagAttributes( $params, 'table' ) ); 05279 05280 if ( isset( $params['showfilename'] ) ) { 05281 $ig->setShowFilename( true ); 05282 } else { 05283 $ig->setShowFilename( false ); 05284 } 05285 if ( isset( $params['caption'] ) ) { 05286 $caption = $params['caption']; 05287 $caption = htmlspecialchars( $caption ); 05288 $caption = $this->replaceInternalLinks( $caption ); 05289 $ig->setCaptionHtml( $caption ); 05290 } 05291 if ( isset( $params['perrow'] ) ) { 05292 $ig->setPerRow( $params['perrow'] ); 05293 } 05294 if ( isset( $params['widths'] ) ) { 05295 $ig->setWidths( $params['widths'] ); 05296 } 05297 if ( isset( $params['heights'] ) ) { 05298 $ig->setHeights( $params['heights'] ); 05299 } 05300 $ig->setAdditionalOptions( $params ); 05301 05302 wfRunHooks( 'BeforeParserrenderImageGallery', array( &$this, &$ig ) ); 05303 05304 $lines = StringUtils::explode( "\n", $text ); 05305 foreach ( $lines as $line ) { 05306 # match lines like these: 05307 # Image:someimage.jpg|This is some image 05308 $matches = array(); 05309 preg_match( "/^([^|]+)(\\|(.*))?$/", $line, $matches ); 05310 # Skip empty lines 05311 if ( count( $matches ) == 0 ) { 05312 continue; 05313 } 05314 05315 if ( strpos( $matches[0], '%' ) !== false ) { 05316 $matches[1] = rawurldecode( $matches[1] ); 05317 } 05318 $title = Title::newFromText( $matches[1], NS_FILE ); 05319 if ( is_null( $title ) ) { 05320 # Bogus title. Ignore these so we don't bomb out later. 05321 continue; 05322 } 05323 05324 # We need to get what handler the file uses, to figure out parameters. 05325 # Note, a hook can overide the file name, and chose an entirely different 05326 # file (which potentially could be of a different type and have different handler). 05327 $options = array(); 05328 $descQuery = false; 05329 wfRunHooks( 'BeforeParserFetchFileAndTitle', 05330 array( $this, $title, &$options, &$descQuery ) ); 05331 # Don't register it now, as ImageGallery does that later. 05332 $file = $this->fetchFileNoRegister( $title, $options ); 05333 $handler = $file ? $file->getHandler() : false; 05334 05335 wfProfileIn( __METHOD__ . '-getMagicWord' ); 05336 $paramMap = array( 05337 'img_alt' => 'gallery-internal-alt', 05338 'img_link' => 'gallery-internal-link', 05339 ); 05340 if ( $handler ) { 05341 $paramMap = $paramMap + $handler->getParamMap(); 05342 // We don't want people to specify per-image widths. 05343 // Additionally the width parameter would need special casing anyhow. 05344 unset( $paramMap['img_width'] ); 05345 } 05346 05347 $mwArray = new MagicWordArray( array_keys( $paramMap ) ); 05348 wfProfileOut( __METHOD__ . '-getMagicWord' ); 05349 05350 $label = ''; 05351 $alt = ''; 05352 $link = ''; 05353 $handlerOptions = array(); 05354 if ( isset( $matches[3] ) ) { 05355 // look for an |alt= definition while trying not to break existing 05356 // captions with multiple pipes (|) in it, until a more sensible grammar 05357 // is defined for images in galleries 05358 05359 // FIXME: Doing recursiveTagParse at this stage, and the trim before 05360 // splitting on '|' is a bit odd, and different from makeImage. 05361 $matches[3] = $this->recursiveTagParse( trim( $matches[3] ) ); 05362 $parameterMatches = StringUtils::explode( '|', $matches[3] ); 05363 05364 foreach ( $parameterMatches as $parameterMatch ) { 05365 list( $magicName, $match ) = $mwArray->matchVariableStartToEnd( $parameterMatch ); 05366 if ( $magicName ) { 05367 $paramName = $paramMap[$magicName]; 05368 05369 switch ( $paramName ) { 05370 case 'gallery-internal-alt': 05371 $alt = $this->stripAltText( $match, false ); 05372 break; 05373 case 'gallery-internal-link': 05374 $linkValue = strip_tags( $this->replaceLinkHoldersText( $match ) ); 05375 $chars = self::EXT_LINK_URL_CLASS; 05376 $prots = $this->mUrlProtocols; 05377 //check to see if link matches an absolute url, if not then it must be a wiki link. 05378 if ( preg_match( "/^($prots)$chars+$/u", $linkValue ) ) { 05379 $link = $linkValue; 05380 } else { 05381 $localLinkTitle = Title::newFromText( $linkValue ); 05382 if ( $localLinkTitle !== null ) { 05383 $link = $localLinkTitle->getLinkURL(); 05384 } 05385 } 05386 break; 05387 default: 05388 // Must be a handler specific parameter. 05389 if ( $handler->validateParam( $paramName, $match ) ) { 05390 $handlerOptions[$paramName] = $match; 05391 } else { 05392 // Guess not. Append it to the caption. 05393 wfDebug( "$parameterMatch failed parameter validation\n" ); 05394 $label .= '|' . $parameterMatch; 05395 } 05396 } 05397 05398 } else { 05399 // concatenate all other pipes 05400 $label .= '|' . $parameterMatch; 05401 } 05402 } 05403 // remove the first pipe 05404 $label = substr( $label, 1 ); 05405 } 05406 05407 $ig->add( $title, $label, $alt, $link, $handlerOptions ); 05408 } 05409 $html = $ig->toHTML(); 05410 wfRunHooks( 'AfterParserFetchFileAndTitle', array( $this, $ig, &$html ) ); 05411 wfProfileOut( __METHOD__ ); 05412 return $html; 05413 } 05414 05419 public function getImageParams( $handler ) { 05420 if ( $handler ) { 05421 $handlerClass = get_class( $handler ); 05422 } else { 05423 $handlerClass = ''; 05424 } 05425 if ( !isset( $this->mImageParams[$handlerClass] ) ) { 05426 # Initialise static lists 05427 static $internalParamNames = array( 05428 'horizAlign' => array( 'left', 'right', 'center', 'none' ), 05429 'vertAlign' => array( 'baseline', 'sub', 'super', 'top', 'text-top', 'middle', 05430 'bottom', 'text-bottom' ), 05431 'frame' => array( 'thumbnail', 'manualthumb', 'framed', 'frameless', 05432 'upright', 'border', 'link', 'alt', 'class' ), 05433 ); 05434 static $internalParamMap; 05435 if ( !$internalParamMap ) { 05436 $internalParamMap = array(); 05437 foreach ( $internalParamNames as $type => $names ) { 05438 foreach ( $names as $name ) { 05439 $magicName = str_replace( '-', '_', "img_$name" ); 05440 $internalParamMap[$magicName] = array( $type, $name ); 05441 } 05442 } 05443 } 05444 05445 # Add handler params 05446 $paramMap = $internalParamMap; 05447 if ( $handler ) { 05448 $handlerParamMap = $handler->getParamMap(); 05449 foreach ( $handlerParamMap as $magic => $paramName ) { 05450 $paramMap[$magic] = array( 'handler', $paramName ); 05451 } 05452 } 05453 $this->mImageParams[$handlerClass] = $paramMap; 05454 $this->mImageParamsMagicArray[$handlerClass] = new MagicWordArray( array_keys( $paramMap ) ); 05455 } 05456 return array( $this->mImageParams[$handlerClass], $this->mImageParamsMagicArray[$handlerClass] ); 05457 } 05458 05467 public function makeImage( $title, $options, $holders = false ) { 05468 # Check if the options text is of the form "options|alt text" 05469 # Options are: 05470 # * thumbnail make a thumbnail with enlarge-icon and caption, alignment depends on lang 05471 # * left no resizing, just left align. label is used for alt= only 05472 # * right same, but right aligned 05473 # * none same, but not aligned 05474 # * ___px scale to ___ pixels width, no aligning. e.g. use in taxobox 05475 # * center center the image 05476 # * frame Keep original image size, no magnify-button. 05477 # * framed Same as "frame" 05478 # * frameless like 'thumb' but without a frame. Keeps user preferences for width 05479 # * upright reduce width for upright images, rounded to full __0 px 05480 # * border draw a 1px border around the image 05481 # * alt Text for HTML alt attribute (defaults to empty) 05482 # * class Set a class for img node 05483 # * link Set the target of the image link. Can be external, interwiki, or local 05484 # vertical-align values (no % or length right now): 05485 # * baseline 05486 # * sub 05487 # * super 05488 # * top 05489 # * text-top 05490 # * middle 05491 # * bottom 05492 # * text-bottom 05493 05494 $parts = StringUtils::explode( "|", $options ); 05495 05496 # Give extensions a chance to select the file revision for us 05497 $options = array(); 05498 $descQuery = false; 05499 wfRunHooks( 'BeforeParserFetchFileAndTitle', 05500 array( $this, $title, &$options, &$descQuery ) ); 05501 # Fetch and register the file (file title may be different via hooks) 05502 list( $file, $title ) = $this->fetchFileAndTitle( $title, $options ); 05503 05504 # Get parameter map 05505 $handler = $file ? $file->getHandler() : false; 05506 05507 list( $paramMap, $mwArray ) = $this->getImageParams( $handler ); 05508 05509 if ( !$file ) { 05510 $this->addTrackingCategory( 'broken-file-category' ); 05511 } 05512 05513 # Process the input parameters 05514 $caption = ''; 05515 $params = array( 'frame' => array(), 'handler' => array(), 05516 'horizAlign' => array(), 'vertAlign' => array() ); 05517 $seenformat = false; 05518 foreach ( $parts as $part ) { 05519 $part = trim( $part ); 05520 list( $magicName, $value ) = $mwArray->matchVariableStartToEnd( $part ); 05521 $validated = false; 05522 if ( isset( $paramMap[$magicName] ) ) { 05523 list( $type, $paramName ) = $paramMap[$magicName]; 05524 05525 # Special case; width and height come in one variable together 05526 if ( $type === 'handler' && $paramName === 'width' ) { 05527 $parsedWidthParam = $this->parseWidthParam( $value ); 05528 if ( isset( $parsedWidthParam['width'] ) ) { 05529 $width = $parsedWidthParam['width']; 05530 if ( $handler->validateParam( 'width', $width ) ) { 05531 $params[$type]['width'] = $width; 05532 $validated = true; 05533 } 05534 } 05535 if ( isset( $parsedWidthParam['height'] ) ) { 05536 $height = $parsedWidthParam['height']; 05537 if ( $handler->validateParam( 'height', $height ) ) { 05538 $params[$type]['height'] = $height; 05539 $validated = true; 05540 } 05541 } 05542 # else no validation -- bug 13436 05543 } else { 05544 if ( $type === 'handler' ) { 05545 # Validate handler parameter 05546 $validated = $handler->validateParam( $paramName, $value ); 05547 } else { 05548 # Validate internal parameters 05549 switch ( $paramName ) { 05550 case 'manualthumb': 05551 case 'alt': 05552 case 'class': 05553 # @todo FIXME: Possibly check validity here for 05554 # manualthumb? downstream behavior seems odd with 05555 # missing manual thumbs. 05556 $validated = true; 05557 $value = $this->stripAltText( $value, $holders ); 05558 break; 05559 case 'link': 05560 $chars = self::EXT_LINK_URL_CLASS; 05561 $prots = $this->mUrlProtocols; 05562 if ( $value === '' ) { 05563 $paramName = 'no-link'; 05564 $value = true; 05565 $validated = true; 05566 } elseif ( preg_match( "/^((?i)$prots)/", $value ) ) { 05567 if ( preg_match( "/^((?i)$prots)$chars+$/u", $value, $m ) ) { 05568 $paramName = 'link-url'; 05569 $this->mOutput->addExternalLink( $value ); 05570 if ( $this->mOptions->getExternalLinkTarget() ) { 05571 $params[$type]['link-target'] = $this->mOptions->getExternalLinkTarget(); 05572 } 05573 $validated = true; 05574 } 05575 } else { 05576 $linkTitle = Title::newFromText( $value ); 05577 if ( $linkTitle ) { 05578 $paramName = 'link-title'; 05579 $value = $linkTitle; 05580 $this->mOutput->addLink( $linkTitle ); 05581 $validated = true; 05582 } 05583 } 05584 break; 05585 case 'frameless': 05586 case 'framed': 05587 case 'thumbnail': 05588 // use first appearing option, discard others. 05589 $validated = ! $seenformat; 05590 $seenformat = true; 05591 break; 05592 default: 05593 # Most other things appear to be empty or numeric... 05594 $validated = ( $value === false || is_numeric( trim( $value ) ) ); 05595 } 05596 } 05597 05598 if ( $validated ) { 05599 $params[$type][$paramName] = $value; 05600 } 05601 } 05602 } 05603 if ( !$validated ) { 05604 $caption = $part; 05605 } 05606 } 05607 05608 # Process alignment parameters 05609 if ( $params['horizAlign'] ) { 05610 $params['frame']['align'] = key( $params['horizAlign'] ); 05611 } 05612 if ( $params['vertAlign'] ) { 05613 $params['frame']['valign'] = key( $params['vertAlign'] ); 05614 } 05615 05616 $params['frame']['caption'] = $caption; 05617 05618 # Will the image be presented in a frame, with the caption below? 05619 $imageIsFramed = isset( $params['frame']['frame'] ) 05620 || isset( $params['frame']['framed'] ) 05621 || isset( $params['frame']['thumbnail'] ) 05622 || isset( $params['frame']['manualthumb'] ); 05623 05624 # In the old days, [[Image:Foo|text...]] would set alt text. Later it 05625 # came to also set the caption, ordinary text after the image -- which 05626 # makes no sense, because that just repeats the text multiple times in 05627 # screen readers. It *also* came to set the title attribute. 05628 # 05629 # Now that we have an alt attribute, we should not set the alt text to 05630 # equal the caption: that's worse than useless, it just repeats the 05631 # text. This is the framed/thumbnail case. If there's no caption, we 05632 # use the unnamed parameter for alt text as well, just for the time be- 05633 # ing, if the unnamed param is set and the alt param is not. 05634 # 05635 # For the future, we need to figure out if we want to tweak this more, 05636 # e.g., introducing a title= parameter for the title; ignoring the un- 05637 # named parameter entirely for images without a caption; adding an ex- 05638 # plicit caption= parameter and preserving the old magic unnamed para- 05639 # meter for BC; ... 05640 if ( $imageIsFramed ) { # Framed image 05641 if ( $caption === '' && !isset( $params['frame']['alt'] ) ) { 05642 # No caption or alt text, add the filename as the alt text so 05643 # that screen readers at least get some description of the image 05644 $params['frame']['alt'] = $title->getText(); 05645 } 05646 # Do not set $params['frame']['title'] because tooltips don't make sense 05647 # for framed images 05648 } else { # Inline image 05649 if ( !isset( $params['frame']['alt'] ) ) { 05650 # No alt text, use the "caption" for the alt text 05651 if ( $caption !== '' ) { 05652 $params['frame']['alt'] = $this->stripAltText( $caption, $holders ); 05653 } else { 05654 # No caption, fall back to using the filename for the 05655 # alt text 05656 $params['frame']['alt'] = $title->getText(); 05657 } 05658 } 05659 # Use the "caption" for the tooltip text 05660 $params['frame']['title'] = $this->stripAltText( $caption, $holders ); 05661 } 05662 05663 wfRunHooks( 'ParserMakeImageParams', array( $title, $file, &$params, $this ) ); 05664 05665 # Linker does the rest 05666 $time = isset( $options['time'] ) ? $options['time'] : false; 05667 $ret = Linker::makeImageLink( $this, $title, $file, $params['frame'], $params['handler'], 05668 $time, $descQuery, $this->mOptions->getThumbSize() ); 05669 05670 # Give the handler a chance to modify the parser object 05671 if ( $handler ) { 05672 $handler->parserTransformHook( $this, $file ); 05673 } 05674 05675 return $ret; 05676 } 05677 05683 protected function stripAltText( $caption, $holders ) { 05684 # Strip bad stuff out of the title (tooltip). We can't just use 05685 # replaceLinkHoldersText() here, because if this function is called 05686 # from replaceInternalLinks2(), mLinkHolders won't be up-to-date. 05687 if ( $holders ) { 05688 $tooltip = $holders->replaceText( $caption ); 05689 } else { 05690 $tooltip = $this->replaceLinkHoldersText( $caption ); 05691 } 05692 05693 # make sure there are no placeholders in thumbnail attributes 05694 # that are later expanded to html- so expand them now and 05695 # remove the tags 05696 $tooltip = $this->mStripState->unstripBoth( $tooltip ); 05697 $tooltip = Sanitizer::stripAllTags( $tooltip ); 05698 05699 return $tooltip; 05700 } 05701 05706 public function disableCache() { 05707 wfDebug( "Parser output marked as uncacheable.\n" ); 05708 if ( !$this->mOutput ) { 05709 throw new MWException( __METHOD__ . 05710 " can only be called when actually parsing something" ); 05711 } 05712 $this->mOutput->setCacheTime( -1 ); // old style, for compatibility 05713 $this->mOutput->updateCacheExpiry( 0 ); // new style, for consistency 05714 } 05715 05724 public function attributeStripCallback( &$text, $frame = false ) { 05725 $text = $this->replaceVariables( $text, $frame ); 05726 $text = $this->mStripState->unstripBoth( $text ); 05727 return $text; 05728 } 05729 05735 public function getTags() { 05736 return array_merge( 05737 array_keys( $this->mTransparentTagHooks ), 05738 array_keys( $this->mTagHooks ), 05739 array_keys( $this->mFunctionTagHooks ) 05740 ); 05741 } 05742 05753 public function replaceTransparentTags( $text ) { 05754 $matches = array(); 05755 $elements = array_keys( $this->mTransparentTagHooks ); 05756 $text = self::extractTagsAndParams( $elements, $text, $matches, $this->mUniqPrefix ); 05757 $replacements = array(); 05758 05759 foreach ( $matches as $marker => $data ) { 05760 list( $element, $content, $params, $tag ) = $data; 05761 $tagName = strtolower( $element ); 05762 if ( isset( $this->mTransparentTagHooks[$tagName] ) ) { 05763 $output = call_user_func_array( 05764 $this->mTransparentTagHooks[$tagName], 05765 array( $content, $params, $this ) 05766 ); 05767 } else { 05768 $output = $tag; 05769 } 05770 $replacements[$marker] = $output; 05771 } 05772 return strtr( $text, $replacements ); 05773 } 05774 05804 private function extractSections( $text, $sectionId, $mode, $newText = '' ) { 05805 global $wgTitle; # not generally used but removes an ugly failure mode 05806 05807 $magicScopeVariable = $this->lock(); 05808 $this->startParse( $wgTitle, new ParserOptions, self::OT_PLAIN, true ); 05809 $outText = ''; 05810 $frame = $this->getPreprocessor()->newFrame(); 05811 05812 # Process section extraction flags 05813 $flags = 0; 05814 $sectionParts = explode( '-', $sectionId ); 05815 $sectionIndex = array_pop( $sectionParts ); 05816 foreach ( $sectionParts as $part ) { 05817 if ( $part === 'T' ) { 05818 $flags |= self::PTD_FOR_INCLUSION; 05819 } 05820 } 05821 05822 # Check for empty input 05823 if ( strval( $text ) === '' ) { 05824 # Only sections 0 and T-0 exist in an empty document 05825 if ( $sectionIndex == 0 ) { 05826 if ( $mode === 'get' ) { 05827 return ''; 05828 } else { 05829 return $newText; 05830 } 05831 } else { 05832 if ( $mode === 'get' ) { 05833 return $newText; 05834 } else { 05835 return $text; 05836 } 05837 } 05838 } 05839 05840 # Preprocess the text 05841 $root = $this->preprocessToDom( $text, $flags ); 05842 05843 # <h> nodes indicate section breaks 05844 # They can only occur at the top level, so we can find them by iterating the root's children 05845 $node = $root->getFirstChild(); 05846 05847 # Find the target section 05848 if ( $sectionIndex == 0 ) { 05849 # Section zero doesn't nest, level=big 05850 $targetLevel = 1000; 05851 } else { 05852 while ( $node ) { 05853 if ( $node->getName() === 'h' ) { 05854 $bits = $node->splitHeading(); 05855 if ( $bits['i'] == $sectionIndex ) { 05856 $targetLevel = $bits['level']; 05857 break; 05858 } 05859 } 05860 if ( $mode === 'replace' ) { 05861 $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG ); 05862 } 05863 $node = $node->getNextSibling(); 05864 } 05865 } 05866 05867 if ( !$node ) { 05868 # Not found 05869 if ( $mode === 'get' ) { 05870 return $newText; 05871 } else { 05872 return $text; 05873 } 05874 } 05875 05876 # Find the end of the section, including nested sections 05877 do { 05878 if ( $node->getName() === 'h' ) { 05879 $bits = $node->splitHeading(); 05880 $curLevel = $bits['level']; 05881 if ( $bits['i'] != $sectionIndex && $curLevel <= $targetLevel ) { 05882 break; 05883 } 05884 } 05885 if ( $mode === 'get' ) { 05886 $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG ); 05887 } 05888 $node = $node->getNextSibling(); 05889 } while ( $node ); 05890 05891 # Write out the remainder (in replace mode only) 05892 if ( $mode === 'replace' ) { 05893 # Output the replacement text 05894 # Add two newlines on -- trailing whitespace in $newText is conventionally 05895 # stripped by the editor, so we need both newlines to restore the paragraph gap 05896 # Only add trailing whitespace if there is newText 05897 if ( $newText != "" ) { 05898 $outText .= $newText . "\n\n"; 05899 } 05900 05901 while ( $node ) { 05902 $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG ); 05903 $node = $node->getNextSibling(); 05904 } 05905 } 05906 05907 if ( is_string( $outText ) ) { 05908 # Re-insert stripped tags 05909 $outText = rtrim( $this->mStripState->unstripBoth( $outText ) ); 05910 } 05911 05912 return $outText; 05913 } 05914 05929 public function getSection( $text, $sectionId, $defaultText = '' ) { 05930 return $this->extractSections( $text, $sectionId, 'get', $defaultText ); 05931 } 05932 05945 public function replaceSection( $oldText, $sectionId, $newText ) { 05946 return $this->extractSections( $oldText, $sectionId, 'replace', $newText ); 05947 } 05948 05954 public function getRevisionId() { 05955 return $this->mRevisionId; 05956 } 05957 05964 public function getRevisionObject() { 05965 if ( !is_null( $this->mRevisionObject ) ) { 05966 return $this->mRevisionObject; 05967 } 05968 if ( is_null( $this->mRevisionId ) ) { 05969 return null; 05970 } 05971 05972 $this->mRevisionObject = Revision::newFromId( $this->mRevisionId ); 05973 return $this->mRevisionObject; 05974 } 05975 05981 public function getRevisionTimestamp() { 05982 if ( is_null( $this->mRevisionTimestamp ) ) { 05983 wfProfileIn( __METHOD__ ); 05984 05985 global $wgContLang; 05986 05987 $revObject = $this->getRevisionObject(); 05988 $timestamp = $revObject ? $revObject->getTimestamp() : wfTimestampNow(); 05989 05990 # The cryptic '' timezone parameter tells to use the site-default 05991 # timezone offset instead of the user settings. 05992 # 05993 # Since this value will be saved into the parser cache, served 05994 # to other users, and potentially even used inside links and such, 05995 # it needs to be consistent for all visitors. 05996 $this->mRevisionTimestamp = $wgContLang->userAdjust( $timestamp, '' ); 05997 05998 wfProfileOut( __METHOD__ ); 05999 } 06000 return $this->mRevisionTimestamp; 06001 } 06002 06008 public function getRevisionUser() { 06009 if ( is_null( $this->mRevisionUser ) ) { 06010 $revObject = $this->getRevisionObject(); 06011 06012 # if this template is subst: the revision id will be blank, 06013 # so just use the current user's name 06014 if ( $revObject ) { 06015 $this->mRevisionUser = $revObject->getUserText(); 06016 } elseif ( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) { 06017 $this->mRevisionUser = $this->getUser()->getName(); 06018 } 06019 } 06020 return $this->mRevisionUser; 06021 } 06022 06028 public function getRevisionSize() { 06029 if ( is_null( $this->mRevisionSize ) ) { 06030 $revObject = $this->getRevisionObject(); 06031 06032 # if this variable is subst: the revision id will be blank, 06033 # so just use the parser input size, because the own substituation 06034 # will change the size. 06035 if ( $revObject ) { 06036 $this->mRevisionSize = $revObject->getSize(); 06037 } elseif ( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) { 06038 $this->mRevisionSize = $this->mInputSize; 06039 } 06040 } 06041 return $this->mRevisionSize; 06042 } 06043 06049 public function setDefaultSort( $sort ) { 06050 $this->mDefaultSort = $sort; 06051 $this->mOutput->setProperty( 'defaultsort', $sort ); 06052 } 06053 06064 public function getDefaultSort() { 06065 if ( $this->mDefaultSort !== false ) { 06066 return $this->mDefaultSort; 06067 } else { 06068 return ''; 06069 } 06070 } 06071 06078 public function getCustomDefaultSort() { 06079 return $this->mDefaultSort; 06080 } 06081 06091 public function guessSectionNameFromWikiText( $text ) { 06092 # Strip out wikitext links(they break the anchor) 06093 $text = $this->stripSectionName( $text ); 06094 $text = Sanitizer::normalizeSectionNameWhitespace( $text ); 06095 return '#' . Sanitizer::escapeId( $text, 'noninitial' ); 06096 } 06097 06106 public function guessLegacySectionNameFromWikiText( $text ) { 06107 # Strip out wikitext links(they break the anchor) 06108 $text = $this->stripSectionName( $text ); 06109 $text = Sanitizer::normalizeSectionNameWhitespace( $text ); 06110 return '#' . Sanitizer::escapeId( $text, array( 'noninitial', 'legacy' ) ); 06111 } 06112 06127 public function stripSectionName( $text ) { 06128 # Strip internal link markup 06129 $text = preg_replace( '/\[\[:?([^[|]+)\|([^[]+)\]\]/', '$2', $text ); 06130 $text = preg_replace( '/\[\[:?([^[]+)\|?\]\]/', '$1', $text ); 06131 06132 # Strip external link markup 06133 # @todo FIXME: Not tolerant to blank link text 06134 # I.E. [https://www.mediawiki.org] will render as [1] or something depending 06135 # on how many empty links there are on the page - need to figure that out. 06136 $text = preg_replace( '/\[(?i:' . $this->mUrlProtocols . ')([^ ]+?) ([^[]+)\]/', '$2', $text ); 06137 06138 # Parse wikitext quotes (italics & bold) 06139 $text = $this->doQuotes( $text ); 06140 06141 # Strip HTML tags 06142 $text = StringUtils::delimiterReplace( '<', '>', '', $text ); 06143 return $text; 06144 } 06145 06156 public function testSrvus( $text, Title $title, ParserOptions $options, $outputType = self::OT_HTML ) { 06157 $magicScopeVariable = $this->lock(); 06158 $this->startParse( $title, $options, $outputType, true ); 06159 06160 $text = $this->replaceVariables( $text ); 06161 $text = $this->mStripState->unstripBoth( $text ); 06162 $text = Sanitizer::removeHTMLtags( $text ); 06163 return $text; 06164 } 06165 06172 public function testPst( $text, Title $title, ParserOptions $options ) { 06173 return $this->preSaveTransform( $text, $title, $options->getUser(), $options ); 06174 } 06175 06182 public function testPreprocess( $text, Title $title, ParserOptions $options ) { 06183 return $this->testSrvus( $text, $title, $options, self::OT_PREPROCESS ); 06184 } 06185 06202 public function markerSkipCallback( $s, $callback ) { 06203 $i = 0; 06204 $out = ''; 06205 while ( $i < strlen( $s ) ) { 06206 $markerStart = strpos( $s, $this->mUniqPrefix, $i ); 06207 if ( $markerStart === false ) { 06208 $out .= call_user_func( $callback, substr( $s, $i ) ); 06209 break; 06210 } else { 06211 $out .= call_user_func( $callback, substr( $s, $i, $markerStart - $i ) ); 06212 $markerEnd = strpos( $s, self::MARKER_SUFFIX, $markerStart ); 06213 if ( $markerEnd === false ) { 06214 $out .= substr( $s, $markerStart ); 06215 break; 06216 } else { 06217 $markerEnd += strlen( self::MARKER_SUFFIX ); 06218 $out .= substr( $s, $markerStart, $markerEnd - $markerStart ); 06219 $i = $markerEnd; 06220 } 06221 } 06222 } 06223 return $out; 06224 } 06225 06232 public function killMarkers( $text ) { 06233 return $this->mStripState->killMarkers( $text ); 06234 } 06235 06252 public function serializeHalfParsedText( $text ) { 06253 wfProfileIn( __METHOD__ ); 06254 $data = array( 06255 'text' => $text, 06256 'version' => self::HALF_PARSED_VERSION, 06257 'stripState' => $this->mStripState->getSubState( $text ), 06258 'linkHolders' => $this->mLinkHolders->getSubArray( $text ) 06259 ); 06260 wfProfileOut( __METHOD__ ); 06261 return $data; 06262 } 06263 06279 public function unserializeHalfParsedText( $data ) { 06280 if ( !isset( $data['version'] ) || $data['version'] != self::HALF_PARSED_VERSION ) { 06281 throw new MWException( __METHOD__ . ': invalid version' ); 06282 } 06283 06284 # First, extract the strip state. 06285 $texts = array( $data['text'] ); 06286 $texts = $this->mStripState->merge( $data['stripState'], $texts ); 06287 06288 # Now renumber links 06289 $texts = $this->mLinkHolders->mergeForeign( $data['linkHolders'], $texts ); 06290 06291 # Should be good to go. 06292 return $texts[0]; 06293 } 06294 06304 public function isValidHalfParsedText( $data ) { 06305 return isset( $data['version'] ) && $data['version'] == self::HALF_PARSED_VERSION; 06306 } 06307 06316 public function parseWidthParam( $value ) { 06317 $parsedWidthParam = array(); 06318 if ( $value === '' ) { 06319 return $parsedWidthParam; 06320 } 06321 $m = array(); 06322 # (bug 13500) In both cases (width/height and width only), 06323 # permit trailing "px" for backward compatibility. 06324 if ( preg_match( '/^([0-9]*)x([0-9]*)\s*(?:px)?\s*$/', $value, $m ) ) { 06325 $width = intval( $m[1] ); 06326 $height = intval( $m[2] ); 06327 $parsedWidthParam['width'] = $width; 06328 $parsedWidthParam['height'] = $height; 06329 } elseif ( preg_match( '/^[0-9]*\s*(?:px)?\s*$/', $value ) ) { 06330 $width = intval( $value ); 06331 $parsedWidthParam['width'] = $width; 06332 } 06333 return $parsedWidthParam; 06334 } 06335 06345 protected function lock() { 06346 if ( $this->mInParse ) { 06347 throw new MWException( "Parser state cleared while parsing. " 06348 . "Did you call Parser::parse recursively?" ); 06349 } 06350 $this->mInParse = true; 06351 06352 $that = $this; 06353 $recursiveCheck = new ScopedCallback( function() use ( $that ) { 06354 $that->mInParse = false; 06355 } ); 06356 06357 return $recursiveCheck; 06358 } 06359 06370 public static function stripOuterParagraph( $html ) { 06371 $m = array(); 06372 if ( preg_match( '/^<p>(.*)\n?<\/p>\n?$/sU', $html, $m ) ) { 06373 if ( strpos( $m[1], '</p>' ) === false ) { 06374 $html = $m[1]; 06375 } 06376 } 06377 06378 return $html; 06379 } 06380 06391 public function getFreshParser() { 06392 global $wgParserConf; 06393 if ( $this->mInParse ) { 06394 return new $wgParserConf['class']( $wgParserConf ); 06395 } else { 06396 return $this; 06397 } 06398 } 06399 }