MediaWiki
REL1_23
|
00001 <?php 00067 class Parser { 00073 const VERSION = '1.6.4'; 00074 00079 const HALF_PARSED_VERSION = 2; 00080 00081 # Flags for Parser::setFunctionHook 00082 # Also available as global constants from Defines.php 00083 const SFH_NO_HASH = 1; 00084 const SFH_OBJECT_ARGS = 2; 00085 00086 # Constants needed for external link processing 00087 # Everything except bracket, space, or control characters 00088 # \p{Zs} is unicode 'separator, space' category. It covers the space 0x20 00089 # as well as U+3000 is IDEOGRAPHIC SPACE for bug 19052 00090 const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F\p{Zs}]'; 00091 const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)([^][<>"\\x00-\\x20\\x7F\p{Zs}]+) 00092 \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sxu'; 00093 00094 # State constants for the definition list colon extraction 00095 const COLON_STATE_TEXT = 0; 00096 const COLON_STATE_TAG = 1; 00097 const COLON_STATE_TAGSTART = 2; 00098 const COLON_STATE_CLOSETAG = 3; 00099 const COLON_STATE_TAGSLASH = 4; 00100 const COLON_STATE_COMMENT = 5; 00101 const COLON_STATE_COMMENTDASH = 6; 00102 const COLON_STATE_COMMENTDASHDASH = 7; 00103 00104 # Flags for preprocessToDom 00105 const PTD_FOR_INCLUSION = 1; 00106 00107 # Allowed values for $this->mOutputType 00108 # Parameter to startExternalParse(). 00109 const OT_HTML = 1; # like parse() 00110 const OT_WIKI = 2; # like preSaveTransform() 00111 const OT_PREPROCESS = 3; # like preprocess() 00112 const OT_MSG = 3; 00113 const OT_PLAIN = 4; # like extractSections() - portions of the original are returned unchanged. 00114 00115 # Marker Suffix needs to be accessible staticly. 00116 const MARKER_SUFFIX = "-QINU\x7f"; 00117 00118 # Markers used for wrapping the table of contents 00119 const TOC_START = '<mw:toc>'; 00120 const TOC_END = '</mw:toc>'; 00121 00122 # Persistent: 00123 var $mTagHooks = array(); 00124 var $mTransparentTagHooks = array(); 00125 var $mFunctionHooks = array(); 00126 var $mFunctionSynonyms = array( 0 => array(), 1 => array() ); 00127 var $mFunctionTagHooks = array(); 00128 var $mStripList = array(); 00129 var $mDefaultStripList = array(); 00130 var $mVarCache = array(); 00131 var $mImageParams = array(); 00132 var $mImageParamsMagicArray = array(); 00133 var $mMarkerIndex = 0; 00134 var $mFirstCall = true; 00135 00136 # Initialised by initialiseVariables() 00137 00141 var $mVariables; 00142 00146 var $mSubstWords; 00147 var $mConf, $mPreprocessor, $mExtLinkBracketedRegex, $mUrlProtocols; # Initialised in constructor 00148 00149 # Cleared with clearState(): 00150 00153 var $mOutput; 00154 var $mAutonumber, $mDTopen; 00155 00159 var $mStripState; 00160 00161 var $mIncludeCount, $mArgStack, $mLastSection, $mInPre; 00165 var $mLinkHolders; 00166 00167 var $mLinkID; 00168 var $mIncludeSizes, $mPPNodeCount, $mGeneratedPPNodeCount, $mHighestExpansionDepth; 00169 var $mDefaultSort; 00170 var $mTplExpandCache; # empty-frame expansion cache 00171 var $mTplRedirCache, $mTplDomCache, $mHeadings, $mDoubleUnderscores; 00172 var $mExpensiveFunctionCount; # number of expensive parser function calls 00173 var $mShowToc, $mForceTocPosition; 00174 00178 var $mUser; # User object; only used when doing pre-save transform 00179 00180 # Temporary 00181 # These are variables reset at least once per parse regardless of $clearState 00182 00186 var $mOptions; 00187 00191 var $mTitle; # Title context, used for self-link rendering and similar things 00192 var $mOutputType; # Output type, one of the OT_xxx constants 00193 var $ot; # Shortcut alias, see setOutputType() 00194 var $mRevisionObject; # The revision object of the specified revision ID 00195 var $mRevisionId; # ID to display in {{REVISIONID}} tags 00196 var $mRevisionTimestamp; # The timestamp of the specified revision ID 00197 var $mRevisionUser; # User to display in {{REVISIONUSER}} tag 00198 var $mRevisionSize; # Size to display in {{REVISIONSIZE}} variable 00199 var $mRevIdForTs; # The revision ID which was used to fetch the timestamp 00200 var $mInputSize = false; # For {{PAGESIZE}} on current page. 00201 00205 var $mUniqPrefix; 00206 00212 var $mLangLinkLanguages; 00213 00219 public function __construct( $conf = array() ) { 00220 $this->mConf = $conf; 00221 $this->mUrlProtocols = wfUrlProtocols(); 00222 $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->mUrlProtocols . ')' . 00223 self::EXT_LINK_URL_CLASS . '+)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F]*?)\]/Su'; 00224 if ( isset( $conf['preprocessorClass'] ) ) { 00225 $this->mPreprocessorClass = $conf['preprocessorClass']; 00226 } elseif ( defined( 'HPHP_VERSION' ) ) { 00227 # Preprocessor_Hash is much faster than Preprocessor_DOM under HipHop 00228 $this->mPreprocessorClass = 'Preprocessor_Hash'; 00229 } elseif ( extension_loaded( 'domxml' ) ) { 00230 # PECL extension that conflicts with the core DOM extension (bug 13770) 00231 wfDebug( "Warning: you have the obsolete domxml extension for PHP. Please remove it!\n" ); 00232 $this->mPreprocessorClass = 'Preprocessor_Hash'; 00233 } elseif ( extension_loaded( 'dom' ) ) { 00234 $this->mPreprocessorClass = 'Preprocessor_DOM'; 00235 } else { 00236 $this->mPreprocessorClass = 'Preprocessor_Hash'; 00237 } 00238 wfDebug( __CLASS__ . ": using preprocessor: {$this->mPreprocessorClass}\n" ); 00239 } 00240 00244 function __destruct() { 00245 if ( isset( $this->mLinkHolders ) ) { 00246 unset( $this->mLinkHolders ); 00247 } 00248 foreach ( $this as $name => $value ) { 00249 unset( $this->$name ); 00250 } 00251 } 00252 00256 function __clone() { 00257 wfRunHooks( 'ParserCloned', array( $this ) ); 00258 } 00259 00263 function firstCallInit() { 00264 if ( !$this->mFirstCall ) { 00265 return; 00266 } 00267 $this->mFirstCall = false; 00268 00269 wfProfileIn( __METHOD__ ); 00270 00271 CoreParserFunctions::register( $this ); 00272 CoreTagHooks::register( $this ); 00273 $this->initialiseVariables(); 00274 00275 wfRunHooks( 'ParserFirstCallInit', array( &$this ) ); 00276 wfProfileOut( __METHOD__ ); 00277 } 00278 00284 function clearState() { 00285 wfProfileIn( __METHOD__ ); 00286 if ( $this->mFirstCall ) { 00287 $this->firstCallInit(); 00288 } 00289 $this->mOutput = new ParserOutput; 00290 $this->mOptions->registerWatcher( array( $this->mOutput, 'recordOption' ) ); 00291 $this->mAutonumber = 0; 00292 $this->mLastSection = ''; 00293 $this->mDTopen = false; 00294 $this->mIncludeCount = array(); 00295 $this->mArgStack = false; 00296 $this->mInPre = false; 00297 $this->mLinkHolders = new LinkHolderArray( $this ); 00298 $this->mLinkID = 0; 00299 $this->mRevisionObject = $this->mRevisionTimestamp = 00300 $this->mRevisionId = $this->mRevisionUser = $this->mRevisionSize = null; 00301 $this->mVarCache = array(); 00302 $this->mUser = null; 00303 $this->mLangLinkLanguages = array(); 00304 00315 $this->mUniqPrefix = "\x7fUNIQ" . self::getRandomString(); 00316 $this->mStripState = new StripState( $this->mUniqPrefix ); 00317 00318 # Clear these on every parse, bug 4549 00319 $this->mTplExpandCache = $this->mTplRedirCache = $this->mTplDomCache = array(); 00320 00321 $this->mShowToc = true; 00322 $this->mForceTocPosition = false; 00323 $this->mIncludeSizes = array( 00324 'post-expand' => 0, 00325 'arg' => 0, 00326 ); 00327 $this->mPPNodeCount = 0; 00328 $this->mGeneratedPPNodeCount = 0; 00329 $this->mHighestExpansionDepth = 0; 00330 $this->mDefaultSort = false; 00331 $this->mHeadings = array(); 00332 $this->mDoubleUnderscores = array(); 00333 $this->mExpensiveFunctionCount = 0; 00334 00335 # Fix cloning 00336 if ( isset( $this->mPreprocessor ) && $this->mPreprocessor->parser !== $this ) { 00337 $this->mPreprocessor = null; 00338 } 00339 00340 wfRunHooks( 'ParserClearState', array( &$this ) ); 00341 wfProfileOut( __METHOD__ ); 00342 } 00343 00356 public function parse( $text, Title $title, ParserOptions $options, $linestart = true, $clearState = true, $revid = null ) { 00362 global $wgUseTidy, $wgAlwaysUseTidy, $wgShowHostnames; 00363 $fname = __METHOD__ . '-' . wfGetCaller(); 00364 wfProfileIn( __METHOD__ ); 00365 wfProfileIn( $fname ); 00366 00367 $this->startParse( $title, $options, self::OT_HTML, $clearState ); 00368 00369 $this->mInputSize = strlen( $text ); 00370 if ( $this->mOptions->getEnableLimitReport() ) { 00371 $this->mOutput->resetParseStartTime(); 00372 } 00373 00374 # Remove the strip marker tag prefix from the input, if present. 00375 if ( $clearState ) { 00376 $text = str_replace( $this->mUniqPrefix, '', $text ); 00377 } 00378 00379 $oldRevisionId = $this->mRevisionId; 00380 $oldRevisionObject = $this->mRevisionObject; 00381 $oldRevisionTimestamp = $this->mRevisionTimestamp; 00382 $oldRevisionUser = $this->mRevisionUser; 00383 $oldRevisionSize = $this->mRevisionSize; 00384 if ( $revid !== null ) { 00385 $this->mRevisionId = $revid; 00386 $this->mRevisionObject = null; 00387 $this->mRevisionTimestamp = null; 00388 $this->mRevisionUser = null; 00389 $this->mRevisionSize = null; 00390 } 00391 00392 wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) ); 00393 # No more strip! 00394 wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) ); 00395 $text = $this->internalParse( $text ); 00396 wfRunHooks( 'ParserAfterParse', array( &$this, &$text, &$this->mStripState ) ); 00397 00398 $text = $this->mStripState->unstripGeneral( $text ); 00399 00400 # Clean up special characters, only run once, next-to-last before doBlockLevels 00401 $fixtags = array( 00402 # french spaces, last one Guillemet-left 00403 # only if there is something before the space 00404 '/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1 ', 00405 # french spaces, Guillemet-right 00406 '/(\\302\\253) /' => '\\1 ', 00407 '/ (!\s*important)/' => ' \\1', # Beware of CSS magic word !important, bug #11874. 00408 ); 00409 $text = preg_replace( array_keys( $fixtags ), array_values( $fixtags ), $text ); 00410 00411 $text = $this->doBlockLevels( $text, $linestart ); 00412 00413 $this->replaceLinkHolders( $text ); 00414 00422 if ( !( $options->getDisableContentConversion() 00423 || isset( $this->mDoubleUnderscores['nocontentconvert'] ) ) 00424 ) { 00425 if ( !$this->mOptions->getInterfaceMessage() ) { 00426 # The position of the convert() call should not be changed. it 00427 # assumes that the links are all replaced and the only thing left 00428 # is the <nowiki> mark. 00429 $text = $this->getConverterLanguage()->convert( $text ); 00430 } 00431 } 00432 00440 if ( !( $options->getDisableTitleConversion() 00441 || isset( $this->mDoubleUnderscores['nocontentconvert'] ) 00442 || isset( $this->mDoubleUnderscores['notitleconvert'] ) 00443 || $this->mOutput->getDisplayTitle() !== false ) 00444 ) { 00445 $convruletitle = $this->getConverterLanguage()->getConvRuleTitle(); 00446 if ( $convruletitle ) { 00447 $this->mOutput->setTitleText( $convruletitle ); 00448 } else { 00449 $titleText = $this->getConverterLanguage()->convertTitle( $title ); 00450 $this->mOutput->setTitleText( $titleText ); 00451 } 00452 } 00453 00454 $text = $this->mStripState->unstripNoWiki( $text ); 00455 00456 wfRunHooks( 'ParserBeforeTidy', array( &$this, &$text ) ); 00457 00458 $text = $this->replaceTransparentTags( $text ); 00459 $text = $this->mStripState->unstripGeneral( $text ); 00460 00461 $text = Sanitizer::normalizeCharReferences( $text ); 00462 00463 if ( ( $wgUseTidy && $this->mOptions->getTidy() ) || $wgAlwaysUseTidy ) { 00464 $text = MWTidy::tidy( $text ); 00465 } else { 00466 # attempt to sanitize at least some nesting problems 00467 # (bug #2702 and quite a few others) 00468 $tidyregs = array( 00469 # ''Something [http://www.cool.com cool''] --> 00470 # <i>Something</i><a href="http://www.cool.com"..><i>cool></i></a> 00471 '/(<([bi])>)(<([bi])>)?([^<]*)(<\/?a[^<]*>)([^<]*)(<\/\\4>)?(<\/\\2>)/' => 00472 '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9', 00473 # fix up an anchor inside another anchor, only 00474 # at least for a single single nested link (bug 3695) 00475 '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\/a>(.*)<\/a>/' => 00476 '\\1\\2</a>\\3</a>\\1\\4</a>', 00477 # fix div inside inline elements- doBlockLevels won't wrap a line which 00478 # contains a div, so fix it up here; replace 00479 # div with escaped text 00480 '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\/div>)([^<]*)(<\/\\2>)/' => 00481 '\\1\\3<div\\5>\\6</div>\\8\\9', 00482 # remove empty italic or bold tag pairs, some 00483 # introduced by rules above 00484 '/<([bi])><\/\\1>/' => '', 00485 ); 00486 00487 $text = preg_replace( 00488 array_keys( $tidyregs ), 00489 array_values( $tidyregs ), 00490 $text ); 00491 } 00492 00493 if ( $this->mExpensiveFunctionCount > $this->mOptions->getExpensiveParserFunctionLimit() ) { 00494 $this->limitationWarn( 'expensive-parserfunction', 00495 $this->mExpensiveFunctionCount, 00496 $this->mOptions->getExpensiveParserFunctionLimit() 00497 ); 00498 } 00499 00500 wfRunHooks( 'ParserAfterTidy', array( &$this, &$text ) ); 00501 00502 # Information on include size limits, for the benefit of users who try to skirt them 00503 if ( $this->mOptions->getEnableLimitReport() ) { 00504 $max = $this->mOptions->getMaxIncludeSize(); 00505 00506 $cpuTime = $this->mOutput->getTimeSinceStart( 'cpu' ); 00507 if ( $cpuTime !== null ) { 00508 $this->mOutput->setLimitReportData( 'limitreport-cputime', 00509 sprintf( "%.3f", $cpuTime ) 00510 ); 00511 } 00512 00513 $wallTime = $this->mOutput->getTimeSinceStart( 'wall' ); 00514 $this->mOutput->setLimitReportData( 'limitreport-walltime', 00515 sprintf( "%.3f", $wallTime ) 00516 ); 00517 00518 $this->mOutput->setLimitReportData( 'limitreport-ppvisitednodes', 00519 array( $this->mPPNodeCount, $this->mOptions->getMaxPPNodeCount() ) 00520 ); 00521 $this->mOutput->setLimitReportData( 'limitreport-ppgeneratednodes', 00522 array( $this->mGeneratedPPNodeCount, $this->mOptions->getMaxGeneratedPPNodeCount() ) 00523 ); 00524 $this->mOutput->setLimitReportData( 'limitreport-postexpandincludesize', 00525 array( $this->mIncludeSizes['post-expand'], $max ) 00526 ); 00527 $this->mOutput->setLimitReportData( 'limitreport-templateargumentsize', 00528 array( $this->mIncludeSizes['arg'], $max ) 00529 ); 00530 $this->mOutput->setLimitReportData( 'limitreport-expansiondepth', 00531 array( $this->mHighestExpansionDepth, $this->mOptions->getMaxPPExpandDepth() ) 00532 ); 00533 $this->mOutput->setLimitReportData( 'limitreport-expensivefunctioncount', 00534 array( $this->mExpensiveFunctionCount, $this->mOptions->getExpensiveParserFunctionLimit() ) 00535 ); 00536 wfRunHooks( 'ParserLimitReportPrepare', array( $this, $this->mOutput ) ); 00537 00538 $limitReport = "NewPP limit report\n"; 00539 if ( $wgShowHostnames ) { 00540 $limitReport .= 'Parsed by ' . wfHostname() . "\n"; 00541 } 00542 foreach ( $this->mOutput->getLimitReportData() as $key => $value ) { 00543 if ( wfRunHooks( 'ParserLimitReportFormat', 00544 array( $key, &$value, &$limitReport, false, false ) 00545 ) ) { 00546 $keyMsg = wfMessage( $key )->inLanguage( 'en' )->useDatabase( false ); 00547 $valueMsg = wfMessage( array( "$key-value-text", "$key-value" ) ) 00548 ->inLanguage( 'en' )->useDatabase( false ); 00549 if ( !$valueMsg->exists() ) { 00550 $valueMsg = new RawMessage( '$1' ); 00551 } 00552 if ( !$keyMsg->isDisabled() && !$valueMsg->isDisabled() ) { 00553 $valueMsg->params( $value ); 00554 $limitReport .= "{$keyMsg->text()}: {$valueMsg->text()}\n"; 00555 } 00556 } 00557 } 00558 // Since we're not really outputting HTML, decode the entities and 00559 // then re-encode the things that need hiding inside HTML comments. 00560 $limitReport = htmlspecialchars_decode( $limitReport ); 00561 wfRunHooks( 'ParserLimitReport', array( $this, &$limitReport ) ); 00562 00563 // Sanitize for comment. Note '‐' in the replacement is U+2010, 00564 // which looks much like the problematic '-'. 00565 $limitReport = str_replace( array( '-', '&' ), array( '‐', '&' ), $limitReport ); 00566 $text .= "\n<!-- \n$limitReport-->\n"; 00567 00568 if ( $this->mGeneratedPPNodeCount > $this->mOptions->getMaxGeneratedPPNodeCount() / 10 ) { 00569 wfDebugLog( 'generated-pp-node-count', $this->mGeneratedPPNodeCount . ' ' . 00570 $this->mTitle->getPrefixedDBkey() ); 00571 } 00572 } 00573 $this->mOutput->setText( $text ); 00574 00575 $this->mRevisionId = $oldRevisionId; 00576 $this->mRevisionObject = $oldRevisionObject; 00577 $this->mRevisionTimestamp = $oldRevisionTimestamp; 00578 $this->mRevisionUser = $oldRevisionUser; 00579 $this->mRevisionSize = $oldRevisionSize; 00580 $this->mInputSize = false; 00581 wfProfileOut( $fname ); 00582 wfProfileOut( __METHOD__ ); 00583 00584 return $this->mOutput; 00585 } 00586 00598 function recursiveTagParse( $text, $frame = false ) { 00599 wfProfileIn( __METHOD__ ); 00600 wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) ); 00601 wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) ); 00602 $text = $this->internalParse( $text, false, $frame ); 00603 wfProfileOut( __METHOD__ ); 00604 return $text; 00605 } 00606 00612 function preprocess( $text, Title $title = null, ParserOptions $options, $revid = null ) { 00613 wfProfileIn( __METHOD__ ); 00614 $this->startParse( $title, $options, self::OT_PREPROCESS, true ); 00615 if ( $revid !== null ) { 00616 $this->mRevisionId = $revid; 00617 } 00618 wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) ); 00619 wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) ); 00620 $text = $this->replaceVariables( $text ); 00621 $text = $this->mStripState->unstripBoth( $text ); 00622 wfProfileOut( __METHOD__ ); 00623 return $text; 00624 } 00625 00635 public function recursivePreprocess( $text, $frame = false ) { 00636 wfProfileIn( __METHOD__ ); 00637 $text = $this->replaceVariables( $text, $frame ); 00638 $text = $this->mStripState->unstripBoth( $text ); 00639 wfProfileOut( __METHOD__ ); 00640 return $text; 00641 } 00642 00656 public function getPreloadText( $text, Title $title, ParserOptions $options, $params = array() ) { 00657 $msg = new RawMessage( $text ); 00658 $text = $msg->params( $params )->plain(); 00659 00660 # Parser (re)initialisation 00661 $this->startParse( $title, $options, self::OT_PLAIN, true ); 00662 00663 $flags = PPFrame::NO_ARGS | PPFrame::NO_TEMPLATES; 00664 $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION ); 00665 $text = $this->getPreprocessor()->newFrame()->expand( $dom, $flags ); 00666 $text = $this->mStripState->unstripBoth( $text ); 00667 return $text; 00668 } 00669 00675 public static function getRandomString() { 00676 return wfRandomString( 16 ); 00677 } 00678 00685 function setUser( $user ) { 00686 $this->mUser = $user; 00687 } 00688 00694 public function uniqPrefix() { 00695 if ( !isset( $this->mUniqPrefix ) ) { 00696 # @todo FIXME: This is probably *horribly wrong* 00697 # LanguageConverter seems to want $wgParser's uniqPrefix, however 00698 # if this is called for a parser cache hit, the parser may not 00699 # have ever been initialized in the first place. 00700 # Not really sure what the heck is supposed to be going on here. 00701 return ''; 00702 # throw new MWException( "Accessing uninitialized mUniqPrefix" ); 00703 } 00704 return $this->mUniqPrefix; 00705 } 00706 00712 function setTitle( $t ) { 00713 if ( !$t || $t instanceof FakeTitle ) { 00714 $t = Title::newFromText( 'NO TITLE' ); 00715 } 00716 00717 if ( $t->hasFragment() ) { 00718 # Strip the fragment to avoid various odd effects 00719 $this->mTitle = clone $t; 00720 $this->mTitle->setFragment( '' ); 00721 } else { 00722 $this->mTitle = $t; 00723 } 00724 } 00725 00731 function getTitle() { 00732 return $this->mTitle; 00733 } 00734 00741 function Title( $x = null ) { 00742 return wfSetVar( $this->mTitle, $x ); 00743 } 00744 00750 function setOutputType( $ot ) { 00751 $this->mOutputType = $ot; 00752 # Shortcut alias 00753 $this->ot = array( 00754 'html' => $ot == self::OT_HTML, 00755 'wiki' => $ot == self::OT_WIKI, 00756 'pre' => $ot == self::OT_PREPROCESS, 00757 'plain' => $ot == self::OT_PLAIN, 00758 ); 00759 } 00760 00767 function OutputType( $x = null ) { 00768 return wfSetVar( $this->mOutputType, $x ); 00769 } 00770 00776 function getOutput() { 00777 return $this->mOutput; 00778 } 00779 00785 function getOptions() { 00786 return $this->mOptions; 00787 } 00788 00795 function Options( $x = null ) { 00796 return wfSetVar( $this->mOptions, $x ); 00797 } 00798 00802 function nextLinkID() { 00803 return $this->mLinkID++; 00804 } 00805 00809 function setLinkID( $id ) { 00810 $this->mLinkID = $id; 00811 } 00812 00817 function getFunctionLang() { 00818 return $this->getTargetLanguage(); 00819 } 00820 00830 public function getTargetLanguage() { 00831 $target = $this->mOptions->getTargetLanguage(); 00832 00833 if ( $target !== null ) { 00834 return $target; 00835 } elseif ( $this->mOptions->getInterfaceMessage() ) { 00836 return $this->mOptions->getUserLangObj(); 00837 } elseif ( is_null( $this->mTitle ) ) { 00838 throw new MWException( __METHOD__ . ': $this->mTitle is null' ); 00839 } 00840 00841 return $this->mTitle->getPageLanguage(); 00842 } 00843 00847 function getConverterLanguage() { 00848 return $this->getTargetLanguage(); 00849 } 00850 00857 function getUser() { 00858 if ( !is_null( $this->mUser ) ) { 00859 return $this->mUser; 00860 } 00861 return $this->mOptions->getUser(); 00862 } 00863 00869 function getPreprocessor() { 00870 if ( !isset( $this->mPreprocessor ) ) { 00871 $class = $this->mPreprocessorClass; 00872 $this->mPreprocessor = new $class( $this ); 00873 } 00874 return $this->mPreprocessor; 00875 } 00876 00897 public static function extractTagsAndParams( $elements, $text, &$matches, $uniq_prefix = '' ) { 00898 static $n = 1; 00899 $stripped = ''; 00900 $matches = array(); 00901 00902 $taglist = implode( '|', $elements ); 00903 $start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?" . ">)|<(!--)/i"; 00904 00905 while ( $text != '' ) { 00906 $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE ); 00907 $stripped .= $p[0]; 00908 if ( count( $p ) < 5 ) { 00909 break; 00910 } 00911 if ( count( $p ) > 5 ) { 00912 # comment 00913 $element = $p[4]; 00914 $attributes = ''; 00915 $close = ''; 00916 $inside = $p[5]; 00917 } else { 00918 # tag 00919 $element = $p[1]; 00920 $attributes = $p[2]; 00921 $close = $p[3]; 00922 $inside = $p[4]; 00923 } 00924 00925 $marker = "$uniq_prefix-$element-" . sprintf( '%08X', $n++ ) . self::MARKER_SUFFIX; 00926 $stripped .= $marker; 00927 00928 if ( $close === '/>' ) { 00929 # Empty element tag, <tag /> 00930 $content = null; 00931 $text = $inside; 00932 $tail = null; 00933 } else { 00934 if ( $element === '!--' ) { 00935 $end = '/(-->)/'; 00936 } else { 00937 $end = "/(<\\/$element\\s*>)/i"; 00938 } 00939 $q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE ); 00940 $content = $q[0]; 00941 if ( count( $q ) < 3 ) { 00942 # No end tag -- let it run out to the end of the text. 00943 $tail = ''; 00944 $text = ''; 00945 } else { 00946 $tail = $q[1]; 00947 $text = $q[2]; 00948 } 00949 } 00950 00951 $matches[$marker] = array( $element, 00952 $content, 00953 Sanitizer::decodeTagAttributes( $attributes ), 00954 "<$element$attributes$close$content$tail" ); 00955 } 00956 return $stripped; 00957 } 00958 00964 function getStripList() { 00965 return $this->mStripList; 00966 } 00967 00977 function insertStripItem( $text ) { 00978 $rnd = "{$this->mUniqPrefix}-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX; 00979 $this->mMarkerIndex++; 00980 $this->mStripState->addGeneral( $rnd, $text ); 00981 return $rnd; 00982 } 00983 00990 function doTableStuff( $text ) { 00991 wfProfileIn( __METHOD__ ); 00992 00993 $lines = StringUtils::explode( "\n", $text ); 00994 $out = ''; 00995 $td_history = array(); # Is currently a td tag open? 00996 $last_tag_history = array(); # Save history of last lag activated (td, th or caption) 00997 $tr_history = array(); # Is currently a tr tag open? 00998 $tr_attributes = array(); # history of tr attributes 00999 $has_opened_tr = array(); # Did this table open a <tr> element? 01000 $indent_level = 0; # indent level of the table 01001 01002 foreach ( $lines as $outLine ) { 01003 $line = trim( $outLine ); 01004 01005 if ( $line === '' ) { # empty line, go to next line 01006 $out .= $outLine . "\n"; 01007 continue; 01008 } 01009 01010 $first_character = $line[0]; 01011 $matches = array(); 01012 01013 if ( preg_match( '/^(:*)\{\|(.*)$/', $line, $matches ) ) { 01014 # First check if we are starting a new table 01015 $indent_level = strlen( $matches[1] ); 01016 01017 $attributes = $this->mStripState->unstripBoth( $matches[2] ); 01018 $attributes = Sanitizer::fixTagAttributes( $attributes, 'table' ); 01019 01020 $outLine = str_repeat( '<dl><dd>', $indent_level ) . "<table{$attributes}>"; 01021 array_push( $td_history, false ); 01022 array_push( $last_tag_history, '' ); 01023 array_push( $tr_history, false ); 01024 array_push( $tr_attributes, '' ); 01025 array_push( $has_opened_tr, false ); 01026 } elseif ( count( $td_history ) == 0 ) { 01027 # Don't do any of the following 01028 $out .= $outLine . "\n"; 01029 continue; 01030 } elseif ( substr( $line, 0, 2 ) === '|}' ) { 01031 # We are ending a table 01032 $line = '</table>' . substr( $line, 2 ); 01033 $last_tag = array_pop( $last_tag_history ); 01034 01035 if ( !array_pop( $has_opened_tr ) ) { 01036 $line = "<tr><td></td></tr>{$line}"; 01037 } 01038 01039 if ( array_pop( $tr_history ) ) { 01040 $line = "</tr>{$line}"; 01041 } 01042 01043 if ( array_pop( $td_history ) ) { 01044 $line = "</{$last_tag}>{$line}"; 01045 } 01046 array_pop( $tr_attributes ); 01047 $outLine = $line . str_repeat( '</dd></dl>', $indent_level ); 01048 } elseif ( substr( $line, 0, 2 ) === '|-' ) { 01049 # Now we have a table row 01050 $line = preg_replace( '#^\|-+#', '', $line ); 01051 01052 # Whats after the tag is now only attributes 01053 $attributes = $this->mStripState->unstripBoth( $line ); 01054 $attributes = Sanitizer::fixTagAttributes( $attributes, 'tr' ); 01055 array_pop( $tr_attributes ); 01056 array_push( $tr_attributes, $attributes ); 01057 01058 $line = ''; 01059 $last_tag = array_pop( $last_tag_history ); 01060 array_pop( $has_opened_tr ); 01061 array_push( $has_opened_tr, true ); 01062 01063 if ( array_pop( $tr_history ) ) { 01064 $line = '</tr>'; 01065 } 01066 01067 if ( array_pop( $td_history ) ) { 01068 $line = "</{$last_tag}>{$line}"; 01069 } 01070 01071 $outLine = $line; 01072 array_push( $tr_history, false ); 01073 array_push( $td_history, false ); 01074 array_push( $last_tag_history, '' ); 01075 } elseif ( $first_character === '|' || $first_character === '!' || substr( $line, 0, 2 ) === '|+' ) { 01076 # This might be cell elements, td, th or captions 01077 if ( substr( $line, 0, 2 ) === '|+' ) { 01078 $first_character = '+'; 01079 $line = substr( $line, 1 ); 01080 } 01081 01082 $line = substr( $line, 1 ); 01083 01084 if ( $first_character === '!' ) { 01085 $line = str_replace( '!!', '||', $line ); 01086 } 01087 01088 # Split up multiple cells on the same line. 01089 # FIXME : This can result in improper nesting of tags processed 01090 # by earlier parser steps, but should avoid splitting up eg 01091 # attribute values containing literal "||". 01092 $cells = StringUtils::explodeMarkup( '||', $line ); 01093 01094 $outLine = ''; 01095 01096 # Loop through each table cell 01097 foreach ( $cells as $cell ) { 01098 $previous = ''; 01099 if ( $first_character !== '+' ) { 01100 $tr_after = array_pop( $tr_attributes ); 01101 if ( !array_pop( $tr_history ) ) { 01102 $previous = "<tr{$tr_after}>\n"; 01103 } 01104 array_push( $tr_history, true ); 01105 array_push( $tr_attributes, '' ); 01106 array_pop( $has_opened_tr ); 01107 array_push( $has_opened_tr, true ); 01108 } 01109 01110 $last_tag = array_pop( $last_tag_history ); 01111 01112 if ( array_pop( $td_history ) ) { 01113 $previous = "</{$last_tag}>\n{$previous}"; 01114 } 01115 01116 if ( $first_character === '|' ) { 01117 $last_tag = 'td'; 01118 } elseif ( $first_character === '!' ) { 01119 $last_tag = 'th'; 01120 } elseif ( $first_character === '+' ) { 01121 $last_tag = 'caption'; 01122 } else { 01123 $last_tag = ''; 01124 } 01125 01126 array_push( $last_tag_history, $last_tag ); 01127 01128 # A cell could contain both parameters and data 01129 $cell_data = explode( '|', $cell, 2 ); 01130 01131 # Bug 553: Note that a '|' inside an invalid link should not 01132 # be mistaken as delimiting cell parameters 01133 if ( strpos( $cell_data[0], '[[' ) !== false ) { 01134 $cell = "{$previous}<{$last_tag}>{$cell}"; 01135 } elseif ( count( $cell_data ) == 1 ) { 01136 $cell = "{$previous}<{$last_tag}>{$cell_data[0]}"; 01137 } else { 01138 $attributes = $this->mStripState->unstripBoth( $cell_data[0] ); 01139 $attributes = Sanitizer::fixTagAttributes( $attributes, $last_tag ); 01140 $cell = "{$previous}<{$last_tag}{$attributes}>{$cell_data[1]}"; 01141 } 01142 01143 $outLine .= $cell; 01144 array_push( $td_history, true ); 01145 } 01146 } 01147 $out .= $outLine . "\n"; 01148 } 01149 01150 # Closing open td, tr && table 01151 while ( count( $td_history ) > 0 ) { 01152 if ( array_pop( $td_history ) ) { 01153 $out .= "</td>\n"; 01154 } 01155 if ( array_pop( $tr_history ) ) { 01156 $out .= "</tr>\n"; 01157 } 01158 if ( !array_pop( $has_opened_tr ) ) { 01159 $out .= "<tr><td></td></tr>\n"; 01160 } 01161 01162 $out .= "</table>\n"; 01163 } 01164 01165 # Remove trailing line-ending (b/c) 01166 if ( substr( $out, -1 ) === "\n" ) { 01167 $out = substr( $out, 0, -1 ); 01168 } 01169 01170 # special case: don't return empty table 01171 if ( $out === "<table>\n<tr><td></td></tr>\n</table>" ) { 01172 $out = ''; 01173 } 01174 01175 wfProfileOut( __METHOD__ ); 01176 01177 return $out; 01178 } 01179 01192 function internalParse( $text, $isMain = true, $frame = false ) { 01193 wfProfileIn( __METHOD__ ); 01194 01195 $origText = $text; 01196 01197 # Hook to suspend the parser in this state 01198 if ( !wfRunHooks( 'ParserBeforeInternalParse', array( &$this, &$text, &$this->mStripState ) ) ) { 01199 wfProfileOut( __METHOD__ ); 01200 return $text; 01201 } 01202 01203 # if $frame is provided, then use $frame for replacing any variables 01204 if ( $frame ) { 01205 # use frame depth to infer how include/noinclude tags should be handled 01206 # depth=0 means this is the top-level document; otherwise it's an included document 01207 if ( !$frame->depth ) { 01208 $flag = 0; 01209 } else { 01210 $flag = Parser::PTD_FOR_INCLUSION; 01211 } 01212 $dom = $this->preprocessToDom( $text, $flag ); 01213 $text = $frame->expand( $dom ); 01214 } else { 01215 # if $frame is not provided, then use old-style replaceVariables 01216 $text = $this->replaceVariables( $text ); 01217 } 01218 01219 wfRunHooks( 'InternalParseBeforeSanitize', array( &$this, &$text, &$this->mStripState ) ); 01220 $text = Sanitizer::removeHTMLtags( $text, array( &$this, 'attributeStripCallback' ), false, array_keys( $this->mTransparentTagHooks ) ); 01221 wfRunHooks( 'InternalParseBeforeLinks', array( &$this, &$text, &$this->mStripState ) ); 01222 01223 # Tables need to come after variable replacement for things to work 01224 # properly; putting them before other transformations should keep 01225 # exciting things like link expansions from showing up in surprising 01226 # places. 01227 $text = $this->doTableStuff( $text ); 01228 01229 $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text ); 01230 01231 $text = $this->doDoubleUnderscore( $text ); 01232 01233 $text = $this->doHeadings( $text ); 01234 $text = $this->replaceInternalLinks( $text ); 01235 $text = $this->doAllQuotes( $text ); 01236 $text = $this->replaceExternalLinks( $text ); 01237 01238 # replaceInternalLinks may sometimes leave behind 01239 # absolute URLs, which have to be masked to hide them from replaceExternalLinks 01240 $text = str_replace( $this->mUniqPrefix . 'NOPARSE', '', $text ); 01241 01242 $text = $this->doMagicLinks( $text ); 01243 $text = $this->formatHeadings( $text, $origText, $isMain ); 01244 01245 wfProfileOut( __METHOD__ ); 01246 return $text; 01247 } 01248 01260 function doMagicLinks( $text ) { 01261 wfProfileIn( __METHOD__ ); 01262 $prots = wfUrlProtocolsWithoutProtRel(); 01263 $urlChar = self::EXT_LINK_URL_CLASS; 01264 $text = preg_replace_callback( 01265 '!(?: # Start cases 01266 (<a[ \t\r\n>].*?</a>) | # m[1]: Skip link text 01267 (<.*?>) | # m[2]: Skip stuff inside HTML elements' . " 01268 (\\b(?i:$prots)$urlChar+) | # m[3]: Free external links" . ' 01269 (?:RFC|PMID)\s+([0-9]+) | # m[4]: RFC or PMID, capture number 01270 ISBN\s+(\b # m[5]: ISBN, capture number 01271 (?: 97[89] [\ \-]? )? # optional 13-digit ISBN prefix 01272 (?: [0-9] [\ \-]? ){9} # 9 digits with opt. delimiters 01273 [0-9Xx] # check digit 01274 \b) 01275 )!xu', array( &$this, 'magicLinkCallback' ), $text ); 01276 wfProfileOut( __METHOD__ ); 01277 return $text; 01278 } 01279 01285 function magicLinkCallback( $m ) { 01286 if ( isset( $m[1] ) && $m[1] !== '' ) { 01287 # Skip anchor 01288 return $m[0]; 01289 } elseif ( isset( $m[2] ) && $m[2] !== '' ) { 01290 # Skip HTML element 01291 return $m[0]; 01292 } elseif ( isset( $m[3] ) && $m[3] !== '' ) { 01293 # Free external link 01294 return $this->makeFreeExternalLink( $m[0] ); 01295 } elseif ( isset( $m[4] ) && $m[4] !== '' ) { 01296 # RFC or PMID 01297 if ( substr( $m[0], 0, 3 ) === 'RFC' ) { 01298 $keyword = 'RFC'; 01299 $urlmsg = 'rfcurl'; 01300 $cssClass = 'mw-magiclink-rfc'; 01301 $id = $m[4]; 01302 } elseif ( substr( $m[0], 0, 4 ) === 'PMID' ) { 01303 $keyword = 'PMID'; 01304 $urlmsg = 'pubmedurl'; 01305 $cssClass = 'mw-magiclink-pmid'; 01306 $id = $m[4]; 01307 } else { 01308 throw new MWException( __METHOD__ . ': unrecognised match type "' . 01309 substr( $m[0], 0, 20 ) . '"' ); 01310 } 01311 $url = wfMessage( $urlmsg, $id )->inContentLanguage()->text(); 01312 return Linker::makeExternalLink( $url, "{$keyword} {$id}", true, $cssClass ); 01313 } elseif ( isset( $m[5] ) && $m[5] !== '' ) { 01314 # ISBN 01315 $isbn = $m[5]; 01316 $num = strtr( $isbn, array( 01317 '-' => '', 01318 ' ' => '', 01319 'x' => 'X', 01320 )); 01321 $titleObj = SpecialPage::getTitleFor( 'Booksources', $num ); 01322 return '<a href="' . 01323 htmlspecialchars( $titleObj->getLocalURL() ) . 01324 "\" class=\"internal mw-magiclink-isbn\">ISBN $isbn</a>"; 01325 } else { 01326 return $m[0]; 01327 } 01328 } 01329 01338 function makeFreeExternalLink( $url ) { 01339 wfProfileIn( __METHOD__ ); 01340 01341 $trail = ''; 01342 01343 # The characters '<' and '>' (which were escaped by 01344 # removeHTMLtags()) should not be included in 01345 # URLs, per RFC 2396. 01346 $m2 = array(); 01347 if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) { 01348 $trail = substr( $url, $m2[0][1] ) . $trail; 01349 $url = substr( $url, 0, $m2[0][1] ); 01350 } 01351 01352 # Move trailing punctuation to $trail 01353 $sep = ',;\.:!?'; 01354 # If there is no left bracket, then consider right brackets fair game too 01355 if ( strpos( $url, '(' ) === false ) { 01356 $sep .= ')'; 01357 } 01358 01359 $numSepChars = strspn( strrev( $url ), $sep ); 01360 if ( $numSepChars ) { 01361 $trail = substr( $url, -$numSepChars ) . $trail; 01362 $url = substr( $url, 0, -$numSepChars ); 01363 } 01364 01365 $url = Sanitizer::cleanUrl( $url ); 01366 01367 # Is this an external image? 01368 $text = $this->maybeMakeExternalImage( $url ); 01369 if ( $text === false ) { 01370 # Not an image, make a link 01371 $text = Linker::makeExternalLink( $url, 01372 $this->getConverterLanguage()->markNoConversion( $url, true ), 01373 true, 'free', 01374 $this->getExternalLinkAttribs( $url ) ); 01375 # Register it in the output object... 01376 # Replace unnecessary URL escape codes with their equivalent characters 01377 $pasteurized = self::replaceUnusualEscapes( $url ); 01378 $this->mOutput->addExternalLink( $pasteurized ); 01379 } 01380 wfProfileOut( __METHOD__ ); 01381 return $text . $trail; 01382 } 01383 01393 function doHeadings( $text ) { 01394 wfProfileIn( __METHOD__ ); 01395 for ( $i = 6; $i >= 1; --$i ) { 01396 $h = str_repeat( '=', $i ); 01397 $text = preg_replace( "/^$h(.+)$h\\s*$/m", "<h$i>\\1</h$i>", $text ); 01398 } 01399 wfProfileOut( __METHOD__ ); 01400 return $text; 01401 } 01402 01411 function doAllQuotes( $text ) { 01412 wfProfileIn( __METHOD__ ); 01413 $outtext = ''; 01414 $lines = StringUtils::explode( "\n", $text ); 01415 foreach ( $lines as $line ) { 01416 $outtext .= $this->doQuotes( $line ) . "\n"; 01417 } 01418 $outtext = substr( $outtext, 0, -1 ); 01419 wfProfileOut( __METHOD__ ); 01420 return $outtext; 01421 } 01422 01430 public function doQuotes( $text ) { 01431 $arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE ); 01432 $countarr = count( $arr ); 01433 if ( $countarr == 1 ) { 01434 return $text; 01435 } 01436 01437 // First, do some preliminary work. This may shift some apostrophes from 01438 // being mark-up to being text. It also counts the number of occurrences 01439 // of bold and italics mark-ups. 01440 $numbold = 0; 01441 $numitalics = 0; 01442 for ( $i = 1; $i < $countarr; $i += 2 ) { 01443 $thislen = strlen( $arr[$i] ); 01444 // If there are ever four apostrophes, assume the first is supposed to 01445 // be text, and the remaining three constitute mark-up for bold text. 01446 // (bug 13227: ''''foo'''' turns into ' ''' foo ' ''') 01447 if ( $thislen == 4 ) { 01448 $arr[$i - 1] .= "'"; 01449 $arr[$i] = "'''"; 01450 $thislen = 3; 01451 } elseif ( $thislen > 5 ) { 01452 // If there are more than 5 apostrophes in a row, assume they're all 01453 // text except for the last 5. 01454 // (bug 13227: ''''''foo'''''' turns into ' ''''' foo ' ''''') 01455 $arr[$i - 1] .= str_repeat( "'", $thislen - 5 ); 01456 $arr[$i] = "'''''"; 01457 $thislen = 5; 01458 } 01459 // Count the number of occurrences of bold and italics mark-ups. 01460 if ( $thislen == 2 ) { 01461 $numitalics++; 01462 } elseif ( $thislen == 3 ) { 01463 $numbold++; 01464 } elseif ( $thislen == 5 ) { 01465 $numitalics++; 01466 $numbold++; 01467 } 01468 } 01469 01470 // If there is an odd number of both bold and italics, it is likely 01471 // that one of the bold ones was meant to be an apostrophe followed 01472 // by italics. Which one we cannot know for certain, but it is more 01473 // likely to be one that has a single-letter word before it. 01474 if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) ) { 01475 $firstsingleletterword = -1; 01476 $firstmultiletterword = -1; 01477 $firstspace = -1; 01478 for ( $i = 1; $i < $countarr; $i += 2 ) { 01479 if ( strlen( $arr[$i] ) == 3 ) { 01480 $x1 = substr( $arr[$i - 1], -1 ); 01481 $x2 = substr( $arr[$i - 1], -2, 1 ); 01482 if ( $x1 === ' ' ) { 01483 if ( $firstspace == -1 ) { 01484 $firstspace = $i; 01485 } 01486 } elseif ( $x2 === ' ' ) { 01487 if ( $firstsingleletterword == -1 ) { 01488 $firstsingleletterword = $i; 01489 // if $firstsingleletterword is set, we don't 01490 // look at the other options, so we can bail early. 01491 break; 01492 } 01493 } else { 01494 if ( $firstmultiletterword == -1 ) { 01495 $firstmultiletterword = $i; 01496 } 01497 } 01498 } 01499 } 01500 01501 // If there is a single-letter word, use it! 01502 if ( $firstsingleletterword > -1 ) { 01503 $arr[$firstsingleletterword] = "''"; 01504 $arr[$firstsingleletterword - 1] .= "'"; 01505 } elseif ( $firstmultiletterword > -1 ) { 01506 // If not, but there's a multi-letter word, use that one. 01507 $arr[$firstmultiletterword] = "''"; 01508 $arr[$firstmultiletterword - 1] .= "'"; 01509 } elseif ( $firstspace > -1 ) { 01510 // ... otherwise use the first one that has neither. 01511 // (notice that it is possible for all three to be -1 if, for example, 01512 // there is only one pentuple-apostrophe in the line) 01513 $arr[$firstspace] = "''"; 01514 $arr[$firstspace - 1] .= "'"; 01515 } 01516 } 01517 01518 // Now let's actually convert our apostrophic mush to HTML! 01519 $output = ''; 01520 $buffer = ''; 01521 $state = ''; 01522 $i = 0; 01523 foreach ( $arr as $r ) { 01524 if ( ( $i % 2 ) == 0 ) { 01525 if ( $state === 'both' ) { 01526 $buffer .= $r; 01527 } else { 01528 $output .= $r; 01529 } 01530 } else { 01531 $thislen = strlen( $r ); 01532 if ( $thislen == 2 ) { 01533 if ( $state === 'i' ) { 01534 $output .= '</i>'; 01535 $state = ''; 01536 } elseif ( $state === 'bi' ) { 01537 $output .= '</i>'; 01538 $state = 'b'; 01539 } elseif ( $state === 'ib' ) { 01540 $output .= '</b></i><b>'; 01541 $state = 'b'; 01542 } elseif ( $state === 'both' ) { 01543 $output .= '<b><i>' . $buffer . '</i>'; 01544 $state = 'b'; 01545 } else { // $state can be 'b' or '' 01546 $output .= '<i>'; 01547 $state .= 'i'; 01548 } 01549 } elseif ( $thislen == 3 ) { 01550 if ( $state === 'b' ) { 01551 $output .= '</b>'; 01552 $state = ''; 01553 } elseif ( $state === 'bi' ) { 01554 $output .= '</i></b><i>'; 01555 $state = 'i'; 01556 } elseif ( $state === 'ib' ) { 01557 $output .= '</b>'; 01558 $state = 'i'; 01559 } elseif ( $state === 'both' ) { 01560 $output .= '<i><b>' . $buffer . '</b>'; 01561 $state = 'i'; 01562 } else { // $state can be 'i' or '' 01563 $output .= '<b>'; 01564 $state .= 'b'; 01565 } 01566 } elseif ( $thislen == 5 ) { 01567 if ( $state === 'b' ) { 01568 $output .= '</b><i>'; 01569 $state = 'i'; 01570 } elseif ( $state === 'i' ) { 01571 $output .= '</i><b>'; 01572 $state = 'b'; 01573 } elseif ( $state === 'bi' ) { 01574 $output .= '</i></b>'; 01575 $state = ''; 01576 } elseif ( $state === 'ib' ) { 01577 $output .= '</b></i>'; 01578 $state = ''; 01579 } elseif ( $state === 'both' ) { 01580 $output .= '<i><b>' . $buffer . '</b></i>'; 01581 $state = ''; 01582 } else { // ($state == '') 01583 $buffer = ''; 01584 $state = 'both'; 01585 } 01586 } 01587 } 01588 $i++; 01589 } 01590 // Now close all remaining tags. Notice that the order is important. 01591 if ( $state === 'b' || $state === 'ib' ) { 01592 $output .= '</b>'; 01593 } 01594 if ( $state === 'i' || $state === 'bi' || $state === 'ib' ) { 01595 $output .= '</i>'; 01596 } 01597 if ( $state === 'bi' ) { 01598 $output .= '</b>'; 01599 } 01600 // There might be lonely ''''', so make sure we have a buffer 01601 if ( $state === 'both' && $buffer ) { 01602 $output .= '<b><i>' . $buffer . '</i></b>'; 01603 } 01604 return $output; 01605 } 01606 01620 function replaceExternalLinks( $text ) { 01621 wfProfileIn( __METHOD__ ); 01622 01623 $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE ); 01624 if ( $bits === false ) { 01625 wfProfileOut( __METHOD__ ); 01626 throw new MWException( "PCRE needs to be compiled with --enable-unicode-properties in order for MediaWiki to function" ); 01627 } 01628 $s = array_shift( $bits ); 01629 01630 $i = 0; 01631 while ( $i < count( $bits ) ) { 01632 $url = $bits[$i++]; 01633 $i++; // protocol 01634 $text = $bits[$i++]; 01635 $trail = $bits[$i++]; 01636 01637 # The characters '<' and '>' (which were escaped by 01638 # removeHTMLtags()) should not be included in 01639 # URLs, per RFC 2396. 01640 $m2 = array(); 01641 if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) { 01642 $text = substr( $url, $m2[0][1] ) . ' ' . $text; 01643 $url = substr( $url, 0, $m2[0][1] ); 01644 } 01645 01646 # If the link text is an image URL, replace it with an <img> tag 01647 # This happened by accident in the original parser, but some people used it extensively 01648 $img = $this->maybeMakeExternalImage( $text ); 01649 if ( $img !== false ) { 01650 $text = $img; 01651 } 01652 01653 $dtrail = ''; 01654 01655 # Set linktype for CSS - if URL==text, link is essentially free 01656 $linktype = ( $text === $url ) ? 'free' : 'text'; 01657 01658 # No link text, e.g. [http://domain.tld/some.link] 01659 if ( $text == '' ) { 01660 # Autonumber 01661 $langObj = $this->getTargetLanguage(); 01662 $text = '[' . $langObj->formatNum( ++$this->mAutonumber ) . ']'; 01663 $linktype = 'autonumber'; 01664 } else { 01665 # Have link text, e.g. [http://domain.tld/some.link text]s 01666 # Check for trail 01667 list( $dtrail, $trail ) = Linker::splitTrail( $trail ); 01668 } 01669 01670 $text = $this->getConverterLanguage()->markNoConversion( $text ); 01671 01672 $url = Sanitizer::cleanUrl( $url ); 01673 01674 # Use the encoded URL 01675 # This means that users can paste URLs directly into the text 01676 # Funny characters like ö aren't valid in URLs anyway 01677 # This was changed in August 2004 01678 $s .= Linker::makeExternalLink( $url, $text, false, $linktype, 01679 $this->getExternalLinkAttribs( $url ) ) . $dtrail . $trail; 01680 01681 # Register link in the output object. 01682 # Replace unnecessary URL escape codes with the referenced character 01683 # This prevents spammers from hiding links from the filters 01684 $pasteurized = self::replaceUnusualEscapes( $url ); 01685 $this->mOutput->addExternalLink( $pasteurized ); 01686 } 01687 01688 wfProfileOut( __METHOD__ ); 01689 return $s; 01690 } 01691 01701 public static function getExternalLinkRel( $url = false, $title = null ) { 01702 global $wgNoFollowLinks, $wgNoFollowNsExceptions, $wgNoFollowDomainExceptions; 01703 $ns = $title ? $title->getNamespace() : false; 01704 if ( $wgNoFollowLinks && !in_array( $ns, $wgNoFollowNsExceptions ) 01705 && !wfMatchesDomainList( $url, $wgNoFollowDomainExceptions ) 01706 ) { 01707 return 'nofollow'; 01708 } 01709 return null; 01710 } 01711 01722 function getExternalLinkAttribs( $url = false ) { 01723 $attribs = array(); 01724 $attribs['rel'] = self::getExternalLinkRel( $url, $this->mTitle ); 01725 01726 if ( $this->mOptions->getExternalLinkTarget() ) { 01727 $attribs['target'] = $this->mOptions->getExternalLinkTarget(); 01728 } 01729 return $attribs; 01730 } 01731 01743 static function replaceUnusualEscapes( $url ) { 01744 return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', 01745 array( __CLASS__, 'replaceUnusualEscapesCallback' ), $url ); 01746 } 01747 01756 private static function replaceUnusualEscapesCallback( $matches ) { 01757 $char = urldecode( $matches[0] ); 01758 $ord = ord( $char ); 01759 # Is it an unsafe or HTTP reserved character according to RFC 1738? 01760 if ( $ord > 32 && $ord < 127 && strpos( '<>"#{}|\^~[]`;/?', $char ) === false ) { 01761 # No, shouldn't be escaped 01762 return $char; 01763 } else { 01764 # Yes, leave it escaped 01765 return $matches[0]; 01766 } 01767 } 01768 01778 function maybeMakeExternalImage( $url ) { 01779 $imagesfrom = $this->mOptions->getAllowExternalImagesFrom(); 01780 $imagesexception = !empty( $imagesfrom ); 01781 $text = false; 01782 # $imagesfrom could be either a single string or an array of strings, parse out the latter 01783 if ( $imagesexception && is_array( $imagesfrom ) ) { 01784 $imagematch = false; 01785 foreach ( $imagesfrom as $match ) { 01786 if ( strpos( $url, $match ) === 0 ) { 01787 $imagematch = true; 01788 break; 01789 } 01790 } 01791 } elseif ( $imagesexception ) { 01792 $imagematch = ( strpos( $url, $imagesfrom ) === 0 ); 01793 } else { 01794 $imagematch = false; 01795 } 01796 if ( $this->mOptions->getAllowExternalImages() 01797 || ( $imagesexception && $imagematch ) ) { 01798 if ( preg_match( self::EXT_IMAGE_REGEX, $url ) ) { 01799 # Image found 01800 $text = Linker::makeExternalImage( $url ); 01801 } 01802 } 01803 if ( !$text && $this->mOptions->getEnableImageWhitelist() 01804 && preg_match( self::EXT_IMAGE_REGEX, $url ) ) { 01805 $whitelist = explode( "\n", wfMessage( 'external_image_whitelist' )->inContentLanguage()->text() ); 01806 foreach ( $whitelist as $entry ) { 01807 # Sanitize the regex fragment, make it case-insensitive, ignore blank entries/comments 01808 if ( strpos( $entry, '#' ) === 0 || $entry === '' ) { 01809 continue; 01810 } 01811 if ( preg_match( '/' . str_replace( '/', '\\/', $entry ) . '/i', $url ) ) { 01812 # Image matches a whitelist entry 01813 $text = Linker::makeExternalImage( $url ); 01814 break; 01815 } 01816 } 01817 } 01818 return $text; 01819 } 01820 01830 function replaceInternalLinks( $s ) { 01831 $this->mLinkHolders->merge( $this->replaceInternalLinks2( $s ) ); 01832 return $s; 01833 } 01834 01843 function replaceInternalLinks2( &$s ) { 01844 wfProfileIn( __METHOD__ ); 01845 01846 wfProfileIn( __METHOD__ . '-setup' ); 01847 static $tc = false, $e1, $e1_img; 01848 # the % is needed to support urlencoded titles as well 01849 if ( !$tc ) { 01850 $tc = Title::legalChars() . '#%'; 01851 # Match a link having the form [[namespace:link|alternate]]trail 01852 $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD"; 01853 # Match cases where there is no "]]", which might still be images 01854 $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD"; 01855 } 01856 01857 $holders = new LinkHolderArray( $this ); 01858 01859 # split the entire text string on occurrences of [[ 01860 $a = StringUtils::explode( '[[', ' ' . $s ); 01861 # get the first element (all text up to first [[), and remove the space we added 01862 $s = $a->current(); 01863 $a->next(); 01864 $line = $a->current(); # Workaround for broken ArrayIterator::next() that returns "void" 01865 $s = substr( $s, 1 ); 01866 01867 $useLinkPrefixExtension = $this->getTargetLanguage()->linkPrefixExtension(); 01868 $e2 = null; 01869 if ( $useLinkPrefixExtension ) { 01870 # Match the end of a line for a word that's not followed by whitespace, 01871 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched 01872 global $wgContLang; 01873 $charset = $wgContLang->linkPrefixCharset(); 01874 $e2 = "/^((?>.*[^$charset]|))(.+)$/sDu"; 01875 } 01876 01877 if ( is_null( $this->mTitle ) ) { 01878 wfProfileOut( __METHOD__ . '-setup' ); 01879 wfProfileOut( __METHOD__ ); 01880 throw new MWException( __METHOD__ . ": \$this->mTitle is null\n" ); 01881 } 01882 $nottalk = !$this->mTitle->isTalkPage(); 01883 01884 if ( $useLinkPrefixExtension ) { 01885 $m = array(); 01886 if ( preg_match( $e2, $s, $m ) ) { 01887 $first_prefix = $m[2]; 01888 } else { 01889 $first_prefix = false; 01890 } 01891 } else { 01892 $prefix = ''; 01893 } 01894 01895 $useSubpages = $this->areSubpagesAllowed(); 01896 wfProfileOut( __METHOD__ . '-setup' ); 01897 01898 # Loop for each link 01899 for ( ; $line !== false && $line !== null; $a->next(), $line = $a->current() ) { 01900 # Check for excessive memory usage 01901 if ( $holders->isBig() ) { 01902 # Too big 01903 # Do the existence check, replace the link holders and clear the array 01904 $holders->replace( $s ); 01905 $holders->clear(); 01906 } 01907 01908 if ( $useLinkPrefixExtension ) { 01909 wfProfileIn( __METHOD__ . '-prefixhandling' ); 01910 if ( preg_match( $e2, $s, $m ) ) { 01911 $prefix = $m[2]; 01912 $s = $m[1]; 01913 } else { 01914 $prefix = ''; 01915 } 01916 # first link 01917 if ( $first_prefix ) { 01918 $prefix = $first_prefix; 01919 $first_prefix = false; 01920 } 01921 wfProfileOut( __METHOD__ . '-prefixhandling' ); 01922 } 01923 01924 $might_be_img = false; 01925 01926 wfProfileIn( __METHOD__ . "-e1" ); 01927 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt 01928 $text = $m[2]; 01929 # If we get a ] at the beginning of $m[3] that means we have a link that's something like: 01930 # [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up, 01931 # the real problem is with the $e1 regex 01932 # See bug 1300. 01933 # 01934 # Still some problems for cases where the ] is meant to be outside punctuation, 01935 # and no image is in sight. See bug 2095. 01936 # 01937 if ( $text !== '' 01938 && substr( $m[3], 0, 1 ) === ']' 01939 && strpos( $text, '[' ) !== false 01940 ) { 01941 $text .= ']'; # so that replaceExternalLinks($text) works later 01942 $m[3] = substr( $m[3], 1 ); 01943 } 01944 # fix up urlencoded title texts 01945 if ( strpos( $m[1], '%' ) !== false ) { 01946 # Should anchors '#' also be rejected? 01947 $m[1] = str_replace( array( '<', '>' ), array( '<', '>' ), rawurldecode( $m[1] ) ); 01948 } 01949 $trail = $m[3]; 01950 } elseif ( preg_match( $e1_img, $line, $m ) ) { # Invalid, but might be an image with a link in its caption 01951 $might_be_img = true; 01952 $text = $m[2]; 01953 if ( strpos( $m[1], '%' ) !== false ) { 01954 $m[1] = rawurldecode( $m[1] ); 01955 } 01956 $trail = ""; 01957 } else { # Invalid form; output directly 01958 $s .= $prefix . '[[' . $line; 01959 wfProfileOut( __METHOD__ . "-e1" ); 01960 continue; 01961 } 01962 wfProfileOut( __METHOD__ . "-e1" ); 01963 wfProfileIn( __METHOD__ . "-misc" ); 01964 01965 # Don't allow internal links to pages containing 01966 # PROTO: where PROTO is a valid URL protocol; these 01967 # should be external links. 01968 if ( preg_match( '/^(?i:' . $this->mUrlProtocols . ')/', $m[1] ) ) { 01969 $s .= $prefix . '[[' . $line; 01970 wfProfileOut( __METHOD__ . "-misc" ); 01971 continue; 01972 } 01973 01974 # Make subpage if necessary 01975 if ( $useSubpages ) { 01976 $link = $this->maybeDoSubpageLink( $m[1], $text ); 01977 } else { 01978 $link = $m[1]; 01979 } 01980 01981 $noforce = ( substr( $m[1], 0, 1 ) !== ':' ); 01982 if ( !$noforce ) { 01983 # Strip off leading ':' 01984 $link = substr( $link, 1 ); 01985 } 01986 01987 wfProfileOut( __METHOD__ . "-misc" ); 01988 wfProfileIn( __METHOD__ . "-title" ); 01989 $nt = Title::newFromText( $this->mStripState->unstripNoWiki( $link ) ); 01990 if ( $nt === null ) { 01991 $s .= $prefix . '[[' . $line; 01992 wfProfileOut( __METHOD__ . "-title" ); 01993 continue; 01994 } 01995 01996 $ns = $nt->getNamespace(); 01997 $iw = $nt->getInterwiki(); 01998 wfProfileOut( __METHOD__ . "-title" ); 01999 02000 if ( $might_be_img ) { # if this is actually an invalid link 02001 wfProfileIn( __METHOD__ . "-might_be_img" ); 02002 if ( $ns == NS_FILE && $noforce ) { # but might be an image 02003 $found = false; 02004 while ( true ) { 02005 # look at the next 'line' to see if we can close it there 02006 $a->next(); 02007 $next_line = $a->current(); 02008 if ( $next_line === false || $next_line === null ) { 02009 break; 02010 } 02011 $m = explode( ']]', $next_line, 3 ); 02012 if ( count( $m ) == 3 ) { 02013 # the first ]] closes the inner link, the second the image 02014 $found = true; 02015 $text .= "[[{$m[0]}]]{$m[1]}"; 02016 $trail = $m[2]; 02017 break; 02018 } elseif ( count( $m ) == 2 ) { 02019 # if there's exactly one ]] that's fine, we'll keep looking 02020 $text .= "[[{$m[0]}]]{$m[1]}"; 02021 } else { 02022 # if $next_line is invalid too, we need look no further 02023 $text .= '[[' . $next_line; 02024 break; 02025 } 02026 } 02027 if ( !$found ) { 02028 # we couldn't find the end of this imageLink, so output it raw 02029 # but don't ignore what might be perfectly normal links in the text we've examined 02030 $holders->merge( $this->replaceInternalLinks2( $text ) ); 02031 $s .= "{$prefix}[[$link|$text"; 02032 # note: no $trail, because without an end, there *is* no trail 02033 wfProfileOut( __METHOD__ . "-might_be_img" ); 02034 continue; 02035 } 02036 } else { # it's not an image, so output it raw 02037 $s .= "{$prefix}[[$link|$text"; 02038 # note: no $trail, because without an end, there *is* no trail 02039 wfProfileOut( __METHOD__ . "-might_be_img" ); 02040 continue; 02041 } 02042 wfProfileOut( __METHOD__ . "-might_be_img" ); 02043 } 02044 02045 $wasblank = ( $text == '' ); 02046 if ( $wasblank ) { 02047 $text = $link; 02048 } else { 02049 # Bug 4598 madness. Handle the quotes only if they come from the alternate part 02050 # [[Lista d''e paise d''o munno]] -> <a href="...">Lista d''e paise d''o munno</a> 02051 # [[Criticism of Harry Potter|Criticism of ''Harry Potter'']] 02052 # -> <a href="Criticism of Harry Potter">Criticism of <i>Harry Potter</i></a> 02053 $text = $this->doQuotes( $text ); 02054 } 02055 02056 # Link not escaped by : , create the various objects 02057 if ( $noforce ) { 02058 # Interwikis 02059 wfProfileIn( __METHOD__ . "-interwiki" ); 02060 if ( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && Language::fetchLanguageName( $iw, null, 'mw' ) ) { 02061 // XXX: the above check prevents links to sites with identifiers that are not language codes 02062 02063 # Bug 24502: filter duplicates 02064 if ( !isset( $this->mLangLinkLanguages[$iw] ) ) { 02065 $this->mLangLinkLanguages[$iw] = true; 02066 $this->mOutput->addLanguageLink( $nt->getFullText() ); 02067 } 02068 02069 $s = rtrim( $s . $prefix ); 02070 $s .= trim( $trail, "\n" ) == '' ? '': $prefix . $trail; 02071 wfProfileOut( __METHOD__ . "-interwiki" ); 02072 continue; 02073 } 02074 wfProfileOut( __METHOD__ . "-interwiki" ); 02075 02076 if ( $ns == NS_FILE ) { 02077 wfProfileIn( __METHOD__ . "-image" ); 02078 if ( !wfIsBadImage( $nt->getDBkey(), $this->mTitle ) ) { 02079 if ( $wasblank ) { 02080 # if no parameters were passed, $text 02081 # becomes something like "File:Foo.png", 02082 # which we don't want to pass on to the 02083 # image generator 02084 $text = ''; 02085 } else { 02086 # recursively parse links inside the image caption 02087 # actually, this will parse them in any other parameters, too, 02088 # but it might be hard to fix that, and it doesn't matter ATM 02089 $text = $this->replaceExternalLinks( $text ); 02090 $holders->merge( $this->replaceInternalLinks2( $text ) ); 02091 } 02092 # cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them 02093 $s .= $prefix . $this->armorLinks( 02094 $this->makeImage( $nt, $text, $holders ) ) . $trail; 02095 } else { 02096 $s .= $prefix . $trail; 02097 } 02098 wfProfileOut( __METHOD__ . "-image" ); 02099 continue; 02100 } 02101 02102 if ( $ns == NS_CATEGORY ) { 02103 wfProfileIn( __METHOD__ . "-category" ); 02104 $s = rtrim( $s . "\n" ); # bug 87 02105 02106 if ( $wasblank ) { 02107 $sortkey = $this->getDefaultSort(); 02108 } else { 02109 $sortkey = $text; 02110 } 02111 $sortkey = Sanitizer::decodeCharReferences( $sortkey ); 02112 $sortkey = str_replace( "\n", '', $sortkey ); 02113 $sortkey = $this->getConverterLanguage()->convertCategoryKey( $sortkey ); 02114 $this->mOutput->addCategory( $nt->getDBkey(), $sortkey ); 02115 02119 $s .= trim( $prefix . $trail, "\n" ) == '' ? '' : $prefix . $trail; 02120 02121 wfProfileOut( __METHOD__ . "-category" ); 02122 continue; 02123 } 02124 } 02125 02126 # Self-link checking. For some languages, variants of the title are checked in 02127 # LinkHolderArray::doVariants() to allow batching the existence checks necessary 02128 # for linking to a different variant. 02129 if ( $ns != NS_SPECIAL && $nt->equals( $this->mTitle ) && !$nt->hasFragment() ) { 02130 $s .= $prefix . Linker::makeSelfLinkObj( $nt, $text, '', $trail ); 02131 continue; 02132 } 02133 02134 # NS_MEDIA is a pseudo-namespace for linking directly to a file 02135 # @todo FIXME: Should do batch file existence checks, see comment below 02136 if ( $ns == NS_MEDIA ) { 02137 wfProfileIn( __METHOD__ . "-media" ); 02138 # Give extensions a chance to select the file revision for us 02139 $options = array(); 02140 $descQuery = false; 02141 wfRunHooks( 'BeforeParserFetchFileAndTitle', 02142 array( $this, $nt, &$options, &$descQuery ) ); 02143 # Fetch and register the file (file title may be different via hooks) 02144 list( $file, $nt ) = $this->fetchFileAndTitle( $nt, $options ); 02145 # Cloak with NOPARSE to avoid replacement in replaceExternalLinks 02146 $s .= $prefix . $this->armorLinks( 02147 Linker::makeMediaLinkFile( $nt, $file, $text ) ) . $trail; 02148 wfProfileOut( __METHOD__ . "-media" ); 02149 continue; 02150 } 02151 02152 wfProfileIn( __METHOD__ . "-always_known" ); 02153 # Some titles, such as valid special pages or files in foreign repos, should 02154 # be shown as bluelinks even though they're not included in the page table 02155 # 02156 # @todo FIXME: isAlwaysKnown() can be expensive for file links; we should really do 02157 # batch file existence checks for NS_FILE and NS_MEDIA 02158 if ( $iw == '' && $nt->isAlwaysKnown() ) { 02159 $this->mOutput->addLink( $nt ); 02160 $s .= $this->makeKnownLinkHolder( $nt, $text, array(), $trail, $prefix ); 02161 } else { 02162 # Links will be added to the output link list after checking 02163 $s .= $holders->makeHolder( $nt, $text, array(), $trail, $prefix ); 02164 } 02165 wfProfileOut( __METHOD__ . "-always_known" ); 02166 } 02167 wfProfileOut( __METHOD__ ); 02168 return $holders; 02169 } 02170 02185 function makeKnownLinkHolder( $nt, $text = '', $query = array(), $trail = '', $prefix = '' ) { 02186 list( $inside, $trail ) = Linker::splitTrail( $trail ); 02187 02188 if ( is_string( $query ) ) { 02189 $query = wfCgiToArray( $query ); 02190 } 02191 if ( $text == '' ) { 02192 $text = htmlspecialchars( $nt->getPrefixedText() ); 02193 } 02194 02195 $link = Linker::linkKnown( $nt, "$prefix$text$inside", array(), $query ); 02196 02197 return $this->armorLinks( $link ) . $trail; 02198 } 02199 02210 function armorLinks( $text ) { 02211 return preg_replace( '/\b((?i)' . $this->mUrlProtocols . ')/', 02212 "{$this->mUniqPrefix}NOPARSE$1", $text ); 02213 } 02214 02219 function areSubpagesAllowed() { 02220 # Some namespaces don't allow subpages 02221 return MWNamespace::hasSubpages( $this->mTitle->getNamespace() ); 02222 } 02223 02232 function maybeDoSubpageLink( $target, &$text ) { 02233 return Linker::normalizeSubpageLink( $this->mTitle, $target, $text ); 02234 } 02235 02242 function closeParagraph() { 02243 $result = ''; 02244 if ( $this->mLastSection != '' ) { 02245 $result = '</' . $this->mLastSection . ">\n"; 02246 } 02247 $this->mInPre = false; 02248 $this->mLastSection = ''; 02249 return $result; 02250 } 02251 02262 function getCommon( $st1, $st2 ) { 02263 $fl = strlen( $st1 ); 02264 $shorter = strlen( $st2 ); 02265 if ( $fl < $shorter ) { 02266 $shorter = $fl; 02267 } 02268 02269 for ( $i = 0; $i < $shorter; ++$i ) { 02270 if ( $st1[$i] != $st2[$i] ) { 02271 break; 02272 } 02273 } 02274 return $i; 02275 } 02276 02286 function openList( $char ) { 02287 $result = $this->closeParagraph(); 02288 02289 if ( '*' === $char ) { 02290 $result .= "<ul>\n<li>"; 02291 } elseif ( '#' === $char ) { 02292 $result .= "<ol>\n<li>"; 02293 } elseif ( ':' === $char ) { 02294 $result .= "<dl>\n<dd>"; 02295 } elseif ( ';' === $char ) { 02296 $result .= "<dl>\n<dt>"; 02297 $this->mDTopen = true; 02298 } else { 02299 $result = '<!-- ERR 1 -->'; 02300 } 02301 02302 return $result; 02303 } 02304 02312 function nextItem( $char ) { 02313 if ( '*' === $char || '#' === $char ) { 02314 return "</li>\n<li>"; 02315 } elseif ( ':' === $char || ';' === $char ) { 02316 $close = "</dd>\n"; 02317 if ( $this->mDTopen ) { 02318 $close = "</dt>\n"; 02319 } 02320 if ( ';' === $char ) { 02321 $this->mDTopen = true; 02322 return $close . '<dt>'; 02323 } else { 02324 $this->mDTopen = false; 02325 return $close . '<dd>'; 02326 } 02327 } 02328 return '<!-- ERR 2 -->'; 02329 } 02330 02338 function closeList( $char ) { 02339 if ( '*' === $char ) { 02340 $text = "</li>\n</ul>"; 02341 } elseif ( '#' === $char ) { 02342 $text = "</li>\n</ol>"; 02343 } elseif ( ':' === $char ) { 02344 if ( $this->mDTopen ) { 02345 $this->mDTopen = false; 02346 $text = "</dt>\n</dl>"; 02347 } else { 02348 $text = "</dd>\n</dl>"; 02349 } 02350 } else { 02351 return '<!-- ERR 3 -->'; 02352 } 02353 return $text . "\n"; 02354 } 02365 function doBlockLevels( $text, $linestart ) { 02366 wfProfileIn( __METHOD__ ); 02367 02368 # Parsing through the text line by line. The main thing 02369 # happening here is handling of block-level elements p, pre, 02370 # and making lists from lines starting with * # : etc. 02371 # 02372 $textLines = StringUtils::explode( "\n", $text ); 02373 02374 $lastPrefix = $output = ''; 02375 $this->mDTopen = $inBlockElem = false; 02376 $prefixLength = 0; 02377 $paragraphStack = false; 02378 $inBlockquote = false; 02379 02380 foreach ( $textLines as $oLine ) { 02381 # Fix up $linestart 02382 if ( !$linestart ) { 02383 $output .= $oLine; 02384 $linestart = true; 02385 continue; 02386 } 02387 # * = ul 02388 # # = ol 02389 # ; = dt 02390 # : = dd 02391 02392 $lastPrefixLength = strlen( $lastPrefix ); 02393 $preCloseMatch = preg_match( '/<\\/pre/i', $oLine ); 02394 $preOpenMatch = preg_match( '/<pre/i', $oLine ); 02395 # If not in a <pre> element, scan for and figure out what prefixes are there. 02396 if ( !$this->mInPre ) { 02397 # Multiple prefixes may abut each other for nested lists. 02398 $prefixLength = strspn( $oLine, '*#:;' ); 02399 $prefix = substr( $oLine, 0, $prefixLength ); 02400 02401 # eh? 02402 # ; and : are both from definition-lists, so they're equivalent 02403 # for the purposes of determining whether or not we need to open/close 02404 # elements. 02405 $prefix2 = str_replace( ';', ':', $prefix ); 02406 $t = substr( $oLine, $prefixLength ); 02407 $this->mInPre = (bool)$preOpenMatch; 02408 } else { 02409 # Don't interpret any other prefixes in preformatted text 02410 $prefixLength = 0; 02411 $prefix = $prefix2 = ''; 02412 $t = $oLine; 02413 } 02414 02415 # List generation 02416 if ( $prefixLength && $lastPrefix === $prefix2 ) { 02417 # Same as the last item, so no need to deal with nesting or opening stuff 02418 $output .= $this->nextItem( substr( $prefix, -1 ) ); 02419 $paragraphStack = false; 02420 02421 if ( substr( $prefix, -1 ) === ';' ) { 02422 # The one nasty exception: definition lists work like this: 02423 # ; title : definition text 02424 # So we check for : in the remainder text to split up the 02425 # title and definition, without b0rking links. 02426 $term = $t2 = ''; 02427 if ( $this->findColonNoLinks( $t, $term, $t2 ) !== false ) { 02428 $t = $t2; 02429 $output .= $term . $this->nextItem( ':' ); 02430 } 02431 } 02432 } elseif ( $prefixLength || $lastPrefixLength ) { 02433 # We need to open or close prefixes, or both. 02434 02435 # Either open or close a level... 02436 $commonPrefixLength = $this->getCommon( $prefix, $lastPrefix ); 02437 $paragraphStack = false; 02438 02439 # Close all the prefixes which aren't shared. 02440 while ( $commonPrefixLength < $lastPrefixLength ) { 02441 $output .= $this->closeList( $lastPrefix[$lastPrefixLength - 1] ); 02442 --$lastPrefixLength; 02443 } 02444 02445 # Continue the current prefix if appropriate. 02446 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) { 02447 $output .= $this->nextItem( $prefix[$commonPrefixLength - 1] ); 02448 } 02449 02450 # Open prefixes where appropriate. 02451 while ( $prefixLength > $commonPrefixLength ) { 02452 $char = substr( $prefix, $commonPrefixLength, 1 ); 02453 $output .= $this->openList( $char ); 02454 02455 if ( ';' === $char ) { 02456 # @todo FIXME: This is dupe of code above 02457 if ( $this->findColonNoLinks( $t, $term, $t2 ) !== false ) { 02458 $t = $t2; 02459 $output .= $term . $this->nextItem( ':' ); 02460 } 02461 } 02462 ++$commonPrefixLength; 02463 } 02464 $lastPrefix = $prefix2; 02465 } 02466 02467 # If we have no prefixes, go to paragraph mode. 02468 if ( 0 == $prefixLength ) { 02469 wfProfileIn( __METHOD__ . "-paragraph" ); 02470 # No prefix (not in list)--go to paragraph mode 02471 # XXX: use a stack for nestable elements like span, table and div 02472 $openmatch = preg_match( '/(?:<table|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<ol|<dl|<li|<\\/tr|<\\/td|<\\/th)/iS', $t ); 02473 $closematch = preg_match( 02474 '/(?:<\\/table|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|' . 02475 '<td|<th|<\\/?blockquote|<\\/?div|<hr|<\\/pre|<\\/p|<\\/mw:|' . $this->mUniqPrefix . '-pre|<\\/li|<\\/ul|<\\/ol|<\\/dl|<\\/?center)/iS', $t ); 02476 if ( $openmatch or $closematch ) { 02477 $paragraphStack = false; 02478 # TODO bug 5718: paragraph closed 02479 $output .= $this->closeParagraph(); 02480 if ( $preOpenMatch and !$preCloseMatch ) { 02481 $this->mInPre = true; 02482 } 02483 $bqOffset = 0; 02484 while ( preg_match( '/<(\\/?)blockquote[\s>]/i', $t, $bqMatch, PREG_OFFSET_CAPTURE, $bqOffset ) ) { 02485 $inBlockquote = !$bqMatch[1][0]; // is this a close tag? 02486 $bqOffset = $bqMatch[0][1] + strlen( $bqMatch[0][0] ); 02487 } 02488 $inBlockElem = !$closematch; 02489 } elseif ( !$inBlockElem && !$this->mInPre ) { 02490 if ( ' ' == substr( $t, 0, 1 ) and ( $this->mLastSection === 'pre' || trim( $t ) != '' ) and !$inBlockquote ) { 02491 # pre 02492 if ( $this->mLastSection !== 'pre' ) { 02493 $paragraphStack = false; 02494 $output .= $this->closeParagraph() . '<pre>'; 02495 $this->mLastSection = 'pre'; 02496 } 02497 $t = substr( $t, 1 ); 02498 } else { 02499 # paragraph 02500 if ( trim( $t ) === '' ) { 02501 if ( $paragraphStack ) { 02502 $output .= $paragraphStack . '<br />'; 02503 $paragraphStack = false; 02504 $this->mLastSection = 'p'; 02505 } else { 02506 if ( $this->mLastSection !== 'p' ) { 02507 $output .= $this->closeParagraph(); 02508 $this->mLastSection = ''; 02509 $paragraphStack = '<p>'; 02510 } else { 02511 $paragraphStack = '</p><p>'; 02512 } 02513 } 02514 } else { 02515 if ( $paragraphStack ) { 02516 $output .= $paragraphStack; 02517 $paragraphStack = false; 02518 $this->mLastSection = 'p'; 02519 } elseif ( $this->mLastSection !== 'p' ) { 02520 $output .= $this->closeParagraph() . '<p>'; 02521 $this->mLastSection = 'p'; 02522 } 02523 } 02524 } 02525 } 02526 wfProfileOut( __METHOD__ . "-paragraph" ); 02527 } 02528 # somewhere above we forget to get out of pre block (bug 785) 02529 if ( $preCloseMatch && $this->mInPre ) { 02530 $this->mInPre = false; 02531 } 02532 if ( $paragraphStack === false ) { 02533 $output .= $t . "\n"; 02534 } 02535 } 02536 while ( $prefixLength ) { 02537 $output .= $this->closeList( $prefix2[$prefixLength - 1] ); 02538 --$prefixLength; 02539 } 02540 if ( $this->mLastSection != '' ) { 02541 $output .= '</' . $this->mLastSection . '>'; 02542 $this->mLastSection = ''; 02543 } 02544 02545 wfProfileOut( __METHOD__ ); 02546 return $output; 02547 } 02548 02559 function findColonNoLinks( $str, &$before, &$after ) { 02560 wfProfileIn( __METHOD__ ); 02561 02562 $pos = strpos( $str, ':' ); 02563 if ( $pos === false ) { 02564 # Nothing to find! 02565 wfProfileOut( __METHOD__ ); 02566 return false; 02567 } 02568 02569 $lt = strpos( $str, '<' ); 02570 if ( $lt === false || $lt > $pos ) { 02571 # Easy; no tag nesting to worry about 02572 $before = substr( $str, 0, $pos ); 02573 $after = substr( $str, $pos + 1 ); 02574 wfProfileOut( __METHOD__ ); 02575 return $pos; 02576 } 02577 02578 # Ugly state machine to walk through avoiding tags. 02579 $state = self::COLON_STATE_TEXT; 02580 $stack = 0; 02581 $len = strlen( $str ); 02582 for ( $i = 0; $i < $len; $i++ ) { 02583 $c = $str[$i]; 02584 02585 switch ( $state ) { 02586 # (Using the number is a performance hack for common cases) 02587 case 0: # self::COLON_STATE_TEXT: 02588 switch ( $c ) { 02589 case "<": 02590 # Could be either a <start> tag or an </end> tag 02591 $state = self::COLON_STATE_TAGSTART; 02592 break; 02593 case ":": 02594 if ( $stack == 0 ) { 02595 # We found it! 02596 $before = substr( $str, 0, $i ); 02597 $after = substr( $str, $i + 1 ); 02598 wfProfileOut( __METHOD__ ); 02599 return $i; 02600 } 02601 # Embedded in a tag; don't break it. 02602 break; 02603 default: 02604 # Skip ahead looking for something interesting 02605 $colon = strpos( $str, ':', $i ); 02606 if ( $colon === false ) { 02607 # Nothing else interesting 02608 wfProfileOut( __METHOD__ ); 02609 return false; 02610 } 02611 $lt = strpos( $str, '<', $i ); 02612 if ( $stack === 0 ) { 02613 if ( $lt === false || $colon < $lt ) { 02614 # We found it! 02615 $before = substr( $str, 0, $colon ); 02616 $after = substr( $str, $colon + 1 ); 02617 wfProfileOut( __METHOD__ ); 02618 return $i; 02619 } 02620 } 02621 if ( $lt === false ) { 02622 # Nothing else interesting to find; abort! 02623 # We're nested, but there's no close tags left. Abort! 02624 break 2; 02625 } 02626 # Skip ahead to next tag start 02627 $i = $lt; 02628 $state = self::COLON_STATE_TAGSTART; 02629 } 02630 break; 02631 case 1: # self::COLON_STATE_TAG: 02632 # In a <tag> 02633 switch ( $c ) { 02634 case ">": 02635 $stack++; 02636 $state = self::COLON_STATE_TEXT; 02637 break; 02638 case "/": 02639 # Slash may be followed by >? 02640 $state = self::COLON_STATE_TAGSLASH; 02641 break; 02642 default: 02643 # ignore 02644 } 02645 break; 02646 case 2: # self::COLON_STATE_TAGSTART: 02647 switch ( $c ) { 02648 case "/": 02649 $state = self::COLON_STATE_CLOSETAG; 02650 break; 02651 case "!": 02652 $state = self::COLON_STATE_COMMENT; 02653 break; 02654 case ">": 02655 # Illegal early close? This shouldn't happen D: 02656 $state = self::COLON_STATE_TEXT; 02657 break; 02658 default: 02659 $state = self::COLON_STATE_TAG; 02660 } 02661 break; 02662 case 3: # self::COLON_STATE_CLOSETAG: 02663 # In a </tag> 02664 if ( $c === ">" ) { 02665 $stack--; 02666 if ( $stack < 0 ) { 02667 wfDebug( __METHOD__ . ": Invalid input; too many close tags\n" ); 02668 wfProfileOut( __METHOD__ ); 02669 return false; 02670 } 02671 $state = self::COLON_STATE_TEXT; 02672 } 02673 break; 02674 case self::COLON_STATE_TAGSLASH: 02675 if ( $c === ">" ) { 02676 # Yes, a self-closed tag <blah/> 02677 $state = self::COLON_STATE_TEXT; 02678 } else { 02679 # Probably we're jumping the gun, and this is an attribute 02680 $state = self::COLON_STATE_TAG; 02681 } 02682 break; 02683 case 5: # self::COLON_STATE_COMMENT: 02684 if ( $c === "-" ) { 02685 $state = self::COLON_STATE_COMMENTDASH; 02686 } 02687 break; 02688 case self::COLON_STATE_COMMENTDASH: 02689 if ( $c === "-" ) { 02690 $state = self::COLON_STATE_COMMENTDASHDASH; 02691 } else { 02692 $state = self::COLON_STATE_COMMENT; 02693 } 02694 break; 02695 case self::COLON_STATE_COMMENTDASHDASH: 02696 if ( $c === ">" ) { 02697 $state = self::COLON_STATE_TEXT; 02698 } else { 02699 $state = self::COLON_STATE_COMMENT; 02700 } 02701 break; 02702 default: 02703 wfProfileOut( __METHOD__ ); 02704 throw new MWException( "State machine error in " . __METHOD__ ); 02705 } 02706 } 02707 if ( $stack > 0 ) { 02708 wfDebug( __METHOD__ . ": Invalid input; not enough close tags (stack $stack, state $state)\n" ); 02709 wfProfileOut( __METHOD__ ); 02710 return false; 02711 } 02712 wfProfileOut( __METHOD__ ); 02713 return false; 02714 } 02715 02727 function getVariableValue( $index, $frame = false ) { 02728 global $wgContLang, $wgSitename, $wgServer; 02729 global $wgArticlePath, $wgScriptPath, $wgStylePath; 02730 02731 if ( is_null( $this->mTitle ) ) { 02732 // If no title set, bad things are going to happen 02733 // later. Title should always be set since this 02734 // should only be called in the middle of a parse 02735 // operation (but the unit-tests do funky stuff) 02736 throw new MWException( __METHOD__ . ' Should only be ' 02737 . ' called while parsing (no title set)' ); 02738 } 02739 02744 if ( wfRunHooks( 'ParserGetVariableValueVarCache', array( &$this, &$this->mVarCache ) ) ) { 02745 if ( isset( $this->mVarCache[$index] ) ) { 02746 return $this->mVarCache[$index]; 02747 } 02748 } 02749 02750 $ts = wfTimestamp( TS_UNIX, $this->mOptions->getTimestamp() ); 02751 wfRunHooks( 'ParserGetVariableValueTs', array( &$this, &$ts ) ); 02752 02753 $pageLang = $this->getFunctionLang(); 02754 02755 switch ( $index ) { 02756 case 'currentmonth': 02757 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'm' ) ); 02758 break; 02759 case 'currentmonth1': 02760 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'n' ) ); 02761 break; 02762 case 'currentmonthname': 02763 $value = $pageLang->getMonthName( MWTimestamp::getInstance( $ts )->format( 'n' ) ); 02764 break; 02765 case 'currentmonthnamegen': 02766 $value = $pageLang->getMonthNameGen( MWTimestamp::getInstance( $ts )->format( 'n' ) ); 02767 break; 02768 case 'currentmonthabbrev': 02769 $value = $pageLang->getMonthAbbreviation( MWTimestamp::getInstance( $ts )->format( 'n' ) ); 02770 break; 02771 case 'currentday': 02772 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'j' ) ); 02773 break; 02774 case 'currentday2': 02775 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'd' ) ); 02776 break; 02777 case 'localmonth': 02778 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'm' ) ); 02779 break; 02780 case 'localmonth1': 02781 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) ); 02782 break; 02783 case 'localmonthname': 02784 $value = $pageLang->getMonthName( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) ); 02785 break; 02786 case 'localmonthnamegen': 02787 $value = $pageLang->getMonthNameGen( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) ); 02788 break; 02789 case 'localmonthabbrev': 02790 $value = $pageLang->getMonthAbbreviation( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) ); 02791 break; 02792 case 'localday': 02793 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'j' ) ); 02794 break; 02795 case 'localday2': 02796 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'd' ) ); 02797 break; 02798 case 'pagename': 02799 $value = wfEscapeWikiText( $this->mTitle->getText() ); 02800 break; 02801 case 'pagenamee': 02802 $value = wfEscapeWikiText( $this->mTitle->getPartialURL() ); 02803 break; 02804 case 'fullpagename': 02805 $value = wfEscapeWikiText( $this->mTitle->getPrefixedText() ); 02806 break; 02807 case 'fullpagenamee': 02808 $value = wfEscapeWikiText( $this->mTitle->getPrefixedURL() ); 02809 break; 02810 case 'subpagename': 02811 $value = wfEscapeWikiText( $this->mTitle->getSubpageText() ); 02812 break; 02813 case 'subpagenamee': 02814 $value = wfEscapeWikiText( $this->mTitle->getSubpageUrlForm() ); 02815 break; 02816 case 'rootpagename': 02817 $value = wfEscapeWikiText( $this->mTitle->getRootText() ); 02818 break; 02819 case 'rootpagenamee': 02820 $value = wfEscapeWikiText( wfUrlEncode( str_replace( ' ', '_', $this->mTitle->getRootText() ) ) ); 02821 break; 02822 case 'basepagename': 02823 $value = wfEscapeWikiText( $this->mTitle->getBaseText() ); 02824 break; 02825 case 'basepagenamee': 02826 $value = wfEscapeWikiText( wfUrlEncode( str_replace( ' ', '_', $this->mTitle->getBaseText() ) ) ); 02827 break; 02828 case 'talkpagename': 02829 if ( $this->mTitle->canTalk() ) { 02830 $talkPage = $this->mTitle->getTalkPage(); 02831 $value = wfEscapeWikiText( $talkPage->getPrefixedText() ); 02832 } else { 02833 $value = ''; 02834 } 02835 break; 02836 case 'talkpagenamee': 02837 if ( $this->mTitle->canTalk() ) { 02838 $talkPage = $this->mTitle->getTalkPage(); 02839 $value = wfEscapeWikiText( $talkPage->getPrefixedURL() ); 02840 } else { 02841 $value = ''; 02842 } 02843 break; 02844 case 'subjectpagename': 02845 $subjPage = $this->mTitle->getSubjectPage(); 02846 $value = wfEscapeWikiText( $subjPage->getPrefixedText() ); 02847 break; 02848 case 'subjectpagenamee': 02849 $subjPage = $this->mTitle->getSubjectPage(); 02850 $value = wfEscapeWikiText( $subjPage->getPrefixedURL() ); 02851 break; 02852 case 'pageid': // requested in bug 23427 02853 $pageid = $this->getTitle()->getArticleID(); 02854 if ( $pageid == 0 ) { 02855 # 0 means the page doesn't exist in the database, 02856 # which means the user is previewing a new page. 02857 # The vary-revision flag must be set, because the magic word 02858 # will have a different value once the page is saved. 02859 $this->mOutput->setFlag( 'vary-revision' ); 02860 wfDebug( __METHOD__ . ": {{PAGEID}} used in a new page, setting vary-revision...\n" ); 02861 } 02862 $value = $pageid ? $pageid : null; 02863 break; 02864 case 'revisionid': 02865 # Let the edit saving system know we should parse the page 02866 # *after* a revision ID has been assigned. 02867 $this->mOutput->setFlag( 'vary-revision' ); 02868 wfDebug( __METHOD__ . ": {{REVISIONID}} used, setting vary-revision...\n" ); 02869 $value = $this->mRevisionId; 02870 break; 02871 case 'revisionday': 02872 # Let the edit saving system know we should parse the page 02873 # *after* a revision ID has been assigned. This is for null edits. 02874 $this->mOutput->setFlag( 'vary-revision' ); 02875 wfDebug( __METHOD__ . ": {{REVISIONDAY}} used, setting vary-revision...\n" ); 02876 $value = intval( substr( $this->getRevisionTimestamp(), 6, 2 ) ); 02877 break; 02878 case 'revisionday2': 02879 # Let the edit saving system know we should parse the page 02880 # *after* a revision ID has been assigned. This is for null edits. 02881 $this->mOutput->setFlag( 'vary-revision' ); 02882 wfDebug( __METHOD__ . ": {{REVISIONDAY2}} used, setting vary-revision...\n" ); 02883 $value = substr( $this->getRevisionTimestamp(), 6, 2 ); 02884 break; 02885 case 'revisionmonth': 02886 # Let the edit saving system know we should parse the page 02887 # *after* a revision ID has been assigned. This is for null edits. 02888 $this->mOutput->setFlag( 'vary-revision' ); 02889 wfDebug( __METHOD__ . ": {{REVISIONMONTH}} used, setting vary-revision...\n" ); 02890 $value = substr( $this->getRevisionTimestamp(), 4, 2 ); 02891 break; 02892 case 'revisionmonth1': 02893 # Let the edit saving system know we should parse the page 02894 # *after* a revision ID has been assigned. This is for null edits. 02895 $this->mOutput->setFlag( 'vary-revision' ); 02896 wfDebug( __METHOD__ . ": {{REVISIONMONTH1}} used, setting vary-revision...\n" ); 02897 $value = intval( substr( $this->getRevisionTimestamp(), 4, 2 ) ); 02898 break; 02899 case 'revisionyear': 02900 # Let the edit saving system know we should parse the page 02901 # *after* a revision ID has been assigned. This is for null edits. 02902 $this->mOutput->setFlag( 'vary-revision' ); 02903 wfDebug( __METHOD__ . ": {{REVISIONYEAR}} used, setting vary-revision...\n" ); 02904 $value = substr( $this->getRevisionTimestamp(), 0, 4 ); 02905 break; 02906 case 'revisiontimestamp': 02907 # Let the edit saving system know we should parse the page 02908 # *after* a revision ID has been assigned. This is for null edits. 02909 $this->mOutput->setFlag( 'vary-revision' ); 02910 wfDebug( __METHOD__ . ": {{REVISIONTIMESTAMP}} used, setting vary-revision...\n" ); 02911 $value = $this->getRevisionTimestamp(); 02912 break; 02913 case 'revisionuser': 02914 # Let the edit saving system know we should parse the page 02915 # *after* a revision ID has been assigned. This is for null edits. 02916 $this->mOutput->setFlag( 'vary-revision' ); 02917 wfDebug( __METHOD__ . ": {{REVISIONUSER}} used, setting vary-revision...\n" ); 02918 $value = $this->getRevisionUser(); 02919 break; 02920 case 'revisionsize': 02921 # Let the edit saving system know we should parse the page 02922 # *after* a revision ID has been assigned. This is for null edits. 02923 $this->mOutput->setFlag( 'vary-revision' ); 02924 wfDebug( __METHOD__ . ": {{REVISIONSIZE}} used, setting vary-revision...\n" ); 02925 $value = $this->getRevisionSize(); 02926 break; 02927 case 'namespace': 02928 $value = str_replace( '_', ' ', $wgContLang->getNsText( $this->mTitle->getNamespace() ) ); 02929 break; 02930 case 'namespacee': 02931 $value = wfUrlencode( $wgContLang->getNsText( $this->mTitle->getNamespace() ) ); 02932 break; 02933 case 'namespacenumber': 02934 $value = $this->mTitle->getNamespace(); 02935 break; 02936 case 'talkspace': 02937 $value = $this->mTitle->canTalk() ? str_replace( '_', ' ', $this->mTitle->getTalkNsText() ) : ''; 02938 break; 02939 case 'talkspacee': 02940 $value = $this->mTitle->canTalk() ? wfUrlencode( $this->mTitle->getTalkNsText() ) : ''; 02941 break; 02942 case 'subjectspace': 02943 $value = str_replace( '_', ' ', $this->mTitle->getSubjectNsText() ); 02944 break; 02945 case 'subjectspacee': 02946 $value = ( wfUrlencode( $this->mTitle->getSubjectNsText() ) ); 02947 break; 02948 case 'currentdayname': 02949 $value = $pageLang->getWeekdayName( (int)MWTimestamp::getInstance( $ts )->format( 'w' ) + 1 ); 02950 break; 02951 case 'currentyear': 02952 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'Y' ), true ); 02953 break; 02954 case 'currenttime': 02955 $value = $pageLang->time( wfTimestamp( TS_MW, $ts ), false, false ); 02956 break; 02957 case 'currenthour': 02958 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'H' ), true ); 02959 break; 02960 case 'currentweek': 02961 # @bug 4594 PHP5 has it zero padded, PHP4 does not, cast to 02962 # int to remove the padding 02963 $value = $pageLang->formatNum( (int)MWTimestamp::getInstance( $ts )->format( 'W' ) ); 02964 break; 02965 case 'currentdow': 02966 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'w' ) ); 02967 break; 02968 case 'localdayname': 02969 $value = $pageLang->getWeekdayName( (int)MWTimestamp::getLocalInstance( $ts )->format( 'w' ) + 1 ); 02970 break; 02971 case 'localyear': 02972 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'Y' ), true ); 02973 break; 02974 case 'localtime': 02975 $value = $pageLang->time( MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' ), false, false ); 02976 break; 02977 case 'localhour': 02978 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'H' ), true ); 02979 break; 02980 case 'localweek': 02981 # @bug 4594 PHP5 has it zero padded, PHP4 does not, cast to 02982 # int to remove the padding 02983 $value = $pageLang->formatNum( (int)MWTimestamp::getLocalInstance( $ts )->format( 'W' ) ); 02984 break; 02985 case 'localdow': 02986 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'w' ) ); 02987 break; 02988 case 'numberofarticles': 02989 $value = $pageLang->formatNum( SiteStats::articles() ); 02990 break; 02991 case 'numberoffiles': 02992 $value = $pageLang->formatNum( SiteStats::images() ); 02993 break; 02994 case 'numberofusers': 02995 $value = $pageLang->formatNum( SiteStats::users() ); 02996 break; 02997 case 'numberofactiveusers': 02998 $value = $pageLang->formatNum( SiteStats::activeUsers() ); 02999 break; 03000 case 'numberofpages': 03001 $value = $pageLang->formatNum( SiteStats::pages() ); 03002 break; 03003 case 'numberofadmins': 03004 $value = $pageLang->formatNum( SiteStats::numberingroup( 'sysop' ) ); 03005 break; 03006 case 'numberofedits': 03007 $value = $pageLang->formatNum( SiteStats::edits() ); 03008 break; 03009 case 'numberofviews': 03010 global $wgDisableCounters; 03011 $value = !$wgDisableCounters ? $pageLang->formatNum( SiteStats::views() ) : ''; 03012 break; 03013 case 'currenttimestamp': 03014 $value = wfTimestamp( TS_MW, $ts ); 03015 break; 03016 case 'localtimestamp': 03017 $value = MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' ); 03018 break; 03019 case 'currentversion': 03020 $value = SpecialVersion::getVersion(); 03021 break; 03022 case 'articlepath': 03023 return $wgArticlePath; 03024 case 'sitename': 03025 return $wgSitename; 03026 case 'server': 03027 return $wgServer; 03028 case 'servername': 03029 $serverParts = wfParseUrl( $wgServer ); 03030 return $serverParts && isset( $serverParts['host'] ) ? $serverParts['host'] : $wgServer; 03031 case 'scriptpath': 03032 return $wgScriptPath; 03033 case 'stylepath': 03034 return $wgStylePath; 03035 case 'directionmark': 03036 return $pageLang->getDirMark(); 03037 case 'contentlanguage': 03038 global $wgLanguageCode; 03039 return $wgLanguageCode; 03040 case 'cascadingsources': 03041 $value = CoreParserFunctions::cascadingsources( $this ); 03042 break; 03043 default: 03044 $ret = null; 03045 wfRunHooks( 'ParserGetVariableValueSwitch', array( &$this, &$this->mVarCache, &$index, &$ret, &$frame ) ); 03046 return $ret; 03047 } 03048 03049 if ( $index ) { 03050 $this->mVarCache[$index] = $value; 03051 } 03052 03053 return $value; 03054 } 03055 03061 function initialiseVariables() { 03062 wfProfileIn( __METHOD__ ); 03063 $variableIDs = MagicWord::getVariableIDs(); 03064 $substIDs = MagicWord::getSubstIDs(); 03065 03066 $this->mVariables = new MagicWordArray( $variableIDs ); 03067 $this->mSubstWords = new MagicWordArray( $substIDs ); 03068 wfProfileOut( __METHOD__ ); 03069 } 03070 03093 function preprocessToDom( $text, $flags = 0 ) { 03094 $dom = $this->getPreprocessor()->preprocessToObj( $text, $flags ); 03095 return $dom; 03096 } 03097 03105 public static function splitWhitespace( $s ) { 03106 $ltrimmed = ltrim( $s ); 03107 $w1 = substr( $s, 0, strlen( $s ) - strlen( $ltrimmed ) ); 03108 $trimmed = rtrim( $ltrimmed ); 03109 $diff = strlen( $ltrimmed ) - strlen( $trimmed ); 03110 if ( $diff > 0 ) { 03111 $w2 = substr( $ltrimmed, -$diff ); 03112 } else { 03113 $w2 = ''; 03114 } 03115 return array( $w1, $trimmed, $w2 ); 03116 } 03117 03137 function replaceVariables( $text, $frame = false, $argsOnly = false ) { 03138 # Is there any text? Also, Prevent too big inclusions! 03139 if ( strlen( $text ) < 1 || strlen( $text ) > $this->mOptions->getMaxIncludeSize() ) { 03140 return $text; 03141 } 03142 wfProfileIn( __METHOD__ ); 03143 03144 if ( $frame === false ) { 03145 $frame = $this->getPreprocessor()->newFrame(); 03146 } elseif ( !( $frame instanceof PPFrame ) ) { 03147 wfDebug( __METHOD__ . " called using plain parameters instead of a PPFrame instance. Creating custom frame.\n" ); 03148 $frame = $this->getPreprocessor()->newCustomFrame( $frame ); 03149 } 03150 03151 $dom = $this->preprocessToDom( $text ); 03152 $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0; 03153 $text = $frame->expand( $dom, $flags ); 03154 03155 wfProfileOut( __METHOD__ ); 03156 return $text; 03157 } 03158 03166 static function createAssocArgs( $args ) { 03167 $assocArgs = array(); 03168 $index = 1; 03169 foreach ( $args as $arg ) { 03170 $eqpos = strpos( $arg, '=' ); 03171 if ( $eqpos === false ) { 03172 $assocArgs[$index++] = $arg; 03173 } else { 03174 $name = trim( substr( $arg, 0, $eqpos ) ); 03175 $value = trim( substr( $arg, $eqpos + 1 ) ); 03176 if ( $value === false ) { 03177 $value = ''; 03178 } 03179 if ( $name !== false ) { 03180 $assocArgs[$name] = $value; 03181 } 03182 } 03183 } 03184 03185 return $assocArgs; 03186 } 03187 03212 function limitationWarn( $limitationType, $current = '', $max = '' ) { 03213 # does no harm if $current and $max are present but are unnecessary for the message 03214 $warning = wfMessage( "$limitationType-warning" )->numParams( $current, $max ) 03215 ->inLanguage( $this->mOptions->getUserLangObj() )->text(); 03216 $this->mOutput->addWarning( $warning ); 03217 $this->addTrackingCategory( "$limitationType-category" ); 03218 } 03219 03233 function braceSubstitution( $piece, $frame ) { 03234 wfProfileIn( __METHOD__ ); 03235 wfProfileIn( __METHOD__ . '-setup' ); 03236 03237 # Flags 03238 $found = false; # $text has been filled 03239 $nowiki = false; # wiki markup in $text should be escaped 03240 $isHTML = false; # $text is HTML, armour it against wikitext transformation 03241 $forceRawInterwiki = false; # Force interwiki transclusion to be done in raw mode not rendered 03242 $isChildObj = false; # $text is a DOM node needing expansion in a child frame 03243 $isLocalObj = false; # $text is a DOM node needing expansion in the current frame 03244 03245 # Title object, where $text came from 03246 $title = false; 03247 03248 # $part1 is the bit before the first |, and must contain only title characters. 03249 # Various prefixes will be stripped from it later. 03250 $titleWithSpaces = $frame->expand( $piece['title'] ); 03251 $part1 = trim( $titleWithSpaces ); 03252 $titleText = false; 03253 03254 # Original title text preserved for various purposes 03255 $originalTitle = $part1; 03256 03257 # $args is a list of argument nodes, starting from index 0, not including $part1 03258 # @todo FIXME: If piece['parts'] is null then the call to getLength() below won't work b/c this $args isn't an object 03259 $args = ( null == $piece['parts'] ) ? array() : $piece['parts']; 03260 wfProfileOut( __METHOD__ . '-setup' ); 03261 03262 $titleProfileIn = null; // profile templates 03263 03264 # SUBST 03265 wfProfileIn( __METHOD__ . '-modifiers' ); 03266 if ( !$found ) { 03267 03268 $substMatch = $this->mSubstWords->matchStartAndRemove( $part1 ); 03269 03270 # Possibilities for substMatch: "subst", "safesubst" or FALSE 03271 # Decide whether to expand template or keep wikitext as-is. 03272 if ( $this->ot['wiki'] ) { 03273 if ( $substMatch === false ) { 03274 $literal = true; # literal when in PST with no prefix 03275 } else { 03276 $literal = false; # expand when in PST with subst: or safesubst: 03277 } 03278 } else { 03279 if ( $substMatch == 'subst' ) { 03280 $literal = true; # literal when not in PST with plain subst: 03281 } else { 03282 $literal = false; # expand when not in PST with safesubst: or no prefix 03283 } 03284 } 03285 if ( $literal ) { 03286 $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args ); 03287 $isLocalObj = true; 03288 $found = true; 03289 } 03290 } 03291 03292 # Variables 03293 if ( !$found && $args->getLength() == 0 ) { 03294 $id = $this->mVariables->matchStartToEnd( $part1 ); 03295 if ( $id !== false ) { 03296 $text = $this->getVariableValue( $id, $frame ); 03297 if ( MagicWord::getCacheTTL( $id ) > -1 ) { 03298 $this->mOutput->updateCacheExpiry( MagicWord::getCacheTTL( $id ) ); 03299 } 03300 $found = true; 03301 } 03302 } 03303 03304 # MSG, MSGNW and RAW 03305 if ( !$found ) { 03306 # Check for MSGNW: 03307 $mwMsgnw = MagicWord::get( 'msgnw' ); 03308 if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) { 03309 $nowiki = true; 03310 } else { 03311 # Remove obsolete MSG: 03312 $mwMsg = MagicWord::get( 'msg' ); 03313 $mwMsg->matchStartAndRemove( $part1 ); 03314 } 03315 03316 # Check for RAW: 03317 $mwRaw = MagicWord::get( 'raw' ); 03318 if ( $mwRaw->matchStartAndRemove( $part1 ) ) { 03319 $forceRawInterwiki = true; 03320 } 03321 } 03322 wfProfileOut( __METHOD__ . '-modifiers' ); 03323 03324 # Parser functions 03325 if ( !$found ) { 03326 wfProfileIn( __METHOD__ . '-pfunc' ); 03327 03328 $colonPos = strpos( $part1, ':' ); 03329 if ( $colonPos !== false ) { 03330 $func = substr( $part1, 0, $colonPos ); 03331 $funcArgs = array( trim( substr( $part1, $colonPos + 1 ) ) ); 03332 for ( $i = 0; $i < $args->getLength(); $i++ ) { 03333 $funcArgs[] = $args->item( $i ); 03334 } 03335 try { 03336 $result = $this->callParserFunction( $frame, $func, $funcArgs ); 03337 } catch ( Exception $ex ) { 03338 wfProfileOut( __METHOD__ . '-pfunc' ); 03339 wfProfileOut( __METHOD__ ); 03340 throw $ex; 03341 } 03342 03343 # The interface for parser functions allows for extracting 03344 # flags into the local scope. Extract any forwarded flags 03345 # here. 03346 extract( $result ); 03347 } 03348 wfProfileOut( __METHOD__ . '-pfunc' ); 03349 } 03350 03351 # Finish mangling title and then check for loops. 03352 # Set $title to a Title object and $titleText to the PDBK 03353 if ( !$found ) { 03354 $ns = NS_TEMPLATE; 03355 # Split the title into page and subpage 03356 $subpage = ''; 03357 $relative = $this->maybeDoSubpageLink( $part1, $subpage ); 03358 if ( $part1 !== $relative ) { 03359 $part1 = $relative; 03360 $ns = $this->mTitle->getNamespace(); 03361 } 03362 $title = Title::newFromText( $part1, $ns ); 03363 if ( $title ) { 03364 $titleText = $title->getPrefixedText(); 03365 # Check for language variants if the template is not found 03366 if ( $this->getConverterLanguage()->hasVariants() && $title->getArticleID() == 0 ) { 03367 $this->getConverterLanguage()->findVariantLink( $part1, $title, true ); 03368 } 03369 # Do recursion depth check 03370 $limit = $this->mOptions->getMaxTemplateDepth(); 03371 if ( $frame->depth >= $limit ) { 03372 $found = true; 03373 $text = '<span class="error">' 03374 . wfMessage( 'parser-template-recursion-depth-warning' ) 03375 ->numParams( $limit )->inContentLanguage()->text() 03376 . '</span>'; 03377 } 03378 } 03379 } 03380 03381 # Load from database 03382 if ( !$found && $title ) { 03383 if ( !Profiler::instance()->isPersistent() ) { 03384 # Too many unique items can kill profiling DBs/collectors 03385 $titleProfileIn = __METHOD__ . "-title-" . $title->getPrefixedDBkey(); 03386 wfProfileIn( $titleProfileIn ); // template in 03387 } 03388 wfProfileIn( __METHOD__ . '-loadtpl' ); 03389 if ( !$title->isExternal() ) { 03390 if ( $title->isSpecialPage() 03391 && $this->mOptions->getAllowSpecialInclusion() 03392 && $this->ot['html'] 03393 ) { 03394 // Pass the template arguments as URL parameters. 03395 // "uselang" will have no effect since the Language object 03396 // is forced to the one defined in ParserOptions. 03397 $pageArgs = array(); 03398 for ( $i = 0; $i < $args->getLength(); $i++ ) { 03399 $bits = $args->item( $i )->splitArg(); 03400 if ( strval( $bits['index'] ) === '' ) { 03401 $name = trim( $frame->expand( $bits['name'], PPFrame::STRIP_COMMENTS ) ); 03402 $value = trim( $frame->expand( $bits['value'] ) ); 03403 $pageArgs[$name] = $value; 03404 } 03405 } 03406 03407 // Create a new context to execute the special page 03408 $context = new RequestContext; 03409 $context->setTitle( $title ); 03410 $context->setRequest( new FauxRequest( $pageArgs ) ); 03411 $context->setUser( $this->getUser() ); 03412 $context->setLanguage( $this->mOptions->getUserLangObj() ); 03413 $ret = SpecialPageFactory::capturePath( $title, $context ); 03414 if ( $ret ) { 03415 $text = $context->getOutput()->getHTML(); 03416 $this->mOutput->addOutputPageMetadata( $context->getOutput() ); 03417 $found = true; 03418 $isHTML = true; 03419 $this->disableCache(); 03420 } 03421 } elseif ( MWNamespace::isNonincludable( $title->getNamespace() ) ) { 03422 $found = false; # access denied 03423 wfDebug( __METHOD__ . ": template inclusion denied for " . 03424 $title->getPrefixedDBkey() . "\n" ); 03425 } else { 03426 list( $text, $title ) = $this->getTemplateDom( $title ); 03427 if ( $text !== false ) { 03428 $found = true; 03429 $isChildObj = true; 03430 } 03431 } 03432 03433 # If the title is valid but undisplayable, make a link to it 03434 if ( !$found && ( $this->ot['html'] || $this->ot['pre'] ) ) { 03435 $text = "[[:$titleText]]"; 03436 $found = true; 03437 } 03438 } elseif ( $title->isTrans() ) { 03439 # Interwiki transclusion 03440 if ( $this->ot['html'] && !$forceRawInterwiki ) { 03441 $text = $this->interwikiTransclude( $title, 'render' ); 03442 $isHTML = true; 03443 } else { 03444 $text = $this->interwikiTransclude( $title, 'raw' ); 03445 # Preprocess it like a template 03446 $text = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION ); 03447 $isChildObj = true; 03448 } 03449 $found = true; 03450 } 03451 03452 # Do infinite loop check 03453 # This has to be done after redirect resolution to avoid infinite loops via redirects 03454 if ( !$frame->loopCheck( $title ) ) { 03455 $found = true; 03456 $text = '<span class="error">' 03457 . wfMessage( 'parser-template-loop-warning', $titleText )->inContentLanguage()->text() 03458 . '</span>'; 03459 wfDebug( __METHOD__ . ": template loop broken at '$titleText'\n" ); 03460 } 03461 wfProfileOut( __METHOD__ . '-loadtpl' ); 03462 } 03463 03464 # If we haven't found text to substitute by now, we're done 03465 # Recover the source wikitext and return it 03466 if ( !$found ) { 03467 $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args ); 03468 if ( $titleProfileIn ) { 03469 wfProfileOut( $titleProfileIn ); // template out 03470 } 03471 wfProfileOut( __METHOD__ ); 03472 return array( 'object' => $text ); 03473 } 03474 03475 # Expand DOM-style return values in a child frame 03476 if ( $isChildObj ) { 03477 # Clean up argument array 03478 $newFrame = $frame->newChild( $args, $title ); 03479 03480 if ( $nowiki ) { 03481 $text = $newFrame->expand( $text, PPFrame::RECOVER_ORIG ); 03482 } elseif ( $titleText !== false && $newFrame->isEmpty() ) { 03483 # Expansion is eligible for the empty-frame cache 03484 if ( isset( $this->mTplExpandCache[$titleText] ) ) { 03485 $text = $this->mTplExpandCache[$titleText]; 03486 } else { 03487 $text = $newFrame->expand( $text ); 03488 $this->mTplExpandCache[$titleText] = $text; 03489 } 03490 } else { 03491 # Uncached expansion 03492 $text = $newFrame->expand( $text ); 03493 } 03494 } 03495 if ( $isLocalObj && $nowiki ) { 03496 $text = $frame->expand( $text, PPFrame::RECOVER_ORIG ); 03497 $isLocalObj = false; 03498 } 03499 03500 if ( $titleProfileIn ) { 03501 wfProfileOut( $titleProfileIn ); // template out 03502 } 03503 03504 # Replace raw HTML by a placeholder 03505 if ( $isHTML ) { 03506 $text = $this->insertStripItem( $text ); 03507 } elseif ( $nowiki && ( $this->ot['html'] || $this->ot['pre'] ) ) { 03508 # Escape nowiki-style return values 03509 $text = wfEscapeWikiText( $text ); 03510 } elseif ( is_string( $text ) 03511 && !$piece['lineStart'] 03512 && preg_match( '/^(?:{\\||:|;|#|\*)/', $text ) 03513 ) { 03514 # Bug 529: if the template begins with a table or block-level 03515 # element, it should be treated as beginning a new line. 03516 # This behavior is somewhat controversial. 03517 $text = "\n" . $text; 03518 } 03519 03520 if ( is_string( $text ) && !$this->incrementIncludeSize( 'post-expand', strlen( $text ) ) ) { 03521 # Error, oversize inclusion 03522 if ( $titleText !== false ) { 03523 # Make a working, properly escaped link if possible (bug 23588) 03524 $text = "[[:$titleText]]"; 03525 } else { 03526 # This will probably not be a working link, but at least it may 03527 # provide some hint of where the problem is 03528 preg_replace( '/^:/', '', $originalTitle ); 03529 $text = "[[:$originalTitle]]"; 03530 } 03531 $text .= $this->insertStripItem( '<!-- WARNING: template omitted, post-expand include size too large -->' ); 03532 $this->limitationWarn( 'post-expand-template-inclusion' ); 03533 } 03534 03535 if ( $isLocalObj ) { 03536 $ret = array( 'object' => $text ); 03537 } else { 03538 $ret = array( 'text' => $text ); 03539 } 03540 03541 wfProfileOut( __METHOD__ ); 03542 return $ret; 03543 } 03544 03563 public function callParserFunction( $frame, $function, array $args = array() ) { 03564 global $wgContLang; 03565 03566 wfProfileIn( __METHOD__ ); 03567 03568 # Case sensitive functions 03569 if ( isset( $this->mFunctionSynonyms[1][$function] ) ) { 03570 $function = $this->mFunctionSynonyms[1][$function]; 03571 } else { 03572 # Case insensitive functions 03573 $function = $wgContLang->lc( $function ); 03574 if ( isset( $this->mFunctionSynonyms[0][$function] ) ) { 03575 $function = $this->mFunctionSynonyms[0][$function]; 03576 } else { 03577 wfProfileOut( __METHOD__ ); 03578 return array( 'found' => false ); 03579 } 03580 } 03581 03582 wfProfileIn( __METHOD__ . '-pfunc-' . $function ); 03583 list( $callback, $flags ) = $this->mFunctionHooks[$function]; 03584 03585 # Workaround for PHP bug 35229 and similar 03586 if ( !is_callable( $callback ) ) { 03587 wfProfileOut( __METHOD__ . '-pfunc-' . $function ); 03588 wfProfileOut( __METHOD__ ); 03589 throw new MWException( "Tag hook for $function is not callable\n" ); 03590 } 03591 03592 $allArgs = array( &$this ); 03593 if ( $flags & SFH_OBJECT_ARGS ) { 03594 # Convert arguments to PPNodes and collect for appending to $allArgs 03595 $funcArgs = array(); 03596 foreach ( $args as $k => $v ) { 03597 if ( $v instanceof PPNode || $k === 0 ) { 03598 $funcArgs[] = $v; 03599 } else { 03600 $funcArgs[] = $this->mPreprocessor->newPartNodeArray( array( $k => $v ) )->item( 0 ); 03601 } 03602 } 03603 03604 # Add a frame parameter, and pass the arguments as an array 03605 $allArgs[] = $frame; 03606 $allArgs[] = $funcArgs; 03607 } else { 03608 # Convert arguments to plain text and append to $allArgs 03609 foreach ( $args as $k => $v ) { 03610 if ( $v instanceof PPNode ) { 03611 $allArgs[] = trim( $frame->expand( $v ) ); 03612 } elseif ( is_int( $k ) && $k >= 0 ) { 03613 $allArgs[] = trim( $v ); 03614 } else { 03615 $allArgs[] = trim( "$k=$v" ); 03616 } 03617 } 03618 } 03619 03620 $result = call_user_func_array( $callback, $allArgs ); 03621 03622 # The interface for function hooks allows them to return a wikitext 03623 # string or an array containing the string and any flags. This mungs 03624 # things around to match what this method should return. 03625 if ( !is_array( $result ) ) { 03626 $result = array( 03627 'found' => true, 03628 'text' => $result, 03629 ); 03630 } else { 03631 if ( isset( $result[0] ) && !isset( $result['text'] ) ) { 03632 $result['text'] = $result[0]; 03633 } 03634 unset( $result[0] ); 03635 $result += array( 03636 'found' => true, 03637 ); 03638 } 03639 03640 $noparse = true; 03641 $preprocessFlags = 0; 03642 if ( isset( $result['noparse'] ) ) { 03643 $noparse = $result['noparse']; 03644 } 03645 if ( isset( $result['preprocessFlags'] ) ) { 03646 $preprocessFlags = $result['preprocessFlags']; 03647 } 03648 03649 if ( !$noparse ) { 03650 $result['text'] = $this->preprocessToDom( $result['text'], $preprocessFlags ); 03651 $result['isChildObj'] = true; 03652 } 03653 wfProfileOut( __METHOD__ . '-pfunc-' . $function ); 03654 wfProfileOut( __METHOD__ ); 03655 03656 return $result; 03657 } 03658 03667 function getTemplateDom( $title ) { 03668 $cacheTitle = $title; 03669 $titleText = $title->getPrefixedDBkey(); 03670 03671 if ( isset( $this->mTplRedirCache[$titleText] ) ) { 03672 list( $ns, $dbk ) = $this->mTplRedirCache[$titleText]; 03673 $title = Title::makeTitle( $ns, $dbk ); 03674 $titleText = $title->getPrefixedDBkey(); 03675 } 03676 if ( isset( $this->mTplDomCache[$titleText] ) ) { 03677 return array( $this->mTplDomCache[$titleText], $title ); 03678 } 03679 03680 # Cache miss, go to the database 03681 list( $text, $title ) = $this->fetchTemplateAndTitle( $title ); 03682 03683 if ( $text === false ) { 03684 $this->mTplDomCache[$titleText] = false; 03685 return array( false, $title ); 03686 } 03687 03688 $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION ); 03689 $this->mTplDomCache[$titleText] = $dom; 03690 03691 if ( !$title->equals( $cacheTitle ) ) { 03692 $this->mTplRedirCache[$cacheTitle->getPrefixedDBkey()] = 03693 array( $title->getNamespace(), $cdb = $title->getDBkey() ); 03694 } 03695 03696 return array( $dom, $title ); 03697 } 03698 03704 function fetchTemplateAndTitle( $title ) { 03705 $templateCb = $this->mOptions->getTemplateCallback(); # Defaults to Parser::statelessFetchTemplate() 03706 $stuff = call_user_func( $templateCb, $title, $this ); 03707 $text = $stuff['text']; 03708 $finalTitle = isset( $stuff['finalTitle'] ) ? $stuff['finalTitle'] : $title; 03709 if ( isset( $stuff['deps'] ) ) { 03710 foreach ( $stuff['deps'] as $dep ) { 03711 $this->mOutput->addTemplate( $dep['title'], $dep['page_id'], $dep['rev_id'] ); 03712 if ( $dep['title']->equals( $this->getTitle() ) ) { 03713 // If we transclude ourselves, the final result 03714 // will change based on the new version of the page 03715 $this->mOutput->setFlag( 'vary-revision' ); 03716 } 03717 } 03718 } 03719 return array( $text, $finalTitle ); 03720 } 03721 03727 function fetchTemplate( $title ) { 03728 $rv = $this->fetchTemplateAndTitle( $title ); 03729 return $rv[0]; 03730 } 03731 03741 static function statelessFetchTemplate( $title, $parser = false ) { 03742 $text = $skip = false; 03743 $finalTitle = $title; 03744 $deps = array(); 03745 03746 # Loop to fetch the article, with up to 1 redirect 03747 for ( $i = 0; $i < 2 && is_object( $title ); $i++ ) { 03748 # Give extensions a chance to select the revision instead 03749 $id = false; # Assume current 03750 wfRunHooks( 'BeforeParserFetchTemplateAndtitle', 03751 array( $parser, $title, &$skip, &$id ) ); 03752 03753 if ( $skip ) { 03754 $text = false; 03755 $deps[] = array( 03756 'title' => $title, 03757 'page_id' => $title->getArticleID(), 03758 'rev_id' => null 03759 ); 03760 break; 03761 } 03762 # Get the revision 03763 $rev = $id 03764 ? Revision::newFromId( $id ) 03765 : Revision::newFromTitle( $title, false, Revision::READ_NORMAL ); 03766 $rev_id = $rev ? $rev->getId() : 0; 03767 # If there is no current revision, there is no page 03768 if ( $id === false && !$rev ) { 03769 $linkCache = LinkCache::singleton(); 03770 $linkCache->addBadLinkObj( $title ); 03771 } 03772 03773 $deps[] = array( 03774 'title' => $title, 03775 'page_id' => $title->getArticleID(), 03776 'rev_id' => $rev_id ); 03777 if ( $rev && !$title->equals( $rev->getTitle() ) ) { 03778 # We fetched a rev from a different title; register it too... 03779 $deps[] = array( 03780 'title' => $rev->getTitle(), 03781 'page_id' => $rev->getPage(), 03782 'rev_id' => $rev_id ); 03783 } 03784 03785 if ( $rev ) { 03786 $content = $rev->getContent(); 03787 $text = $content ? $content->getWikitextForTransclusion() : null; 03788 03789 if ( $text === false || $text === null ) { 03790 $text = false; 03791 break; 03792 } 03793 } elseif ( $title->getNamespace() == NS_MEDIAWIKI ) { 03794 global $wgContLang; 03795 $message = wfMessage( $wgContLang->lcfirst( $title->getText() ) )->inContentLanguage(); 03796 if ( !$message->exists() ) { 03797 $text = false; 03798 break; 03799 } 03800 $content = $message->content(); 03801 $text = $message->plain(); 03802 } else { 03803 break; 03804 } 03805 if ( !$content ) { 03806 break; 03807 } 03808 # Redirect? 03809 $finalTitle = $title; 03810 $title = $content->getRedirectTarget(); 03811 } 03812 return array( 03813 'text' => $text, 03814 'finalTitle' => $finalTitle, 03815 'deps' => $deps ); 03816 } 03817 03825 function fetchFile( $title, $options = array() ) { 03826 $res = $this->fetchFileAndTitle( $title, $options ); 03827 return $res[0]; 03828 } 03829 03837 function fetchFileAndTitle( $title, $options = array() ) { 03838 $file = $this->fetchFileNoRegister( $title, $options ); 03839 03840 $time = $file ? $file->getTimestamp() : false; 03841 $sha1 = $file ? $file->getSha1() : false; 03842 # Register the file as a dependency... 03843 $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 ); 03844 if ( $file && !$title->equals( $file->getTitle() ) ) { 03845 # Update fetched file title 03846 $title = $file->getTitle(); 03847 $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 ); 03848 } 03849 return array( $file, $title ); 03850 } 03851 03862 protected function fetchFileNoRegister( $title, $options = array() ) { 03863 if ( isset( $options['broken'] ) ) { 03864 $file = false; // broken thumbnail forced by hook 03865 } elseif ( isset( $options['sha1'] ) ) { // get by (sha1,timestamp) 03866 $file = RepoGroup::singleton()->findFileFromKey( $options['sha1'], $options ); 03867 } else { // get by (name,timestamp) 03868 $file = wfFindFile( $title, $options ); 03869 } 03870 return $file; 03871 } 03872 03881 function interwikiTransclude( $title, $action ) { 03882 global $wgEnableScaryTranscluding; 03883 03884 if ( !$wgEnableScaryTranscluding ) { 03885 return wfMessage( 'scarytranscludedisabled' )->inContentLanguage()->text(); 03886 } 03887 03888 $url = $title->getFullURL( array( 'action' => $action ) ); 03889 03890 if ( strlen( $url ) > 255 ) { 03891 return wfMessage( 'scarytranscludetoolong' )->inContentLanguage()->text(); 03892 } 03893 return $this->fetchScaryTemplateMaybeFromCache( $url ); 03894 } 03895 03900 function fetchScaryTemplateMaybeFromCache( $url ) { 03901 global $wgTranscludeCacheExpiry; 03902 $dbr = wfGetDB( DB_SLAVE ); 03903 $tsCond = $dbr->timestamp( time() - $wgTranscludeCacheExpiry ); 03904 $obj = $dbr->selectRow( 'transcache', array( 'tc_time', 'tc_contents' ), 03905 array( 'tc_url' => $url, "tc_time >= " . $dbr->addQuotes( $tsCond ) ) ); 03906 if ( $obj ) { 03907 return $obj->tc_contents; 03908 } 03909 03910 $req = MWHttpRequest::factory( $url ); 03911 $status = $req->execute(); // Status object 03912 if ( $status->isOK() ) { 03913 $text = $req->getContent(); 03914 } elseif ( $req->getStatus() != 200 ) { // Though we failed to fetch the content, this status is useless. 03915 return wfMessage( 'scarytranscludefailed-httpstatus', $url, $req->getStatus() /* HTTP status */ )->inContentLanguage()->text(); 03916 } else { 03917 return wfMessage( 'scarytranscludefailed', $url )->inContentLanguage()->text(); 03918 } 03919 03920 $dbw = wfGetDB( DB_MASTER ); 03921 $dbw->replace( 'transcache', array( 'tc_url' ), array( 03922 'tc_url' => $url, 03923 'tc_time' => $dbw->timestamp( time() ), 03924 'tc_contents' => $text 03925 ) ); 03926 return $text; 03927 } 03928 03938 function argSubstitution( $piece, $frame ) { 03939 wfProfileIn( __METHOD__ ); 03940 03941 $error = false; 03942 $parts = $piece['parts']; 03943 $nameWithSpaces = $frame->expand( $piece['title'] ); 03944 $argName = trim( $nameWithSpaces ); 03945 $object = false; 03946 $text = $frame->getArgument( $argName ); 03947 if ( $text === false && $parts->getLength() > 0 03948 && ( $this->ot['html'] 03949 || $this->ot['pre'] 03950 || ( $this->ot['wiki'] && $frame->isTemplate() ) 03951 ) 03952 ) { 03953 # No match in frame, use the supplied default 03954 $object = $parts->item( 0 )->getChildren(); 03955 } 03956 if ( !$this->incrementIncludeSize( 'arg', strlen( $text ) ) ) { 03957 $error = '<!-- WARNING: argument omitted, expansion size too large -->'; 03958 $this->limitationWarn( 'post-expand-template-argument' ); 03959 } 03960 03961 if ( $text === false && $object === false ) { 03962 # No match anywhere 03963 $object = $frame->virtualBracketedImplode( '{{{', '|', '}}}', $nameWithSpaces, $parts ); 03964 } 03965 if ( $error !== false ) { 03966 $text .= $error; 03967 } 03968 if ( $object !== false ) { 03969 $ret = array( 'object' => $object ); 03970 } else { 03971 $ret = array( 'text' => $text ); 03972 } 03973 03974 wfProfileOut( __METHOD__ ); 03975 return $ret; 03976 } 03977 03993 function extensionSubstitution( $params, $frame ) { 03994 $name = $frame->expand( $params['name'] ); 03995 $attrText = !isset( $params['attr'] ) ? null : $frame->expand( $params['attr'] ); 03996 $content = !isset( $params['inner'] ) ? null : $frame->expand( $params['inner'] ); 03997 $marker = "{$this->mUniqPrefix}-$name-" . sprintf( '%08X', $this->mMarkerIndex++ ) . self::MARKER_SUFFIX; 03998 03999 $isFunctionTag = isset( $this->mFunctionTagHooks[strtolower( $name )] ) && 04000 ( $this->ot['html'] || $this->ot['pre'] ); 04001 if ( $isFunctionTag ) { 04002 $markerType = 'none'; 04003 } else { 04004 $markerType = 'general'; 04005 } 04006 if ( $this->ot['html'] || $isFunctionTag ) { 04007 $name = strtolower( $name ); 04008 $attributes = Sanitizer::decodeTagAttributes( $attrText ); 04009 if ( isset( $params['attributes'] ) ) { 04010 $attributes = $attributes + $params['attributes']; 04011 } 04012 04013 if ( isset( $this->mTagHooks[$name] ) ) { 04014 # Workaround for PHP bug 35229 and similar 04015 if ( !is_callable( $this->mTagHooks[$name] ) ) { 04016 throw new MWException( "Tag hook for $name is not callable\n" ); 04017 } 04018 $output = call_user_func_array( $this->mTagHooks[$name], 04019 array( $content, $attributes, $this, $frame ) ); 04020 } elseif ( isset( $this->mFunctionTagHooks[$name] ) ) { 04021 list( $callback, ) = $this->mFunctionTagHooks[$name]; 04022 if ( !is_callable( $callback ) ) { 04023 throw new MWException( "Tag hook for $name is not callable\n" ); 04024 } 04025 04026 $output = call_user_func_array( $callback, array( &$this, $frame, $content, $attributes ) ); 04027 } else { 04028 $output = '<span class="error">Invalid tag extension name: ' . 04029 htmlspecialchars( $name ) . '</span>'; 04030 } 04031 04032 if ( is_array( $output ) ) { 04033 # Extract flags to local scope (to override $markerType) 04034 $flags = $output; 04035 $output = $flags[0]; 04036 unset( $flags[0] ); 04037 extract( $flags ); 04038 } 04039 } else { 04040 if ( is_null( $attrText ) ) { 04041 $attrText = ''; 04042 } 04043 if ( isset( $params['attributes'] ) ) { 04044 foreach ( $params['attributes'] as $attrName => $attrValue ) { 04045 $attrText .= ' ' . htmlspecialchars( $attrName ) . '="' . 04046 htmlspecialchars( $attrValue ) . '"'; 04047 } 04048 } 04049 if ( $content === null ) { 04050 $output = "<$name$attrText/>"; 04051 } else { 04052 $close = is_null( $params['close'] ) ? '' : $frame->expand( $params['close'] ); 04053 $output = "<$name$attrText>$content$close"; 04054 } 04055 } 04056 04057 if ( $markerType === 'none' ) { 04058 return $output; 04059 } elseif ( $markerType === 'nowiki' ) { 04060 $this->mStripState->addNoWiki( $marker, $output ); 04061 } elseif ( $markerType === 'general' ) { 04062 $this->mStripState->addGeneral( $marker, $output ); 04063 } else { 04064 throw new MWException( __METHOD__ . ': invalid marker type' ); 04065 } 04066 return $marker; 04067 } 04068 04076 function incrementIncludeSize( $type, $size ) { 04077 if ( $this->mIncludeSizes[$type] + $size > $this->mOptions->getMaxIncludeSize() ) { 04078 return false; 04079 } else { 04080 $this->mIncludeSizes[$type] += $size; 04081 return true; 04082 } 04083 } 04084 04090 function incrementExpensiveFunctionCount() { 04091 $this->mExpensiveFunctionCount++; 04092 return $this->mExpensiveFunctionCount <= $this->mOptions->getExpensiveParserFunctionLimit(); 04093 } 04094 04103 function doDoubleUnderscore( $text ) { 04104 wfProfileIn( __METHOD__ ); 04105 04106 # The position of __TOC__ needs to be recorded 04107 $mw = MagicWord::get( 'toc' ); 04108 if ( $mw->match( $text ) ) { 04109 $this->mShowToc = true; 04110 $this->mForceTocPosition = true; 04111 04112 # Set a placeholder. At the end we'll fill it in with the TOC. 04113 $text = $mw->replace( '<!--MWTOC-->', $text, 1 ); 04114 04115 # Only keep the first one. 04116 $text = $mw->replace( '', $text ); 04117 } 04118 04119 # Now match and remove the rest of them 04120 $mwa = MagicWord::getDoubleUnderscoreArray(); 04121 $this->mDoubleUnderscores = $mwa->matchAndRemove( $text ); 04122 04123 if ( isset( $this->mDoubleUnderscores['nogallery'] ) ) { 04124 $this->mOutput->mNoGallery = true; 04125 } 04126 if ( isset( $this->mDoubleUnderscores['notoc'] ) && !$this->mForceTocPosition ) { 04127 $this->mShowToc = false; 04128 } 04129 if ( isset( $this->mDoubleUnderscores['hiddencat'] ) && $this->mTitle->getNamespace() == NS_CATEGORY ) { 04130 $this->addTrackingCategory( 'hidden-category-category' ); 04131 } 04132 # (bug 8068) Allow control over whether robots index a page. 04133 # 04134 # @todo FIXME: Bug 14899: __INDEX__ always overrides __NOINDEX__ here! This 04135 # is not desirable, the last one on the page should win. 04136 if ( isset( $this->mDoubleUnderscores['noindex'] ) && $this->mTitle->canUseNoindex() ) { 04137 $this->mOutput->setIndexPolicy( 'noindex' ); 04138 $this->addTrackingCategory( 'noindex-category' ); 04139 } 04140 if ( isset( $this->mDoubleUnderscores['index'] ) && $this->mTitle->canUseNoindex() ) { 04141 $this->mOutput->setIndexPolicy( 'index' ); 04142 $this->addTrackingCategory( 'index-category' ); 04143 } 04144 04145 # Cache all double underscores in the database 04146 foreach ( $this->mDoubleUnderscores as $key => $val ) { 04147 $this->mOutput->setProperty( $key, '' ); 04148 } 04149 04150 wfProfileOut( __METHOD__ ); 04151 return $text; 04152 } 04153 04165 public function addTrackingCategory( $msg ) { 04166 if ( $this->mTitle->getNamespace() === NS_SPECIAL ) { 04167 wfDebug( __METHOD__ . ": Not adding tracking category $msg to special page!\n" ); 04168 return false; 04169 } 04170 // Important to parse with correct title (bug 31469) 04171 $cat = wfMessage( $msg ) 04172 ->title( $this->getTitle() ) 04173 ->inContentLanguage() 04174 ->text(); 04175 04176 # Allow tracking categories to be disabled by setting them to "-" 04177 if ( $cat === '-' ) { 04178 return false; 04179 } 04180 04181 $containerCategory = Title::makeTitleSafe( NS_CATEGORY, $cat ); 04182 if ( $containerCategory ) { 04183 $this->mOutput->addCategory( $containerCategory->getDBkey(), $this->getDefaultSort() ); 04184 return true; 04185 } else { 04186 wfDebug( __METHOD__ . ": [[MediaWiki:$msg]] is not a valid title!\n" ); 04187 return false; 04188 } 04189 } 04190 04207 function formatHeadings( $text, $origText, $isMain = true ) { 04208 global $wgMaxTocLevel, $wgExperimentalHtmlIds; 04209 04210 # Inhibit editsection links if requested in the page 04211 if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) { 04212 $maybeShowEditLink = $showEditLink = false; 04213 } else { 04214 $maybeShowEditLink = true; /* Actual presence will depend on ParserOptions option */ 04215 $showEditLink = $this->mOptions->getEditSection(); 04216 } 04217 if ( $showEditLink ) { 04218 $this->mOutput->setEditSectionTokens( true ); 04219 } 04220 04221 # Get all headlines for numbering them and adding funky stuff like [edit] 04222 # links - this is for later, but we need the number of headlines right now 04223 $matches = array(); 04224 $numMatches = preg_match_all( '/<H(?P<level>[1-6])(?P<attrib>.*?' . '>)\s*(?P<header>[\s\S]*?)\s*<\/H[1-6] *>/i', $text, $matches ); 04225 04226 # if there are fewer than 4 headlines in the article, do not show TOC 04227 # unless it's been explicitly enabled. 04228 $enoughToc = $this->mShowToc && 04229 ( ( $numMatches >= 4 ) || $this->mForceTocPosition ); 04230 04231 # Allow user to stipulate that a page should have a "new section" 04232 # link added via __NEWSECTIONLINK__ 04233 if ( isset( $this->mDoubleUnderscores['newsectionlink'] ) ) { 04234 $this->mOutput->setNewSection( true ); 04235 } 04236 04237 # Allow user to remove the "new section" 04238 # link via __NONEWSECTIONLINK__ 04239 if ( isset( $this->mDoubleUnderscores['nonewsectionlink'] ) ) { 04240 $this->mOutput->hideNewSection( true ); 04241 } 04242 04243 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML, 04244 # override above conditions and always show TOC above first header 04245 if ( isset( $this->mDoubleUnderscores['forcetoc'] ) ) { 04246 $this->mShowToc = true; 04247 $enoughToc = true; 04248 } 04249 04250 # headline counter 04251 $headlineCount = 0; 04252 $numVisible = 0; 04253 04254 # Ugh .. the TOC should have neat indentation levels which can be 04255 # passed to the skin functions. These are determined here 04256 $toc = ''; 04257 $full = ''; 04258 $head = array(); 04259 $sublevelCount = array(); 04260 $levelCount = array(); 04261 $level = 0; 04262 $prevlevel = 0; 04263 $toclevel = 0; 04264 $prevtoclevel = 0; 04265 $markerRegex = "{$this->mUniqPrefix}-h-(\d+)-" . self::MARKER_SUFFIX; 04266 $baseTitleText = $this->mTitle->getPrefixedDBkey(); 04267 $oldType = $this->mOutputType; 04268 $this->setOutputType( self::OT_WIKI ); 04269 $frame = $this->getPreprocessor()->newFrame(); 04270 $root = $this->preprocessToDom( $origText ); 04271 $node = $root->getFirstChild(); 04272 $byteOffset = 0; 04273 $tocraw = array(); 04274 $refers = array(); 04275 04276 foreach ( $matches[3] as $headline ) { 04277 $isTemplate = false; 04278 $titleText = false; 04279 $sectionIndex = false; 04280 $numbering = ''; 04281 $markerMatches = array(); 04282 if ( preg_match( "/^$markerRegex/", $headline, $markerMatches ) ) { 04283 $serial = $markerMatches[1]; 04284 list( $titleText, $sectionIndex ) = $this->mHeadings[$serial]; 04285 $isTemplate = ( $titleText != $baseTitleText ); 04286 $headline = preg_replace( "/^$markerRegex\\s*/", "", $headline ); 04287 } 04288 04289 if ( $toclevel ) { 04290 $prevlevel = $level; 04291 } 04292 $level = $matches[1][$headlineCount]; 04293 04294 if ( $level > $prevlevel ) { 04295 # Increase TOC level 04296 $toclevel++; 04297 $sublevelCount[$toclevel] = 0; 04298 if ( $toclevel < $wgMaxTocLevel ) { 04299 $prevtoclevel = $toclevel; 04300 $toc .= Linker::tocIndent(); 04301 $numVisible++; 04302 } 04303 } elseif ( $level < $prevlevel && $toclevel > 1 ) { 04304 # Decrease TOC level, find level to jump to 04305 04306 for ( $i = $toclevel; $i > 0; $i-- ) { 04307 if ( $levelCount[$i] == $level ) { 04308 # Found last matching level 04309 $toclevel = $i; 04310 break; 04311 } elseif ( $levelCount[$i] < $level ) { 04312 # Found first matching level below current level 04313 $toclevel = $i + 1; 04314 break; 04315 } 04316 } 04317 if ( $i == 0 ) { 04318 $toclevel = 1; 04319 } 04320 if ( $toclevel < $wgMaxTocLevel ) { 04321 if ( $prevtoclevel < $wgMaxTocLevel ) { 04322 # Unindent only if the previous toc level was shown :p 04323 $toc .= Linker::tocUnindent( $prevtoclevel - $toclevel ); 04324 $prevtoclevel = $toclevel; 04325 } else { 04326 $toc .= Linker::tocLineEnd(); 04327 } 04328 } 04329 } else { 04330 # No change in level, end TOC line 04331 if ( $toclevel < $wgMaxTocLevel ) { 04332 $toc .= Linker::tocLineEnd(); 04333 } 04334 } 04335 04336 $levelCount[$toclevel] = $level; 04337 04338 # count number of headlines for each level 04339 $sublevelCount[$toclevel]++; 04340 $dot = 0; 04341 for ( $i = 1; $i <= $toclevel; $i++ ) { 04342 if ( !empty( $sublevelCount[$i] ) ) { 04343 if ( $dot ) { 04344 $numbering .= '.'; 04345 } 04346 $numbering .= $this->getTargetLanguage()->formatNum( $sublevelCount[$i] ); 04347 $dot = 1; 04348 } 04349 } 04350 04351 # The safe header is a version of the header text safe to use for links 04352 04353 # Remove link placeholders by the link text. 04354 # <!--LINK number--> 04355 # turns into 04356 # link text with suffix 04357 # Do this before unstrip since link text can contain strip markers 04358 $safeHeadline = $this->replaceLinkHoldersText( $headline ); 04359 04360 # Avoid insertion of weird stuff like <math> by expanding the relevant sections 04361 $safeHeadline = $this->mStripState->unstripBoth( $safeHeadline ); 04362 04363 # Strip out HTML (first regex removes any tag not allowed) 04364 # Allowed tags are: 04365 # * <sup> and <sub> (bug 8393) 04366 # * <i> (bug 26375) 04367 # * <b> (r105284) 04368 # * <span dir="rtl"> and <span dir="ltr"> (bug 35167) 04369 # 04370 # We strip any parameter from accepted tags (second regex), except dir="rtl|ltr" from <span>, 04371 # to allow setting directionality in toc items. 04372 $tocline = preg_replace( 04373 array( '#<(?!/?(span|sup|sub|i|b)(?: [^>]*)?>).*?' . '>#', '#<(/?(?:span(?: dir="(?:rtl|ltr)")?|sup|sub|i|b))(?: .*?)?' . '>#' ), 04374 array( '', '<$1>' ), 04375 $safeHeadline 04376 ); 04377 $tocline = trim( $tocline ); 04378 04379 # For the anchor, strip out HTML-y stuff period 04380 $safeHeadline = preg_replace( '/<.*?' . '>/', '', $safeHeadline ); 04381 $safeHeadline = Sanitizer::normalizeSectionNameWhitespace( $safeHeadline ); 04382 04383 # Save headline for section edit hint before it's escaped 04384 $headlineHint = $safeHeadline; 04385 04386 if ( $wgExperimentalHtmlIds ) { 04387 # For reverse compatibility, provide an id that's 04388 # HTML4-compatible, like we used to. 04389 # 04390 # It may be worth noting, academically, that it's possible for 04391 # the legacy anchor to conflict with a non-legacy headline 04392 # anchor on the page. In this case likely the "correct" thing 04393 # would be to either drop the legacy anchors or make sure 04394 # they're numbered first. However, this would require people 04395 # to type in section names like "abc_.D7.93.D7.90.D7.A4" 04396 # manually, so let's not bother worrying about it. 04397 $legacyHeadline = Sanitizer::escapeId( $safeHeadline, 04398 array( 'noninitial', 'legacy' ) ); 04399 $safeHeadline = Sanitizer::escapeId( $safeHeadline ); 04400 04401 if ( $legacyHeadline == $safeHeadline ) { 04402 # No reason to have both (in fact, we can't) 04403 $legacyHeadline = false; 04404 } 04405 } else { 04406 $legacyHeadline = false; 04407 $safeHeadline = Sanitizer::escapeId( $safeHeadline, 04408 'noninitial' ); 04409 } 04410 04411 # HTML names must be case-insensitively unique (bug 10721). 04412 # This does not apply to Unicode characters per 04413 # http://dev.w3.org/html5/spec/infrastructure.html#case-sensitivity-and-string-comparison 04414 # @todo FIXME: We may be changing them depending on the current locale. 04415 $arrayKey = strtolower( $safeHeadline ); 04416 if ( $legacyHeadline === false ) { 04417 $legacyArrayKey = false; 04418 } else { 04419 $legacyArrayKey = strtolower( $legacyHeadline ); 04420 } 04421 04422 # count how many in assoc. array so we can track dupes in anchors 04423 if ( isset( $refers[$arrayKey] ) ) { 04424 $refers[$arrayKey]++; 04425 } else { 04426 $refers[$arrayKey] = 1; 04427 } 04428 if ( isset( $refers[$legacyArrayKey] ) ) { 04429 $refers[$legacyArrayKey]++; 04430 } else { 04431 $refers[$legacyArrayKey] = 1; 04432 } 04433 04434 # Don't number the heading if it is the only one (looks silly) 04435 if ( count( $matches[3] ) > 1 && $this->mOptions->getNumberHeadings() ) { 04436 # the two are different if the line contains a link 04437 $headline = Html::element( 'span', array( 'class' => 'mw-headline-number' ), $numbering ) . ' ' . $headline; 04438 } 04439 04440 # Create the anchor for linking from the TOC to the section 04441 $anchor = $safeHeadline; 04442 $legacyAnchor = $legacyHeadline; 04443 if ( $refers[$arrayKey] > 1 ) { 04444 $anchor .= '_' . $refers[$arrayKey]; 04445 } 04446 if ( $legacyHeadline !== false && $refers[$legacyArrayKey] > 1 ) { 04447 $legacyAnchor .= '_' . $refers[$legacyArrayKey]; 04448 } 04449 if ( $enoughToc && ( !isset( $wgMaxTocLevel ) || $toclevel < $wgMaxTocLevel ) ) { 04450 $toc .= Linker::tocLine( $anchor, $tocline, 04451 $numbering, $toclevel, ( $isTemplate ? false : $sectionIndex ) ); 04452 } 04453 04454 # Add the section to the section tree 04455 # Find the DOM node for this header 04456 $noOffset = ( $isTemplate || $sectionIndex === false ); 04457 while ( $node && !$noOffset ) { 04458 if ( $node->getName() === 'h' ) { 04459 $bits = $node->splitHeading(); 04460 if ( $bits['i'] == $sectionIndex ) { 04461 break; 04462 } 04463 } 04464 $byteOffset += mb_strlen( $this->mStripState->unstripBoth( 04465 $frame->expand( $node, PPFrame::RECOVER_ORIG ) ) ); 04466 $node = $node->getNextSibling(); 04467 } 04468 $tocraw[] = array( 04469 'toclevel' => $toclevel, 04470 'level' => $level, 04471 'line' => $tocline, 04472 'number' => $numbering, 04473 'index' => ( $isTemplate ? 'T-' : '' ) . $sectionIndex, 04474 'fromtitle' => $titleText, 04475 'byteoffset' => ( $noOffset ? null : $byteOffset ), 04476 'anchor' => $anchor, 04477 ); 04478 04479 # give headline the correct <h#> tag 04480 if ( $maybeShowEditLink && $sectionIndex !== false ) { 04481 // Output edit section links as markers with styles that can be customized by skins 04482 if ( $isTemplate ) { 04483 # Put a T flag in the section identifier, to indicate to extractSections() 04484 # that sections inside <includeonly> should be counted. 04485 $editlinkArgs = array( $titleText, "T-$sectionIndex"/*, null */ ); 04486 } else { 04487 $editlinkArgs = array( $this->mTitle->getPrefixedText(), $sectionIndex, $headlineHint ); 04488 } 04489 // We use a bit of pesudo-xml for editsection markers. The language converter is run later on 04490 // Using a UNIQ style marker leads to the converter screwing up the tokens when it converts stuff 04491 // And trying to insert strip tags fails too. At this point all real inputted tags have already been escaped 04492 // so we don't have to worry about a user trying to input one of these markers directly. 04493 // We use a page and section attribute to stop the language converter from converting these important bits 04494 // of data, but put the headline hint inside a content block because the language converter is supposed to 04495 // be able to convert that piece of data. 04496 $editlink = '<mw:editsection page="' . htmlspecialchars( $editlinkArgs[0] ); 04497 $editlink .= '" section="' . htmlspecialchars( $editlinkArgs[1] ) . '"'; 04498 if ( isset( $editlinkArgs[2] ) ) { 04499 $editlink .= '>' . $editlinkArgs[2] . '</mw:editsection>'; 04500 } else { 04501 $editlink .= '/>'; 04502 } 04503 } else { 04504 $editlink = ''; 04505 } 04506 $head[$headlineCount] = Linker::makeHeadline( $level, 04507 $matches['attrib'][$headlineCount], $anchor, $headline, 04508 $editlink, $legacyAnchor ); 04509 04510 $headlineCount++; 04511 } 04512 04513 $this->setOutputType( $oldType ); 04514 04515 # Never ever show TOC if no headers 04516 if ( $numVisible < 1 ) { 04517 $enoughToc = false; 04518 } 04519 04520 if ( $enoughToc ) { 04521 if ( $prevtoclevel > 0 && $prevtoclevel < $wgMaxTocLevel ) { 04522 $toc .= Linker::tocUnindent( $prevtoclevel - 1 ); 04523 } 04524 $toc = Linker::tocList( $toc, $this->mOptions->getUserLangObj() ); 04525 $this->mOutput->setTOCHTML( $toc ); 04526 $toc = self::TOC_START . $toc . self::TOC_END; 04527 } 04528 04529 if ( $isMain ) { 04530 $this->mOutput->setSections( $tocraw ); 04531 } 04532 04533 # split up and insert constructed headlines 04534 $blocks = preg_split( '/<H[1-6].*?' . '>[\s\S]*?<\/H[1-6]>/i', $text ); 04535 $i = 0; 04536 04537 // build an array of document sections 04538 $sections = array(); 04539 foreach ( $blocks as $block ) { 04540 // $head is zero-based, sections aren't. 04541 if ( empty( $head[$i - 1] ) ) { 04542 $sections[$i] = $block; 04543 } else { 04544 $sections[$i] = $head[$i - 1] . $block; 04545 } 04546 04557 wfRunHooks( 'ParserSectionCreate', array( $this, $i, &$sections[$i], $showEditLink ) ); 04558 04559 $i++; 04560 } 04561 04562 if ( $enoughToc && $isMain && !$this->mForceTocPosition ) { 04563 // append the TOC at the beginning 04564 // Top anchor now in skin 04565 $sections[0] = $sections[0] . $toc . "\n"; 04566 } 04567 04568 $full .= join( '', $sections ); 04569 04570 if ( $this->mForceTocPosition ) { 04571 return str_replace( '<!--MWTOC-->', $toc, $full ); 04572 } else { 04573 return $full; 04574 } 04575 } 04576 04588 public function preSaveTransform( $text, Title $title, User $user, ParserOptions $options, $clearState = true ) { 04589 $this->startParse( $title, $options, self::OT_WIKI, $clearState ); 04590 $this->setUser( $user ); 04591 04592 $pairs = array( 04593 "\r\n" => "\n", 04594 ); 04595 $text = str_replace( array_keys( $pairs ), array_values( $pairs ), $text ); 04596 if ( $options->getPreSaveTransform() ) { 04597 $text = $this->pstPass2( $text, $user ); 04598 } 04599 $text = $this->mStripState->unstripBoth( $text ); 04600 04601 $this->setUser( null ); #Reset 04602 04603 return $text; 04604 } 04605 04614 private function pstPass2( $text, $user ) { 04615 global $wgContLang; 04616 04617 # Note: This is the timestamp saved as hardcoded wikitext to 04618 # the database, we use $wgContLang here in order to give 04619 # everyone the same signature and use the default one rather 04620 # than the one selected in each user's preferences. 04621 # (see also bug 12815) 04622 $ts = $this->mOptions->getTimestamp(); 04623 $timestamp = MWTimestamp::getLocalInstance( $ts ); 04624 $ts = $timestamp->format( 'YmdHis' ); 04625 $tzMsg = $timestamp->format( 'T' ); # might vary on DST changeover! 04626 04627 # Allow translation of timezones through wiki. format() can return 04628 # whatever crap the system uses, localised or not, so we cannot 04629 # ship premade translations. 04630 $key = 'timezone-' . strtolower( trim( $tzMsg ) ); 04631 $msg = wfMessage( $key )->inContentLanguage(); 04632 if ( $msg->exists() ) { 04633 $tzMsg = $msg->text(); 04634 } 04635 04636 $d = $wgContLang->timeanddate( $ts, false, false ) . " ($tzMsg)"; 04637 04638 # Variable replacement 04639 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags 04640 $text = $this->replaceVariables( $text ); 04641 04642 # This works almost by chance, as the replaceVariables are done before the getUserSig(), 04643 # which may corrupt this parser instance via its wfMessage()->text() call- 04644 04645 # Signatures 04646 $sigText = $this->getUserSig( $user ); 04647 $text = strtr( $text, array( 04648 '~~~~~' => $d, 04649 '~~~~' => "$sigText $d", 04650 '~~~' => $sigText 04651 ) ); 04652 04653 # Context links ("pipe tricks"): [[|name]] and [[name (context)|]] 04654 $tc = '[' . Title::legalChars() . ']'; 04655 $nc = '[ _0-9A-Za-z\x80-\xff-]'; # Namespaces can use non-ascii! 04656 04657 $p1 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\))\\|]]/"; # [[ns:page (context)|]] 04658 $p4 = "/\[\[(:?$nc+:|:|)($tc+?)( ?($tc+))\\|]]/"; # [[ns:page(context)|]] (double-width brackets, added in r40257) 04659 $p3 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\)|)((?:, |,)$tc+|)\\|]]/"; # [[ns:page (context), context|]] (using either single or double-width comma) 04660 $p2 = "/\[\[\\|($tc+)]]/"; # [[|page]] (reverse pipe trick: add context from page title) 04661 04662 # try $p1 first, to turn "[[A, B (C)|]]" into "[[A, B (C)|A, B]]" 04663 $text = preg_replace( $p1, '[[\\1\\2\\3|\\2]]', $text ); 04664 $text = preg_replace( $p4, '[[\\1\\2\\3|\\2]]', $text ); 04665 $text = preg_replace( $p3, '[[\\1\\2\\3\\4|\\2]]', $text ); 04666 04667 $t = $this->mTitle->getText(); 04668 $m = array(); 04669 if ( preg_match( "/^($nc+:|)$tc+?( \\($tc+\\))$/", $t, $m ) ) { 04670 $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text ); 04671 } elseif ( preg_match( "/^($nc+:|)$tc+?(, $tc+|)$/", $t, $m ) && "$m[1]$m[2]" != '' ) { 04672 $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text ); 04673 } else { 04674 # if there's no context, don't bother duplicating the title 04675 $text = preg_replace( $p2, '[[\\1]]', $text ); 04676 } 04677 04678 # Trim trailing whitespace 04679 $text = rtrim( $text ); 04680 04681 return $text; 04682 } 04683 04698 function getUserSig( &$user, $nickname = false, $fancySig = null ) { 04699 global $wgMaxSigChars; 04700 04701 $username = $user->getName(); 04702 04703 # If not given, retrieve from the user object. 04704 if ( $nickname === false ) { 04705 $nickname = $user->getOption( 'nickname' ); 04706 } 04707 04708 if ( is_null( $fancySig ) ) { 04709 $fancySig = $user->getBoolOption( 'fancysig' ); 04710 } 04711 04712 $nickname = $nickname == null ? $username : $nickname; 04713 04714 if ( mb_strlen( $nickname ) > $wgMaxSigChars ) { 04715 $nickname = $username; 04716 wfDebug( __METHOD__ . ": $username has overlong signature.\n" ); 04717 } elseif ( $fancySig !== false ) { 04718 # Sig. might contain markup; validate this 04719 if ( $this->validateSig( $nickname ) !== false ) { 04720 # Validated; clean up (if needed) and return it 04721 return $this->cleanSig( $nickname, true ); 04722 } else { 04723 # Failed to validate; fall back to the default 04724 $nickname = $username; 04725 wfDebug( __METHOD__ . ": $username has bad XML tags in signature.\n" ); 04726 } 04727 } 04728 04729 # Make sure nickname doesnt get a sig in a sig 04730 $nickname = self::cleanSigInSig( $nickname ); 04731 04732 # If we're still here, make it a link to the user page 04733 $userText = wfEscapeWikiText( $username ); 04734 $nickText = wfEscapeWikiText( $nickname ); 04735 $msgName = $user->isAnon() ? 'signature-anon' : 'signature'; 04736 04737 return wfMessage( $msgName, $userText, $nickText )->inContentLanguage()->title( $this->getTitle() )->text(); 04738 } 04739 04746 function validateSig( $text ) { 04747 return Xml::isWellFormedXmlFragment( $text ) ? $text : false; 04748 } 04749 04760 public function cleanSig( $text, $parsing = false ) { 04761 if ( !$parsing ) { 04762 global $wgTitle; 04763 $this->startParse( $wgTitle, new ParserOptions, self::OT_PREPROCESS, true ); 04764 } 04765 04766 # Option to disable this feature 04767 if ( !$this->mOptions->getCleanSignatures() ) { 04768 return $text; 04769 } 04770 04771 # @todo FIXME: Regex doesn't respect extension tags or nowiki 04772 # => Move this logic to braceSubstitution() 04773 $substWord = MagicWord::get( 'subst' ); 04774 $substRegex = '/\{\{(?!(?:' . $substWord->getBaseRegex() . '))/x' . $substWord->getRegexCase(); 04775 $substText = '{{' . $substWord->getSynonym( 0 ); 04776 04777 $text = preg_replace( $substRegex, $substText, $text ); 04778 $text = self::cleanSigInSig( $text ); 04779 $dom = $this->preprocessToDom( $text ); 04780 $frame = $this->getPreprocessor()->newFrame(); 04781 $text = $frame->expand( $dom ); 04782 04783 if ( !$parsing ) { 04784 $text = $this->mStripState->unstripBoth( $text ); 04785 } 04786 04787 return $text; 04788 } 04789 04796 public static function cleanSigInSig( $text ) { 04797 $text = preg_replace( '/~{3,5}/', '', $text ); 04798 return $text; 04799 } 04800 04810 public function startExternalParse( Title $title = null, ParserOptions $options, $outputType, $clearState = true ) { 04811 $this->startParse( $title, $options, $outputType, $clearState ); 04812 } 04813 04820 private function startParse( Title $title = null, ParserOptions $options, $outputType, $clearState = true ) { 04821 $this->setTitle( $title ); 04822 $this->mOptions = $options; 04823 $this->setOutputType( $outputType ); 04824 if ( $clearState ) { 04825 $this->clearState(); 04826 } 04827 } 04828 04837 public function transformMsg( $text, $options, $title = null ) { 04838 static $executing = false; 04839 04840 # Guard against infinite recursion 04841 if ( $executing ) { 04842 return $text; 04843 } 04844 $executing = true; 04845 04846 wfProfileIn( __METHOD__ ); 04847 if ( !$title ) { 04848 global $wgTitle; 04849 $title = $wgTitle; 04850 } 04851 04852 $text = $this->preprocess( $text, $title, $options ); 04853 04854 $executing = false; 04855 wfProfileOut( __METHOD__ ); 04856 return $text; 04857 } 04858 04883 public function setHook( $tag, $callback ) { 04884 $tag = strtolower( $tag ); 04885 if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) { 04886 throw new MWException( "Invalid character {$m[0]} in setHook('$tag', ...) call" ); 04887 } 04888 $oldVal = isset( $this->mTagHooks[$tag] ) ? $this->mTagHooks[$tag] : null; 04889 $this->mTagHooks[$tag] = $callback; 04890 if ( !in_array( $tag, $this->mStripList ) ) { 04891 $this->mStripList[] = $tag; 04892 } 04893 04894 return $oldVal; 04895 } 04896 04914 function setTransparentTagHook( $tag, $callback ) { 04915 $tag = strtolower( $tag ); 04916 if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) { 04917 throw new MWException( "Invalid character {$m[0]} in setTransparentHook('$tag', ...) call" ); 04918 } 04919 $oldVal = isset( $this->mTransparentTagHooks[$tag] ) ? $this->mTransparentTagHooks[$tag] : null; 04920 $this->mTransparentTagHooks[$tag] = $callback; 04921 04922 return $oldVal; 04923 } 04924 04928 function clearTagHooks() { 04929 $this->mTagHooks = array(); 04930 $this->mFunctionTagHooks = array(); 04931 $this->mStripList = $this->mDefaultStripList; 04932 } 04933 04977 public function setFunctionHook( $id, $callback, $flags = 0 ) { 04978 global $wgContLang; 04979 04980 $oldVal = isset( $this->mFunctionHooks[$id] ) ? $this->mFunctionHooks[$id][0] : null; 04981 $this->mFunctionHooks[$id] = array( $callback, $flags ); 04982 04983 # Add to function cache 04984 $mw = MagicWord::get( $id ); 04985 if ( !$mw ) { 04986 throw new MWException( __METHOD__ . '() expecting a magic word identifier.' ); 04987 } 04988 04989 $synonyms = $mw->getSynonyms(); 04990 $sensitive = intval( $mw->isCaseSensitive() ); 04991 04992 foreach ( $synonyms as $syn ) { 04993 # Case 04994 if ( !$sensitive ) { 04995 $syn = $wgContLang->lc( $syn ); 04996 } 04997 # Add leading hash 04998 if ( !( $flags & SFH_NO_HASH ) ) { 04999 $syn = '#' . $syn; 05000 } 05001 # Remove trailing colon 05002 if ( substr( $syn, -1, 1 ) === ':' ) { 05003 $syn = substr( $syn, 0, -1 ); 05004 } 05005 $this->mFunctionSynonyms[$sensitive][$syn] = $id; 05006 } 05007 return $oldVal; 05008 } 05009 05015 function getFunctionHooks() { 05016 return array_keys( $this->mFunctionHooks ); 05017 } 05018 05029 function setFunctionTagHook( $tag, $callback, $flags ) { 05030 $tag = strtolower( $tag ); 05031 if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) { 05032 throw new MWException( "Invalid character {$m[0]} in setFunctionTagHook('$tag', ...) call" ); 05033 } 05034 $old = isset( $this->mFunctionTagHooks[$tag] ) ? 05035 $this->mFunctionTagHooks[$tag] : null; 05036 $this->mFunctionTagHooks[$tag] = array( $callback, $flags ); 05037 05038 if ( !in_array( $tag, $this->mStripList ) ) { 05039 $this->mStripList[] = $tag; 05040 } 05041 05042 return $old; 05043 } 05044 05055 function replaceLinkHolders( &$text, $options = 0 ) { 05056 return $this->mLinkHolders->replace( $text ); 05057 } 05058 05066 function replaceLinkHoldersText( $text ) { 05067 return $this->mLinkHolders->replaceText( $text ); 05068 } 05069 05083 function renderImageGallery( $text, $params ) { 05084 wfProfileIn( __METHOD__ ); 05085 05086 $mode = false; 05087 if ( isset( $params['mode'] ) ) { 05088 $mode = $params['mode']; 05089 } 05090 05091 try { 05092 $ig = ImageGalleryBase::factory( $mode ); 05093 } catch ( MWException $e ) { 05094 // If invalid type set, fallback to default. 05095 $ig = ImageGalleryBase::factory( false ); 05096 } 05097 05098 $ig->setContextTitle( $this->mTitle ); 05099 $ig->setShowBytes( false ); 05100 $ig->setShowFilename( false ); 05101 $ig->setParser( $this ); 05102 $ig->setHideBadImages(); 05103 $ig->setAttributes( Sanitizer::validateTagAttributes( $params, 'table' ) ); 05104 05105 if ( isset( $params['showfilename'] ) ) { 05106 $ig->setShowFilename( true ); 05107 } else { 05108 $ig->setShowFilename( false ); 05109 } 05110 if ( isset( $params['caption'] ) ) { 05111 $caption = $params['caption']; 05112 $caption = htmlspecialchars( $caption ); 05113 $caption = $this->replaceInternalLinks( $caption ); 05114 $ig->setCaptionHtml( $caption ); 05115 } 05116 if ( isset( $params['perrow'] ) ) { 05117 $ig->setPerRow( $params['perrow'] ); 05118 } 05119 if ( isset( $params['widths'] ) ) { 05120 $ig->setWidths( $params['widths'] ); 05121 } 05122 if ( isset( $params['heights'] ) ) { 05123 $ig->setHeights( $params['heights'] ); 05124 } 05125 $ig->setAdditionalOptions( $params ); 05126 05127 wfRunHooks( 'BeforeParserrenderImageGallery', array( &$this, &$ig ) ); 05128 05129 $lines = StringUtils::explode( "\n", $text ); 05130 foreach ( $lines as $line ) { 05131 # match lines like these: 05132 # Image:someimage.jpg|This is some image 05133 $matches = array(); 05134 preg_match( "/^([^|]+)(\\|(.*))?$/", $line, $matches ); 05135 # Skip empty lines 05136 if ( count( $matches ) == 0 ) { 05137 continue; 05138 } 05139 05140 if ( strpos( $matches[0], '%' ) !== false ) { 05141 $matches[1] = rawurldecode( $matches[1] ); 05142 } 05143 $title = Title::newFromText( $matches[1], NS_FILE ); 05144 if ( is_null( $title ) ) { 05145 # Bogus title. Ignore these so we don't bomb out later. 05146 continue; 05147 } 05148 05149 # We need to get what handler the file uses, to figure out parameters. 05150 # Note, a hook can overide the file name, and chose an entirely different 05151 # file (which potentially could be of a different type and have different handler). 05152 $options = array(); 05153 $descQuery = false; 05154 wfRunHooks( 'BeforeParserFetchFileAndTitle', 05155 array( $this, $title, &$options, &$descQuery ) ); 05156 # Don't register it now, as ImageGallery does that later. 05157 $file = $this->fetchFileNoRegister( $title, $options ); 05158 $handler = $file ? $file->getHandler() : false; 05159 05160 wfProfileIn( __METHOD__ . '-getMagicWord' ); 05161 $paramMap = array( 05162 'img_alt' => 'gallery-internal-alt', 05163 'img_link' => 'gallery-internal-link', 05164 ); 05165 if ( $handler ) { 05166 $paramMap = $paramMap + $handler->getParamMap(); 05167 // We don't want people to specify per-image widths. 05168 // Additionally the width parameter would need special casing anyhow. 05169 unset( $paramMap['img_width'] ); 05170 } 05171 05172 $mwArray = new MagicWordArray( array_keys( $paramMap ) ); 05173 wfProfileOut( __METHOD__ . '-getMagicWord' ); 05174 05175 $label = ''; 05176 $alt = ''; 05177 $link = ''; 05178 $handlerOptions = array(); 05179 if ( isset( $matches[3] ) ) { 05180 // look for an |alt= definition while trying not to break existing 05181 // captions with multiple pipes (|) in it, until a more sensible grammar 05182 // is defined for images in galleries 05183 05184 // FIXME: Doing recursiveTagParse at this stage, and the trim before 05185 // splitting on '|' is a bit odd, and different from makeImage. 05186 $matches[3] = $this->recursiveTagParse( trim( $matches[3] ) ); 05187 $parameterMatches = StringUtils::explode( '|', $matches[3] ); 05188 05189 foreach ( $parameterMatches as $parameterMatch ) { 05190 list( $magicName, $match ) = $mwArray->matchVariableStartToEnd( $parameterMatch ); 05191 if ( $magicName ) { 05192 $paramName = $paramMap[$magicName]; 05193 05194 switch ( $paramName ) { 05195 case 'gallery-internal-alt': 05196 $alt = $this->stripAltText( $match, false ); 05197 break; 05198 case 'gallery-internal-link': 05199 $linkValue = strip_tags( $this->replaceLinkHoldersText( $match ) ); 05200 $chars = self::EXT_LINK_URL_CLASS; 05201 $prots = $this->mUrlProtocols; 05202 //check to see if link matches an absolute url, if not then it must be a wiki link. 05203 if ( preg_match( "/^($prots)$chars+$/u", $linkValue ) ) { 05204 $link = $linkValue; 05205 } else { 05206 $localLinkTitle = Title::newFromText( $linkValue ); 05207 if ( $localLinkTitle !== null ) { 05208 $link = $localLinkTitle->getLocalURL(); 05209 } 05210 } 05211 break; 05212 default: 05213 // Must be a handler specific parameter. 05214 if ( $handler->validateParam( $paramName, $match ) ) { 05215 $handlerOptions[$paramName] = $match; 05216 } else { 05217 // Guess not. Append it to the caption. 05218 wfDebug( "$parameterMatch failed parameter validation\n" ); 05219 $label .= '|' . $parameterMatch; 05220 } 05221 } 05222 05223 } else { 05224 // concatenate all other pipes 05225 $label .= '|' . $parameterMatch; 05226 } 05227 } 05228 // remove the first pipe 05229 $label = substr( $label, 1 ); 05230 } 05231 05232 $ig->add( $title, $label, $alt, $link, $handlerOptions ); 05233 } 05234 $html = $ig->toHTML(); 05235 wfProfileOut( __METHOD__ ); 05236 return $html; 05237 } 05238 05243 function getImageParams( $handler ) { 05244 if ( $handler ) { 05245 $handlerClass = get_class( $handler ); 05246 } else { 05247 $handlerClass = ''; 05248 } 05249 if ( !isset( $this->mImageParams[$handlerClass] ) ) { 05250 # Initialise static lists 05251 static $internalParamNames = array( 05252 'horizAlign' => array( 'left', 'right', 'center', 'none' ), 05253 'vertAlign' => array( 'baseline', 'sub', 'super', 'top', 'text-top', 'middle', 05254 'bottom', 'text-bottom' ), 05255 'frame' => array( 'thumbnail', 'manualthumb', 'framed', 'frameless', 05256 'upright', 'border', 'link', 'alt', 'class' ), 05257 ); 05258 static $internalParamMap; 05259 if ( !$internalParamMap ) { 05260 $internalParamMap = array(); 05261 foreach ( $internalParamNames as $type => $names ) { 05262 foreach ( $names as $name ) { 05263 $magicName = str_replace( '-', '_', "img_$name" ); 05264 $internalParamMap[$magicName] = array( $type, $name ); 05265 } 05266 } 05267 } 05268 05269 # Add handler params 05270 $paramMap = $internalParamMap; 05271 if ( $handler ) { 05272 $handlerParamMap = $handler->getParamMap(); 05273 foreach ( $handlerParamMap as $magic => $paramName ) { 05274 $paramMap[$magic] = array( 'handler', $paramName ); 05275 } 05276 } 05277 $this->mImageParams[$handlerClass] = $paramMap; 05278 $this->mImageParamsMagicArray[$handlerClass] = new MagicWordArray( array_keys( $paramMap ) ); 05279 } 05280 return array( $this->mImageParams[$handlerClass], $this->mImageParamsMagicArray[$handlerClass] ); 05281 } 05282 05291 function makeImage( $title, $options, $holders = false ) { 05292 # Check if the options text is of the form "options|alt text" 05293 # Options are: 05294 # * thumbnail make a thumbnail with enlarge-icon and caption, alignment depends on lang 05295 # * left no resizing, just left align. label is used for alt= only 05296 # * right same, but right aligned 05297 # * none same, but not aligned 05298 # * ___px scale to ___ pixels width, no aligning. e.g. use in taxobox 05299 # * center center the image 05300 # * frame Keep original image size, no magnify-button. 05301 # * framed Same as "frame" 05302 # * frameless like 'thumb' but without a frame. Keeps user preferences for width 05303 # * upright reduce width for upright images, rounded to full __0 px 05304 # * border draw a 1px border around the image 05305 # * alt Text for HTML alt attribute (defaults to empty) 05306 # * class Set a class for img node 05307 # * link Set the target of the image link. Can be external, interwiki, or local 05308 # vertical-align values (no % or length right now): 05309 # * baseline 05310 # * sub 05311 # * super 05312 # * top 05313 # * text-top 05314 # * middle 05315 # * bottom 05316 # * text-bottom 05317 05318 $parts = StringUtils::explode( "|", $options ); 05319 05320 # Give extensions a chance to select the file revision for us 05321 $options = array(); 05322 $descQuery = false; 05323 wfRunHooks( 'BeforeParserFetchFileAndTitle', 05324 array( $this, $title, &$options, &$descQuery ) ); 05325 # Fetch and register the file (file title may be different via hooks) 05326 list( $file, $title ) = $this->fetchFileAndTitle( $title, $options ); 05327 05328 # Get parameter map 05329 $handler = $file ? $file->getHandler() : false; 05330 05331 list( $paramMap, $mwArray ) = $this->getImageParams( $handler ); 05332 05333 if ( !$file ) { 05334 $this->addTrackingCategory( 'broken-file-category' ); 05335 } 05336 05337 # Process the input parameters 05338 $caption = ''; 05339 $params = array( 'frame' => array(), 'handler' => array(), 05340 'horizAlign' => array(), 'vertAlign' => array() ); 05341 foreach ( $parts as $part ) { 05342 $part = trim( $part ); 05343 list( $magicName, $value ) = $mwArray->matchVariableStartToEnd( $part ); 05344 $validated = false; 05345 if ( isset( $paramMap[$magicName] ) ) { 05346 list( $type, $paramName ) = $paramMap[$magicName]; 05347 05348 # Special case; width and height come in one variable together 05349 if ( $type === 'handler' && $paramName === 'width' ) { 05350 $parsedWidthParam = $this->parseWidthParam( $value ); 05351 if ( isset( $parsedWidthParam['width'] ) ) { 05352 $width = $parsedWidthParam['width']; 05353 if ( $handler->validateParam( 'width', $width ) ) { 05354 $params[$type]['width'] = $width; 05355 $validated = true; 05356 } 05357 } 05358 if ( isset( $parsedWidthParam['height'] ) ) { 05359 $height = $parsedWidthParam['height']; 05360 if ( $handler->validateParam( 'height', $height ) ) { 05361 $params[$type]['height'] = $height; 05362 $validated = true; 05363 } 05364 } 05365 # else no validation -- bug 13436 05366 } else { 05367 if ( $type === 'handler' ) { 05368 # Validate handler parameter 05369 $validated = $handler->validateParam( $paramName, $value ); 05370 } else { 05371 # Validate internal parameters 05372 switch ( $paramName ) { 05373 case 'manualthumb': 05374 case 'alt': 05375 case 'class': 05376 # @todo FIXME: Possibly check validity here for 05377 # manualthumb? downstream behavior seems odd with 05378 # missing manual thumbs. 05379 $validated = true; 05380 $value = $this->stripAltText( $value, $holders ); 05381 break; 05382 case 'link': 05383 $chars = self::EXT_LINK_URL_CLASS; 05384 $prots = $this->mUrlProtocols; 05385 if ( $value === '' ) { 05386 $paramName = 'no-link'; 05387 $value = true; 05388 $validated = true; 05389 } elseif ( preg_match( "/^(?i)$prots/", $value ) ) { 05390 if ( preg_match( "/^((?i)$prots)$chars+$/u", $value, $m ) ) { 05391 $paramName = 'link-url'; 05392 $this->mOutput->addExternalLink( $value ); 05393 if ( $this->mOptions->getExternalLinkTarget() ) { 05394 $params[$type]['link-target'] = $this->mOptions->getExternalLinkTarget(); 05395 } 05396 $validated = true; 05397 } 05398 } else { 05399 $linkTitle = Title::newFromText( $value ); 05400 if ( $linkTitle ) { 05401 $paramName = 'link-title'; 05402 $value = $linkTitle; 05403 $this->mOutput->addLink( $linkTitle ); 05404 $validated = true; 05405 } 05406 } 05407 break; 05408 default: 05409 # Most other things appear to be empty or numeric... 05410 $validated = ( $value === false || is_numeric( trim( $value ) ) ); 05411 } 05412 } 05413 05414 if ( $validated ) { 05415 $params[$type][$paramName] = $value; 05416 } 05417 } 05418 } 05419 if ( !$validated ) { 05420 $caption = $part; 05421 } 05422 } 05423 05424 # Process alignment parameters 05425 if ( $params['horizAlign'] ) { 05426 $params['frame']['align'] = key( $params['horizAlign'] ); 05427 } 05428 if ( $params['vertAlign'] ) { 05429 $params['frame']['valign'] = key( $params['vertAlign'] ); 05430 } 05431 05432 $params['frame']['caption'] = $caption; 05433 05434 # Will the image be presented in a frame, with the caption below? 05435 $imageIsFramed = isset( $params['frame']['frame'] ) 05436 || isset( $params['frame']['framed'] ) 05437 || isset( $params['frame']['thumbnail'] ) 05438 || isset( $params['frame']['manualthumb'] ); 05439 05440 # In the old days, [[Image:Foo|text...]] would set alt text. Later it 05441 # came to also set the caption, ordinary text after the image -- which 05442 # makes no sense, because that just repeats the text multiple times in 05443 # screen readers. It *also* came to set the title attribute. 05444 # 05445 # Now that we have an alt attribute, we should not set the alt text to 05446 # equal the caption: that's worse than useless, it just repeats the 05447 # text. This is the framed/thumbnail case. If there's no caption, we 05448 # use the unnamed parameter for alt text as well, just for the time be- 05449 # ing, if the unnamed param is set and the alt param is not. 05450 # 05451 # For the future, we need to figure out if we want to tweak this more, 05452 # e.g., introducing a title= parameter for the title; ignoring the un- 05453 # named parameter entirely for images without a caption; adding an ex- 05454 # plicit caption= parameter and preserving the old magic unnamed para- 05455 # meter for BC; ... 05456 if ( $imageIsFramed ) { # Framed image 05457 if ( $caption === '' && !isset( $params['frame']['alt'] ) ) { 05458 # No caption or alt text, add the filename as the alt text so 05459 # that screen readers at least get some description of the image 05460 $params['frame']['alt'] = $title->getText(); 05461 } 05462 # Do not set $params['frame']['title'] because tooltips don't make sense 05463 # for framed images 05464 } else { # Inline image 05465 if ( !isset( $params['frame']['alt'] ) ) { 05466 # No alt text, use the "caption" for the alt text 05467 if ( $caption !== '' ) { 05468 $params['frame']['alt'] = $this->stripAltText( $caption, $holders ); 05469 } else { 05470 # No caption, fall back to using the filename for the 05471 # alt text 05472 $params['frame']['alt'] = $title->getText(); 05473 } 05474 } 05475 # Use the "caption" for the tooltip text 05476 $params['frame']['title'] = $this->stripAltText( $caption, $holders ); 05477 } 05478 05479 wfRunHooks( 'ParserMakeImageParams', array( $title, $file, &$params, $this ) ); 05480 05481 # Linker does the rest 05482 $time = isset( $options['time'] ) ? $options['time'] : false; 05483 $ret = Linker::makeImageLink( $this, $title, $file, $params['frame'], $params['handler'], 05484 $time, $descQuery, $this->mOptions->getThumbSize() ); 05485 05486 # Give the handler a chance to modify the parser object 05487 if ( $handler ) { 05488 $handler->parserTransformHook( $this, $file ); 05489 } 05490 05491 return $ret; 05492 } 05493 05499 protected function stripAltText( $caption, $holders ) { 05500 # Strip bad stuff out of the title (tooltip). We can't just use 05501 # replaceLinkHoldersText() here, because if this function is called 05502 # from replaceInternalLinks2(), mLinkHolders won't be up-to-date. 05503 if ( $holders ) { 05504 $tooltip = $holders->replaceText( $caption ); 05505 } else { 05506 $tooltip = $this->replaceLinkHoldersText( $caption ); 05507 } 05508 05509 # make sure there are no placeholders in thumbnail attributes 05510 # that are later expanded to html- so expand them now and 05511 # remove the tags 05512 $tooltip = $this->mStripState->unstripBoth( $tooltip ); 05513 $tooltip = Sanitizer::stripAllTags( $tooltip ); 05514 05515 return $tooltip; 05516 } 05517 05522 function disableCache() { 05523 wfDebug( "Parser output marked as uncacheable.\n" ); 05524 if ( !$this->mOutput ) { 05525 throw new MWException( __METHOD__ . 05526 " can only be called when actually parsing something" ); 05527 } 05528 $this->mOutput->setCacheTime( -1 ); // old style, for compatibility 05529 $this->mOutput->updateCacheExpiry( 0 ); // new style, for consistency 05530 } 05531 05540 function attributeStripCallback( &$text, $frame = false ) { 05541 $text = $this->replaceVariables( $text, $frame ); 05542 $text = $this->mStripState->unstripBoth( $text ); 05543 return $text; 05544 } 05545 05551 function getTags() { 05552 return array_merge( array_keys( $this->mTransparentTagHooks ), array_keys( $this->mTagHooks ), array_keys( $this->mFunctionTagHooks ) ); 05553 } 05554 05565 function replaceTransparentTags( $text ) { 05566 $matches = array(); 05567 $elements = array_keys( $this->mTransparentTagHooks ); 05568 $text = self::extractTagsAndParams( $elements, $text, $matches, $this->mUniqPrefix ); 05569 $replacements = array(); 05570 05571 foreach ( $matches as $marker => $data ) { 05572 list( $element, $content, $params, $tag ) = $data; 05573 $tagName = strtolower( $element ); 05574 if ( isset( $this->mTransparentTagHooks[$tagName] ) ) { 05575 $output = call_user_func_array( $this->mTransparentTagHooks[$tagName], array( $content, $params, $this ) ); 05576 } else { 05577 $output = $tag; 05578 } 05579 $replacements[$marker] = $output; 05580 } 05581 return strtr( $text, $replacements ); 05582 } 05583 05613 private function extractSections( $text, $section, $mode, $newText = '' ) { 05614 global $wgTitle; # not generally used but removes an ugly failure mode 05615 $this->startParse( $wgTitle, new ParserOptions, self::OT_PLAIN, true ); 05616 $outText = ''; 05617 $frame = $this->getPreprocessor()->newFrame(); 05618 05619 # Process section extraction flags 05620 $flags = 0; 05621 $sectionParts = explode( '-', $section ); 05622 $sectionIndex = array_pop( $sectionParts ); 05623 foreach ( $sectionParts as $part ) { 05624 if ( $part === 'T' ) { 05625 $flags |= self::PTD_FOR_INCLUSION; 05626 } 05627 } 05628 05629 # Check for empty input 05630 if ( strval( $text ) === '' ) { 05631 # Only sections 0 and T-0 exist in an empty document 05632 if ( $sectionIndex == 0 ) { 05633 if ( $mode === 'get' ) { 05634 return ''; 05635 } else { 05636 return $newText; 05637 } 05638 } else { 05639 if ( $mode === 'get' ) { 05640 return $newText; 05641 } else { 05642 return $text; 05643 } 05644 } 05645 } 05646 05647 # Preprocess the text 05648 $root = $this->preprocessToDom( $text, $flags ); 05649 05650 # <h> nodes indicate section breaks 05651 # They can only occur at the top level, so we can find them by iterating the root's children 05652 $node = $root->getFirstChild(); 05653 05654 # Find the target section 05655 if ( $sectionIndex == 0 ) { 05656 # Section zero doesn't nest, level=big 05657 $targetLevel = 1000; 05658 } else { 05659 while ( $node ) { 05660 if ( $node->getName() === 'h' ) { 05661 $bits = $node->splitHeading(); 05662 if ( $bits['i'] == $sectionIndex ) { 05663 $targetLevel = $bits['level']; 05664 break; 05665 } 05666 } 05667 if ( $mode === 'replace' ) { 05668 $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG ); 05669 } 05670 $node = $node->getNextSibling(); 05671 } 05672 } 05673 05674 if ( !$node ) { 05675 # Not found 05676 if ( $mode === 'get' ) { 05677 return $newText; 05678 } else { 05679 return $text; 05680 } 05681 } 05682 05683 # Find the end of the section, including nested sections 05684 do { 05685 if ( $node->getName() === 'h' ) { 05686 $bits = $node->splitHeading(); 05687 $curLevel = $bits['level']; 05688 if ( $bits['i'] != $sectionIndex && $curLevel <= $targetLevel ) { 05689 break; 05690 } 05691 } 05692 if ( $mode === 'get' ) { 05693 $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG ); 05694 } 05695 $node = $node->getNextSibling(); 05696 } while ( $node ); 05697 05698 # Write out the remainder (in replace mode only) 05699 if ( $mode === 'replace' ) { 05700 # Output the replacement text 05701 # Add two newlines on -- trailing whitespace in $newText is conventionally 05702 # stripped by the editor, so we need both newlines to restore the paragraph gap 05703 # Only add trailing whitespace if there is newText 05704 if ( $newText != "" ) { 05705 $outText .= $newText . "\n\n"; 05706 } 05707 05708 while ( $node ) { 05709 $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG ); 05710 $node = $node->getNextSibling(); 05711 } 05712 } 05713 05714 if ( is_string( $outText ) ) { 05715 # Re-insert stripped tags 05716 $outText = rtrim( $this->mStripState->unstripBoth( $outText ) ); 05717 } 05718 05719 return $outText; 05720 } 05721 05734 public function getSection( $text, $section, $deftext = '' ) { 05735 return $this->extractSections( $text, $section, "get", $deftext ); 05736 } 05737 05748 public function replaceSection( $oldtext, $section, $text ) { 05749 return $this->extractSections( $oldtext, $section, "replace", $text ); 05750 } 05751 05757 function getRevisionId() { 05758 return $this->mRevisionId; 05759 } 05760 05767 public function getRevisionObject() { 05768 if ( !is_null( $this->mRevisionObject ) ) { 05769 return $this->mRevisionObject; 05770 } 05771 if ( is_null( $this->mRevisionId ) ) { 05772 return null; 05773 } 05774 05775 $this->mRevisionObject = Revision::newFromId( $this->mRevisionId ); 05776 return $this->mRevisionObject; 05777 } 05778 05783 function getRevisionTimestamp() { 05784 if ( is_null( $this->mRevisionTimestamp ) ) { 05785 wfProfileIn( __METHOD__ ); 05786 05787 global $wgContLang; 05788 05789 $revObject = $this->getRevisionObject(); 05790 $timestamp = $revObject ? $revObject->getTimestamp() : wfTimestampNow(); 05791 05792 # The cryptic '' timezone parameter tells to use the site-default 05793 # timezone offset instead of the user settings. 05794 # 05795 # Since this value will be saved into the parser cache, served 05796 # to other users, and potentially even used inside links and such, 05797 # it needs to be consistent for all visitors. 05798 $this->mRevisionTimestamp = $wgContLang->userAdjust( $timestamp, '' ); 05799 05800 wfProfileOut( __METHOD__ ); 05801 } 05802 return $this->mRevisionTimestamp; 05803 } 05804 05810 function getRevisionUser() { 05811 if ( is_null( $this->mRevisionUser ) ) { 05812 $revObject = $this->getRevisionObject(); 05813 05814 # if this template is subst: the revision id will be blank, 05815 # so just use the current user's name 05816 if ( $revObject ) { 05817 $this->mRevisionUser = $revObject->getUserText(); 05818 } elseif ( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) { 05819 $this->mRevisionUser = $this->getUser()->getName(); 05820 } 05821 } 05822 return $this->mRevisionUser; 05823 } 05824 05830 function getRevisionSize() { 05831 if ( is_null( $this->mRevisionSize ) ) { 05832 $revObject = $this->getRevisionObject(); 05833 05834 # if this variable is subst: the revision id will be blank, 05835 # so just use the parser input size, because the own substituation 05836 # will change the size. 05837 if ( $revObject ) { 05838 $this->mRevisionSize = $revObject->getSize(); 05839 } elseif ( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) { 05840 $this->mRevisionSize = $this->mInputSize; 05841 } 05842 } 05843 return $this->mRevisionSize; 05844 } 05845 05851 public function setDefaultSort( $sort ) { 05852 $this->mDefaultSort = $sort; 05853 $this->mOutput->setProperty( 'defaultsort', $sort ); 05854 } 05855 05866 public function getDefaultSort() { 05867 if ( $this->mDefaultSort !== false ) { 05868 return $this->mDefaultSort; 05869 } else { 05870 return ''; 05871 } 05872 } 05873 05880 public function getCustomDefaultSort() { 05881 return $this->mDefaultSort; 05882 } 05883 05893 public function guessSectionNameFromWikiText( $text ) { 05894 # Strip out wikitext links(they break the anchor) 05895 $text = $this->stripSectionName( $text ); 05896 $text = Sanitizer::normalizeSectionNameWhitespace( $text ); 05897 return '#' . Sanitizer::escapeId( $text, 'noninitial' ); 05898 } 05899 05908 public function guessLegacySectionNameFromWikiText( $text ) { 05909 # Strip out wikitext links(they break the anchor) 05910 $text = $this->stripSectionName( $text ); 05911 $text = Sanitizer::normalizeSectionNameWhitespace( $text ); 05912 return '#' . Sanitizer::escapeId( $text, array( 'noninitial', 'legacy' ) ); 05913 } 05914 05929 public function stripSectionName( $text ) { 05930 # Strip internal link markup 05931 $text = preg_replace( '/\[\[:?([^[|]+)\|([^[]+)\]\]/', '$2', $text ); 05932 $text = preg_replace( '/\[\[:?([^[]+)\|?\]\]/', '$1', $text ); 05933 05934 # Strip external link markup 05935 # @todo FIXME: Not tolerant to blank link text 05936 # I.E. [https://www.mediawiki.org] will render as [1] or something depending 05937 # on how many empty links there are on the page - need to figure that out. 05938 $text = preg_replace( '/\[(?i:' . $this->mUrlProtocols . ')([^ ]+?) ([^[]+)\]/', '$2', $text ); 05939 05940 # Parse wikitext quotes (italics & bold) 05941 $text = $this->doQuotes( $text ); 05942 05943 # Strip HTML tags 05944 $text = StringUtils::delimiterReplace( '<', '>', '', $text ); 05945 return $text; 05946 } 05947 05958 function testSrvus( $text, Title $title, ParserOptions $options, $outputType = self::OT_HTML ) { 05959 $this->startParse( $title, $options, $outputType, true ); 05960 05961 $text = $this->replaceVariables( $text ); 05962 $text = $this->mStripState->unstripBoth( $text ); 05963 $text = Sanitizer::removeHTMLtags( $text ); 05964 return $text; 05965 } 05966 05973 function testPst( $text, Title $title, ParserOptions $options ) { 05974 return $this->preSaveTransform( $text, $title, $options->getUser(), $options ); 05975 } 05976 05983 function testPreprocess( $text, Title $title, ParserOptions $options ) { 05984 return $this->testSrvus( $text, $title, $options, self::OT_PREPROCESS ); 05985 } 05986 06003 function markerSkipCallback( $s, $callback ) { 06004 $i = 0; 06005 $out = ''; 06006 while ( $i < strlen( $s ) ) { 06007 $markerStart = strpos( $s, $this->mUniqPrefix, $i ); 06008 if ( $markerStart === false ) { 06009 $out .= call_user_func( $callback, substr( $s, $i ) ); 06010 break; 06011 } else { 06012 $out .= call_user_func( $callback, substr( $s, $i, $markerStart - $i ) ); 06013 $markerEnd = strpos( $s, self::MARKER_SUFFIX, $markerStart ); 06014 if ( $markerEnd === false ) { 06015 $out .= substr( $s, $markerStart ); 06016 break; 06017 } else { 06018 $markerEnd += strlen( self::MARKER_SUFFIX ); 06019 $out .= substr( $s, $markerStart, $markerEnd - $markerStart ); 06020 $i = $markerEnd; 06021 } 06022 } 06023 } 06024 return $out; 06025 } 06026 06033 function killMarkers( $text ) { 06034 return $this->mStripState->killMarkers( $text ); 06035 } 06036 06053 function serializeHalfParsedText( $text ) { 06054 wfProfileIn( __METHOD__ ); 06055 $data = array( 06056 'text' => $text, 06057 'version' => self::HALF_PARSED_VERSION, 06058 'stripState' => $this->mStripState->getSubState( $text ), 06059 'linkHolders' => $this->mLinkHolders->getSubArray( $text ) 06060 ); 06061 wfProfileOut( __METHOD__ ); 06062 return $data; 06063 } 06064 06080 function unserializeHalfParsedText( $data ) { 06081 if ( !isset( $data['version'] ) || $data['version'] != self::HALF_PARSED_VERSION ) { 06082 throw new MWException( __METHOD__ . ': invalid version' ); 06083 } 06084 06085 # First, extract the strip state. 06086 $texts = array( $data['text'] ); 06087 $texts = $this->mStripState->merge( $data['stripState'], $texts ); 06088 06089 # Now renumber links 06090 $texts = $this->mLinkHolders->mergeForeign( $data['linkHolders'], $texts ); 06091 06092 # Should be good to go. 06093 return $texts[0]; 06094 } 06095 06105 function isValidHalfParsedText( $data ) { 06106 return isset( $data['version'] ) && $data['version'] == self::HALF_PARSED_VERSION; 06107 } 06108 06117 public function parseWidthParam( $value ) { 06118 $parsedWidthParam = array(); 06119 if ( $value === '' ) { 06120 return $parsedWidthParam; 06121 } 06122 $m = array(); 06123 # (bug 13500) In both cases (width/height and width only), 06124 # permit trailing "px" for backward compatibility. 06125 if ( preg_match( '/^([0-9]*)x([0-9]*)\s*(?:px)?\s*$/', $value, $m ) ) { 06126 $width = intval( $m[1] ); 06127 $height = intval( $m[2] ); 06128 $parsedWidthParam['width'] = $width; 06129 $parsedWidthParam['height'] = $height; 06130 } elseif ( preg_match( '/^[0-9]*\s*(?:px)?\s*$/', $value ) ) { 06131 $width = intval( $value ); 06132 $parsedWidthParam['width'] = $width; 06133 } 06134 return $parsedWidthParam; 06135 } 06136 }