MediaWiki
REL1_21
|
00001 <?php 00068 class Parser { 00074 const VERSION = '1.6.4'; 00075 00080 const HALF_PARSED_VERSION = 2; 00081 00082 # Flags for Parser::setFunctionHook 00083 # Also available as global constants from Defines.php 00084 const SFH_NO_HASH = 1; 00085 const SFH_OBJECT_ARGS = 2; 00086 00087 # Constants needed for external link processing 00088 # Everything except bracket, space, or control characters 00089 # \p{Zs} is unicode 'separator, space' category. It covers the space 0x20 00090 # as well as U+3000 is IDEOGRAPHIC SPACE for bug 19052 00091 const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F\p{Zs}]'; 00092 const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)([^][<>"\\x00-\\x20\\x7F\p{Zs}]+) 00093 \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sxu'; 00094 00095 # State constants for the definition list colon extraction 00096 const COLON_STATE_TEXT = 0; 00097 const COLON_STATE_TAG = 1; 00098 const COLON_STATE_TAGSTART = 2; 00099 const COLON_STATE_CLOSETAG = 3; 00100 const COLON_STATE_TAGSLASH = 4; 00101 const COLON_STATE_COMMENT = 5; 00102 const COLON_STATE_COMMENTDASH = 6; 00103 const COLON_STATE_COMMENTDASHDASH = 7; 00104 00105 # Flags for preprocessToDom 00106 const PTD_FOR_INCLUSION = 1; 00107 00108 # Allowed values for $this->mOutputType 00109 # Parameter to startExternalParse(). 00110 const OT_HTML = 1; # like parse() 00111 const OT_WIKI = 2; # like preSaveTransform() 00112 const OT_PREPROCESS = 3; # like preprocess() 00113 const OT_MSG = 3; 00114 const OT_PLAIN = 4; # like extractSections() - portions of the original are returned unchanged. 00115 00116 # Marker Suffix needs to be accessible staticly. 00117 const MARKER_SUFFIX = "-QINU\x7f"; 00118 00119 # Persistent: 00120 var $mTagHooks = array(); 00121 var $mTransparentTagHooks = array(); 00122 var $mFunctionHooks = array(); 00123 var $mFunctionSynonyms = array( 0 => array(), 1 => array() ); 00124 var $mFunctionTagHooks = array(); 00125 var $mStripList = array(); 00126 var $mDefaultStripList = array(); 00127 var $mVarCache = array(); 00128 var $mImageParams = array(); 00129 var $mImageParamsMagicArray = array(); 00130 var $mMarkerIndex = 0; 00131 var $mFirstCall = true; 00132 00133 # Initialised by initialiseVariables() 00134 00138 var $mVariables; 00139 00143 var $mSubstWords; 00144 var $mConf, $mPreprocessor, $mExtLinkBracketedRegex, $mUrlProtocols; # Initialised in constructor 00145 00146 # Cleared with clearState(): 00147 00150 var $mOutput; 00151 var $mAutonumber, $mDTopen; 00152 00156 var $mStripState; 00157 00158 var $mIncludeCount, $mArgStack, $mLastSection, $mInPre; 00162 var $mLinkHolders; 00163 00164 var $mLinkID; 00165 var $mIncludeSizes, $mPPNodeCount, $mGeneratedPPNodeCount, $mHighestExpansionDepth; 00166 var $mDefaultSort; 00167 var $mTplExpandCache; # empty-frame expansion cache 00168 var $mTplRedirCache, $mTplDomCache, $mHeadings, $mDoubleUnderscores; 00169 var $mExpensiveFunctionCount; # number of expensive parser function calls 00170 var $mShowToc, $mForceTocPosition; 00171 00175 var $mUser; # User object; only used when doing pre-save transform 00176 00177 # Temporary 00178 # These are variables reset at least once per parse regardless of $clearState 00179 00183 var $mOptions; 00184 00188 var $mTitle; # Title context, used for self-link rendering and similar things 00189 var $mOutputType; # Output type, one of the OT_xxx constants 00190 var $ot; # Shortcut alias, see setOutputType() 00191 var $mRevisionObject; # The revision object of the specified revision ID 00192 var $mRevisionId; # ID to display in {{REVISIONID}} tags 00193 var $mRevisionTimestamp; # The timestamp of the specified revision ID 00194 var $mRevisionUser; # User to display in {{REVISIONUSER}} tag 00195 var $mRevIdForTs; # The revision ID which was used to fetch the timestamp 00196 00200 var $mUniqPrefix; 00201 00207 var $mLangLinkLanguages; 00208 00214 public function __construct( $conf = array() ) { 00215 $this->mConf = $conf; 00216 $this->mUrlProtocols = wfUrlProtocols(); 00217 $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->mUrlProtocols . ')' . 00218 self::EXT_LINK_URL_CLASS . '+)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F]*?)\]/Su'; 00219 if ( isset( $conf['preprocessorClass'] ) ) { 00220 $this->mPreprocessorClass = $conf['preprocessorClass']; 00221 } elseif ( defined( 'MW_COMPILED' ) ) { 00222 # Preprocessor_Hash is much faster than Preprocessor_DOM in compiled mode 00223 $this->mPreprocessorClass = 'Preprocessor_Hash'; 00224 } elseif ( extension_loaded( 'domxml' ) ) { 00225 # PECL extension that conflicts with the core DOM extension (bug 13770) 00226 wfDebug( "Warning: you have the obsolete domxml extension for PHP. Please remove it!\n" ); 00227 $this->mPreprocessorClass = 'Preprocessor_Hash'; 00228 } elseif ( extension_loaded( 'dom' ) ) { 00229 $this->mPreprocessorClass = 'Preprocessor_DOM'; 00230 } else { 00231 $this->mPreprocessorClass = 'Preprocessor_Hash'; 00232 } 00233 wfDebug( __CLASS__ . ": using preprocessor: {$this->mPreprocessorClass}\n" ); 00234 } 00235 00239 function __destruct() { 00240 if ( isset( $this->mLinkHolders ) ) { 00241 unset( $this->mLinkHolders ); 00242 } 00243 foreach ( $this as $name => $value ) { 00244 unset( $this->$name ); 00245 } 00246 } 00247 00251 function __clone() { 00252 wfRunHooks( 'ParserCloned', array( $this ) ); 00253 } 00254 00258 function firstCallInit() { 00259 if ( !$this->mFirstCall ) { 00260 return; 00261 } 00262 $this->mFirstCall = false; 00263 00264 wfProfileIn( __METHOD__ ); 00265 00266 CoreParserFunctions::register( $this ); 00267 CoreTagHooks::register( $this ); 00268 $this->initialiseVariables(); 00269 00270 wfRunHooks( 'ParserFirstCallInit', array( &$this ) ); 00271 wfProfileOut( __METHOD__ ); 00272 } 00273 00279 function clearState() { 00280 wfProfileIn( __METHOD__ ); 00281 if ( $this->mFirstCall ) { 00282 $this->firstCallInit(); 00283 } 00284 $this->mOutput = new ParserOutput; 00285 $this->mOptions->registerWatcher( array( $this->mOutput, 'recordOption' ) ); 00286 $this->mAutonumber = 0; 00287 $this->mLastSection = ''; 00288 $this->mDTopen = false; 00289 $this->mIncludeCount = array(); 00290 $this->mArgStack = false; 00291 $this->mInPre = false; 00292 $this->mLinkHolders = new LinkHolderArray( $this ); 00293 $this->mLinkID = 0; 00294 $this->mRevisionObject = $this->mRevisionTimestamp = 00295 $this->mRevisionId = $this->mRevisionUser = null; 00296 $this->mVarCache = array(); 00297 $this->mUser = null; 00298 $this->mLangLinkLanguages = array(); 00299 00310 $this->mUniqPrefix = "\x7fUNIQ" . self::getRandomString(); 00311 $this->mStripState = new StripState( $this->mUniqPrefix ); 00312 00313 # Clear these on every parse, bug 4549 00314 $this->mTplExpandCache = $this->mTplRedirCache = $this->mTplDomCache = array(); 00315 00316 $this->mShowToc = true; 00317 $this->mForceTocPosition = false; 00318 $this->mIncludeSizes = array( 00319 'post-expand' => 0, 00320 'arg' => 0, 00321 ); 00322 $this->mPPNodeCount = 0; 00323 $this->mGeneratedPPNodeCount = 0; 00324 $this->mHighestExpansionDepth = 0; 00325 $this->mDefaultSort = false; 00326 $this->mHeadings = array(); 00327 $this->mDoubleUnderscores = array(); 00328 $this->mExpensiveFunctionCount = 0; 00329 00330 # Fix cloning 00331 if ( isset( $this->mPreprocessor ) && $this->mPreprocessor->parser !== $this ) { 00332 $this->mPreprocessor = null; 00333 } 00334 00335 wfRunHooks( 'ParserClearState', array( &$this ) ); 00336 wfProfileOut( __METHOD__ ); 00337 } 00338 00351 public function parse( $text, Title $title, ParserOptions $options, $linestart = true, $clearState = true, $revid = null ) { 00357 global $wgUseTidy, $wgAlwaysUseTidy; 00358 $fname = __METHOD__ . '-' . wfGetCaller(); 00359 wfProfileIn( __METHOD__ ); 00360 wfProfileIn( $fname ); 00361 00362 $this->startParse( $title, $options, self::OT_HTML, $clearState ); 00363 00364 # Remove the strip marker tag prefix from the input, if present. 00365 if ( $clearState ) { 00366 $text = str_replace( $this->mUniqPrefix, '', $text ); 00367 } 00368 00369 $oldRevisionId = $this->mRevisionId; 00370 $oldRevisionObject = $this->mRevisionObject; 00371 $oldRevisionTimestamp = $this->mRevisionTimestamp; 00372 $oldRevisionUser = $this->mRevisionUser; 00373 if ( $revid !== null ) { 00374 $this->mRevisionId = $revid; 00375 $this->mRevisionObject = null; 00376 $this->mRevisionTimestamp = null; 00377 $this->mRevisionUser = null; 00378 } 00379 00380 wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) ); 00381 # No more strip! 00382 wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) ); 00383 $text = $this->internalParse( $text ); 00384 wfRunHooks( 'ParserAfterParse', array( &$this, &$text, &$this->mStripState ) ); 00385 00386 $text = $this->mStripState->unstripGeneral( $text ); 00387 00388 # Clean up special characters, only run once, next-to-last before doBlockLevels 00389 $fixtags = array( 00390 # french spaces, last one Guillemet-left 00391 # only if there is something before the space 00392 '/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1 ', 00393 # french spaces, Guillemet-right 00394 '/(\\302\\253) /' => '\\1 ', 00395 '/ (!\s*important)/' => ' \\1', # Beware of CSS magic word !important, bug #11874. 00396 ); 00397 $text = preg_replace( array_keys( $fixtags ), array_values( $fixtags ), $text ); 00398 00399 $text = $this->doBlockLevels( $text, $linestart ); 00400 00401 $this->replaceLinkHolders( $text ); 00402 00410 if ( !( $options->getDisableContentConversion() 00411 || isset( $this->mDoubleUnderscores['nocontentconvert'] ) ) ) 00412 { 00413 if ( !$this->mOptions->getInterfaceMessage() ) { 00414 # The position of the convert() call should not be changed. it 00415 # assumes that the links are all replaced and the only thing left 00416 # is the <nowiki> mark. 00417 $text = $this->getConverterLanguage()->convert( $text ); 00418 } 00419 } 00420 00428 if ( !( $options->getDisableTitleConversion() 00429 || isset( $this->mDoubleUnderscores['nocontentconvert'] ) 00430 || isset( $this->mDoubleUnderscores['notitleconvert'] ) 00431 || $this->mOutput->getDisplayTitle() !== false ) ) 00432 { 00433 $convruletitle = $this->getConverterLanguage()->getConvRuleTitle(); 00434 if ( $convruletitle ) { 00435 $this->mOutput->setTitleText( $convruletitle ); 00436 } else { 00437 $titleText = $this->getConverterLanguage()->convertTitle( $title ); 00438 $this->mOutput->setTitleText( $titleText ); 00439 } 00440 } 00441 00442 $text = $this->mStripState->unstripNoWiki( $text ); 00443 00444 wfRunHooks( 'ParserBeforeTidy', array( &$this, &$text ) ); 00445 00446 $text = $this->replaceTransparentTags( $text ); 00447 $text = $this->mStripState->unstripGeneral( $text ); 00448 00449 $text = Sanitizer::normalizeCharReferences( $text ); 00450 00451 if ( ( $wgUseTidy && $this->mOptions->getTidy() ) || $wgAlwaysUseTidy ) { 00452 $text = MWTidy::tidy( $text ); 00453 } else { 00454 # attempt to sanitize at least some nesting problems 00455 # (bug #2702 and quite a few others) 00456 $tidyregs = array( 00457 # ''Something [http://www.cool.com cool''] --> 00458 # <i>Something</i><a href="http://www.cool.com"..><i>cool></i></a> 00459 '/(<([bi])>)(<([bi])>)?([^<]*)(<\/?a[^<]*>)([^<]*)(<\/\\4>)?(<\/\\2>)/' => 00460 '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9', 00461 # fix up an anchor inside another anchor, only 00462 # at least for a single single nested link (bug 3695) 00463 '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\/a>(.*)<\/a>/' => 00464 '\\1\\2</a>\\3</a>\\1\\4</a>', 00465 # fix div inside inline elements- doBlockLevels won't wrap a line which 00466 # contains a div, so fix it up here; replace 00467 # div with escaped text 00468 '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\/div>)([^<]*)(<\/\\2>)/' => 00469 '\\1\\3<div\\5>\\6</div>\\8\\9', 00470 # remove empty italic or bold tag pairs, some 00471 # introduced by rules above 00472 '/<([bi])><\/\\1>/' => '', 00473 ); 00474 00475 $text = preg_replace( 00476 array_keys( $tidyregs ), 00477 array_values( $tidyregs ), 00478 $text ); 00479 } 00480 00481 if ( $this->mExpensiveFunctionCount > $this->mOptions->getExpensiveParserFunctionLimit() ) { 00482 $this->limitationWarn( 'expensive-parserfunction', 00483 $this->mExpensiveFunctionCount, 00484 $this->mOptions->getExpensiveParserFunctionLimit() 00485 ); 00486 } 00487 00488 wfRunHooks( 'ParserAfterTidy', array( &$this, &$text ) ); 00489 00490 # Information on include size limits, for the benefit of users who try to skirt them 00491 if ( $this->mOptions->getEnableLimitReport() ) { 00492 $max = $this->mOptions->getMaxIncludeSize(); 00493 $PFreport = "Expensive parser function count: {$this->mExpensiveFunctionCount}/{$this->mOptions->getExpensiveParserFunctionLimit()}\n"; 00494 $limitReport = 00495 "NewPP limit report\n" . 00496 "Preprocessor visited node count: {$this->mPPNodeCount}/{$this->mOptions->getMaxPPNodeCount()}\n" . 00497 "Preprocessor generated node count: " . 00498 "{$this->mGeneratedPPNodeCount}/{$this->mOptions->getMaxGeneratedPPNodeCount()}\n" . 00499 "Post-expand include size: {$this->mIncludeSizes['post-expand']}/$max bytes\n" . 00500 "Template argument size: {$this->mIncludeSizes['arg']}/$max bytes\n" . 00501 "Highest expansion depth: {$this->mHighestExpansionDepth}/{$this->mOptions->getMaxPPExpandDepth()}\n" . 00502 $PFreport; 00503 wfRunHooks( 'ParserLimitReport', array( $this, &$limitReport ) ); 00504 00505 // Sanitize for comment. Note '‐' in the replacement is U+2010, 00506 // which looks much like the problematic '-'. 00507 $limitReport = str_replace( array( '-', '&' ), array( '‐', '&' ), $limitReport ); 00508 00509 $text .= "\n<!-- \n$limitReport-->\n"; 00510 00511 if ( $this->mGeneratedPPNodeCount > $this->mOptions->getMaxGeneratedPPNodeCount() / 10 ) { 00512 wfDebugLog( 'generated-pp-node-count', $this->mGeneratedPPNodeCount . ' ' . 00513 $this->mTitle->getPrefixedDBkey() ); 00514 } 00515 } 00516 $this->mOutput->setText( $text ); 00517 00518 $this->mRevisionId = $oldRevisionId; 00519 $this->mRevisionObject = $oldRevisionObject; 00520 $this->mRevisionTimestamp = $oldRevisionTimestamp; 00521 $this->mRevisionUser = $oldRevisionUser; 00522 wfProfileOut( $fname ); 00523 wfProfileOut( __METHOD__ ); 00524 00525 return $this->mOutput; 00526 } 00527 00539 function recursiveTagParse( $text, $frame=false ) { 00540 wfProfileIn( __METHOD__ ); 00541 wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) ); 00542 wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) ); 00543 $text = $this->internalParse( $text, false, $frame ); 00544 wfProfileOut( __METHOD__ ); 00545 return $text; 00546 } 00547 00553 function preprocess( $text, Title $title = null, ParserOptions $options, $revid = null ) { 00554 wfProfileIn( __METHOD__ ); 00555 $this->startParse( $title, $options, self::OT_PREPROCESS, true ); 00556 if ( $revid !== null ) { 00557 $this->mRevisionId = $revid; 00558 } 00559 wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) ); 00560 wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) ); 00561 $text = $this->replaceVariables( $text ); 00562 $text = $this->mStripState->unstripBoth( $text ); 00563 wfProfileOut( __METHOD__ ); 00564 return $text; 00565 } 00566 00576 public function recursivePreprocess( $text, $frame = false ) { 00577 wfProfileIn( __METHOD__ ); 00578 $text = $this->replaceVariables( $text, $frame ); 00579 $text = $this->mStripState->unstripBoth( $text ); 00580 wfProfileOut( __METHOD__ ); 00581 return $text; 00582 } 00583 00596 public function getPreloadText( $text, Title $title, ParserOptions $options ) { 00597 # Parser (re)initialisation 00598 $this->startParse( $title, $options, self::OT_PLAIN, true ); 00599 00600 $flags = PPFrame::NO_ARGS | PPFrame::NO_TEMPLATES; 00601 $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION ); 00602 $text = $this->getPreprocessor()->newFrame()->expand( $dom, $flags ); 00603 $text = $this->mStripState->unstripBoth( $text ); 00604 return $text; 00605 } 00606 00612 public static function getRandomString() { 00613 return wfRandomString( 16 ); 00614 } 00615 00622 function setUser( $user ) { 00623 $this->mUser = $user; 00624 } 00625 00631 public function uniqPrefix() { 00632 if ( !isset( $this->mUniqPrefix ) ) { 00633 # @todo FIXME: This is probably *horribly wrong* 00634 # LanguageConverter seems to want $wgParser's uniqPrefix, however 00635 # if this is called for a parser cache hit, the parser may not 00636 # have ever been initialized in the first place. 00637 # Not really sure what the heck is supposed to be going on here. 00638 return ''; 00639 # throw new MWException( "Accessing uninitialized mUniqPrefix" ); 00640 } 00641 return $this->mUniqPrefix; 00642 } 00643 00649 function setTitle( $t ) { 00650 if ( !$t || $t instanceof FakeTitle ) { 00651 $t = Title::newFromText( 'NO TITLE' ); 00652 } 00653 00654 if ( strval( $t->getFragment() ) !== '' ) { 00655 # Strip the fragment to avoid various odd effects 00656 $this->mTitle = clone $t; 00657 $this->mTitle->setFragment( '' ); 00658 } else { 00659 $this->mTitle = $t; 00660 } 00661 } 00662 00668 function getTitle() { 00669 return $this->mTitle; 00670 } 00671 00678 function Title( $x = null ) { 00679 return wfSetVar( $this->mTitle, $x ); 00680 } 00681 00687 function setOutputType( $ot ) { 00688 $this->mOutputType = $ot; 00689 # Shortcut alias 00690 $this->ot = array( 00691 'html' => $ot == self::OT_HTML, 00692 'wiki' => $ot == self::OT_WIKI, 00693 'pre' => $ot == self::OT_PREPROCESS, 00694 'plain' => $ot == self::OT_PLAIN, 00695 ); 00696 } 00697 00704 function OutputType( $x = null ) { 00705 return wfSetVar( $this->mOutputType, $x ); 00706 } 00707 00713 function getOutput() { 00714 return $this->mOutput; 00715 } 00716 00722 function getOptions() { 00723 return $this->mOptions; 00724 } 00725 00732 function Options( $x = null ) { 00733 return wfSetVar( $this->mOptions, $x ); 00734 } 00735 00739 function nextLinkID() { 00740 return $this->mLinkID++; 00741 } 00742 00746 function setLinkID( $id ) { 00747 $this->mLinkID = $id; 00748 } 00749 00754 function getFunctionLang() { 00755 return $this->getTargetLanguage(); 00756 } 00757 00767 public function getTargetLanguage() { 00768 $target = $this->mOptions->getTargetLanguage(); 00769 00770 if ( $target !== null ) { 00771 return $target; 00772 } elseif( $this->mOptions->getInterfaceMessage() ) { 00773 return $this->mOptions->getUserLangObj(); 00774 } elseif( is_null( $this->mTitle ) ) { 00775 throw new MWException( __METHOD__ . ': $this->mTitle is null' ); 00776 } 00777 00778 return $this->mTitle->getPageLanguage(); 00779 } 00780 00784 function getConverterLanguage() { 00785 return $this->getTargetLanguage(); 00786 } 00787 00794 function getUser() { 00795 if ( !is_null( $this->mUser ) ) { 00796 return $this->mUser; 00797 } 00798 return $this->mOptions->getUser(); 00799 } 00800 00806 function getPreprocessor() { 00807 if ( !isset( $this->mPreprocessor ) ) { 00808 $class = $this->mPreprocessorClass; 00809 $this->mPreprocessor = new $class( $this ); 00810 } 00811 return $this->mPreprocessor; 00812 } 00813 00834 public static function extractTagsAndParams( $elements, $text, &$matches, $uniq_prefix = '' ) { 00835 static $n = 1; 00836 $stripped = ''; 00837 $matches = array(); 00838 00839 $taglist = implode( '|', $elements ); 00840 $start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?" . ">)|<(!--)/i"; 00841 00842 while ( $text != '' ) { 00843 $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE ); 00844 $stripped .= $p[0]; 00845 if ( count( $p ) < 5 ) { 00846 break; 00847 } 00848 if ( count( $p ) > 5 ) { 00849 # comment 00850 $element = $p[4]; 00851 $attributes = ''; 00852 $close = ''; 00853 $inside = $p[5]; 00854 } else { 00855 # tag 00856 $element = $p[1]; 00857 $attributes = $p[2]; 00858 $close = $p[3]; 00859 $inside = $p[4]; 00860 } 00861 00862 $marker = "$uniq_prefix-$element-" . sprintf( '%08X', $n++ ) . self::MARKER_SUFFIX; 00863 $stripped .= $marker; 00864 00865 if ( $close === '/>' ) { 00866 # Empty element tag, <tag /> 00867 $content = null; 00868 $text = $inside; 00869 $tail = null; 00870 } else { 00871 if ( $element === '!--' ) { 00872 $end = '/(-->)/'; 00873 } else { 00874 $end = "/(<\\/$element\\s*>)/i"; 00875 } 00876 $q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE ); 00877 $content = $q[0]; 00878 if ( count( $q ) < 3 ) { 00879 # No end tag -- let it run out to the end of the text. 00880 $tail = ''; 00881 $text = ''; 00882 } else { 00883 $tail = $q[1]; 00884 $text = $q[2]; 00885 } 00886 } 00887 00888 $matches[$marker] = array( $element, 00889 $content, 00890 Sanitizer::decodeTagAttributes( $attributes ), 00891 "<$element$attributes$close$content$tail" ); 00892 } 00893 return $stripped; 00894 } 00895 00901 function getStripList() { 00902 return $this->mStripList; 00903 } 00904 00914 function insertStripItem( $text ) { 00915 $rnd = "{$this->mUniqPrefix}-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX; 00916 $this->mMarkerIndex++; 00917 $this->mStripState->addGeneral( $rnd, $text ); 00918 return $rnd; 00919 } 00920 00927 function doTableStuff( $text ) { 00928 wfProfileIn( __METHOD__ ); 00929 00930 $lines = StringUtils::explode( "\n", $text ); 00931 $out = ''; 00932 $td_history = array(); # Is currently a td tag open? 00933 $last_tag_history = array(); # Save history of last lag activated (td, th or caption) 00934 $tr_history = array(); # Is currently a tr tag open? 00935 $tr_attributes = array(); # history of tr attributes 00936 $has_opened_tr = array(); # Did this table open a <tr> element? 00937 $indent_level = 0; # indent level of the table 00938 00939 foreach ( $lines as $outLine ) { 00940 $line = trim( $outLine ); 00941 00942 if ( $line === '' ) { # empty line, go to next line 00943 $out .= $outLine . "\n"; 00944 continue; 00945 } 00946 00947 $first_character = $line[0]; 00948 $matches = array(); 00949 00950 if ( preg_match( '/^(:*)\{\|(.*)$/', $line, $matches ) ) { 00951 # First check if we are starting a new table 00952 $indent_level = strlen( $matches[1] ); 00953 00954 $attributes = $this->mStripState->unstripBoth( $matches[2] ); 00955 $attributes = Sanitizer::fixTagAttributes( $attributes, 'table' ); 00956 00957 $outLine = str_repeat( '<dl><dd>', $indent_level ) . "<table{$attributes}>"; 00958 array_push( $td_history, false ); 00959 array_push( $last_tag_history, '' ); 00960 array_push( $tr_history, false ); 00961 array_push( $tr_attributes, '' ); 00962 array_push( $has_opened_tr, false ); 00963 } elseif ( count( $td_history ) == 0 ) { 00964 # Don't do any of the following 00965 $out .= $outLine . "\n"; 00966 continue; 00967 } elseif ( substr( $line, 0, 2 ) === '|}' ) { 00968 # We are ending a table 00969 $line = '</table>' . substr( $line, 2 ); 00970 $last_tag = array_pop( $last_tag_history ); 00971 00972 if ( !array_pop( $has_opened_tr ) ) { 00973 $line = "<tr><td></td></tr>{$line}"; 00974 } 00975 00976 if ( array_pop( $tr_history ) ) { 00977 $line = "</tr>{$line}"; 00978 } 00979 00980 if ( array_pop( $td_history ) ) { 00981 $line = "</{$last_tag}>{$line}"; 00982 } 00983 array_pop( $tr_attributes ); 00984 $outLine = $line . str_repeat( '</dd></dl>', $indent_level ); 00985 } elseif ( substr( $line, 0, 2 ) === '|-' ) { 00986 # Now we have a table row 00987 $line = preg_replace( '#^\|-+#', '', $line ); 00988 00989 # Whats after the tag is now only attributes 00990 $attributes = $this->mStripState->unstripBoth( $line ); 00991 $attributes = Sanitizer::fixTagAttributes( $attributes, 'tr' ); 00992 array_pop( $tr_attributes ); 00993 array_push( $tr_attributes, $attributes ); 00994 00995 $line = ''; 00996 $last_tag = array_pop( $last_tag_history ); 00997 array_pop( $has_opened_tr ); 00998 array_push( $has_opened_tr, true ); 00999 01000 if ( array_pop( $tr_history ) ) { 01001 $line = '</tr>'; 01002 } 01003 01004 if ( array_pop( $td_history ) ) { 01005 $line = "</{$last_tag}>{$line}"; 01006 } 01007 01008 $outLine = $line; 01009 array_push( $tr_history, false ); 01010 array_push( $td_history, false ); 01011 array_push( $last_tag_history, '' ); 01012 } elseif ( $first_character === '|' || $first_character === '!' || substr( $line, 0, 2 ) === '|+' ) { 01013 # This might be cell elements, td, th or captions 01014 if ( substr( $line, 0, 2 ) === '|+' ) { 01015 $first_character = '+'; 01016 $line = substr( $line, 1 ); 01017 } 01018 01019 $line = substr( $line, 1 ); 01020 01021 if ( $first_character === '!' ) { 01022 $line = str_replace( '!!', '||', $line ); 01023 } 01024 01025 # Split up multiple cells on the same line. 01026 # FIXME : This can result in improper nesting of tags processed 01027 # by earlier parser steps, but should avoid splitting up eg 01028 # attribute values containing literal "||". 01029 $cells = StringUtils::explodeMarkup( '||', $line ); 01030 01031 $outLine = ''; 01032 01033 # Loop through each table cell 01034 foreach ( $cells as $cell ) { 01035 $previous = ''; 01036 if ( $first_character !== '+' ) { 01037 $tr_after = array_pop( $tr_attributes ); 01038 if ( !array_pop( $tr_history ) ) { 01039 $previous = "<tr{$tr_after}>\n"; 01040 } 01041 array_push( $tr_history, true ); 01042 array_push( $tr_attributes, '' ); 01043 array_pop( $has_opened_tr ); 01044 array_push( $has_opened_tr, true ); 01045 } 01046 01047 $last_tag = array_pop( $last_tag_history ); 01048 01049 if ( array_pop( $td_history ) ) { 01050 $previous = "</{$last_tag}>\n{$previous}"; 01051 } 01052 01053 if ( $first_character === '|' ) { 01054 $last_tag = 'td'; 01055 } elseif ( $first_character === '!' ) { 01056 $last_tag = 'th'; 01057 } elseif ( $first_character === '+' ) { 01058 $last_tag = 'caption'; 01059 } else { 01060 $last_tag = ''; 01061 } 01062 01063 array_push( $last_tag_history, $last_tag ); 01064 01065 # A cell could contain both parameters and data 01066 $cell_data = explode( '|', $cell, 2 ); 01067 01068 # Bug 553: Note that a '|' inside an invalid link should not 01069 # be mistaken as delimiting cell parameters 01070 if ( strpos( $cell_data[0], '[[' ) !== false ) { 01071 $cell = "{$previous}<{$last_tag}>{$cell}"; 01072 } elseif ( count( $cell_data ) == 1 ) { 01073 $cell = "{$previous}<{$last_tag}>{$cell_data[0]}"; 01074 } else { 01075 $attributes = $this->mStripState->unstripBoth( $cell_data[0] ); 01076 $attributes = Sanitizer::fixTagAttributes( $attributes, $last_tag ); 01077 $cell = "{$previous}<{$last_tag}{$attributes}>{$cell_data[1]}"; 01078 } 01079 01080 $outLine .= $cell; 01081 array_push( $td_history, true ); 01082 } 01083 } 01084 $out .= $outLine . "\n"; 01085 } 01086 01087 # Closing open td, tr && table 01088 while ( count( $td_history ) > 0 ) { 01089 if ( array_pop( $td_history ) ) { 01090 $out .= "</td>\n"; 01091 } 01092 if ( array_pop( $tr_history ) ) { 01093 $out .= "</tr>\n"; 01094 } 01095 if ( !array_pop( $has_opened_tr ) ) { 01096 $out .= "<tr><td></td></tr>\n"; 01097 } 01098 01099 $out .= "</table>\n"; 01100 } 01101 01102 # Remove trailing line-ending (b/c) 01103 if ( substr( $out, -1 ) === "\n" ) { 01104 $out = substr( $out, 0, -1 ); 01105 } 01106 01107 # special case: don't return empty table 01108 if ( $out === "<table>\n<tr><td></td></tr>\n</table>" ) { 01109 $out = ''; 01110 } 01111 01112 wfProfileOut( __METHOD__ ); 01113 01114 return $out; 01115 } 01116 01129 function internalParse( $text, $isMain = true, $frame = false ) { 01130 wfProfileIn( __METHOD__ ); 01131 01132 $origText = $text; 01133 01134 # Hook to suspend the parser in this state 01135 if ( !wfRunHooks( 'ParserBeforeInternalParse', array( &$this, &$text, &$this->mStripState ) ) ) { 01136 wfProfileOut( __METHOD__ ); 01137 return $text; 01138 } 01139 01140 # if $frame is provided, then use $frame for replacing any variables 01141 if ( $frame ) { 01142 # use frame depth to infer how include/noinclude tags should be handled 01143 # depth=0 means this is the top-level document; otherwise it's an included document 01144 if ( !$frame->depth ) { 01145 $flag = 0; 01146 } else { 01147 $flag = Parser::PTD_FOR_INCLUSION; 01148 } 01149 $dom = $this->preprocessToDom( $text, $flag ); 01150 $text = $frame->expand( $dom ); 01151 } else { 01152 # if $frame is not provided, then use old-style replaceVariables 01153 $text = $this->replaceVariables( $text ); 01154 } 01155 01156 wfRunHooks( 'InternalParseBeforeSanitize', array( &$this, &$text, &$this->mStripState ) ); 01157 $text = Sanitizer::removeHTMLtags( $text, array( &$this, 'attributeStripCallback' ), false, array_keys( $this->mTransparentTagHooks ) ); 01158 wfRunHooks( 'InternalParseBeforeLinks', array( &$this, &$text, &$this->mStripState ) ); 01159 01160 # Tables need to come after variable replacement for things to work 01161 # properly; putting them before other transformations should keep 01162 # exciting things like link expansions from showing up in surprising 01163 # places. 01164 $text = $this->doTableStuff( $text ); 01165 01166 $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text ); 01167 01168 $text = $this->doDoubleUnderscore( $text ); 01169 01170 $text = $this->doHeadings( $text ); 01171 $text = $this->replaceInternalLinks( $text ); 01172 $text = $this->doAllQuotes( $text ); 01173 $text = $this->replaceExternalLinks( $text ); 01174 01175 # replaceInternalLinks may sometimes leave behind 01176 # absolute URLs, which have to be masked to hide them from replaceExternalLinks 01177 $text = str_replace( $this->mUniqPrefix . 'NOPARSE', '', $text ); 01178 01179 $text = $this->doMagicLinks( $text ); 01180 $text = $this->formatHeadings( $text, $origText, $isMain ); 01181 01182 wfProfileOut( __METHOD__ ); 01183 return $text; 01184 } 01185 01197 function doMagicLinks( $text ) { 01198 wfProfileIn( __METHOD__ ); 01199 $prots = wfUrlProtocolsWithoutProtRel(); 01200 $urlChar = self::EXT_LINK_URL_CLASS; 01201 $text = preg_replace_callback( 01202 '!(?: # Start cases 01203 (<a[ \t\r\n>].*?</a>) | # m[1]: Skip link text 01204 (<.*?>) | # m[2]: Skip stuff inside HTML elements' . " 01205 (\\b(?i:$prots)$urlChar+) | # m[3]: Free external links" . ' 01206 (?:RFC|PMID)\s+([0-9]+) | # m[4]: RFC or PMID, capture number 01207 ISBN\s+(\b # m[5]: ISBN, capture number 01208 (?: 97[89] [\ \-]? )? # optional 13-digit ISBN prefix 01209 (?: [0-9] [\ \-]? ){9} # 9 digits with opt. delimiters 01210 [0-9Xx] # check digit 01211 \b) 01212 )!xu', array( &$this, 'magicLinkCallback' ), $text ); 01213 wfProfileOut( __METHOD__ ); 01214 return $text; 01215 } 01216 01222 function magicLinkCallback( $m ) { 01223 if ( isset( $m[1] ) && $m[1] !== '' ) { 01224 # Skip anchor 01225 return $m[0]; 01226 } elseif ( isset( $m[2] ) && $m[2] !== '' ) { 01227 # Skip HTML element 01228 return $m[0]; 01229 } elseif ( isset( $m[3] ) && $m[3] !== '' ) { 01230 # Free external link 01231 return $this->makeFreeExternalLink( $m[0] ); 01232 } elseif ( isset( $m[4] ) && $m[4] !== '' ) { 01233 # RFC or PMID 01234 if ( substr( $m[0], 0, 3 ) === 'RFC' ) { 01235 $keyword = 'RFC'; 01236 $urlmsg = 'rfcurl'; 01237 $CssClass = 'mw-magiclink-rfc'; 01238 $id = $m[4]; 01239 } elseif ( substr( $m[0], 0, 4 ) === 'PMID' ) { 01240 $keyword = 'PMID'; 01241 $urlmsg = 'pubmedurl'; 01242 $CssClass = 'mw-magiclink-pmid'; 01243 $id = $m[4]; 01244 } else { 01245 throw new MWException( __METHOD__ . ': unrecognised match type "' . 01246 substr( $m[0], 0, 20 ) . '"' ); 01247 } 01248 $url = wfMessage( $urlmsg, $id )->inContentLanguage()->text(); 01249 return Linker::makeExternalLink( $url, "{$keyword} {$id}", true, $CssClass ); 01250 } elseif ( isset( $m[5] ) && $m[5] !== '' ) { 01251 # ISBN 01252 $isbn = $m[5]; 01253 $num = strtr( $isbn, array( 01254 '-' => '', 01255 ' ' => '', 01256 'x' => 'X', 01257 )); 01258 $titleObj = SpecialPage::getTitleFor( 'Booksources', $num ); 01259 return'<a href="' . 01260 htmlspecialchars( $titleObj->getLocalUrl() ) . 01261 "\" class=\"internal mw-magiclink-isbn\">ISBN $isbn</a>"; 01262 } else { 01263 return $m[0]; 01264 } 01265 } 01266 01275 function makeFreeExternalLink( $url ) { 01276 wfProfileIn( __METHOD__ ); 01277 01278 $trail = ''; 01279 01280 # The characters '<' and '>' (which were escaped by 01281 # removeHTMLtags()) should not be included in 01282 # URLs, per RFC 2396. 01283 $m2 = array(); 01284 if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) { 01285 $trail = substr( $url, $m2[0][1] ) . $trail; 01286 $url = substr( $url, 0, $m2[0][1] ); 01287 } 01288 01289 # Move trailing punctuation to $trail 01290 $sep = ',;\.:!?'; 01291 # If there is no left bracket, then consider right brackets fair game too 01292 if ( strpos( $url, '(' ) === false ) { 01293 $sep .= ')'; 01294 } 01295 01296 $numSepChars = strspn( strrev( $url ), $sep ); 01297 if ( $numSepChars ) { 01298 $trail = substr( $url, -$numSepChars ) . $trail; 01299 $url = substr( $url, 0, -$numSepChars ); 01300 } 01301 01302 $url = Sanitizer::cleanUrl( $url ); 01303 01304 # Is this an external image? 01305 $text = $this->maybeMakeExternalImage( $url ); 01306 if ( $text === false ) { 01307 # Not an image, make a link 01308 $text = Linker::makeExternalLink( $url, 01309 $this->getConverterLanguage()->markNoConversion( $url, true ), 01310 true, 'free', 01311 $this->getExternalLinkAttribs( $url ) ); 01312 # Register it in the output object... 01313 # Replace unnecessary URL escape codes with their equivalent characters 01314 $pasteurized = self::replaceUnusualEscapes( $url ); 01315 $this->mOutput->addExternalLink( $pasteurized ); 01316 } 01317 wfProfileOut( __METHOD__ ); 01318 return $text . $trail; 01319 } 01320 01330 function doHeadings( $text ) { 01331 wfProfileIn( __METHOD__ ); 01332 for ( $i = 6; $i >= 1; --$i ) { 01333 $h = str_repeat( '=', $i ); 01334 $text = preg_replace( "/^$h(.+)$h\\s*$/m", "<h$i>\\1</h$i>", $text ); 01335 } 01336 wfProfileOut( __METHOD__ ); 01337 return $text; 01338 } 01339 01348 function doAllQuotes( $text ) { 01349 wfProfileIn( __METHOD__ ); 01350 $outtext = ''; 01351 $lines = StringUtils::explode( "\n", $text ); 01352 foreach ( $lines as $line ) { 01353 $outtext .= $this->doQuotes( $line ) . "\n"; 01354 } 01355 $outtext = substr( $outtext, 0, -1 ); 01356 wfProfileOut( __METHOD__ ); 01357 return $outtext; 01358 } 01359 01367 public function doQuotes( $text ) { 01368 $arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE ); 01369 if ( count( $arr ) == 1 ) { 01370 return $text; 01371 } else { 01372 # First, do some preliminary work. This may shift some apostrophes from 01373 # being mark-up to being text. It also counts the number of occurrences 01374 # of bold and italics mark-ups. 01375 $numbold = 0; 01376 $numitalics = 0; 01377 for ( $i = 0; $i < count( $arr ); $i++ ) { 01378 if ( ( $i % 2 ) == 1 ) { 01379 # If there are ever four apostrophes, assume the first is supposed to 01380 # be text, and the remaining three constitute mark-up for bold text. 01381 if ( strlen( $arr[$i] ) == 4 ) { 01382 $arr[$i-1] .= "'"; 01383 $arr[$i] = "'''"; 01384 } elseif ( strlen( $arr[$i] ) > 5 ) { 01385 # If there are more than 5 apostrophes in a row, assume they're all 01386 # text except for the last 5. 01387 $arr[$i-1] .= str_repeat( "'", strlen( $arr[$i] ) - 5 ); 01388 $arr[$i] = "'''''"; 01389 } 01390 # Count the number of occurrences of bold and italics mark-ups. 01391 # We are not counting sequences of five apostrophes. 01392 if ( strlen( $arr[$i] ) == 2 ) { 01393 $numitalics++; 01394 } elseif ( strlen( $arr[$i] ) == 3 ) { 01395 $numbold++; 01396 } elseif ( strlen( $arr[$i] ) == 5 ) { 01397 $numitalics++; 01398 $numbold++; 01399 } 01400 } 01401 } 01402 01403 # If there is an odd number of both bold and italics, it is likely 01404 # that one of the bold ones was meant to be an apostrophe followed 01405 # by italics. Which one we cannot know for certain, but it is more 01406 # likely to be one that has a single-letter word before it. 01407 if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) ) { 01408 $i = 0; 01409 $firstsingleletterword = -1; 01410 $firstmultiletterword = -1; 01411 $firstspace = -1; 01412 foreach ( $arr as $r ) { 01413 if ( ( $i % 2 == 1 ) and ( strlen( $r ) == 3 ) ) { 01414 $x1 = substr( $arr[$i-1], -1 ); 01415 $x2 = substr( $arr[$i-1], -2, 1 ); 01416 if ( $x1 === ' ' ) { 01417 if ( $firstspace == -1 ) { 01418 $firstspace = $i; 01419 } 01420 } elseif ( $x2 === ' ' ) { 01421 if ( $firstsingleletterword == -1 ) { 01422 $firstsingleletterword = $i; 01423 } 01424 } else { 01425 if ( $firstmultiletterword == -1 ) { 01426 $firstmultiletterword = $i; 01427 } 01428 } 01429 } 01430 $i++; 01431 } 01432 01433 # If there is a single-letter word, use it! 01434 if ( $firstsingleletterword > -1 ) { 01435 $arr[$firstsingleletterword] = "''"; 01436 $arr[$firstsingleletterword-1] .= "'"; 01437 } elseif ( $firstmultiletterword > -1 ) { 01438 # If not, but there's a multi-letter word, use that one. 01439 $arr[$firstmultiletterword] = "''"; 01440 $arr[$firstmultiletterword-1] .= "'"; 01441 } elseif ( $firstspace > -1 ) { 01442 # ... otherwise use the first one that has neither. 01443 # (notice that it is possible for all three to be -1 if, for example, 01444 # there is only one pentuple-apostrophe in the line) 01445 $arr[$firstspace] = "''"; 01446 $arr[$firstspace-1] .= "'"; 01447 } 01448 } 01449 01450 # Now let's actually convert our apostrophic mush to HTML! 01451 $output = ''; 01452 $buffer = ''; 01453 $state = ''; 01454 $i = 0; 01455 foreach ( $arr as $r ) { 01456 if ( ( $i % 2 ) == 0 ) { 01457 if ( $state === 'both' ) { 01458 $buffer .= $r; 01459 } else { 01460 $output .= $r; 01461 } 01462 } else { 01463 if ( strlen( $r ) == 2 ) { 01464 if ( $state === 'i' ) { 01465 $output .= '</i>'; $state = ''; 01466 } elseif ( $state === 'bi' ) { 01467 $output .= '</i>'; $state = 'b'; 01468 } elseif ( $state === 'ib' ) { 01469 $output .= '</b></i><b>'; $state = 'b'; 01470 } elseif ( $state === 'both' ) { 01471 $output .= '<b><i>' . $buffer . '</i>'; $state = 'b'; 01472 } else { # $state can be 'b' or '' 01473 $output .= '<i>'; $state .= 'i'; 01474 } 01475 } elseif ( strlen( $r ) == 3 ) { 01476 if ( $state === 'b' ) { 01477 $output .= '</b>'; $state = ''; 01478 } elseif ( $state === 'bi' ) { 01479 $output .= '</i></b><i>'; $state = 'i'; 01480 } elseif ( $state === 'ib' ) { 01481 $output .= '</b>'; $state = 'i'; 01482 } elseif ( $state === 'both' ) { 01483 $output .= '<i><b>' . $buffer . '</b>'; $state = 'i'; 01484 } else { # $state can be 'i' or '' 01485 $output .= '<b>'; $state .= 'b'; 01486 } 01487 } elseif ( strlen( $r ) == 5 ) { 01488 if ( $state === 'b' ) { 01489 $output .= '</b><i>'; $state = 'i'; 01490 } elseif ( $state === 'i' ) { 01491 $output .= '</i><b>'; $state = 'b'; 01492 } elseif ( $state === 'bi' ) { 01493 $output .= '</i></b>'; $state = ''; 01494 } elseif ( $state === 'ib' ) { 01495 $output .= '</b></i>'; $state = ''; 01496 } elseif ( $state === 'both' ) { 01497 $output .= '<i><b>' . $buffer . '</b></i>'; $state = ''; 01498 } else { # ($state == '') 01499 $buffer = ''; $state = 'both'; 01500 } 01501 } 01502 } 01503 $i++; 01504 } 01505 # Now close all remaining tags. Notice that the order is important. 01506 if ( $state === 'b' || $state === 'ib' ) { 01507 $output .= '</b>'; 01508 } 01509 if ( $state === 'i' || $state === 'bi' || $state === 'ib' ) { 01510 $output .= '</i>'; 01511 } 01512 if ( $state === 'bi' ) { 01513 $output .= '</b>'; 01514 } 01515 # There might be lonely ''''', so make sure we have a buffer 01516 if ( $state === 'both' && $buffer ) { 01517 $output .= '<b><i>' . $buffer . '</i></b>'; 01518 } 01519 return $output; 01520 } 01521 } 01522 01536 function replaceExternalLinks( $text ) { 01537 wfProfileIn( __METHOD__ ); 01538 01539 $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE ); 01540 if ( $bits === false ) { 01541 throw new MWException( "PCRE needs to be compiled with --enable-unicode-properties in order for MediaWiki to function" ); 01542 } 01543 $s = array_shift( $bits ); 01544 01545 $i = 0; 01546 while ( $i<count( $bits ) ) { 01547 $url = $bits[$i++]; 01548 $i++; // protocol 01549 $text = $bits[$i++]; 01550 $trail = $bits[$i++]; 01551 01552 # The characters '<' and '>' (which were escaped by 01553 # removeHTMLtags()) should not be included in 01554 # URLs, per RFC 2396. 01555 $m2 = array(); 01556 if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) { 01557 $text = substr( $url, $m2[0][1] ) . ' ' . $text; 01558 $url = substr( $url, 0, $m2[0][1] ); 01559 } 01560 01561 # If the link text is an image URL, replace it with an <img> tag 01562 # This happened by accident in the original parser, but some people used it extensively 01563 $img = $this->maybeMakeExternalImage( $text ); 01564 if ( $img !== false ) { 01565 $text = $img; 01566 } 01567 01568 $dtrail = ''; 01569 01570 # Set linktype for CSS - if URL==text, link is essentially free 01571 $linktype = ( $text === $url ) ? 'free' : 'text'; 01572 01573 # No link text, e.g. [http://domain.tld/some.link] 01574 if ( $text == '' ) { 01575 # Autonumber 01576 $langObj = $this->getTargetLanguage(); 01577 $text = '[' . $langObj->formatNum( ++$this->mAutonumber ) . ']'; 01578 $linktype = 'autonumber'; 01579 } else { 01580 # Have link text, e.g. [http://domain.tld/some.link text]s 01581 # Check for trail 01582 list( $dtrail, $trail ) = Linker::splitTrail( $trail ); 01583 } 01584 01585 $text = $this->getConverterLanguage()->markNoConversion( $text ); 01586 01587 $url = Sanitizer::cleanUrl( $url ); 01588 01589 # Use the encoded URL 01590 # This means that users can paste URLs directly into the text 01591 # Funny characters like ö aren't valid in URLs anyway 01592 # This was changed in August 2004 01593 $s .= Linker::makeExternalLink( $url, $text, false, $linktype, 01594 $this->getExternalLinkAttribs( $url ) ) . $dtrail . $trail; 01595 01596 # Register link in the output object. 01597 # Replace unnecessary URL escape codes with the referenced character 01598 # This prevents spammers from hiding links from the filters 01599 $pasteurized = self::replaceUnusualEscapes( $url ); 01600 $this->mOutput->addExternalLink( $pasteurized ); 01601 } 01602 01603 wfProfileOut( __METHOD__ ); 01604 return $s; 01605 } 01615 public static function getExternalLinkRel( $url = false, $title = null ) { 01616 global $wgNoFollowLinks, $wgNoFollowNsExceptions, $wgNoFollowDomainExceptions; 01617 $ns = $title ? $title->getNamespace() : false; 01618 if ( $wgNoFollowLinks && !in_array( $ns, $wgNoFollowNsExceptions ) && 01619 !wfMatchesDomainList( $url, $wgNoFollowDomainExceptions ) ) 01620 { 01621 return 'nofollow'; 01622 } 01623 return null; 01624 } 01635 function getExternalLinkAttribs( $url = false ) { 01636 $attribs = array(); 01637 $attribs['rel'] = self::getExternalLinkRel( $url, $this->mTitle ); 01638 01639 if ( $this->mOptions->getExternalLinkTarget() ) { 01640 $attribs['target'] = $this->mOptions->getExternalLinkTarget(); 01641 } 01642 return $attribs; 01643 } 01644 01656 static function replaceUnusualEscapes( $url ) { 01657 return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', 01658 array( __CLASS__, 'replaceUnusualEscapesCallback' ), $url ); 01659 } 01660 01669 private static function replaceUnusualEscapesCallback( $matches ) { 01670 $char = urldecode( $matches[0] ); 01671 $ord = ord( $char ); 01672 # Is it an unsafe or HTTP reserved character according to RFC 1738? 01673 if ( $ord > 32 && $ord < 127 && strpos( '<>"#{}|\^~[]`;/?', $char ) === false ) { 01674 # No, shouldn't be escaped 01675 return $char; 01676 } else { 01677 # Yes, leave it escaped 01678 return $matches[0]; 01679 } 01680 } 01681 01691 function maybeMakeExternalImage( $url ) { 01692 $imagesfrom = $this->mOptions->getAllowExternalImagesFrom(); 01693 $imagesexception = !empty( $imagesfrom ); 01694 $text = false; 01695 # $imagesfrom could be either a single string or an array of strings, parse out the latter 01696 if ( $imagesexception && is_array( $imagesfrom ) ) { 01697 $imagematch = false; 01698 foreach ( $imagesfrom as $match ) { 01699 if ( strpos( $url, $match ) === 0 ) { 01700 $imagematch = true; 01701 break; 01702 } 01703 } 01704 } elseif ( $imagesexception ) { 01705 $imagematch = ( strpos( $url, $imagesfrom ) === 0 ); 01706 } else { 01707 $imagematch = false; 01708 } 01709 if ( $this->mOptions->getAllowExternalImages() 01710 || ( $imagesexception && $imagematch ) ) { 01711 if ( preg_match( self::EXT_IMAGE_REGEX, $url ) ) { 01712 # Image found 01713 $text = Linker::makeExternalImage( $url ); 01714 } 01715 } 01716 if ( !$text && $this->mOptions->getEnableImageWhitelist() 01717 && preg_match( self::EXT_IMAGE_REGEX, $url ) ) { 01718 $whitelist = explode( "\n", wfMessage( 'external_image_whitelist' )->inContentLanguage()->text() ); 01719 foreach ( $whitelist as $entry ) { 01720 # Sanitize the regex fragment, make it case-insensitive, ignore blank entries/comments 01721 if ( strpos( $entry, '#' ) === 0 || $entry === '' ) { 01722 continue; 01723 } 01724 if ( preg_match( '/' . str_replace( '/', '\\/', $entry ) . '/i', $url ) ) { 01725 # Image matches a whitelist entry 01726 $text = Linker::makeExternalImage( $url ); 01727 break; 01728 } 01729 } 01730 } 01731 return $text; 01732 } 01733 01743 function replaceInternalLinks( $s ) { 01744 $this->mLinkHolders->merge( $this->replaceInternalLinks2( $s ) ); 01745 return $s; 01746 } 01747 01756 function replaceInternalLinks2( &$s ) { 01757 wfProfileIn( __METHOD__ ); 01758 01759 wfProfileIn( __METHOD__ . '-setup' ); 01760 static $tc = false, $e1, $e1_img; 01761 # the % is needed to support urlencoded titles as well 01762 if ( !$tc ) { 01763 $tc = Title::legalChars() . '#%'; 01764 # Match a link having the form [[namespace:link|alternate]]trail 01765 $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD"; 01766 # Match cases where there is no "]]", which might still be images 01767 $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD"; 01768 } 01769 01770 $holders = new LinkHolderArray( $this ); 01771 01772 # split the entire text string on occurrences of [[ 01773 $a = StringUtils::explode( '[[', ' ' . $s ); 01774 # get the first element (all text up to first [[), and remove the space we added 01775 $s = $a->current(); 01776 $a->next(); 01777 $line = $a->current(); # Workaround for broken ArrayIterator::next() that returns "void" 01778 $s = substr( $s, 1 ); 01779 01780 $useLinkPrefixExtension = $this->getTargetLanguage()->linkPrefixExtension(); 01781 $e2 = null; 01782 if ( $useLinkPrefixExtension ) { 01783 # Match the end of a line for a word that's not followed by whitespace, 01784 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched 01785 $e2 = wfMessage( 'linkprefix' )->inContentLanguage()->text(); 01786 } 01787 01788 if ( is_null( $this->mTitle ) ) { 01789 wfProfileOut( __METHOD__ . '-setup' ); 01790 wfProfileOut( __METHOD__ ); 01791 throw new MWException( __METHOD__ . ": \$this->mTitle is null\n" ); 01792 } 01793 $nottalk = !$this->mTitle->isTalkPage(); 01794 01795 if ( $useLinkPrefixExtension ) { 01796 $m = array(); 01797 if ( preg_match( $e2, $s, $m ) ) { 01798 $first_prefix = $m[2]; 01799 } else { 01800 $first_prefix = false; 01801 } 01802 } else { 01803 $prefix = ''; 01804 } 01805 01806 $useSubpages = $this->areSubpagesAllowed(); 01807 wfProfileOut( __METHOD__ . '-setup' ); 01808 01809 # Loop for each link 01810 for ( ; $line !== false && $line !== null; $a->next(), $line = $a->current() ) { 01811 # Check for excessive memory usage 01812 if ( $holders->isBig() ) { 01813 # Too big 01814 # Do the existence check, replace the link holders and clear the array 01815 $holders->replace( $s ); 01816 $holders->clear(); 01817 } 01818 01819 if ( $useLinkPrefixExtension ) { 01820 wfProfileIn( __METHOD__ . '-prefixhandling' ); 01821 if ( preg_match( $e2, $s, $m ) ) { 01822 $prefix = $m[2]; 01823 $s = $m[1]; 01824 } else { 01825 $prefix = ''; 01826 } 01827 # first link 01828 if ( $first_prefix ) { 01829 $prefix = $first_prefix; 01830 $first_prefix = false; 01831 } 01832 wfProfileOut( __METHOD__ . '-prefixhandling' ); 01833 } 01834 01835 $might_be_img = false; 01836 01837 wfProfileIn( __METHOD__ . "-e1" ); 01838 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt 01839 $text = $m[2]; 01840 # If we get a ] at the beginning of $m[3] that means we have a link that's something like: 01841 # [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up, 01842 # the real problem is with the $e1 regex 01843 # See bug 1300. 01844 # 01845 # Still some problems for cases where the ] is meant to be outside punctuation, 01846 # and no image is in sight. See bug 2095. 01847 # 01848 if ( $text !== '' && 01849 substr( $m[3], 0, 1 ) === ']' && 01850 strpos( $text, '[' ) !== false 01851 ) 01852 { 01853 $text .= ']'; # so that replaceExternalLinks($text) works later 01854 $m[3] = substr( $m[3], 1 ); 01855 } 01856 # fix up urlencoded title texts 01857 if ( strpos( $m[1], '%' ) !== false ) { 01858 # Should anchors '#' also be rejected? 01859 $m[1] = str_replace( array( '<', '>' ), array( '<', '>' ), rawurldecode( $m[1] ) ); 01860 } 01861 $trail = $m[3]; 01862 } elseif ( preg_match( $e1_img, $line, $m ) ) { # Invalid, but might be an image with a link in its caption 01863 $might_be_img = true; 01864 $text = $m[2]; 01865 if ( strpos( $m[1], '%' ) !== false ) { 01866 $m[1] = rawurldecode( $m[1] ); 01867 } 01868 $trail = ""; 01869 } else { # Invalid form; output directly 01870 $s .= $prefix . '[[' . $line; 01871 wfProfileOut( __METHOD__ . "-e1" ); 01872 continue; 01873 } 01874 wfProfileOut( __METHOD__ . "-e1" ); 01875 wfProfileIn( __METHOD__ . "-misc" ); 01876 01877 # Don't allow internal links to pages containing 01878 # PROTO: where PROTO is a valid URL protocol; these 01879 # should be external links. 01880 if ( preg_match( '/^(?i:' . $this->mUrlProtocols . ')/', $m[1] ) ) { 01881 $s .= $prefix . '[[' . $line; 01882 wfProfileOut( __METHOD__ . "-misc" ); 01883 continue; 01884 } 01885 01886 # Make subpage if necessary 01887 if ( $useSubpages ) { 01888 $link = $this->maybeDoSubpageLink( $m[1], $text ); 01889 } else { 01890 $link = $m[1]; 01891 } 01892 01893 $noforce = ( substr( $m[1], 0, 1 ) !== ':' ); 01894 if ( !$noforce ) { 01895 # Strip off leading ':' 01896 $link = substr( $link, 1 ); 01897 } 01898 01899 wfProfileOut( __METHOD__ . "-misc" ); 01900 wfProfileIn( __METHOD__ . "-title" ); 01901 $nt = Title::newFromText( $this->mStripState->unstripNoWiki( $link ) ); 01902 if ( $nt === null ) { 01903 $s .= $prefix . '[[' . $line; 01904 wfProfileOut( __METHOD__ . "-title" ); 01905 continue; 01906 } 01907 01908 $ns = $nt->getNamespace(); 01909 $iw = $nt->getInterWiki(); 01910 wfProfileOut( __METHOD__ . "-title" ); 01911 01912 if ( $might_be_img ) { # if this is actually an invalid link 01913 wfProfileIn( __METHOD__ . "-might_be_img" ); 01914 if ( $ns == NS_FILE && $noforce ) { # but might be an image 01915 $found = false; 01916 while ( true ) { 01917 # look at the next 'line' to see if we can close it there 01918 $a->next(); 01919 $next_line = $a->current(); 01920 if ( $next_line === false || $next_line === null ) { 01921 break; 01922 } 01923 $m = explode( ']]', $next_line, 3 ); 01924 if ( count( $m ) == 3 ) { 01925 # the first ]] closes the inner link, the second the image 01926 $found = true; 01927 $text .= "[[{$m[0]}]]{$m[1]}"; 01928 $trail = $m[2]; 01929 break; 01930 } elseif ( count( $m ) == 2 ) { 01931 # if there's exactly one ]] that's fine, we'll keep looking 01932 $text .= "[[{$m[0]}]]{$m[1]}"; 01933 } else { 01934 # if $next_line is invalid too, we need look no further 01935 $text .= '[[' . $next_line; 01936 break; 01937 } 01938 } 01939 if ( !$found ) { 01940 # we couldn't find the end of this imageLink, so output it raw 01941 # but don't ignore what might be perfectly normal links in the text we've examined 01942 $holders->merge( $this->replaceInternalLinks2( $text ) ); 01943 $s .= "{$prefix}[[$link|$text"; 01944 # note: no $trail, because without an end, there *is* no trail 01945 wfProfileOut( __METHOD__ . "-might_be_img" ); 01946 continue; 01947 } 01948 } else { # it's not an image, so output it raw 01949 $s .= "{$prefix}[[$link|$text"; 01950 # note: no $trail, because without an end, there *is* no trail 01951 wfProfileOut( __METHOD__ . "-might_be_img" ); 01952 continue; 01953 } 01954 wfProfileOut( __METHOD__ . "-might_be_img" ); 01955 } 01956 01957 $wasblank = ( $text == '' ); 01958 if ( $wasblank ) { 01959 $text = $link; 01960 } else { 01961 # Bug 4598 madness. Handle the quotes only if they come from the alternate part 01962 # [[Lista d''e paise d''o munno]] -> <a href="...">Lista d''e paise d''o munno</a> 01963 # [[Criticism of Harry Potter|Criticism of ''Harry Potter'']] 01964 # -> <a href="Criticism of Harry Potter">Criticism of <i>Harry Potter</i></a> 01965 $text = $this->doQuotes( $text ); 01966 } 01967 01968 # Link not escaped by : , create the various objects 01969 if ( $noforce ) { 01970 # Interwikis 01971 wfProfileIn( __METHOD__ . "-interwiki" ); 01972 if ( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && Language::fetchLanguageName( $iw, null, 'mw' ) ) { 01973 // XXX: the above check prevents links to sites with identifiers that are not language codes 01974 01975 # Bug 24502: filter duplicates 01976 if ( !isset( $this->mLangLinkLanguages[$iw] ) ) { 01977 $this->mLangLinkLanguages[$iw] = true; 01978 $this->mOutput->addLanguageLink( $nt->getFullText() ); 01979 } 01980 01981 $s = rtrim( $s . $prefix ); 01982 $s .= trim( $trail, "\n" ) == '' ? '': $prefix . $trail; 01983 wfProfileOut( __METHOD__ . "-interwiki" ); 01984 continue; 01985 } 01986 wfProfileOut( __METHOD__ . "-interwiki" ); 01987 01988 if ( $ns == NS_FILE ) { 01989 wfProfileIn( __METHOD__ . "-image" ); 01990 if ( !wfIsBadImage( $nt->getDBkey(), $this->mTitle ) ) { 01991 if ( $wasblank ) { 01992 # if no parameters were passed, $text 01993 # becomes something like "File:Foo.png", 01994 # which we don't want to pass on to the 01995 # image generator 01996 $text = ''; 01997 } else { 01998 # recursively parse links inside the image caption 01999 # actually, this will parse them in any other parameters, too, 02000 # but it might be hard to fix that, and it doesn't matter ATM 02001 $text = $this->replaceExternalLinks( $text ); 02002 $holders->merge( $this->replaceInternalLinks2( $text ) ); 02003 } 02004 # cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them 02005 $s .= $prefix . $this->armorLinks( 02006 $this->makeImage( $nt, $text, $holders ) ) . $trail; 02007 } else { 02008 $s .= $prefix . $trail; 02009 } 02010 wfProfileOut( __METHOD__ . "-image" ); 02011 continue; 02012 } 02013 02014 if ( $ns == NS_CATEGORY ) { 02015 wfProfileIn( __METHOD__ . "-category" ); 02016 $s = rtrim( $s . "\n" ); # bug 87 02017 02018 if ( $wasblank ) { 02019 $sortkey = $this->getDefaultSort(); 02020 } else { 02021 $sortkey = $text; 02022 } 02023 $sortkey = Sanitizer::decodeCharReferences( $sortkey ); 02024 $sortkey = str_replace( "\n", '', $sortkey ); 02025 $sortkey = $this->getConverterLanguage()->convertCategoryKey( $sortkey ); 02026 $this->mOutput->addCategory( $nt->getDBkey(), $sortkey ); 02027 02032 $s .= trim( $prefix . $trail, "\n" ) == '' ? '' : $prefix . $trail; 02033 02034 wfProfileOut( __METHOD__ . "-category" ); 02035 continue; 02036 } 02037 } 02038 02039 # Self-link checking 02040 if ( $nt->getFragment() === '' && $ns != NS_SPECIAL ) { 02041 if ( $nt->equals( $this->mTitle ) || ( !$nt->isKnown() && in_array( 02042 $this->mTitle->getPrefixedText(), 02043 $this->getConverterLanguage()->autoConvertToAllVariants( $nt->getPrefixedText() ), 02044 true 02045 ) ) ) { 02046 $s .= $prefix . Linker::makeSelfLinkObj( $nt, $text, '', $trail ); 02047 continue; 02048 } 02049 } 02050 02051 # NS_MEDIA is a pseudo-namespace for linking directly to a file 02052 # @todo FIXME: Should do batch file existence checks, see comment below 02053 if ( $ns == NS_MEDIA ) { 02054 wfProfileIn( __METHOD__ . "-media" ); 02055 # Give extensions a chance to select the file revision for us 02056 $options = array(); 02057 $descQuery = false; 02058 wfRunHooks( 'BeforeParserFetchFileAndTitle', 02059 array( $this, $nt, &$options, &$descQuery ) ); 02060 # Fetch and register the file (file title may be different via hooks) 02061 list( $file, $nt ) = $this->fetchFileAndTitle( $nt, $options ); 02062 # Cloak with NOPARSE to avoid replacement in replaceExternalLinks 02063 $s .= $prefix . $this->armorLinks( 02064 Linker::makeMediaLinkFile( $nt, $file, $text ) ) . $trail; 02065 wfProfileOut( __METHOD__ . "-media" ); 02066 continue; 02067 } 02068 02069 wfProfileIn( __METHOD__ . "-always_known" ); 02070 # Some titles, such as valid special pages or files in foreign repos, should 02071 # be shown as bluelinks even though they're not included in the page table 02072 # 02073 # @todo FIXME: isAlwaysKnown() can be expensive for file links; we should really do 02074 # batch file existence checks for NS_FILE and NS_MEDIA 02075 if ( $iw == '' && $nt->isAlwaysKnown() ) { 02076 $this->mOutput->addLink( $nt ); 02077 $s .= $this->makeKnownLinkHolder( $nt, $text, array(), $trail, $prefix ); 02078 } else { 02079 # Links will be added to the output link list after checking 02080 $s .= $holders->makeHolder( $nt, $text, array(), $trail, $prefix ); 02081 } 02082 wfProfileOut( __METHOD__ . "-always_known" ); 02083 } 02084 wfProfileOut( __METHOD__ ); 02085 return $holders; 02086 } 02087 02102 function makeKnownLinkHolder( $nt, $text = '', $query = array(), $trail = '', $prefix = '' ) { 02103 list( $inside, $trail ) = Linker::splitTrail( $trail ); 02104 02105 if ( is_string( $query ) ) { 02106 $query = wfCgiToArray( $query ); 02107 } 02108 if ( $text == '' ) { 02109 $text = htmlspecialchars( $nt->getPrefixedText() ); 02110 } 02111 02112 $link = Linker::linkKnown( $nt, "$prefix$text$inside", array(), $query ); 02113 02114 return $this->armorLinks( $link ) . $trail; 02115 } 02116 02127 function armorLinks( $text ) { 02128 return preg_replace( '/\b((?i)' . $this->mUrlProtocols . ')/', 02129 "{$this->mUniqPrefix}NOPARSE$1", $text ); 02130 } 02131 02136 function areSubpagesAllowed() { 02137 # Some namespaces don't allow subpages 02138 return MWNamespace::hasSubpages( $this->mTitle->getNamespace() ); 02139 } 02140 02149 function maybeDoSubpageLink( $target, &$text ) { 02150 return Linker::normalizeSubpageLink( $this->mTitle, $target, $text ); 02151 } 02152 02159 function closeParagraph() { 02160 $result = ''; 02161 if ( $this->mLastSection != '' ) { 02162 $result = '</' . $this->mLastSection . ">\n"; 02163 } 02164 $this->mInPre = false; 02165 $this->mLastSection = ''; 02166 return $result; 02167 } 02168 02179 function getCommon( $st1, $st2 ) { 02180 $fl = strlen( $st1 ); 02181 $shorter = strlen( $st2 ); 02182 if ( $fl < $shorter ) { 02183 $shorter = $fl; 02184 } 02185 02186 for ( $i = 0; $i < $shorter; ++$i ) { 02187 if ( $st1[$i] != $st2[$i] ) { 02188 break; 02189 } 02190 } 02191 return $i; 02192 } 02193 02203 function openList( $char ) { 02204 $result = $this->closeParagraph(); 02205 02206 if ( '*' === $char ) { 02207 $result .= '<ul><li>'; 02208 } elseif ( '#' === $char ) { 02209 $result .= '<ol><li>'; 02210 } elseif ( ':' === $char ) { 02211 $result .= '<dl><dd>'; 02212 } elseif ( ';' === $char ) { 02213 $result .= '<dl><dt>'; 02214 $this->mDTopen = true; 02215 } else { 02216 $result = '<!-- ERR 1 -->'; 02217 } 02218 02219 return $result; 02220 } 02221 02229 function nextItem( $char ) { 02230 if ( '*' === $char || '#' === $char ) { 02231 return '</li><li>'; 02232 } elseif ( ':' === $char || ';' === $char ) { 02233 $close = '</dd>'; 02234 if ( $this->mDTopen ) { 02235 $close = '</dt>'; 02236 } 02237 if ( ';' === $char ) { 02238 $this->mDTopen = true; 02239 return $close . '<dt>'; 02240 } else { 02241 $this->mDTopen = false; 02242 return $close . '<dd>'; 02243 } 02244 } 02245 return '<!-- ERR 2 -->'; 02246 } 02247 02255 function closeList( $char ) { 02256 if ( '*' === $char ) { 02257 $text = '</li></ul>'; 02258 } elseif ( '#' === $char ) { 02259 $text = '</li></ol>'; 02260 } elseif ( ':' === $char ) { 02261 if ( $this->mDTopen ) { 02262 $this->mDTopen = false; 02263 $text = '</dt></dl>'; 02264 } else { 02265 $text = '</dd></dl>'; 02266 } 02267 } else { 02268 return '<!-- ERR 3 -->'; 02269 } 02270 return $text . "\n"; 02271 } 02282 function doBlockLevels( $text, $linestart ) { 02283 wfProfileIn( __METHOD__ ); 02284 02285 # Parsing through the text line by line. The main thing 02286 # happening here is handling of block-level elements p, pre, 02287 # and making lists from lines starting with * # : etc. 02288 # 02289 $textLines = StringUtils::explode( "\n", $text ); 02290 02291 $lastPrefix = $output = ''; 02292 $this->mDTopen = $inBlockElem = false; 02293 $prefixLength = 0; 02294 $paragraphStack = false; 02295 02296 foreach ( $textLines as $oLine ) { 02297 # Fix up $linestart 02298 if ( !$linestart ) { 02299 $output .= $oLine; 02300 $linestart = true; 02301 continue; 02302 } 02303 # * = ul 02304 # # = ol 02305 # ; = dt 02306 # : = dd 02307 02308 $lastPrefixLength = strlen( $lastPrefix ); 02309 $preCloseMatch = preg_match( '/<\\/pre/i', $oLine ); 02310 $preOpenMatch = preg_match( '/<pre/i', $oLine ); 02311 # If not in a <pre> element, scan for and figure out what prefixes are there. 02312 if ( !$this->mInPre ) { 02313 # Multiple prefixes may abut each other for nested lists. 02314 $prefixLength = strspn( $oLine, '*#:;' ); 02315 $prefix = substr( $oLine, 0, $prefixLength ); 02316 02317 # eh? 02318 # ; and : are both from definition-lists, so they're equivalent 02319 # for the purposes of determining whether or not we need to open/close 02320 # elements. 02321 $prefix2 = str_replace( ';', ':', $prefix ); 02322 $t = substr( $oLine, $prefixLength ); 02323 $this->mInPre = (bool)$preOpenMatch; 02324 } else { 02325 # Don't interpret any other prefixes in preformatted text 02326 $prefixLength = 0; 02327 $prefix = $prefix2 = ''; 02328 $t = $oLine; 02329 } 02330 02331 # List generation 02332 if ( $prefixLength && $lastPrefix === $prefix2 ) { 02333 # Same as the last item, so no need to deal with nesting or opening stuff 02334 $output .= $this->nextItem( substr( $prefix, -1 ) ); 02335 $paragraphStack = false; 02336 02337 if ( substr( $prefix, -1 ) === ';' ) { 02338 # The one nasty exception: definition lists work like this: 02339 # ; title : definition text 02340 # So we check for : in the remainder text to split up the 02341 # title and definition, without b0rking links. 02342 $term = $t2 = ''; 02343 if ( $this->findColonNoLinks( $t, $term, $t2 ) !== false ) { 02344 $t = $t2; 02345 $output .= $term . $this->nextItem( ':' ); 02346 } 02347 } 02348 } elseif ( $prefixLength || $lastPrefixLength ) { 02349 # We need to open or close prefixes, or both. 02350 02351 # Either open or close a level... 02352 $commonPrefixLength = $this->getCommon( $prefix, $lastPrefix ); 02353 $paragraphStack = false; 02354 02355 # Close all the prefixes which aren't shared. 02356 while ( $commonPrefixLength < $lastPrefixLength ) { 02357 $output .= $this->closeList( $lastPrefix[$lastPrefixLength-1] ); 02358 --$lastPrefixLength; 02359 } 02360 02361 # Continue the current prefix if appropriate. 02362 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) { 02363 $output .= $this->nextItem( $prefix[$commonPrefixLength-1] ); 02364 } 02365 02366 # Open prefixes where appropriate. 02367 while ( $prefixLength > $commonPrefixLength ) { 02368 $char = substr( $prefix, $commonPrefixLength, 1 ); 02369 $output .= $this->openList( $char ); 02370 02371 if ( ';' === $char ) { 02372 # @todo FIXME: This is dupe of code above 02373 if ( $this->findColonNoLinks( $t, $term, $t2 ) !== false ) { 02374 $t = $t2; 02375 $output .= $term . $this->nextItem( ':' ); 02376 } 02377 } 02378 ++$commonPrefixLength; 02379 } 02380 $lastPrefix = $prefix2; 02381 } 02382 02383 # If we have no prefixes, go to paragraph mode. 02384 if ( 0 == $prefixLength ) { 02385 wfProfileIn( __METHOD__ . "-paragraph" ); 02386 # No prefix (not in list)--go to paragraph mode 02387 # XXX: use a stack for nestable elements like span, table and div 02388 $openmatch = preg_match( '/(?:<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<ol|<dl|<li|<\\/tr|<\\/td|<\\/th)/iS', $t ); 02389 $closematch = preg_match( 02390 '/(?:<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'. 02391 '<td|<th|<\\/?div|<hr|<\\/pre|<\\/p|'.$this->mUniqPrefix . '-pre|<\\/li|<\\/ul|<\\/ol|<\\/dl|<\\/?center)/iS', $t ); 02392 if ( $openmatch or $closematch ) { 02393 $paragraphStack = false; 02394 # TODO bug 5718: paragraph closed 02395 $output .= $this->closeParagraph(); 02396 if ( $preOpenMatch and !$preCloseMatch ) { 02397 $this->mInPre = true; 02398 } 02399 $inBlockElem = !$closematch; 02400 } elseif ( !$inBlockElem && !$this->mInPre ) { 02401 if ( ' ' == substr( $t, 0, 1 ) and ( $this->mLastSection === 'pre' || trim( $t ) != '' ) ) { 02402 # pre 02403 if ( $this->mLastSection !== 'pre' ) { 02404 $paragraphStack = false; 02405 $output .= $this->closeParagraph() . '<pre>'; 02406 $this->mLastSection = 'pre'; 02407 } 02408 $t = substr( $t, 1 ); 02409 } else { 02410 # paragraph 02411 if ( trim( $t ) === '' ) { 02412 if ( $paragraphStack ) { 02413 $output .= $paragraphStack . '<br />'; 02414 $paragraphStack = false; 02415 $this->mLastSection = 'p'; 02416 } else { 02417 if ( $this->mLastSection !== 'p' ) { 02418 $output .= $this->closeParagraph(); 02419 $this->mLastSection = ''; 02420 $paragraphStack = '<p>'; 02421 } else { 02422 $paragraphStack = '</p><p>'; 02423 } 02424 } 02425 } else { 02426 if ( $paragraphStack ) { 02427 $output .= $paragraphStack; 02428 $paragraphStack = false; 02429 $this->mLastSection = 'p'; 02430 } elseif ( $this->mLastSection !== 'p' ) { 02431 $output .= $this->closeParagraph() . '<p>'; 02432 $this->mLastSection = 'p'; 02433 } 02434 } 02435 } 02436 } 02437 wfProfileOut( __METHOD__ . "-paragraph" ); 02438 } 02439 # somewhere above we forget to get out of pre block (bug 785) 02440 if ( $preCloseMatch && $this->mInPre ) { 02441 $this->mInPre = false; 02442 } 02443 if ( $paragraphStack === false ) { 02444 $output .= $t . "\n"; 02445 } 02446 } 02447 while ( $prefixLength ) { 02448 $output .= $this->closeList( $prefix2[$prefixLength-1] ); 02449 --$prefixLength; 02450 } 02451 if ( $this->mLastSection != '' ) { 02452 $output .= '</' . $this->mLastSection . '>'; 02453 $this->mLastSection = ''; 02454 } 02455 02456 wfProfileOut( __METHOD__ ); 02457 return $output; 02458 } 02459 02470 function findColonNoLinks( $str, &$before, &$after ) { 02471 wfProfileIn( __METHOD__ ); 02472 02473 $pos = strpos( $str, ':' ); 02474 if ( $pos === false ) { 02475 # Nothing to find! 02476 wfProfileOut( __METHOD__ ); 02477 return false; 02478 } 02479 02480 $lt = strpos( $str, '<' ); 02481 if ( $lt === false || $lt > $pos ) { 02482 # Easy; no tag nesting to worry about 02483 $before = substr( $str, 0, $pos ); 02484 $after = substr( $str, $pos+1 ); 02485 wfProfileOut( __METHOD__ ); 02486 return $pos; 02487 } 02488 02489 # Ugly state machine to walk through avoiding tags. 02490 $state = self::COLON_STATE_TEXT; 02491 $stack = 0; 02492 $len = strlen( $str ); 02493 for( $i = 0; $i < $len; $i++ ) { 02494 $c = $str[$i]; 02495 02496 switch( $state ) { 02497 # (Using the number is a performance hack for common cases) 02498 case 0: # self::COLON_STATE_TEXT: 02499 switch( $c ) { 02500 case "<": 02501 # Could be either a <start> tag or an </end> tag 02502 $state = self::COLON_STATE_TAGSTART; 02503 break; 02504 case ":": 02505 if ( $stack == 0 ) { 02506 # We found it! 02507 $before = substr( $str, 0, $i ); 02508 $after = substr( $str, $i + 1 ); 02509 wfProfileOut( __METHOD__ ); 02510 return $i; 02511 } 02512 # Embedded in a tag; don't break it. 02513 break; 02514 default: 02515 # Skip ahead looking for something interesting 02516 $colon = strpos( $str, ':', $i ); 02517 if ( $colon === false ) { 02518 # Nothing else interesting 02519 wfProfileOut( __METHOD__ ); 02520 return false; 02521 } 02522 $lt = strpos( $str, '<', $i ); 02523 if ( $stack === 0 ) { 02524 if ( $lt === false || $colon < $lt ) { 02525 # We found it! 02526 $before = substr( $str, 0, $colon ); 02527 $after = substr( $str, $colon + 1 ); 02528 wfProfileOut( __METHOD__ ); 02529 return $i; 02530 } 02531 } 02532 if ( $lt === false ) { 02533 # Nothing else interesting to find; abort! 02534 # We're nested, but there's no close tags left. Abort! 02535 break 2; 02536 } 02537 # Skip ahead to next tag start 02538 $i = $lt; 02539 $state = self::COLON_STATE_TAGSTART; 02540 } 02541 break; 02542 case 1: # self::COLON_STATE_TAG: 02543 # In a <tag> 02544 switch( $c ) { 02545 case ">": 02546 $stack++; 02547 $state = self::COLON_STATE_TEXT; 02548 break; 02549 case "/": 02550 # Slash may be followed by >? 02551 $state = self::COLON_STATE_TAGSLASH; 02552 break; 02553 default: 02554 # ignore 02555 } 02556 break; 02557 case 2: # self::COLON_STATE_TAGSTART: 02558 switch( $c ) { 02559 case "/": 02560 $state = self::COLON_STATE_CLOSETAG; 02561 break; 02562 case "!": 02563 $state = self::COLON_STATE_COMMENT; 02564 break; 02565 case ">": 02566 # Illegal early close? This shouldn't happen D: 02567 $state = self::COLON_STATE_TEXT; 02568 break; 02569 default: 02570 $state = self::COLON_STATE_TAG; 02571 } 02572 break; 02573 case 3: # self::COLON_STATE_CLOSETAG: 02574 # In a </tag> 02575 if ( $c === ">" ) { 02576 $stack--; 02577 if ( $stack < 0 ) { 02578 wfDebug( __METHOD__ . ": Invalid input; too many close tags\n" ); 02579 wfProfileOut( __METHOD__ ); 02580 return false; 02581 } 02582 $state = self::COLON_STATE_TEXT; 02583 } 02584 break; 02585 case self::COLON_STATE_TAGSLASH: 02586 if ( $c === ">" ) { 02587 # Yes, a self-closed tag <blah/> 02588 $state = self::COLON_STATE_TEXT; 02589 } else { 02590 # Probably we're jumping the gun, and this is an attribute 02591 $state = self::COLON_STATE_TAG; 02592 } 02593 break; 02594 case 5: # self::COLON_STATE_COMMENT: 02595 if ( $c === "-" ) { 02596 $state = self::COLON_STATE_COMMENTDASH; 02597 } 02598 break; 02599 case self::COLON_STATE_COMMENTDASH: 02600 if ( $c === "-" ) { 02601 $state = self::COLON_STATE_COMMENTDASHDASH; 02602 } else { 02603 $state = self::COLON_STATE_COMMENT; 02604 } 02605 break; 02606 case self::COLON_STATE_COMMENTDASHDASH: 02607 if ( $c === ">" ) { 02608 $state = self::COLON_STATE_TEXT; 02609 } else { 02610 $state = self::COLON_STATE_COMMENT; 02611 } 02612 break; 02613 default: 02614 throw new MWException( "State machine error in " . __METHOD__ ); 02615 } 02616 } 02617 if ( $stack > 0 ) { 02618 wfDebug( __METHOD__ . ": Invalid input; not enough close tags (stack $stack, state $state)\n" ); 02619 wfProfileOut( __METHOD__ ); 02620 return false; 02621 } 02622 wfProfileOut( __METHOD__ ); 02623 return false; 02624 } 02625 02637 function getVariableValue( $index, $frame = false ) { 02638 global $wgContLang, $wgSitename, $wgServer; 02639 global $wgArticlePath, $wgScriptPath, $wgStylePath; 02640 02641 if ( is_null( $this->mTitle ) ) { 02642 // If no title set, bad things are going to happen 02643 // later. Title should always be set since this 02644 // should only be called in the middle of a parse 02645 // operation (but the unit-tests do funky stuff) 02646 throw new MWException( __METHOD__ . ' Should only be ' 02647 . ' called while parsing (no title set)' ); 02648 } 02649 02654 if ( wfRunHooks( 'ParserGetVariableValueVarCache', array( &$this, &$this->mVarCache ) ) ) { 02655 if ( isset( $this->mVarCache[$index] ) ) { 02656 return $this->mVarCache[$index]; 02657 } 02658 } 02659 02660 $ts = wfTimestamp( TS_UNIX, $this->mOptions->getTimestamp() ); 02661 wfRunHooks( 'ParserGetVariableValueTs', array( &$this, &$ts ) ); 02662 02663 # Use the time zone 02664 global $wgLocaltimezone; 02665 if ( isset( $wgLocaltimezone ) ) { 02666 $oldtz = date_default_timezone_get(); 02667 date_default_timezone_set( $wgLocaltimezone ); 02668 } 02669 02670 $localTimestamp = date( 'YmdHis', $ts ); 02671 $localMonth = date( 'm', $ts ); 02672 $localMonth1 = date( 'n', $ts ); 02673 $localMonthName = date( 'n', $ts ); 02674 $localDay = date( 'j', $ts ); 02675 $localDay2 = date( 'd', $ts ); 02676 $localDayOfWeek = date( 'w', $ts ); 02677 $localWeek = date( 'W', $ts ); 02678 $localYear = date( 'Y', $ts ); 02679 $localHour = date( 'H', $ts ); 02680 if ( isset( $wgLocaltimezone ) ) { 02681 date_default_timezone_set( $oldtz ); 02682 } 02683 02684 $pageLang = $this->getFunctionLang(); 02685 02686 switch ( $index ) { 02687 case 'currentmonth': 02688 $value = $pageLang->formatNum( gmdate( 'm', $ts ) ); 02689 break; 02690 case 'currentmonth1': 02691 $value = $pageLang->formatNum( gmdate( 'n', $ts ) ); 02692 break; 02693 case 'currentmonthname': 02694 $value = $pageLang->getMonthName( gmdate( 'n', $ts ) ); 02695 break; 02696 case 'currentmonthnamegen': 02697 $value = $pageLang->getMonthNameGen( gmdate( 'n', $ts ) ); 02698 break; 02699 case 'currentmonthabbrev': 02700 $value = $pageLang->getMonthAbbreviation( gmdate( 'n', $ts ) ); 02701 break; 02702 case 'currentday': 02703 $value = $pageLang->formatNum( gmdate( 'j', $ts ) ); 02704 break; 02705 case 'currentday2': 02706 $value = $pageLang->formatNum( gmdate( 'd', $ts ) ); 02707 break; 02708 case 'localmonth': 02709 $value = $pageLang->formatNum( $localMonth ); 02710 break; 02711 case 'localmonth1': 02712 $value = $pageLang->formatNum( $localMonth1 ); 02713 break; 02714 case 'localmonthname': 02715 $value = $pageLang->getMonthName( $localMonthName ); 02716 break; 02717 case 'localmonthnamegen': 02718 $value = $pageLang->getMonthNameGen( $localMonthName ); 02719 break; 02720 case 'localmonthabbrev': 02721 $value = $pageLang->getMonthAbbreviation( $localMonthName ); 02722 break; 02723 case 'localday': 02724 $value = $pageLang->formatNum( $localDay ); 02725 break; 02726 case 'localday2': 02727 $value = $pageLang->formatNum( $localDay2 ); 02728 break; 02729 case 'pagename': 02730 $value = wfEscapeWikiText( $this->mTitle->getText() ); 02731 break; 02732 case 'pagenamee': 02733 $value = wfEscapeWikiText( $this->mTitle->getPartialURL() ); 02734 break; 02735 case 'fullpagename': 02736 $value = wfEscapeWikiText( $this->mTitle->getPrefixedText() ); 02737 break; 02738 case 'fullpagenamee': 02739 $value = wfEscapeWikiText( $this->mTitle->getPrefixedURL() ); 02740 break; 02741 case 'subpagename': 02742 $value = wfEscapeWikiText( $this->mTitle->getSubpageText() ); 02743 break; 02744 case 'subpagenamee': 02745 $value = wfEscapeWikiText( $this->mTitle->getSubpageUrlForm() ); 02746 break; 02747 case 'basepagename': 02748 $value = wfEscapeWikiText( $this->mTitle->getBaseText() ); 02749 break; 02750 case 'basepagenamee': 02751 $value = wfEscapeWikiText( wfUrlEncode( str_replace( ' ', '_', $this->mTitle->getBaseText() ) ) ); 02752 break; 02753 case 'talkpagename': 02754 if ( $this->mTitle->canTalk() ) { 02755 $talkPage = $this->mTitle->getTalkPage(); 02756 $value = wfEscapeWikiText( $talkPage->getPrefixedText() ); 02757 } else { 02758 $value = ''; 02759 } 02760 break; 02761 case 'talkpagenamee': 02762 if ( $this->mTitle->canTalk() ) { 02763 $talkPage = $this->mTitle->getTalkPage(); 02764 $value = wfEscapeWikiText( $talkPage->getPrefixedUrl() ); 02765 } else { 02766 $value = ''; 02767 } 02768 break; 02769 case 'subjectpagename': 02770 $subjPage = $this->mTitle->getSubjectPage(); 02771 $value = wfEscapeWikiText( $subjPage->getPrefixedText() ); 02772 break; 02773 case 'subjectpagenamee': 02774 $subjPage = $this->mTitle->getSubjectPage(); 02775 $value = wfEscapeWikiText( $subjPage->getPrefixedUrl() ); 02776 break; 02777 case 'pageid': // requested in bug 23427 02778 $pageid = $this->getTitle()->getArticleId(); 02779 if( $pageid == 0 ) { 02780 # 0 means the page doesn't exist in the database, 02781 # which means the user is previewing a new page. 02782 # The vary-revision flag must be set, because the magic word 02783 # will have a different value once the page is saved. 02784 $this->mOutput->setFlag( 'vary-revision' ); 02785 wfDebug( __METHOD__ . ": {{PAGEID}} used in a new page, setting vary-revision...\n" ); 02786 } 02787 $value = $pageid ? $pageid : null; 02788 break; 02789 case 'revisionid': 02790 # Let the edit saving system know we should parse the page 02791 # *after* a revision ID has been assigned. 02792 $this->mOutput->setFlag( 'vary-revision' ); 02793 wfDebug( __METHOD__ . ": {{REVISIONID}} used, setting vary-revision...\n" ); 02794 $value = $this->mRevisionId; 02795 break; 02796 case 'revisionday': 02797 # Let the edit saving system know we should parse the page 02798 # *after* a revision ID has been assigned. This is for null edits. 02799 $this->mOutput->setFlag( 'vary-revision' ); 02800 wfDebug( __METHOD__ . ": {{REVISIONDAY}} used, setting vary-revision...\n" ); 02801 $value = intval( substr( $this->getRevisionTimestamp(), 6, 2 ) ); 02802 break; 02803 case 'revisionday2': 02804 # Let the edit saving system know we should parse the page 02805 # *after* a revision ID has been assigned. This is for null edits. 02806 $this->mOutput->setFlag( 'vary-revision' ); 02807 wfDebug( __METHOD__ . ": {{REVISIONDAY2}} used, setting vary-revision...\n" ); 02808 $value = substr( $this->getRevisionTimestamp(), 6, 2 ); 02809 break; 02810 case 'revisionmonth': 02811 # Let the edit saving system know we should parse the page 02812 # *after* a revision ID has been assigned. This is for null edits. 02813 $this->mOutput->setFlag( 'vary-revision' ); 02814 wfDebug( __METHOD__ . ": {{REVISIONMONTH}} used, setting vary-revision...\n" ); 02815 $value = substr( $this->getRevisionTimestamp(), 4, 2 ); 02816 break; 02817 case 'revisionmonth1': 02818 # Let the edit saving system know we should parse the page 02819 # *after* a revision ID has been assigned. This is for null edits. 02820 $this->mOutput->setFlag( 'vary-revision' ); 02821 wfDebug( __METHOD__ . ": {{REVISIONMONTH1}} used, setting vary-revision...\n" ); 02822 $value = intval( substr( $this->getRevisionTimestamp(), 4, 2 ) ); 02823 break; 02824 case 'revisionyear': 02825 # Let the edit saving system know we should parse the page 02826 # *after* a revision ID has been assigned. This is for null edits. 02827 $this->mOutput->setFlag( 'vary-revision' ); 02828 wfDebug( __METHOD__ . ": {{REVISIONYEAR}} used, setting vary-revision...\n" ); 02829 $value = substr( $this->getRevisionTimestamp(), 0, 4 ); 02830 break; 02831 case 'revisiontimestamp': 02832 # Let the edit saving system know we should parse the page 02833 # *after* a revision ID has been assigned. This is for null edits. 02834 $this->mOutput->setFlag( 'vary-revision' ); 02835 wfDebug( __METHOD__ . ": {{REVISIONTIMESTAMP}} used, setting vary-revision...\n" ); 02836 $value = $this->getRevisionTimestamp(); 02837 break; 02838 case 'revisionuser': 02839 # Let the edit saving system know we should parse the page 02840 # *after* a revision ID has been assigned. This is for null edits. 02841 $this->mOutput->setFlag( 'vary-revision' ); 02842 wfDebug( __METHOD__ . ": {{REVISIONUSER}} used, setting vary-revision...\n" ); 02843 $value = $this->getRevisionUser(); 02844 break; 02845 case 'namespace': 02846 $value = str_replace( '_', ' ', $wgContLang->getNsText( $this->mTitle->getNamespace() ) ); 02847 break; 02848 case 'namespacee': 02849 $value = wfUrlencode( $wgContLang->getNsText( $this->mTitle->getNamespace() ) ); 02850 break; 02851 case 'namespacenumber': 02852 $value = $this->mTitle->getNamespace(); 02853 break; 02854 case 'talkspace': 02855 $value = $this->mTitle->canTalk() ? str_replace( '_', ' ', $this->mTitle->getTalkNsText() ) : ''; 02856 break; 02857 case 'talkspacee': 02858 $value = $this->mTitle->canTalk() ? wfUrlencode( $this->mTitle->getTalkNsText() ) : ''; 02859 break; 02860 case 'subjectspace': 02861 $value = $this->mTitle->getSubjectNsText(); 02862 break; 02863 case 'subjectspacee': 02864 $value = ( wfUrlencode( $this->mTitle->getSubjectNsText() ) ); 02865 break; 02866 case 'currentdayname': 02867 $value = $pageLang->getWeekdayName( gmdate( 'w', $ts ) + 1 ); 02868 break; 02869 case 'currentyear': 02870 $value = $pageLang->formatNum( gmdate( 'Y', $ts ), true ); 02871 break; 02872 case 'currenttime': 02873 $value = $pageLang->time( wfTimestamp( TS_MW, $ts ), false, false ); 02874 break; 02875 case 'currenthour': 02876 $value = $pageLang->formatNum( gmdate( 'H', $ts ), true ); 02877 break; 02878 case 'currentweek': 02879 # @bug 4594 PHP5 has it zero padded, PHP4 does not, cast to 02880 # int to remove the padding 02881 $value = $pageLang->formatNum( (int)gmdate( 'W', $ts ) ); 02882 break; 02883 case 'currentdow': 02884 $value = $pageLang->formatNum( gmdate( 'w', $ts ) ); 02885 break; 02886 case 'localdayname': 02887 $value = $pageLang->getWeekdayName( $localDayOfWeek + 1 ); 02888 break; 02889 case 'localyear': 02890 $value = $pageLang->formatNum( $localYear, true ); 02891 break; 02892 case 'localtime': 02893 $value = $pageLang->time( $localTimestamp, false, false ); 02894 break; 02895 case 'localhour': 02896 $value = $pageLang->formatNum( $localHour, true ); 02897 break; 02898 case 'localweek': 02899 # @bug 4594 PHP5 has it zero padded, PHP4 does not, cast to 02900 # int to remove the padding 02901 $value = $pageLang->formatNum( (int)$localWeek ); 02902 break; 02903 case 'localdow': 02904 $value = $pageLang->formatNum( $localDayOfWeek ); 02905 break; 02906 case 'numberofarticles': 02907 $value = $pageLang->formatNum( SiteStats::articles() ); 02908 break; 02909 case 'numberoffiles': 02910 $value = $pageLang->formatNum( SiteStats::images() ); 02911 break; 02912 case 'numberofusers': 02913 $value = $pageLang->formatNum( SiteStats::users() ); 02914 break; 02915 case 'numberofactiveusers': 02916 $value = $pageLang->formatNum( SiteStats::activeUsers() ); 02917 break; 02918 case 'numberofpages': 02919 $value = $pageLang->formatNum( SiteStats::pages() ); 02920 break; 02921 case 'numberofadmins': 02922 $value = $pageLang->formatNum( SiteStats::numberingroup( 'sysop' ) ); 02923 break; 02924 case 'numberofedits': 02925 $value = $pageLang->formatNum( SiteStats::edits() ); 02926 break; 02927 case 'numberofviews': 02928 global $wgDisableCounters; 02929 $value = !$wgDisableCounters ? $pageLang->formatNum( SiteStats::views() ) : ''; 02930 break; 02931 case 'currenttimestamp': 02932 $value = wfTimestamp( TS_MW, $ts ); 02933 break; 02934 case 'localtimestamp': 02935 $value = $localTimestamp; 02936 break; 02937 case 'currentversion': 02938 $value = SpecialVersion::getVersion(); 02939 break; 02940 case 'articlepath': 02941 return $wgArticlePath; 02942 case 'sitename': 02943 return $wgSitename; 02944 case 'server': 02945 return $wgServer; 02946 case 'servername': 02947 $serverParts = wfParseUrl( $wgServer ); 02948 return $serverParts && isset( $serverParts['host'] ) ? $serverParts['host'] : $wgServer; 02949 case 'scriptpath': 02950 return $wgScriptPath; 02951 case 'stylepath': 02952 return $wgStylePath; 02953 case 'directionmark': 02954 return $pageLang->getDirMark(); 02955 case 'contentlanguage': 02956 global $wgLanguageCode; 02957 return $wgLanguageCode; 02958 default: 02959 $ret = null; 02960 if ( wfRunHooks( 'ParserGetVariableValueSwitch', array( &$this, &$this->mVarCache, &$index, &$ret, &$frame ) ) ) { 02961 return $ret; 02962 } else { 02963 return null; 02964 } 02965 } 02966 02967 if ( $index ) { 02968 $this->mVarCache[$index] = $value; 02969 } 02970 02971 return $value; 02972 } 02973 02979 function initialiseVariables() { 02980 wfProfileIn( __METHOD__ ); 02981 $variableIDs = MagicWord::getVariableIDs(); 02982 $substIDs = MagicWord::getSubstIDs(); 02983 02984 $this->mVariables = new MagicWordArray( $variableIDs ); 02985 $this->mSubstWords = new MagicWordArray( $substIDs ); 02986 wfProfileOut( __METHOD__ ); 02987 } 02988 03013 function preprocessToDom( $text, $flags = 0 ) { 03014 $dom = $this->getPreprocessor()->preprocessToObj( $text, $flags ); 03015 return $dom; 03016 } 03017 03025 public static function splitWhitespace( $s ) { 03026 $ltrimmed = ltrim( $s ); 03027 $w1 = substr( $s, 0, strlen( $s ) - strlen( $ltrimmed ) ); 03028 $trimmed = rtrim( $ltrimmed ); 03029 $diff = strlen( $ltrimmed ) - strlen( $trimmed ); 03030 if ( $diff > 0 ) { 03031 $w2 = substr( $ltrimmed, -$diff ); 03032 } else { 03033 $w2 = ''; 03034 } 03035 return array( $w1, $trimmed, $w2 ); 03036 } 03037 03057 function replaceVariables( $text, $frame = false, $argsOnly = false ) { 03058 # Is there any text? Also, Prevent too big inclusions! 03059 if ( strlen( $text ) < 1 || strlen( $text ) > $this->mOptions->getMaxIncludeSize() ) { 03060 return $text; 03061 } 03062 wfProfileIn( __METHOD__ ); 03063 03064 if ( $frame === false ) { 03065 $frame = $this->getPreprocessor()->newFrame(); 03066 } elseif ( !( $frame instanceof PPFrame ) ) { 03067 wfDebug( __METHOD__ . " called using plain parameters instead of a PPFrame instance. Creating custom frame.\n" ); 03068 $frame = $this->getPreprocessor()->newCustomFrame( $frame ); 03069 } 03070 03071 $dom = $this->preprocessToDom( $text ); 03072 $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0; 03073 $text = $frame->expand( $dom, $flags ); 03074 03075 wfProfileOut( __METHOD__ ); 03076 return $text; 03077 } 03078 03086 static function createAssocArgs( $args ) { 03087 $assocArgs = array(); 03088 $index = 1; 03089 foreach ( $args as $arg ) { 03090 $eqpos = strpos( $arg, '=' ); 03091 if ( $eqpos === false ) { 03092 $assocArgs[$index++] = $arg; 03093 } else { 03094 $name = trim( substr( $arg, 0, $eqpos ) ); 03095 $value = trim( substr( $arg, $eqpos+1 ) ); 03096 if ( $value === false ) { 03097 $value = ''; 03098 } 03099 if ( $name !== false ) { 03100 $assocArgs[$name] = $value; 03101 } 03102 } 03103 } 03104 03105 return $assocArgs; 03106 } 03107 03126 function limitationWarn( $limitationType, $current = '', $max = '' ) { 03127 # does no harm if $current and $max are present but are unnecessary for the message 03128 $warning = wfMessage( "$limitationType-warning" )->numParams( $current, $max ) 03129 ->inContentLanguage()->escaped(); 03130 $this->mOutput->addWarning( $warning ); 03131 $this->addTrackingCategory( "$limitationType-category" ); 03132 } 03133 03147 function braceSubstitution( $piece, $frame ) { 03148 global $wgContLang; 03149 wfProfileIn( __METHOD__ ); 03150 wfProfileIn( __METHOD__ . '-setup' ); 03151 03152 # Flags 03153 $found = false; # $text has been filled 03154 $nowiki = false; # wiki markup in $text should be escaped 03155 $isHTML = false; # $text is HTML, armour it against wikitext transformation 03156 $forceRawInterwiki = false; # Force interwiki transclusion to be done in raw mode not rendered 03157 $isChildObj = false; # $text is a DOM node needing expansion in a child frame 03158 $isLocalObj = false; # $text is a DOM node needing expansion in the current frame 03159 03160 # Title object, where $text came from 03161 $title = false; 03162 03163 # $part1 is the bit before the first |, and must contain only title characters. 03164 # Various prefixes will be stripped from it later. 03165 $titleWithSpaces = $frame->expand( $piece['title'] ); 03166 $part1 = trim( $titleWithSpaces ); 03167 $titleText = false; 03168 03169 # Original title text preserved for various purposes 03170 $originalTitle = $part1; 03171 03172 # $args is a list of argument nodes, starting from index 0, not including $part1 03173 # @todo FIXME: If piece['parts'] is null then the call to getLength() below won't work b/c this $args isn't an object 03174 $args = ( null == $piece['parts'] ) ? array() : $piece['parts']; 03175 wfProfileOut( __METHOD__ . '-setup' ); 03176 03177 $titleProfileIn = null; // profile templates 03178 03179 # SUBST 03180 wfProfileIn( __METHOD__ . '-modifiers' ); 03181 if ( !$found ) { 03182 03183 $substMatch = $this->mSubstWords->matchStartAndRemove( $part1 ); 03184 03185 # Possibilities for substMatch: "subst", "safesubst" or FALSE 03186 # Decide whether to expand template or keep wikitext as-is. 03187 if ( $this->ot['wiki'] ) { 03188 if ( $substMatch === false ) { 03189 $literal = true; # literal when in PST with no prefix 03190 } else { 03191 $literal = false; # expand when in PST with subst: or safesubst: 03192 } 03193 } else { 03194 if ( $substMatch == 'subst' ) { 03195 $literal = true; # literal when not in PST with plain subst: 03196 } else { 03197 $literal = false; # expand when not in PST with safesubst: or no prefix 03198 } 03199 } 03200 if ( $literal ) { 03201 $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args ); 03202 $isLocalObj = true; 03203 $found = true; 03204 } 03205 } 03206 03207 # Variables 03208 if ( !$found && $args->getLength() == 0 ) { 03209 $id = $this->mVariables->matchStartToEnd( $part1 ); 03210 if ( $id !== false ) { 03211 $text = $this->getVariableValue( $id, $frame ); 03212 if ( MagicWord::getCacheTTL( $id ) > -1 ) { 03213 $this->mOutput->updateCacheExpiry( MagicWord::getCacheTTL( $id ) ); 03214 } 03215 $found = true; 03216 } 03217 } 03218 03219 # MSG, MSGNW and RAW 03220 if ( !$found ) { 03221 # Check for MSGNW: 03222 $mwMsgnw = MagicWord::get( 'msgnw' ); 03223 if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) { 03224 $nowiki = true; 03225 } else { 03226 # Remove obsolete MSG: 03227 $mwMsg = MagicWord::get( 'msg' ); 03228 $mwMsg->matchStartAndRemove( $part1 ); 03229 } 03230 03231 # Check for RAW: 03232 $mwRaw = MagicWord::get( 'raw' ); 03233 if ( $mwRaw->matchStartAndRemove( $part1 ) ) { 03234 $forceRawInterwiki = true; 03235 } 03236 } 03237 wfProfileOut( __METHOD__ . '-modifiers' ); 03238 03239 # Parser functions 03240 if ( !$found ) { 03241 wfProfileIn( __METHOD__ . '-pfunc' ); 03242 03243 $colonPos = strpos( $part1, ':' ); 03244 if ( $colonPos !== false ) { 03245 $func = substr( $part1, 0, $colonPos ); 03246 $funcArgs = array( trim( substr( $part1, $colonPos + 1 ) ) ); 03247 for ( $i = 0; $i < $args->getLength(); $i++ ) { 03248 $funcArgs[] = $args->item( $i ); 03249 } 03250 try { 03251 $result = $this->callParserFunction( $frame, $func, $funcArgs ); 03252 } catch ( Exception $ex ) { 03253 wfProfileOut( __METHOD__ . '-pfunc' ); 03254 throw $ex; 03255 } 03256 03257 # The interface for parser functions allows for extracting 03258 # flags into the local scope. Extract any forwarded flags 03259 # here. 03260 extract( $result ); 03261 } 03262 wfProfileOut( __METHOD__ . '-pfunc' ); 03263 } 03264 03265 # Finish mangling title and then check for loops. 03266 # Set $title to a Title object and $titleText to the PDBK 03267 if ( !$found ) { 03268 $ns = NS_TEMPLATE; 03269 # Split the title into page and subpage 03270 $subpage = ''; 03271 $part1 = $this->maybeDoSubpageLink( $part1, $subpage ); 03272 if ( $subpage !== '' ) { 03273 $ns = $this->mTitle->getNamespace(); 03274 } 03275 $title = Title::newFromText( $part1, $ns ); 03276 if ( $title ) { 03277 $titleText = $title->getPrefixedText(); 03278 # Check for language variants if the template is not found 03279 if ( $this->getConverterLanguage()->hasVariants() && $title->getArticleID() == 0 ) { 03280 $this->getConverterLanguage()->findVariantLink( $part1, $title, true ); 03281 } 03282 # Do recursion depth check 03283 $limit = $this->mOptions->getMaxTemplateDepth(); 03284 if ( $frame->depth >= $limit ) { 03285 $found = true; 03286 $text = '<span class="error">' 03287 . wfMessage( 'parser-template-recursion-depth-warning' ) 03288 ->numParams( $limit )->inContentLanguage()->text() 03289 . '</span>'; 03290 } 03291 } 03292 } 03293 03294 # Load from database 03295 if ( !$found && $title ) { 03296 if ( !Profiler::instance()->isPersistent() ) { 03297 # Too many unique items can kill profiling DBs/collectors 03298 $titleProfileIn = __METHOD__ . "-title-" . $title->getDBKey(); 03299 wfProfileIn( $titleProfileIn ); // template in 03300 } 03301 wfProfileIn( __METHOD__ . '-loadtpl' ); 03302 if ( !$title->isExternal() ) { 03303 if ( $title->isSpecialPage() 03304 && $this->mOptions->getAllowSpecialInclusion() 03305 && $this->ot['html'] ) 03306 { 03307 // Pass the template arguments as URL parameters. 03308 // "uselang" will have no effect since the Language object 03309 // is forced to the one defined in ParserOptions. 03310 $pageArgs = array(); 03311 for ( $i = 0; $i < $args->getLength(); $i++ ) { 03312 $bits = $args->item( $i )->splitArg(); 03313 if ( strval( $bits['index'] ) === '' ) { 03314 $name = trim( $frame->expand( $bits['name'], PPFrame::STRIP_COMMENTS ) ); 03315 $value = trim( $frame->expand( $bits['value'] ) ); 03316 $pageArgs[$name] = $value; 03317 } 03318 } 03319 03320 // Create a new context to execute the special page 03321 $context = new RequestContext; 03322 $context->setTitle( $title ); 03323 $context->setRequest( new FauxRequest( $pageArgs ) ); 03324 $context->setUser( $this->getUser() ); 03325 $context->setLanguage( $this->mOptions->getUserLangObj() ); 03326 $ret = SpecialPageFactory::capturePath( $title, $context ); 03327 if ( $ret ) { 03328 $text = $context->getOutput()->getHTML(); 03329 $this->mOutput->addOutputPageMetadata( $context->getOutput() ); 03330 $found = true; 03331 $isHTML = true; 03332 $this->disableCache(); 03333 } 03334 } elseif ( MWNamespace::isNonincludable( $title->getNamespace() ) ) { 03335 $found = false; # access denied 03336 wfDebug( __METHOD__ . ": template inclusion denied for " . $title->getPrefixedDBkey() ); 03337 } else { 03338 list( $text, $title ) = $this->getTemplateDom( $title ); 03339 if ( $text !== false ) { 03340 $found = true; 03341 $isChildObj = true; 03342 } 03343 } 03344 03345 # If the title is valid but undisplayable, make a link to it 03346 if ( !$found && ( $this->ot['html'] || $this->ot['pre'] ) ) { 03347 $text = "[[:$titleText]]"; 03348 $found = true; 03349 } 03350 } elseif ( $title->isTrans() ) { 03351 # Interwiki transclusion 03352 if ( $this->ot['html'] && !$forceRawInterwiki ) { 03353 $text = $this->interwikiTransclude( $title, 'render' ); 03354 $isHTML = true; 03355 } else { 03356 $text = $this->interwikiTransclude( $title, 'raw' ); 03357 # Preprocess it like a template 03358 $text = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION ); 03359 $isChildObj = true; 03360 } 03361 $found = true; 03362 } 03363 03364 # Do infinite loop check 03365 # This has to be done after redirect resolution to avoid infinite loops via redirects 03366 if ( !$frame->loopCheck( $title ) ) { 03367 $found = true; 03368 $text = '<span class="error">' 03369 . wfMessage( 'parser-template-loop-warning', $titleText )->inContentLanguage()->text() 03370 . '</span>'; 03371 wfDebug( __METHOD__ . ": template loop broken at '$titleText'\n" ); 03372 } 03373 wfProfileOut( __METHOD__ . '-loadtpl' ); 03374 } 03375 03376 # If we haven't found text to substitute by now, we're done 03377 # Recover the source wikitext and return it 03378 if ( !$found ) { 03379 $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args ); 03380 if ( $titleProfileIn ) { 03381 wfProfileOut( $titleProfileIn ); // template out 03382 } 03383 wfProfileOut( __METHOD__ ); 03384 return array( 'object' => $text ); 03385 } 03386 03387 # Expand DOM-style return values in a child frame 03388 if ( $isChildObj ) { 03389 # Clean up argument array 03390 $newFrame = $frame->newChild( $args, $title ); 03391 03392 if ( $nowiki ) { 03393 $text = $newFrame->expand( $text, PPFrame::RECOVER_ORIG ); 03394 } elseif ( $titleText !== false && $newFrame->isEmpty() ) { 03395 # Expansion is eligible for the empty-frame cache 03396 if ( isset( $this->mTplExpandCache[$titleText] ) ) { 03397 $text = $this->mTplExpandCache[$titleText]; 03398 } else { 03399 $text = $newFrame->expand( $text ); 03400 $this->mTplExpandCache[$titleText] = $text; 03401 } 03402 } else { 03403 # Uncached expansion 03404 $text = $newFrame->expand( $text ); 03405 } 03406 } 03407 if ( $isLocalObj && $nowiki ) { 03408 $text = $frame->expand( $text, PPFrame::RECOVER_ORIG ); 03409 $isLocalObj = false; 03410 } 03411 03412 if ( $titleProfileIn ) { 03413 wfProfileOut( $titleProfileIn ); // template out 03414 } 03415 03416 # Replace raw HTML by a placeholder 03417 if ( $isHTML ) { 03418 $text = $this->insertStripItem( $text ); 03419 } elseif ( $nowiki && ( $this->ot['html'] || $this->ot['pre'] ) ) { 03420 # Escape nowiki-style return values 03421 $text = wfEscapeWikiText( $text ); 03422 } elseif ( is_string( $text ) 03423 && !$piece['lineStart'] 03424 && preg_match( '/^(?:{\\||:|;|#|\*)/', $text ) ) 03425 { 03426 # Bug 529: if the template begins with a table or block-level 03427 # element, it should be treated as beginning a new line. 03428 # This behavior is somewhat controversial. 03429 $text = "\n" . $text; 03430 } 03431 03432 if ( is_string( $text ) && !$this->incrementIncludeSize( 'post-expand', strlen( $text ) ) ) { 03433 # Error, oversize inclusion 03434 if ( $titleText !== false ) { 03435 # Make a working, properly escaped link if possible (bug 23588) 03436 $text = "[[:$titleText]]"; 03437 } else { 03438 # This will probably not be a working link, but at least it may 03439 # provide some hint of where the problem is 03440 preg_replace( '/^:/', '', $originalTitle ); 03441 $text = "[[:$originalTitle]]"; 03442 } 03443 $text .= $this->insertStripItem( '<!-- WARNING: template omitted, post-expand include size too large -->' ); 03444 $this->limitationWarn( 'post-expand-template-inclusion' ); 03445 } 03446 03447 if ( $isLocalObj ) { 03448 $ret = array( 'object' => $text ); 03449 } else { 03450 $ret = array( 'text' => $text ); 03451 } 03452 03453 wfProfileOut( __METHOD__ ); 03454 return $ret; 03455 } 03456 03475 public function callParserFunction( $frame, $function, array $args = array() ) { 03476 global $wgContLang; 03477 03478 wfProfileIn( __METHOD__ ); 03479 03480 # Case sensitive functions 03481 if ( isset( $this->mFunctionSynonyms[1][$function] ) ) { 03482 $function = $this->mFunctionSynonyms[1][$function]; 03483 } else { 03484 # Case insensitive functions 03485 $function = $wgContLang->lc( $function ); 03486 if ( isset( $this->mFunctionSynonyms[0][$function] ) ) { 03487 $function = $this->mFunctionSynonyms[0][$function]; 03488 } else { 03489 wfProfileOut( __METHOD__ ); 03490 return array( 'found' => false ); 03491 } 03492 } 03493 03494 wfProfileIn( __METHOD__ . '-pfunc-' . $function ); 03495 list( $callback, $flags ) = $this->mFunctionHooks[$function]; 03496 03497 # Workaround for PHP bug 35229 and similar 03498 if ( !is_callable( $callback ) ) { 03499 wfProfileOut( __METHOD__ . '-pfunc-' . $function ); 03500 wfProfileOut( __METHOD__ ); 03501 throw new MWException( "Tag hook for $function is not callable\n" ); 03502 } 03503 03504 $allArgs = array( &$this ); 03505 if ( $flags & SFH_OBJECT_ARGS ) { 03506 # Convert arguments to PPNodes and collect for appending to $allArgs 03507 $funcArgs = array(); 03508 foreach ( $args as $k => $v ) { 03509 if ( $v instanceof PPNode || $k === 0 ) { 03510 $funcArgs[] = $v; 03511 } else { 03512 $funcArgs[] = $this->mPreprocessor->newPartNodeArray( array( $k => $v ) )->item( 0 ); 03513 } 03514 } 03515 03516 # Add a frame parameter, and pass the arguments as an array 03517 $allArgs[] = $frame; 03518 $allArgs[] = $funcArgs; 03519 } else { 03520 # Convert arguments to plain text and append to $allArgs 03521 foreach ( $args as $k => $v ) { 03522 if ( $v instanceof PPNode ) { 03523 $allArgs[] = trim( $frame->expand( $v ) ); 03524 } elseif ( is_int( $k ) && $k >= 0 ) { 03525 $allArgs[] = trim( $v ); 03526 } else { 03527 $allArgs[] = trim( "$k=$v" ); 03528 } 03529 } 03530 } 03531 03532 $result = call_user_func_array( $callback, $allArgs ); 03533 03534 # The interface for function hooks allows them to return a wikitext 03535 # string or an array containing the string and any flags. This mungs 03536 # things around to match what this method should return. 03537 if ( !is_array( $result ) ) { 03538 $result = array( 03539 'found' => true, 03540 'text' => $result, 03541 ); 03542 } else { 03543 if ( isset( $result[0] ) && !isset( $result['text'] ) ) { 03544 $result['text'] = $result[0]; 03545 } 03546 unset( $result[0] ); 03547 $result += array( 03548 'found' => true, 03549 ); 03550 } 03551 03552 $noparse = true; 03553 $preprocessFlags = 0; 03554 if ( isset( $result['noparse'] ) ) { 03555 $noparse = $result['noparse']; 03556 } 03557 if ( isset( $result['preprocessFlags'] ) ) { 03558 $preprocessFlags = $result['preprocessFlags']; 03559 } 03560 03561 if ( !$noparse ) { 03562 $result['text'] = $this->preprocessToDom( $result['text'], $preprocessFlags ); 03563 $result['isChildObj'] = true; 03564 } 03565 wfProfileOut( __METHOD__ . '-pfunc-' . $function ); 03566 wfProfileOut( __METHOD__ ); 03567 03568 return $result; 03569 } 03570 03579 function getTemplateDom( $title ) { 03580 $cacheTitle = $title; 03581 $titleText = $title->getPrefixedDBkey(); 03582 03583 if ( isset( $this->mTplRedirCache[$titleText] ) ) { 03584 list( $ns, $dbk ) = $this->mTplRedirCache[$titleText]; 03585 $title = Title::makeTitle( $ns, $dbk ); 03586 $titleText = $title->getPrefixedDBkey(); 03587 } 03588 if ( isset( $this->mTplDomCache[$titleText] ) ) { 03589 return array( $this->mTplDomCache[$titleText], $title ); 03590 } 03591 03592 # Cache miss, go to the database 03593 list( $text, $title ) = $this->fetchTemplateAndTitle( $title ); 03594 03595 if ( $text === false ) { 03596 $this->mTplDomCache[$titleText] = false; 03597 return array( false, $title ); 03598 } 03599 03600 $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION ); 03601 $this->mTplDomCache[ $titleText ] = $dom; 03602 03603 if ( !$title->equals( $cacheTitle ) ) { 03604 $this->mTplRedirCache[$cacheTitle->getPrefixedDBkey()] = 03605 array( $title->getNamespace(), $cdb = $title->getDBkey() ); 03606 } 03607 03608 return array( $dom, $title ); 03609 } 03610 03616 function fetchTemplateAndTitle( $title ) { 03617 $templateCb = $this->mOptions->getTemplateCallback(); # Defaults to Parser::statelessFetchTemplate() 03618 $stuff = call_user_func( $templateCb, $title, $this ); 03619 $text = $stuff['text']; 03620 $finalTitle = isset( $stuff['finalTitle'] ) ? $stuff['finalTitle'] : $title; 03621 if ( isset( $stuff['deps'] ) ) { 03622 foreach ( $stuff['deps'] as $dep ) { 03623 $this->mOutput->addTemplate( $dep['title'], $dep['page_id'], $dep['rev_id'] ); 03624 } 03625 } 03626 return array( $text, $finalTitle ); 03627 } 03628 03634 function fetchTemplate( $title ) { 03635 $rv = $this->fetchTemplateAndTitle( $title ); 03636 return $rv[0]; 03637 } 03638 03648 static function statelessFetchTemplate( $title, $parser = false ) { 03649 $text = $skip = false; 03650 $finalTitle = $title; 03651 $deps = array(); 03652 03653 # Loop to fetch the article, with up to 1 redirect 03654 for ( $i = 0; $i < 2 && is_object( $title ); $i++ ) { 03655 # Give extensions a chance to select the revision instead 03656 $id = false; # Assume current 03657 wfRunHooks( 'BeforeParserFetchTemplateAndtitle', 03658 array( $parser, $title, &$skip, &$id ) ); 03659 03660 if ( $skip ) { 03661 $text = false; 03662 $deps[] = array( 03663 'title' => $title, 03664 'page_id' => $title->getArticleID(), 03665 'rev_id' => null 03666 ); 03667 break; 03668 } 03669 # Get the revision 03670 $rev = $id 03671 ? Revision::newFromId( $id ) 03672 : Revision::newFromTitle( $title, false, Revision::READ_NORMAL ); 03673 $rev_id = $rev ? $rev->getId() : 0; 03674 # If there is no current revision, there is no page 03675 if ( $id === false && !$rev ) { 03676 $linkCache = LinkCache::singleton(); 03677 $linkCache->addBadLinkObj( $title ); 03678 } 03679 03680 $deps[] = array( 03681 'title' => $title, 03682 'page_id' => $title->getArticleID(), 03683 'rev_id' => $rev_id ); 03684 if ( $rev && !$title->equals( $rev->getTitle() ) ) { 03685 # We fetched a rev from a different title; register it too... 03686 $deps[] = array( 03687 'title' => $rev->getTitle(), 03688 'page_id' => $rev->getPage(), 03689 'rev_id' => $rev_id ); 03690 } 03691 03692 if ( $rev ) { 03693 $content = $rev->getContent(); 03694 $text = $content ? $content->getWikitextForTransclusion() : null; 03695 03696 if ( $text === false || $text === null ) { 03697 $text = false; 03698 break; 03699 } 03700 } elseif ( $title->getNamespace() == NS_MEDIAWIKI ) { 03701 global $wgContLang; 03702 $message = wfMessage( $wgContLang->lcfirst( $title->getText() ) )->inContentLanguage(); 03703 if ( !$message->exists() ) { 03704 $text = false; 03705 break; 03706 } 03707 $content = $message->content(); 03708 $text = $message->plain(); 03709 } else { 03710 break; 03711 } 03712 if ( !$content ) { 03713 break; 03714 } 03715 # Redirect? 03716 $finalTitle = $title; 03717 $title = $content->getRedirectTarget(); 03718 } 03719 return array( 03720 'text' => $text, 03721 'finalTitle' => $finalTitle, 03722 'deps' => $deps ); 03723 } 03724 03732 function fetchFile( $title, $options = array() ) { 03733 $res = $this->fetchFileAndTitle( $title, $options ); 03734 return $res[0]; 03735 } 03736 03744 function fetchFileAndTitle( $title, $options = array() ) { 03745 if ( isset( $options['broken'] ) ) { 03746 $file = false; // broken thumbnail forced by hook 03747 } elseif ( isset( $options['sha1'] ) ) { // get by (sha1,timestamp) 03748 $file = RepoGroup::singleton()->findFileFromKey( $options['sha1'], $options ); 03749 } else { // get by (name,timestamp) 03750 $file = wfFindFile( $title, $options ); 03751 } 03752 $time = $file ? $file->getTimestamp() : false; 03753 $sha1 = $file ? $file->getSha1() : false; 03754 # Register the file as a dependency... 03755 $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 ); 03756 if ( $file && !$title->equals( $file->getTitle() ) ) { 03757 # Update fetched file title 03758 $title = $file->getTitle(); 03759 if ( is_null( $file->getRedirectedTitle() ) ) { 03760 # This file was not a redirect, but the title does not match. 03761 # Register under the new name because otherwise the link will 03762 # get lost. 03763 $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 ); 03764 } 03765 } 03766 return array( $file, $title ); 03767 } 03768 03777 function interwikiTransclude( $title, $action ) { 03778 global $wgEnableScaryTranscluding; 03779 03780 if ( !$wgEnableScaryTranscluding ) { 03781 return wfMessage( 'scarytranscludedisabled' )->inContentLanguage()->text(); 03782 } 03783 03784 $url = $title->getFullUrl( "action=$action" ); 03785 03786 if ( strlen( $url ) > 255 ) { 03787 return wfMessage( 'scarytranscludetoolong' )->inContentLanguage()->text(); 03788 } 03789 return $this->fetchScaryTemplateMaybeFromCache( $url ); 03790 } 03791 03796 function fetchScaryTemplateMaybeFromCache( $url ) { 03797 global $wgTranscludeCacheExpiry; 03798 $dbr = wfGetDB( DB_SLAVE ); 03799 $tsCond = $dbr->timestamp( time() - $wgTranscludeCacheExpiry ); 03800 $obj = $dbr->selectRow( 'transcache', array( 'tc_time', 'tc_contents' ), 03801 array( 'tc_url' => $url, "tc_time >= " . $dbr->addQuotes( $tsCond ) ) ); 03802 if ( $obj ) { 03803 return $obj->tc_contents; 03804 } 03805 03806 $req = MWHttpRequest::factory( $url ); 03807 $status = $req->execute(); // Status object 03808 if ( $status->isOK() ) { 03809 $text = $req->getContent(); 03810 } elseif ( $req->getStatus() != 200 ) { // Though we failed to fetch the content, this status is useless. 03811 return wfMessage( 'scarytranscludefailed-httpstatus', $url, $req->getStatus() /* HTTP status */ )->inContentLanguage()->text(); 03812 } else { 03813 return wfMessage( 'scarytranscludefailed', $url )->inContentLanguage()->text(); 03814 } 03815 03816 $dbw = wfGetDB( DB_MASTER ); 03817 $dbw->replace( 'transcache', array( 'tc_url' ), array( 03818 'tc_url' => $url, 03819 'tc_time' => $dbw->timestamp( time() ), 03820 'tc_contents' => $text) 03821 ); 03822 return $text; 03823 } 03824 03834 function argSubstitution( $piece, $frame ) { 03835 wfProfileIn( __METHOD__ ); 03836 03837 $error = false; 03838 $parts = $piece['parts']; 03839 $nameWithSpaces = $frame->expand( $piece['title'] ); 03840 $argName = trim( $nameWithSpaces ); 03841 $object = false; 03842 $text = $frame->getArgument( $argName ); 03843 if ( $text === false && $parts->getLength() > 0 03844 && ( 03845 $this->ot['html'] 03846 || $this->ot['pre'] 03847 || ( $this->ot['wiki'] && $frame->isTemplate() ) 03848 ) 03849 ) { 03850 # No match in frame, use the supplied default 03851 $object = $parts->item( 0 )->getChildren(); 03852 } 03853 if ( !$this->incrementIncludeSize( 'arg', strlen( $text ) ) ) { 03854 $error = '<!-- WARNING: argument omitted, expansion size too large -->'; 03855 $this->limitationWarn( 'post-expand-template-argument' ); 03856 } 03857 03858 if ( $text === false && $object === false ) { 03859 # No match anywhere 03860 $object = $frame->virtualBracketedImplode( '{{{', '|', '}}}', $nameWithSpaces, $parts ); 03861 } 03862 if ( $error !== false ) { 03863 $text .= $error; 03864 } 03865 if ( $object !== false ) { 03866 $ret = array( 'object' => $object ); 03867 } else { 03868 $ret = array( 'text' => $text ); 03869 } 03870 03871 wfProfileOut( __METHOD__ ); 03872 return $ret; 03873 } 03874 03890 function extensionSubstitution( $params, $frame ) { 03891 $name = $frame->expand( $params['name'] ); 03892 $attrText = !isset( $params['attr'] ) ? null : $frame->expand( $params['attr'] ); 03893 $content = !isset( $params['inner'] ) ? null : $frame->expand( $params['inner'] ); 03894 $marker = "{$this->mUniqPrefix}-$name-" . sprintf( '%08X', $this->mMarkerIndex++ ) . self::MARKER_SUFFIX; 03895 03896 $isFunctionTag = isset( $this->mFunctionTagHooks[strtolower( $name )] ) && 03897 ( $this->ot['html'] || $this->ot['pre'] ); 03898 if ( $isFunctionTag ) { 03899 $markerType = 'none'; 03900 } else { 03901 $markerType = 'general'; 03902 } 03903 if ( $this->ot['html'] || $isFunctionTag ) { 03904 $name = strtolower( $name ); 03905 $attributes = Sanitizer::decodeTagAttributes( $attrText ); 03906 if ( isset( $params['attributes'] ) ) { 03907 $attributes = $attributes + $params['attributes']; 03908 } 03909 03910 if ( isset( $this->mTagHooks[$name] ) ) { 03911 # Workaround for PHP bug 35229 and similar 03912 if ( !is_callable( $this->mTagHooks[$name] ) ) { 03913 throw new MWException( "Tag hook for $name is not callable\n" ); 03914 } 03915 $output = call_user_func_array( $this->mTagHooks[$name], 03916 array( $content, $attributes, $this, $frame ) ); 03917 } elseif ( isset( $this->mFunctionTagHooks[$name] ) ) { 03918 list( $callback, ) = $this->mFunctionTagHooks[$name]; 03919 if ( !is_callable( $callback ) ) { 03920 throw new MWException( "Tag hook for $name is not callable\n" ); 03921 } 03922 03923 $output = call_user_func_array( $callback, array( &$this, $frame, $content, $attributes ) ); 03924 } else { 03925 $output = '<span class="error">Invalid tag extension name: ' . 03926 htmlspecialchars( $name ) . '</span>'; 03927 } 03928 03929 if ( is_array( $output ) ) { 03930 # Extract flags to local scope (to override $markerType) 03931 $flags = $output; 03932 $output = $flags[0]; 03933 unset( $flags[0] ); 03934 extract( $flags ); 03935 } 03936 } else { 03937 if ( is_null( $attrText ) ) { 03938 $attrText = ''; 03939 } 03940 if ( isset( $params['attributes'] ) ) { 03941 foreach ( $params['attributes'] as $attrName => $attrValue ) { 03942 $attrText .= ' ' . htmlspecialchars( $attrName ) . '="' . 03943 htmlspecialchars( $attrValue ) . '"'; 03944 } 03945 } 03946 if ( $content === null ) { 03947 $output = "<$name$attrText/>"; 03948 } else { 03949 $close = is_null( $params['close'] ) ? '' : $frame->expand( $params['close'] ); 03950 $output = "<$name$attrText>$content$close"; 03951 } 03952 } 03953 03954 if ( $markerType === 'none' ) { 03955 return $output; 03956 } elseif ( $markerType === 'nowiki' ) { 03957 $this->mStripState->addNoWiki( $marker, $output ); 03958 } elseif ( $markerType === 'general' ) { 03959 $this->mStripState->addGeneral( $marker, $output ); 03960 } else { 03961 throw new MWException( __METHOD__ . ': invalid marker type' ); 03962 } 03963 return $marker; 03964 } 03965 03973 function incrementIncludeSize( $type, $size ) { 03974 if ( $this->mIncludeSizes[$type] + $size > $this->mOptions->getMaxIncludeSize() ) { 03975 return false; 03976 } else { 03977 $this->mIncludeSizes[$type] += $size; 03978 return true; 03979 } 03980 } 03981 03987 function incrementExpensiveFunctionCount() { 03988 $this->mExpensiveFunctionCount++; 03989 return $this->mExpensiveFunctionCount <= $this->mOptions->getExpensiveParserFunctionLimit(); 03990 } 03991 04000 function doDoubleUnderscore( $text ) { 04001 wfProfileIn( __METHOD__ ); 04002 04003 # The position of __TOC__ needs to be recorded 04004 $mw = MagicWord::get( 'toc' ); 04005 if ( $mw->match( $text ) ) { 04006 $this->mShowToc = true; 04007 $this->mForceTocPosition = true; 04008 04009 # Set a placeholder. At the end we'll fill it in with the TOC. 04010 $text = $mw->replace( '<!--MWTOC-->', $text, 1 ); 04011 04012 # Only keep the first one. 04013 $text = $mw->replace( '', $text ); 04014 } 04015 04016 # Now match and remove the rest of them 04017 $mwa = MagicWord::getDoubleUnderscoreArray(); 04018 $this->mDoubleUnderscores = $mwa->matchAndRemove( $text ); 04019 04020 if ( isset( $this->mDoubleUnderscores['nogallery'] ) ) { 04021 $this->mOutput->mNoGallery = true; 04022 } 04023 if ( isset( $this->mDoubleUnderscores['notoc'] ) && !$this->mForceTocPosition ) { 04024 $this->mShowToc = false; 04025 } 04026 if ( isset( $this->mDoubleUnderscores['hiddencat'] ) && $this->mTitle->getNamespace() == NS_CATEGORY ) { 04027 $this->addTrackingCategory( 'hidden-category-category' ); 04028 } 04029 # (bug 8068) Allow control over whether robots index a page. 04030 # 04031 # @todo FIXME: Bug 14899: __INDEX__ always overrides __NOINDEX__ here! This 04032 # is not desirable, the last one on the page should win. 04033 if ( isset( $this->mDoubleUnderscores['noindex'] ) && $this->mTitle->canUseNoindex() ) { 04034 $this->mOutput->setIndexPolicy( 'noindex' ); 04035 $this->addTrackingCategory( 'noindex-category' ); 04036 } 04037 if ( isset( $this->mDoubleUnderscores['index'] ) && $this->mTitle->canUseNoindex() ) { 04038 $this->mOutput->setIndexPolicy( 'index' ); 04039 $this->addTrackingCategory( 'index-category' ); 04040 } 04041 04042 # Cache all double underscores in the database 04043 foreach ( $this->mDoubleUnderscores as $key => $val ) { 04044 $this->mOutput->setProperty( $key, '' ); 04045 } 04046 04047 wfProfileOut( __METHOD__ ); 04048 return $text; 04049 } 04050 04058 public function addTrackingCategory( $msg ) { 04059 if ( $this->mTitle->getNamespace() === NS_SPECIAL ) { 04060 wfDebug( __METHOD__ . ": Not adding tracking category $msg to special page!\n" ); 04061 return false; 04062 } 04063 // Important to parse with correct title (bug 31469) 04064 $cat = wfMessage( $msg ) 04065 ->title( $this->getTitle() ) 04066 ->inContentLanguage() 04067 ->text(); 04068 04069 # Allow tracking categories to be disabled by setting them to "-" 04070 if ( $cat === '-' ) { 04071 return false; 04072 } 04073 04074 $containerCategory = Title::makeTitleSafe( NS_CATEGORY, $cat ); 04075 if ( $containerCategory ) { 04076 $this->mOutput->addCategory( $containerCategory->getDBkey(), $this->getDefaultSort() ); 04077 return true; 04078 } else { 04079 wfDebug( __METHOD__ . ": [[MediaWiki:$msg]] is not a valid title!\n" ); 04080 return false; 04081 } 04082 } 04083 04100 function formatHeadings( $text, $origText, $isMain=true ) { 04101 global $wgMaxTocLevel, $wgHtml5, $wgExperimentalHtmlIds; 04102 04103 # Inhibit editsection links if requested in the page 04104 if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) { 04105 $maybeShowEditLink = $showEditLink = false; 04106 } else { 04107 $maybeShowEditLink = true; /* Actual presence will depend on ParserOptions option */ 04108 $showEditLink = $this->mOptions->getEditSection(); 04109 } 04110 if ( $showEditLink ) { 04111 $this->mOutput->setEditSectionTokens( true ); 04112 } 04113 04114 # Get all headlines for numbering them and adding funky stuff like [edit] 04115 # links - this is for later, but we need the number of headlines right now 04116 $matches = array(); 04117 $numMatches = preg_match_all( '/<H(?P<level>[1-6])(?P<attrib>.*?'.'>)(?P<header>.*?)<\/H[1-6] *>/i', $text, $matches ); 04118 04119 # if there are fewer than 4 headlines in the article, do not show TOC 04120 # unless it's been explicitly enabled. 04121 $enoughToc = $this->mShowToc && 04122 ( ( $numMatches >= 4 ) || $this->mForceTocPosition ); 04123 04124 # Allow user to stipulate that a page should have a "new section" 04125 # link added via __NEWSECTIONLINK__ 04126 if ( isset( $this->mDoubleUnderscores['newsectionlink'] ) ) { 04127 $this->mOutput->setNewSection( true ); 04128 } 04129 04130 # Allow user to remove the "new section" 04131 # link via __NONEWSECTIONLINK__ 04132 if ( isset( $this->mDoubleUnderscores['nonewsectionlink'] ) ) { 04133 $this->mOutput->hideNewSection( true ); 04134 } 04135 04136 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML, 04137 # override above conditions and always show TOC above first header 04138 if ( isset( $this->mDoubleUnderscores['forcetoc'] ) ) { 04139 $this->mShowToc = true; 04140 $enoughToc = true; 04141 } 04142 04143 # headline counter 04144 $headlineCount = 0; 04145 $numVisible = 0; 04146 04147 # Ugh .. the TOC should have neat indentation levels which can be 04148 # passed to the skin functions. These are determined here 04149 $toc = ''; 04150 $full = ''; 04151 $head = array(); 04152 $sublevelCount = array(); 04153 $levelCount = array(); 04154 $level = 0; 04155 $prevlevel = 0; 04156 $toclevel = 0; 04157 $prevtoclevel = 0; 04158 $markerRegex = "{$this->mUniqPrefix}-h-(\d+)-" . self::MARKER_SUFFIX; 04159 $baseTitleText = $this->mTitle->getPrefixedDBkey(); 04160 $oldType = $this->mOutputType; 04161 $this->setOutputType( self::OT_WIKI ); 04162 $frame = $this->getPreprocessor()->newFrame(); 04163 $root = $this->preprocessToDom( $origText ); 04164 $node = $root->getFirstChild(); 04165 $byteOffset = 0; 04166 $tocraw = array(); 04167 $refers = array(); 04168 04169 foreach ( $matches[3] as $headline ) { 04170 $isTemplate = false; 04171 $titleText = false; 04172 $sectionIndex = false; 04173 $numbering = ''; 04174 $markerMatches = array(); 04175 if ( preg_match( "/^$markerRegex/", $headline, $markerMatches ) ) { 04176 $serial = $markerMatches[1]; 04177 list( $titleText, $sectionIndex ) = $this->mHeadings[$serial]; 04178 $isTemplate = ( $titleText != $baseTitleText ); 04179 $headline = preg_replace( "/^$markerRegex/", "", $headline ); 04180 } 04181 04182 if ( $toclevel ) { 04183 $prevlevel = $level; 04184 } 04185 $level = $matches[1][$headlineCount]; 04186 04187 if ( $level > $prevlevel ) { 04188 # Increase TOC level 04189 $toclevel++; 04190 $sublevelCount[$toclevel] = 0; 04191 if ( $toclevel < $wgMaxTocLevel ) { 04192 $prevtoclevel = $toclevel; 04193 $toc .= Linker::tocIndent(); 04194 $numVisible++; 04195 } 04196 } elseif ( $level < $prevlevel && $toclevel > 1 ) { 04197 # Decrease TOC level, find level to jump to 04198 04199 for ( $i = $toclevel; $i > 0; $i-- ) { 04200 if ( $levelCount[$i] == $level ) { 04201 # Found last matching level 04202 $toclevel = $i; 04203 break; 04204 } elseif ( $levelCount[$i] < $level ) { 04205 # Found first matching level below current level 04206 $toclevel = $i + 1; 04207 break; 04208 } 04209 } 04210 if ( $i == 0 ) { 04211 $toclevel = 1; 04212 } 04213 if ( $toclevel < $wgMaxTocLevel ) { 04214 if ( $prevtoclevel < $wgMaxTocLevel ) { 04215 # Unindent only if the previous toc level was shown :p 04216 $toc .= Linker::tocUnindent( $prevtoclevel - $toclevel ); 04217 $prevtoclevel = $toclevel; 04218 } else { 04219 $toc .= Linker::tocLineEnd(); 04220 } 04221 } 04222 } else { 04223 # No change in level, end TOC line 04224 if ( $toclevel < $wgMaxTocLevel ) { 04225 $toc .= Linker::tocLineEnd(); 04226 } 04227 } 04228 04229 $levelCount[$toclevel] = $level; 04230 04231 # count number of headlines for each level 04232 $sublevelCount[$toclevel]++; 04233 $dot = 0; 04234 for( $i = 1; $i <= $toclevel; $i++ ) { 04235 if ( !empty( $sublevelCount[$i] ) ) { 04236 if ( $dot ) { 04237 $numbering .= '.'; 04238 } 04239 $numbering .= $this->getTargetLanguage()->formatNum( $sublevelCount[$i] ); 04240 $dot = 1; 04241 } 04242 } 04243 04244 # The safe header is a version of the header text safe to use for links 04245 04246 # Remove link placeholders by the link text. 04247 # <!--LINK number--> 04248 # turns into 04249 # link text with suffix 04250 # Do this before unstrip since link text can contain strip markers 04251 $safeHeadline = $this->replaceLinkHoldersText( $headline ); 04252 04253 # Avoid insertion of weird stuff like <math> by expanding the relevant sections 04254 $safeHeadline = $this->mStripState->unstripBoth( $safeHeadline ); 04255 04256 # Strip out HTML (first regex removes any tag not allowed) 04257 # Allowed tags are: 04258 # * <sup> and <sub> (bug 8393) 04259 # * <i> (bug 26375) 04260 # * <b> (r105284) 04261 # * <span dir="rtl"> and <span dir="ltr"> (bug 35167) 04262 # 04263 # We strip any parameter from accepted tags (second regex), except dir="rtl|ltr" from <span>, 04264 # to allow setting directionality in toc items. 04265 $tocline = preg_replace( 04266 array( '#<(?!/?(span|sup|sub|i|b)(?: [^>]*)?>).*?'.'>#', '#<(/?(?:span(?: dir="(?:rtl|ltr)")?|sup|sub|i|b))(?: .*?)?'.'>#' ), 04267 array( '', '<$1>' ), 04268 $safeHeadline 04269 ); 04270 $tocline = trim( $tocline ); 04271 04272 # For the anchor, strip out HTML-y stuff period 04273 $safeHeadline = preg_replace( '/<.*?'.'>/', '', $safeHeadline ); 04274 $safeHeadline = Sanitizer::normalizeSectionNameWhitespace( $safeHeadline ); 04275 04276 # Save headline for section edit hint before it's escaped 04277 $headlineHint = $safeHeadline; 04278 04279 if ( $wgHtml5 && $wgExperimentalHtmlIds ) { 04280 # For reverse compatibility, provide an id that's 04281 # HTML4-compatible, like we used to. 04282 # 04283 # It may be worth noting, academically, that it's possible for 04284 # the legacy anchor to conflict with a non-legacy headline 04285 # anchor on the page. In this case likely the "correct" thing 04286 # would be to either drop the legacy anchors or make sure 04287 # they're numbered first. However, this would require people 04288 # to type in section names like "abc_.D7.93.D7.90.D7.A4" 04289 # manually, so let's not bother worrying about it. 04290 $legacyHeadline = Sanitizer::escapeId( $safeHeadline, 04291 array( 'noninitial', 'legacy' ) ); 04292 $safeHeadline = Sanitizer::escapeId( $safeHeadline ); 04293 04294 if ( $legacyHeadline == $safeHeadline ) { 04295 # No reason to have both (in fact, we can't) 04296 $legacyHeadline = false; 04297 } 04298 } else { 04299 $legacyHeadline = false; 04300 $safeHeadline = Sanitizer::escapeId( $safeHeadline, 04301 'noninitial' ); 04302 } 04303 04304 # HTML names must be case-insensitively unique (bug 10721). 04305 # This does not apply to Unicode characters per 04306 # http://dev.w3.org/html5/spec/infrastructure.html#case-sensitivity-and-string-comparison 04307 # @todo FIXME: We may be changing them depending on the current locale. 04308 $arrayKey = strtolower( $safeHeadline ); 04309 if ( $legacyHeadline === false ) { 04310 $legacyArrayKey = false; 04311 } else { 04312 $legacyArrayKey = strtolower( $legacyHeadline ); 04313 } 04314 04315 # count how many in assoc. array so we can track dupes in anchors 04316 if ( isset( $refers[$arrayKey] ) ) { 04317 $refers[$arrayKey]++; 04318 } else { 04319 $refers[$arrayKey] = 1; 04320 } 04321 if ( isset( $refers[$legacyArrayKey] ) ) { 04322 $refers[$legacyArrayKey]++; 04323 } else { 04324 $refers[$legacyArrayKey] = 1; 04325 } 04326 04327 # Don't number the heading if it is the only one (looks silly) 04328 if ( count( $matches[3] ) > 1 && $this->mOptions->getNumberHeadings() ) { 04329 # the two are different if the line contains a link 04330 $headline = Html::element( 'span', array( 'class' => 'mw-headline-number' ), $numbering ) . ' ' . $headline; 04331 } 04332 04333 # Create the anchor for linking from the TOC to the section 04334 $anchor = $safeHeadline; 04335 $legacyAnchor = $legacyHeadline; 04336 if ( $refers[$arrayKey] > 1 ) { 04337 $anchor .= '_' . $refers[$arrayKey]; 04338 } 04339 if ( $legacyHeadline !== false && $refers[$legacyArrayKey] > 1 ) { 04340 $legacyAnchor .= '_' . $refers[$legacyArrayKey]; 04341 } 04342 if ( $enoughToc && ( !isset( $wgMaxTocLevel ) || $toclevel < $wgMaxTocLevel ) ) { 04343 $toc .= Linker::tocLine( $anchor, $tocline, 04344 $numbering, $toclevel, ( $isTemplate ? false : $sectionIndex ) ); 04345 } 04346 04347 # Add the section to the section tree 04348 # Find the DOM node for this header 04349 while ( $node && !$isTemplate ) { 04350 if ( $node->getName() === 'h' ) { 04351 $bits = $node->splitHeading(); 04352 if ( $bits['i'] == $sectionIndex ) { 04353 break; 04354 } 04355 } 04356 $byteOffset += mb_strlen( $this->mStripState->unstripBoth( 04357 $frame->expand( $node, PPFrame::RECOVER_ORIG ) ) ); 04358 $node = $node->getNextSibling(); 04359 } 04360 $tocraw[] = array( 04361 'toclevel' => $toclevel, 04362 'level' => $level, 04363 'line' => $tocline, 04364 'number' => $numbering, 04365 'index' => ( $isTemplate ? 'T-' : '' ) . $sectionIndex, 04366 'fromtitle' => $titleText, 04367 'byteoffset' => ( $isTemplate ? null : $byteOffset ), 04368 'anchor' => $anchor, 04369 ); 04370 04371 # give headline the correct <h#> tag 04372 if ( $maybeShowEditLink && $sectionIndex !== false ) { 04373 // Output edit section links as markers with styles that can be customized by skins 04374 if ( $isTemplate ) { 04375 # Put a T flag in the section identifier, to indicate to extractSections() 04376 # that sections inside <includeonly> should be counted. 04377 $editlinkArgs = array( $titleText, "T-$sectionIndex"/*, null */ ); 04378 } else { 04379 $editlinkArgs = array( $this->mTitle->getPrefixedText(), $sectionIndex, $headlineHint ); 04380 } 04381 // We use a bit of pesudo-xml for editsection markers. The language converter is run later on 04382 // Using a UNIQ style marker leads to the converter screwing up the tokens when it converts stuff 04383 // And trying to insert strip tags fails too. At this point all real inputted tags have already been escaped 04384 // so we don't have to worry about a user trying to input one of these markers directly. 04385 // We use a page and section attribute to stop the language converter from converting these important bits 04386 // of data, but put the headline hint inside a content block because the language converter is supposed to 04387 // be able to convert that piece of data. 04388 $editlink = '<mw:editsection page="' . htmlspecialchars( $editlinkArgs[0] ); 04389 $editlink .= '" section="' . htmlspecialchars( $editlinkArgs[1] ) . '"'; 04390 if ( isset( $editlinkArgs[2] ) ) { 04391 $editlink .= '>' . $editlinkArgs[2] . '</mw:editsection>'; 04392 } else { 04393 $editlink .= '/>'; 04394 } 04395 } else { 04396 $editlink = ''; 04397 } 04398 $head[$headlineCount] = Linker::makeHeadline( $level, 04399 $matches['attrib'][$headlineCount], $anchor, $headline, 04400 $editlink, $legacyAnchor ); 04401 04402 $headlineCount++; 04403 } 04404 04405 $this->setOutputType( $oldType ); 04406 04407 # Never ever show TOC if no headers 04408 if ( $numVisible < 1 ) { 04409 $enoughToc = false; 04410 } 04411 04412 if ( $enoughToc ) { 04413 if ( $prevtoclevel > 0 && $prevtoclevel < $wgMaxTocLevel ) { 04414 $toc .= Linker::tocUnindent( $prevtoclevel - 1 ); 04415 } 04416 $toc = Linker::tocList( $toc, $this->mOptions->getUserLangObj() ); 04417 $this->mOutput->setTOCHTML( $toc ); 04418 } 04419 04420 if ( $isMain ) { 04421 $this->mOutput->setSections( $tocraw ); 04422 } 04423 04424 # split up and insert constructed headlines 04425 $blocks = preg_split( '/<H[1-6].*?' . '>.*?<\/H[1-6]>/i', $text ); 04426 $i = 0; 04427 04428 // build an array of document sections 04429 $sections = array(); 04430 foreach ( $blocks as $block ) { 04431 // $head is zero-based, sections aren't. 04432 if ( empty( $head[$i - 1] ) ) { 04433 $sections[$i] = $block; 04434 } else { 04435 $sections[$i] = $head[$i - 1] . $block; 04436 } 04437 04448 wfRunHooks( 'ParserSectionCreate', array( $this, $i, &$sections[$i], $showEditLink ) ); 04449 04450 $i++; 04451 } 04452 04453 if ( $enoughToc && $isMain && !$this->mForceTocPosition ) { 04454 // append the TOC at the beginning 04455 // Top anchor now in skin 04456 $sections[0] = $sections[0] . $toc . "\n"; 04457 } 04458 04459 $full .= join( '', $sections ); 04460 04461 if ( $this->mForceTocPosition ) { 04462 return str_replace( '<!--MWTOC-->', $toc, $full ); 04463 } else { 04464 return $full; 04465 } 04466 } 04467 04479 public function preSaveTransform( $text, Title $title, User $user, ParserOptions $options, $clearState = true ) { 04480 $this->startParse( $title, $options, self::OT_WIKI, $clearState ); 04481 $this->setUser( $user ); 04482 04483 $pairs = array( 04484 "\r\n" => "\n", 04485 ); 04486 $text = str_replace( array_keys( $pairs ), array_values( $pairs ), $text ); 04487 if( $options->getPreSaveTransform() ) { 04488 $text = $this->pstPass2( $text, $user ); 04489 } 04490 $text = $this->mStripState->unstripBoth( $text ); 04491 04492 $this->setUser( null ); #Reset 04493 04494 return $text; 04495 } 04496 04506 function pstPass2( $text, $user ) { 04507 global $wgContLang, $wgLocaltimezone; 04508 04509 # Note: This is the timestamp saved as hardcoded wikitext to 04510 # the database, we use $wgContLang here in order to give 04511 # everyone the same signature and use the default one rather 04512 # than the one selected in each user's preferences. 04513 # (see also bug 12815) 04514 $ts = $this->mOptions->getTimestamp(); 04515 if ( isset( $wgLocaltimezone ) ) { 04516 $tz = $wgLocaltimezone; 04517 } else { 04518 $tz = date_default_timezone_get(); 04519 } 04520 04521 $unixts = wfTimestamp( TS_UNIX, $ts ); 04522 $oldtz = date_default_timezone_get(); 04523 date_default_timezone_set( $tz ); 04524 $ts = date( 'YmdHis', $unixts ); 04525 $tzMsg = date( 'T', $unixts ); # might vary on DST changeover! 04526 04527 # Allow translation of timezones through wiki. date() can return 04528 # whatever crap the system uses, localised or not, so we cannot 04529 # ship premade translations. 04530 $key = 'timezone-' . strtolower( trim( $tzMsg ) ); 04531 $msg = wfMessage( $key )->inContentLanguage(); 04532 if ( $msg->exists() ) { 04533 $tzMsg = $msg->text(); 04534 } 04535 04536 date_default_timezone_set( $oldtz ); 04537 04538 $d = $wgContLang->timeanddate( $ts, false, false ) . " ($tzMsg)"; 04539 04540 # Variable replacement 04541 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags 04542 $text = $this->replaceVariables( $text ); 04543 04544 # This works almost by chance, as the replaceVariables are done before the getUserSig(), 04545 # which may corrupt this parser instance via its wfMessage()->text() call- 04546 04547 # Signatures 04548 $sigText = $this->getUserSig( $user ); 04549 $text = strtr( $text, array( 04550 '~~~~~' => $d, 04551 '~~~~' => "$sigText $d", 04552 '~~~' => $sigText 04553 ) ); 04554 04555 # Context links ("pipe tricks"): [[|name]] and [[name (context)|]] 04556 $tc = '[' . Title::legalChars() . ']'; 04557 $nc = '[ _0-9A-Za-z\x80-\xff-]'; # Namespaces can use non-ascii! 04558 04559 $p1 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\))\\|]]/"; # [[ns:page (context)|]] 04560 $p4 = "/\[\[(:?$nc+:|:|)($tc+?)( ?($tc+))\\|]]/"; # [[ns:page(context)|]] (double-width brackets, added in r40257) 04561 $p3 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\)|)((?:, |,)$tc+|)\\|]]/"; # [[ns:page (context), context|]] (using either single or double-width comma) 04562 $p2 = "/\[\[\\|($tc+)]]/"; # [[|page]] (reverse pipe trick: add context from page title) 04563 04564 # try $p1 first, to turn "[[A, B (C)|]]" into "[[A, B (C)|A, B]]" 04565 $text = preg_replace( $p1, '[[\\1\\2\\3|\\2]]', $text ); 04566 $text = preg_replace( $p4, '[[\\1\\2\\3|\\2]]', $text ); 04567 $text = preg_replace( $p3, '[[\\1\\2\\3\\4|\\2]]', $text ); 04568 04569 $t = $this->mTitle->getText(); 04570 $m = array(); 04571 if ( preg_match( "/^($nc+:|)$tc+?( \\($tc+\\))$/", $t, $m ) ) { 04572 $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text ); 04573 } elseif ( preg_match( "/^($nc+:|)$tc+?(, $tc+|)$/", $t, $m ) && "$m[1]$m[2]" != '' ) { 04574 $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text ); 04575 } else { 04576 # if there's no context, don't bother duplicating the title 04577 $text = preg_replace( $p2, '[[\\1]]', $text ); 04578 } 04579 04580 # Trim trailing whitespace 04581 $text = rtrim( $text ); 04582 04583 return $text; 04584 } 04585 04600 function getUserSig( &$user, $nickname = false, $fancySig = null ) { 04601 global $wgMaxSigChars; 04602 04603 $username = $user->getName(); 04604 04605 # If not given, retrieve from the user object. 04606 if ( $nickname === false ) 04607 $nickname = $user->getOption( 'nickname' ); 04608 04609 if ( is_null( $fancySig ) ) { 04610 $fancySig = $user->getBoolOption( 'fancysig' ); 04611 } 04612 04613 $nickname = $nickname == null ? $username : $nickname; 04614 04615 if ( mb_strlen( $nickname ) > $wgMaxSigChars ) { 04616 $nickname = $username; 04617 wfDebug( __METHOD__ . ": $username has overlong signature.\n" ); 04618 } elseif ( $fancySig !== false ) { 04619 # Sig. might contain markup; validate this 04620 if ( $this->validateSig( $nickname ) !== false ) { 04621 # Validated; clean up (if needed) and return it 04622 return $this->cleanSig( $nickname, true ); 04623 } else { 04624 # Failed to validate; fall back to the default 04625 $nickname = $username; 04626 wfDebug( __METHOD__ . ": $username has bad XML tags in signature.\n" ); 04627 } 04628 } 04629 04630 # Make sure nickname doesnt get a sig in a sig 04631 $nickname = self::cleanSigInSig( $nickname ); 04632 04633 # If we're still here, make it a link to the user page 04634 $userText = wfEscapeWikiText( $username ); 04635 $nickText = wfEscapeWikiText( $nickname ); 04636 $msgName = $user->isAnon() ? 'signature-anon' : 'signature'; 04637 04638 return wfMessage( $msgName, $userText, $nickText )->inContentLanguage()->title( $this->getTitle() )->text(); 04639 } 04640 04647 function validateSig( $text ) { 04648 return( Xml::isWellFormedXmlFragment( $text ) ? $text : false ); 04649 } 04650 04661 public function cleanSig( $text, $parsing = false ) { 04662 if ( !$parsing ) { 04663 global $wgTitle; 04664 $this->startParse( $wgTitle, new ParserOptions, self::OT_PREPROCESS, true ); 04665 } 04666 04667 # Option to disable this feature 04668 if ( !$this->mOptions->getCleanSignatures() ) { 04669 return $text; 04670 } 04671 04672 # @todo FIXME: Regex doesn't respect extension tags or nowiki 04673 # => Move this logic to braceSubstitution() 04674 $substWord = MagicWord::get( 'subst' ); 04675 $substRegex = '/\{\{(?!(?:' . $substWord->getBaseRegex() . '))/x' . $substWord->getRegexCase(); 04676 $substText = '{{' . $substWord->getSynonym( 0 ); 04677 04678 $text = preg_replace( $substRegex, $substText, $text ); 04679 $text = self::cleanSigInSig( $text ); 04680 $dom = $this->preprocessToDom( $text ); 04681 $frame = $this->getPreprocessor()->newFrame(); 04682 $text = $frame->expand( $dom ); 04683 04684 if ( !$parsing ) { 04685 $text = $this->mStripState->unstripBoth( $text ); 04686 } 04687 04688 return $text; 04689 } 04690 04697 public static function cleanSigInSig( $text ) { 04698 $text = preg_replace( '/~{3,5}/', '', $text ); 04699 return $text; 04700 } 04701 04711 public function startExternalParse( Title $title = null, ParserOptions $options, $outputType, $clearState = true ) { 04712 $this->startParse( $title, $options, $outputType, $clearState ); 04713 } 04714 04721 private function startParse( Title $title = null, ParserOptions $options, $outputType, $clearState = true ) { 04722 $this->setTitle( $title ); 04723 $this->mOptions = $options; 04724 $this->setOutputType( $outputType ); 04725 if ( $clearState ) { 04726 $this->clearState(); 04727 } 04728 } 04729 04738 public function transformMsg( $text, $options, $title = null ) { 04739 static $executing = false; 04740 04741 # Guard against infinite recursion 04742 if ( $executing ) { 04743 return $text; 04744 } 04745 $executing = true; 04746 04747 wfProfileIn( __METHOD__ ); 04748 if ( !$title ) { 04749 global $wgTitle; 04750 $title = $wgTitle; 04751 } 04752 04753 $text = $this->preprocess( $text, $title, $options ); 04754 04755 $executing = false; 04756 wfProfileOut( __METHOD__ ); 04757 return $text; 04758 } 04759 04784 public function setHook( $tag, $callback ) { 04785 $tag = strtolower( $tag ); 04786 if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) { 04787 throw new MWException( "Invalid character {$m[0]} in setHook('$tag', ...) call" ); 04788 } 04789 $oldVal = isset( $this->mTagHooks[$tag] ) ? $this->mTagHooks[$tag] : null; 04790 $this->mTagHooks[$tag] = $callback; 04791 if ( !in_array( $tag, $this->mStripList ) ) { 04792 $this->mStripList[] = $tag; 04793 } 04794 04795 return $oldVal; 04796 } 04797 04815 function setTransparentTagHook( $tag, $callback ) { 04816 $tag = strtolower( $tag ); 04817 if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) { 04818 throw new MWException( "Invalid character {$m[0]} in setTransparentHook('$tag', ...) call" ); 04819 } 04820 $oldVal = isset( $this->mTransparentTagHooks[$tag] ) ? $this->mTransparentTagHooks[$tag] : null; 04821 $this->mTransparentTagHooks[$tag] = $callback; 04822 04823 return $oldVal; 04824 } 04825 04829 function clearTagHooks() { 04830 $this->mTagHooks = array(); 04831 $this->mFunctionTagHooks = array(); 04832 $this->mStripList = $this->mDefaultStripList; 04833 } 04834 04878 public function setFunctionHook( $id, $callback, $flags = 0 ) { 04879 global $wgContLang; 04880 04881 $oldVal = isset( $this->mFunctionHooks[$id] ) ? $this->mFunctionHooks[$id][0] : null; 04882 $this->mFunctionHooks[$id] = array( $callback, $flags ); 04883 04884 # Add to function cache 04885 $mw = MagicWord::get( $id ); 04886 if ( !$mw ) 04887 throw new MWException( __METHOD__ . '() expecting a magic word identifier.' ); 04888 04889 $synonyms = $mw->getSynonyms(); 04890 $sensitive = intval( $mw->isCaseSensitive() ); 04891 04892 foreach ( $synonyms as $syn ) { 04893 # Case 04894 if ( !$sensitive ) { 04895 $syn = $wgContLang->lc( $syn ); 04896 } 04897 # Add leading hash 04898 if ( !( $flags & SFH_NO_HASH ) ) { 04899 $syn = '#' . $syn; 04900 } 04901 # Remove trailing colon 04902 if ( substr( $syn, -1, 1 ) === ':' ) { 04903 $syn = substr( $syn, 0, -1 ); 04904 } 04905 $this->mFunctionSynonyms[$sensitive][$syn] = $id; 04906 } 04907 return $oldVal; 04908 } 04909 04915 function getFunctionHooks() { 04916 return array_keys( $this->mFunctionHooks ); 04917 } 04918 04929 function setFunctionTagHook( $tag, $callback, $flags ) { 04930 $tag = strtolower( $tag ); 04931 if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) throw new MWException( "Invalid character {$m[0]} in setFunctionTagHook('$tag', ...) call" ); 04932 $old = isset( $this->mFunctionTagHooks[$tag] ) ? 04933 $this->mFunctionTagHooks[$tag] : null; 04934 $this->mFunctionTagHooks[$tag] = array( $callback, $flags ); 04935 04936 if ( !in_array( $tag, $this->mStripList ) ) { 04937 $this->mStripList[] = $tag; 04938 } 04939 04940 return $old; 04941 } 04942 04953 function replaceLinkHolders( &$text, $options = 0 ) { 04954 return $this->mLinkHolders->replace( $text ); 04955 } 04956 04964 function replaceLinkHoldersText( $text ) { 04965 return $this->mLinkHolders->replaceText( $text ); 04966 } 04967 04981 function renderImageGallery( $text, $params ) { 04982 $ig = new ImageGallery(); 04983 $ig->setContextTitle( $this->mTitle ); 04984 $ig->setShowBytes( false ); 04985 $ig->setShowFilename( false ); 04986 $ig->setParser( $this ); 04987 $ig->setHideBadImages(); 04988 $ig->setAttributes( Sanitizer::validateTagAttributes( $params, 'table' ) ); 04989 04990 if ( isset( $params['showfilename'] ) ) { 04991 $ig->setShowFilename( true ); 04992 } else { 04993 $ig->setShowFilename( false ); 04994 } 04995 if ( isset( $params['caption'] ) ) { 04996 $caption = $params['caption']; 04997 $caption = htmlspecialchars( $caption ); 04998 $caption = $this->replaceInternalLinks( $caption ); 04999 $ig->setCaptionHtml( $caption ); 05000 } 05001 if ( isset( $params['perrow'] ) ) { 05002 $ig->setPerRow( $params['perrow'] ); 05003 } 05004 if ( isset( $params['widths'] ) ) { 05005 $ig->setWidths( $params['widths'] ); 05006 } 05007 if ( isset( $params['heights'] ) ) { 05008 $ig->setHeights( $params['heights'] ); 05009 } 05010 05011 wfRunHooks( 'BeforeParserrenderImageGallery', array( &$this, &$ig ) ); 05012 05013 $lines = StringUtils::explode( "\n", $text ); 05014 foreach ( $lines as $line ) { 05015 # match lines like these: 05016 # Image:someimage.jpg|This is some image 05017 $matches = array(); 05018 preg_match( "/^([^|]+)(\\|(.*))?$/", $line, $matches ); 05019 # Skip empty lines 05020 if ( count( $matches ) == 0 ) { 05021 continue; 05022 } 05023 05024 if ( strpos( $matches[0], '%' ) !== false ) { 05025 $matches[1] = rawurldecode( $matches[1] ); 05026 } 05027 $title = Title::newFromText( $matches[1], NS_FILE ); 05028 if ( is_null( $title ) ) { 05029 # Bogus title. Ignore these so we don't bomb out later. 05030 continue; 05031 } 05032 05033 $label = ''; 05034 $alt = ''; 05035 $link = ''; 05036 if ( isset( $matches[3] ) ) { 05037 // look for an |alt= definition while trying not to break existing 05038 // captions with multiple pipes (|) in it, until a more sensible grammar 05039 // is defined for images in galleries 05040 05041 $matches[3] = $this->recursiveTagParse( trim( $matches[3] ) ); 05042 $parameterMatches = StringUtils::explode( '|', $matches[3] ); 05043 $magicWordAlt = MagicWord::get( 'img_alt' ); 05044 $magicWordLink = MagicWord::get( 'img_link' ); 05045 05046 foreach ( $parameterMatches as $parameterMatch ) { 05047 if ( $match = $magicWordAlt->matchVariableStartToEnd( $parameterMatch ) ) { 05048 $alt = $this->stripAltText( $match, false ); 05049 } 05050 elseif( $match = $magicWordLink->matchVariableStartToEnd( $parameterMatch ) ) { 05051 $linkValue = strip_tags( $this->replaceLinkHoldersText( $match ) ); 05052 $chars = self::EXT_LINK_URL_CLASS; 05053 $prots = $this->mUrlProtocols; 05054 //check to see if link matches an absolute url, if not then it must be a wiki link. 05055 if ( preg_match( "/^($prots)$chars+$/u", $linkValue ) ) { 05056 $link = $linkValue; 05057 } else { 05058 $localLinkTitle = Title::newFromText( $linkValue ); 05059 if ( $localLinkTitle !== null ) { 05060 $link = $localLinkTitle->getLocalURL(); 05061 } 05062 } 05063 } 05064 else { 05065 // concatenate all other pipes 05066 $label .= '|' . $parameterMatch; 05067 } 05068 } 05069 // remove the first pipe 05070 $label = substr( $label, 1 ); 05071 } 05072 05073 $ig->add( $title, $label, $alt, $link ); 05074 } 05075 return $ig->toHTML(); 05076 } 05077 05082 function getImageParams( $handler ) { 05083 if ( $handler ) { 05084 $handlerClass = get_class( $handler ); 05085 } else { 05086 $handlerClass = ''; 05087 } 05088 if ( !isset( $this->mImageParams[$handlerClass] ) ) { 05089 # Initialise static lists 05090 static $internalParamNames = array( 05091 'horizAlign' => array( 'left', 'right', 'center', 'none' ), 05092 'vertAlign' => array( 'baseline', 'sub', 'super', 'top', 'text-top', 'middle', 05093 'bottom', 'text-bottom' ), 05094 'frame' => array( 'thumbnail', 'manualthumb', 'framed', 'frameless', 05095 'upright', 'border', 'link', 'alt', 'class' ), 05096 ); 05097 static $internalParamMap; 05098 if ( !$internalParamMap ) { 05099 $internalParamMap = array(); 05100 foreach ( $internalParamNames as $type => $names ) { 05101 foreach ( $names as $name ) { 05102 $magicName = str_replace( '-', '_', "img_$name" ); 05103 $internalParamMap[$magicName] = array( $type, $name ); 05104 } 05105 } 05106 } 05107 05108 # Add handler params 05109 $paramMap = $internalParamMap; 05110 if ( $handler ) { 05111 $handlerParamMap = $handler->getParamMap(); 05112 foreach ( $handlerParamMap as $magic => $paramName ) { 05113 $paramMap[$magic] = array( 'handler', $paramName ); 05114 } 05115 } 05116 $this->mImageParams[$handlerClass] = $paramMap; 05117 $this->mImageParamsMagicArray[$handlerClass] = new MagicWordArray( array_keys( $paramMap ) ); 05118 } 05119 return array( $this->mImageParams[$handlerClass], $this->mImageParamsMagicArray[$handlerClass] ); 05120 } 05121 05130 function makeImage( $title, $options, $holders = false ) { 05131 # Check if the options text is of the form "options|alt text" 05132 # Options are: 05133 # * thumbnail make a thumbnail with enlarge-icon and caption, alignment depends on lang 05134 # * left no resizing, just left align. label is used for alt= only 05135 # * right same, but right aligned 05136 # * none same, but not aligned 05137 # * ___px scale to ___ pixels width, no aligning. e.g. use in taxobox 05138 # * center center the image 05139 # * frame Keep original image size, no magnify-button. 05140 # * framed Same as "frame" 05141 # * frameless like 'thumb' but without a frame. Keeps user preferences for width 05142 # * upright reduce width for upright images, rounded to full __0 px 05143 # * border draw a 1px border around the image 05144 # * alt Text for HTML alt attribute (defaults to empty) 05145 # * class Set a class for img node 05146 # * link Set the target of the image link. Can be external, interwiki, or local 05147 # vertical-align values (no % or length right now): 05148 # * baseline 05149 # * sub 05150 # * super 05151 # * top 05152 # * text-top 05153 # * middle 05154 # * bottom 05155 # * text-bottom 05156 05157 $parts = StringUtils::explode( "|", $options ); 05158 05159 # Give extensions a chance to select the file revision for us 05160 $options = array(); 05161 $descQuery = false; 05162 wfRunHooks( 'BeforeParserFetchFileAndTitle', 05163 array( $this, $title, &$options, &$descQuery ) ); 05164 # Fetch and register the file (file title may be different via hooks) 05165 list( $file, $title ) = $this->fetchFileAndTitle( $title, $options ); 05166 05167 # Get parameter map 05168 $handler = $file ? $file->getHandler() : false; 05169 05170 list( $paramMap, $mwArray ) = $this->getImageParams( $handler ); 05171 05172 if ( !$file ) { 05173 $this->addTrackingCategory( 'broken-file-category' ); 05174 } 05175 05176 # Process the input parameters 05177 $caption = ''; 05178 $params = array( 'frame' => array(), 'handler' => array(), 05179 'horizAlign' => array(), 'vertAlign' => array() ); 05180 foreach ( $parts as $part ) { 05181 $part = trim( $part ); 05182 list( $magicName, $value ) = $mwArray->matchVariableStartToEnd( $part ); 05183 $validated = false; 05184 if ( isset( $paramMap[$magicName] ) ) { 05185 list( $type, $paramName ) = $paramMap[$magicName]; 05186 05187 # Special case; width and height come in one variable together 05188 if ( $type === 'handler' && $paramName === 'width' ) { 05189 $parsedWidthParam = $this->parseWidthParam( $value ); 05190 if( isset( $parsedWidthParam['width'] ) ) { 05191 $width = $parsedWidthParam['width']; 05192 if ( $handler->validateParam( 'width', $width ) ) { 05193 $params[$type]['width'] = $width; 05194 $validated = true; 05195 } 05196 } 05197 if( isset( $parsedWidthParam['height'] ) ) { 05198 $height = $parsedWidthParam['height']; 05199 if ( $handler->validateParam( 'height', $height ) ) { 05200 $params[$type]['height'] = $height; 05201 $validated = true; 05202 } 05203 } 05204 # else no validation -- bug 13436 05205 } else { 05206 if ( $type === 'handler' ) { 05207 # Validate handler parameter 05208 $validated = $handler->validateParam( $paramName, $value ); 05209 } else { 05210 # Validate internal parameters 05211 switch( $paramName ) { 05212 case 'manualthumb': 05213 case 'alt': 05214 case 'class': 05215 # @todo FIXME: Possibly check validity here for 05216 # manualthumb? downstream behavior seems odd with 05217 # missing manual thumbs. 05218 $validated = true; 05219 $value = $this->stripAltText( $value, $holders ); 05220 break; 05221 case 'link': 05222 $chars = self::EXT_LINK_URL_CLASS; 05223 $prots = $this->mUrlProtocols; 05224 if ( $value === '' ) { 05225 $paramName = 'no-link'; 05226 $value = true; 05227 $validated = true; 05228 } elseif ( preg_match( "/^(?i)$prots/", $value ) ) { 05229 if ( preg_match( "/^((?i)$prots)$chars+$/u", $value, $m ) ) { 05230 $paramName = 'link-url'; 05231 $this->mOutput->addExternalLink( $value ); 05232 if ( $this->mOptions->getExternalLinkTarget() ) { 05233 $params[$type]['link-target'] = $this->mOptions->getExternalLinkTarget(); 05234 } 05235 $validated = true; 05236 } 05237 } else { 05238 $linkTitle = Title::newFromText( $value ); 05239 if ( $linkTitle ) { 05240 $paramName = 'link-title'; 05241 $value = $linkTitle; 05242 $this->mOutput->addLink( $linkTitle ); 05243 $validated = true; 05244 } 05245 } 05246 break; 05247 default: 05248 # Most other things appear to be empty or numeric... 05249 $validated = ( $value === false || is_numeric( trim( $value ) ) ); 05250 } 05251 } 05252 05253 if ( $validated ) { 05254 $params[$type][$paramName] = $value; 05255 } 05256 } 05257 } 05258 if ( !$validated ) { 05259 $caption = $part; 05260 } 05261 } 05262 05263 # Process alignment parameters 05264 if ( $params['horizAlign'] ) { 05265 $params['frame']['align'] = key( $params['horizAlign'] ); 05266 } 05267 if ( $params['vertAlign'] ) { 05268 $params['frame']['valign'] = key( $params['vertAlign'] ); 05269 } 05270 05271 $params['frame']['caption'] = $caption; 05272 05273 # Will the image be presented in a frame, with the caption below? 05274 $imageIsFramed = isset( $params['frame']['frame'] ) || 05275 isset( $params['frame']['framed'] ) || 05276 isset( $params['frame']['thumbnail'] ) || 05277 isset( $params['frame']['manualthumb'] ); 05278 05279 # In the old days, [[Image:Foo|text...]] would set alt text. Later it 05280 # came to also set the caption, ordinary text after the image -- which 05281 # makes no sense, because that just repeats the text multiple times in 05282 # screen readers. It *also* came to set the title attribute. 05283 # 05284 # Now that we have an alt attribute, we should not set the alt text to 05285 # equal the caption: that's worse than useless, it just repeats the 05286 # text. This is the framed/thumbnail case. If there's no caption, we 05287 # use the unnamed parameter for alt text as well, just for the time be- 05288 # ing, if the unnamed param is set and the alt param is not. 05289 # 05290 # For the future, we need to figure out if we want to tweak this more, 05291 # e.g., introducing a title= parameter for the title; ignoring the un- 05292 # named parameter entirely for images without a caption; adding an ex- 05293 # plicit caption= parameter and preserving the old magic unnamed para- 05294 # meter for BC; ... 05295 if ( $imageIsFramed ) { # Framed image 05296 if ( $caption === '' && !isset( $params['frame']['alt'] ) ) { 05297 # No caption or alt text, add the filename as the alt text so 05298 # that screen readers at least get some description of the image 05299 $params['frame']['alt'] = $title->getText(); 05300 } 05301 # Do not set $params['frame']['title'] because tooltips don't make sense 05302 # for framed images 05303 } else { # Inline image 05304 if ( !isset( $params['frame']['alt'] ) ) { 05305 # No alt text, use the "caption" for the alt text 05306 if ( $caption !== '' ) { 05307 $params['frame']['alt'] = $this->stripAltText( $caption, $holders ); 05308 } else { 05309 # No caption, fall back to using the filename for the 05310 # alt text 05311 $params['frame']['alt'] = $title->getText(); 05312 } 05313 } 05314 # Use the "caption" for the tooltip text 05315 $params['frame']['title'] = $this->stripAltText( $caption, $holders ); 05316 } 05317 05318 wfRunHooks( 'ParserMakeImageParams', array( $title, $file, &$params, $this ) ); 05319 05320 # Linker does the rest 05321 $time = isset( $options['time'] ) ? $options['time'] : false; 05322 $ret = Linker::makeImageLink( $this, $title, $file, $params['frame'], $params['handler'], 05323 $time, $descQuery, $this->mOptions->getThumbSize() ); 05324 05325 # Give the handler a chance to modify the parser object 05326 if ( $handler ) { 05327 $handler->parserTransformHook( $this, $file ); 05328 } 05329 05330 return $ret; 05331 } 05332 05338 protected function stripAltText( $caption, $holders ) { 05339 # Strip bad stuff out of the title (tooltip). We can't just use 05340 # replaceLinkHoldersText() here, because if this function is called 05341 # from replaceInternalLinks2(), mLinkHolders won't be up-to-date. 05342 if ( $holders ) { 05343 $tooltip = $holders->replaceText( $caption ); 05344 } else { 05345 $tooltip = $this->replaceLinkHoldersText( $caption ); 05346 } 05347 05348 # make sure there are no placeholders in thumbnail attributes 05349 # that are later expanded to html- so expand them now and 05350 # remove the tags 05351 $tooltip = $this->mStripState->unstripBoth( $tooltip ); 05352 $tooltip = Sanitizer::stripAllTags( $tooltip ); 05353 05354 return $tooltip; 05355 } 05356 05361 function disableCache() { 05362 wfDebug( "Parser output marked as uncacheable.\n" ); 05363 if ( !$this->mOutput ) { 05364 throw new MWException( __METHOD__ . 05365 " can only be called when actually parsing something" ); 05366 } 05367 $this->mOutput->setCacheTime( -1 ); // old style, for compatibility 05368 $this->mOutput->updateCacheExpiry( 0 ); // new style, for consistency 05369 } 05370 05379 function attributeStripCallback( &$text, $frame = false ) { 05380 $text = $this->replaceVariables( $text, $frame ); 05381 $text = $this->mStripState->unstripBoth( $text ); 05382 return $text; 05383 } 05384 05390 function getTags() { 05391 return array_merge( array_keys( $this->mTransparentTagHooks ), array_keys( $this->mTagHooks ), array_keys( $this->mFunctionTagHooks ) ); 05392 } 05393 05404 function replaceTransparentTags( $text ) { 05405 $matches = array(); 05406 $elements = array_keys( $this->mTransparentTagHooks ); 05407 $text = self::extractTagsAndParams( $elements, $text, $matches, $this->mUniqPrefix ); 05408 $replacements = array(); 05409 05410 foreach ( $matches as $marker => $data ) { 05411 list( $element, $content, $params, $tag ) = $data; 05412 $tagName = strtolower( $element ); 05413 if ( isset( $this->mTransparentTagHooks[$tagName] ) ) { 05414 $output = call_user_func_array( $this->mTransparentTagHooks[$tagName], array( $content, $params, $this ) ); 05415 } else { 05416 $output = $tag; 05417 } 05418 $replacements[$marker] = $output; 05419 } 05420 return strtr( $text, $replacements ); 05421 } 05422 05452 private function extractSections( $text, $section, $mode, $newText = '' ) { 05453 global $wgTitle; # not generally used but removes an ugly failure mode 05454 $this->startParse( $wgTitle, new ParserOptions, self::OT_PLAIN, true ); 05455 $outText = ''; 05456 $frame = $this->getPreprocessor()->newFrame(); 05457 05458 # Process section extraction flags 05459 $flags = 0; 05460 $sectionParts = explode( '-', $section ); 05461 $sectionIndex = array_pop( $sectionParts ); 05462 foreach ( $sectionParts as $part ) { 05463 if ( $part === 'T' ) { 05464 $flags |= self::PTD_FOR_INCLUSION; 05465 } 05466 } 05467 05468 # Check for empty input 05469 if ( strval( $text ) === '' ) { 05470 # Only sections 0 and T-0 exist in an empty document 05471 if ( $sectionIndex == 0 ) { 05472 if ( $mode === 'get' ) { 05473 return ''; 05474 } else { 05475 return $newText; 05476 } 05477 } else { 05478 if ( $mode === 'get' ) { 05479 return $newText; 05480 } else { 05481 return $text; 05482 } 05483 } 05484 } 05485 05486 # Preprocess the text 05487 $root = $this->preprocessToDom( $text, $flags ); 05488 05489 # <h> nodes indicate section breaks 05490 # They can only occur at the top level, so we can find them by iterating the root's children 05491 $node = $root->getFirstChild(); 05492 05493 # Find the target section 05494 if ( $sectionIndex == 0 ) { 05495 # Section zero doesn't nest, level=big 05496 $targetLevel = 1000; 05497 } else { 05498 while ( $node ) { 05499 if ( $node->getName() === 'h' ) { 05500 $bits = $node->splitHeading(); 05501 if ( $bits['i'] == $sectionIndex ) { 05502 $targetLevel = $bits['level']; 05503 break; 05504 } 05505 } 05506 if ( $mode === 'replace' ) { 05507 $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG ); 05508 } 05509 $node = $node->getNextSibling(); 05510 } 05511 } 05512 05513 if ( !$node ) { 05514 # Not found 05515 if ( $mode === 'get' ) { 05516 return $newText; 05517 } else { 05518 return $text; 05519 } 05520 } 05521 05522 # Find the end of the section, including nested sections 05523 do { 05524 if ( $node->getName() === 'h' ) { 05525 $bits = $node->splitHeading(); 05526 $curLevel = $bits['level']; 05527 if ( $bits['i'] != $sectionIndex && $curLevel <= $targetLevel ) { 05528 break; 05529 } 05530 } 05531 if ( $mode === 'get' ) { 05532 $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG ); 05533 } 05534 $node = $node->getNextSibling(); 05535 } while ( $node ); 05536 05537 # Write out the remainder (in replace mode only) 05538 if ( $mode === 'replace' ) { 05539 # Output the replacement text 05540 # Add two newlines on -- trailing whitespace in $newText is conventionally 05541 # stripped by the editor, so we need both newlines to restore the paragraph gap 05542 # Only add trailing whitespace if there is newText 05543 if ( $newText != "" ) { 05544 $outText .= $newText . "\n\n"; 05545 } 05546 05547 while ( $node ) { 05548 $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG ); 05549 $node = $node->getNextSibling(); 05550 } 05551 } 05552 05553 if ( is_string( $outText ) ) { 05554 # Re-insert stripped tags 05555 $outText = rtrim( $this->mStripState->unstripBoth( $outText ) ); 05556 } 05557 05558 return $outText; 05559 } 05560 05573 public function getSection( $text, $section, $deftext = '' ) { 05574 return $this->extractSections( $text, $section, "get", $deftext ); 05575 } 05576 05587 public function replaceSection( $oldtext, $section, $text ) { 05588 return $this->extractSections( $oldtext, $section, "replace", $text ); 05589 } 05590 05596 function getRevisionId() { 05597 return $this->mRevisionId; 05598 } 05599 05605 protected function getRevisionObject() { 05606 if ( !is_null( $this->mRevisionObject ) ) { 05607 return $this->mRevisionObject; 05608 } 05609 if ( is_null( $this->mRevisionId ) ) { 05610 return null; 05611 } 05612 05613 $this->mRevisionObject = Revision::newFromId( $this->mRevisionId ); 05614 return $this->mRevisionObject; 05615 } 05616 05621 function getRevisionTimestamp() { 05622 if ( is_null( $this->mRevisionTimestamp ) ) { 05623 wfProfileIn( __METHOD__ ); 05624 05625 global $wgContLang; 05626 05627 $revObject = $this->getRevisionObject(); 05628 $timestamp = $revObject ? $revObject->getTimestamp() : wfTimestampNow(); 05629 05630 # The cryptic '' timezone parameter tells to use the site-default 05631 # timezone offset instead of the user settings. 05632 # 05633 # Since this value will be saved into the parser cache, served 05634 # to other users, and potentially even used inside links and such, 05635 # it needs to be consistent for all visitors. 05636 $this->mRevisionTimestamp = $wgContLang->userAdjust( $timestamp, '' ); 05637 05638 wfProfileOut( __METHOD__ ); 05639 } 05640 return $this->mRevisionTimestamp; 05641 } 05642 05648 function getRevisionUser() { 05649 if( is_null( $this->mRevisionUser ) ) { 05650 $revObject = $this->getRevisionObject(); 05651 05652 # if this template is subst: the revision id will be blank, 05653 # so just use the current user's name 05654 if( $revObject ) { 05655 $this->mRevisionUser = $revObject->getUserText(); 05656 } elseif( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) { 05657 $this->mRevisionUser = $this->getUser()->getName(); 05658 } 05659 } 05660 return $this->mRevisionUser; 05661 } 05662 05668 public function setDefaultSort( $sort ) { 05669 $this->mDefaultSort = $sort; 05670 $this->mOutput->setProperty( 'defaultsort', $sort ); 05671 } 05672 05683 public function getDefaultSort() { 05684 if ( $this->mDefaultSort !== false ) { 05685 return $this->mDefaultSort; 05686 } else { 05687 return ''; 05688 } 05689 } 05690 05697 public function getCustomDefaultSort() { 05698 return $this->mDefaultSort; 05699 } 05700 05710 public function guessSectionNameFromWikiText( $text ) { 05711 # Strip out wikitext links(they break the anchor) 05712 $text = $this->stripSectionName( $text ); 05713 $text = Sanitizer::normalizeSectionNameWhitespace( $text ); 05714 return '#' . Sanitizer::escapeId( $text, 'noninitial' ); 05715 } 05716 05725 public function guessLegacySectionNameFromWikiText( $text ) { 05726 # Strip out wikitext links(they break the anchor) 05727 $text = $this->stripSectionName( $text ); 05728 $text = Sanitizer::normalizeSectionNameWhitespace( $text ); 05729 return '#' . Sanitizer::escapeId( $text, array( 'noninitial', 'legacy' ) ); 05730 } 05731 05746 public function stripSectionName( $text ) { 05747 # Strip internal link markup 05748 $text = preg_replace( '/\[\[:?([^[|]+)\|([^[]+)\]\]/', '$2', $text ); 05749 $text = preg_replace( '/\[\[:?([^[]+)\|?\]\]/', '$1', $text ); 05750 05751 # Strip external link markup 05752 # @todo FIXME: Not tolerant to blank link text 05753 # I.E. [http://www.mediawiki.org] will render as [1] or something depending 05754 # on how many empty links there are on the page - need to figure that out. 05755 $text = preg_replace( '/\[(?i:' . $this->mUrlProtocols . ')([^ ]+?) ([^[]+)\]/', '$2', $text ); 05756 05757 # Parse wikitext quotes (italics & bold) 05758 $text = $this->doQuotes( $text ); 05759 05760 # Strip HTML tags 05761 $text = StringUtils::delimiterReplace( '<', '>', '', $text ); 05762 return $text; 05763 } 05764 05775 function testSrvus( $text, Title $title, ParserOptions $options, $outputType = self::OT_HTML ) { 05776 $this->startParse( $title, $options, $outputType, true ); 05777 05778 $text = $this->replaceVariables( $text ); 05779 $text = $this->mStripState->unstripBoth( $text ); 05780 $text = Sanitizer::removeHTMLtags( $text ); 05781 return $text; 05782 } 05783 05790 function testPst( $text, Title $title, ParserOptions $options ) { 05791 return $this->preSaveTransform( $text, $title, $options->getUser(), $options ); 05792 } 05793 05800 function testPreprocess( $text, Title $title, ParserOptions $options ) { 05801 return $this->testSrvus( $text, $title, $options, self::OT_PREPROCESS ); 05802 } 05803 05820 function markerSkipCallback( $s, $callback ) { 05821 $i = 0; 05822 $out = ''; 05823 while ( $i < strlen( $s ) ) { 05824 $markerStart = strpos( $s, $this->mUniqPrefix, $i ); 05825 if ( $markerStart === false ) { 05826 $out .= call_user_func( $callback, substr( $s, $i ) ); 05827 break; 05828 } else { 05829 $out .= call_user_func( $callback, substr( $s, $i, $markerStart - $i ) ); 05830 $markerEnd = strpos( $s, self::MARKER_SUFFIX, $markerStart ); 05831 if ( $markerEnd === false ) { 05832 $out .= substr( $s, $markerStart ); 05833 break; 05834 } else { 05835 $markerEnd += strlen( self::MARKER_SUFFIX ); 05836 $out .= substr( $s, $markerStart, $markerEnd - $markerStart ); 05837 $i = $markerEnd; 05838 } 05839 } 05840 } 05841 return $out; 05842 } 05843 05850 function killMarkers( $text ) { 05851 return $this->mStripState->killMarkers( $text ); 05852 } 05853 05870 function serializeHalfParsedText( $text ) { 05871 wfProfileIn( __METHOD__ ); 05872 $data = array( 05873 'text' => $text, 05874 'version' => self::HALF_PARSED_VERSION, 05875 'stripState' => $this->mStripState->getSubState( $text ), 05876 'linkHolders' => $this->mLinkHolders->getSubArray( $text ) 05877 ); 05878 wfProfileOut( __METHOD__ ); 05879 return $data; 05880 } 05881 05897 function unserializeHalfParsedText( $data ) { 05898 if ( !isset( $data['version'] ) || $data['version'] != self::HALF_PARSED_VERSION ) { 05899 throw new MWException( __METHOD__ . ': invalid version' ); 05900 } 05901 05902 # First, extract the strip state. 05903 $texts = array( $data['text'] ); 05904 $texts = $this->mStripState->merge( $data['stripState'], $texts ); 05905 05906 # Now renumber links 05907 $texts = $this->mLinkHolders->mergeForeign( $data['linkHolders'], $texts ); 05908 05909 # Should be good to go. 05910 return $texts[0]; 05911 } 05912 05922 function isValidHalfParsedText( $data ) { 05923 return isset( $data['version'] ) && $data['version'] == self::HALF_PARSED_VERSION; 05924 } 05925 05934 public function parseWidthParam( $value ) { 05935 $parsedWidthParam = array(); 05936 if( $value === '' ) { 05937 return $parsedWidthParam; 05938 } 05939 $m = array(); 05940 # (bug 13500) In both cases (width/height and width only), 05941 # permit trailing "px" for backward compatibility. 05942 if ( preg_match( '/^([0-9]*)x([0-9]*)\s*(?:px)?\s*$/', $value, $m ) ) { 05943 $width = intval( $m[1] ); 05944 $height = intval( $m[2] ); 05945 $parsedWidthParam['width'] = $width; 05946 $parsedWidthParam['height'] = $height; 05947 } elseif ( preg_match( '/^[0-9]*\s*(?:px)?\s*$/', $value ) ) { 05948 $width = intval( $value ); 05949 $parsedWidthParam['width'] = $width; 05950 } 05951 return $parsedWidthParam; 05952 } 05953 }