MediaWiki  REL1_24
Parser.php
Go to the documentation of this file.
00001 <?php
00067 class Parser {
00073     const VERSION = '1.6.4';
00074 
00079     const HALF_PARSED_VERSION = 2;
00080 
00081     # Flags for Parser::setFunctionHook
00082     # Also available as global constants from Defines.php
00083     const SFH_NO_HASH = 1;
00084     const SFH_OBJECT_ARGS = 2;
00085 
00086     # Constants needed for external link processing
00087     # Everything except bracket, space, or control characters
00088     # \p{Zs} is unicode 'separator, space' category. It covers the space 0x20
00089     # as well as U+3000 is IDEOGRAPHIC SPACE for bug 19052
00090     const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F\p{Zs}]';
00091     const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)([^][<>"\\x00-\\x20\\x7F\p{Zs}]+)
00092         \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sxu';
00093 
00094     # State constants for the definition list colon extraction
00095     const COLON_STATE_TEXT = 0;
00096     const COLON_STATE_TAG = 1;
00097     const COLON_STATE_TAGSTART = 2;
00098     const COLON_STATE_CLOSETAG = 3;
00099     const COLON_STATE_TAGSLASH = 4;
00100     const COLON_STATE_COMMENT = 5;
00101     const COLON_STATE_COMMENTDASH = 6;
00102     const COLON_STATE_COMMENTDASHDASH = 7;
00103 
00104     # Flags for preprocessToDom
00105     const PTD_FOR_INCLUSION = 1;
00106 
00107     # Allowed values for $this->mOutputType
00108     # Parameter to startExternalParse().
00109     const OT_HTML = 1; # like parse()
00110     const OT_WIKI = 2; # like preSaveTransform()
00111     const OT_PREPROCESS = 3; # like preprocess()
00112     const OT_MSG = 3;
00113     const OT_PLAIN = 4; # like extractSections() - portions of the original are returned unchanged.
00114 
00115     # Marker Suffix needs to be accessible staticly.
00116     const MARKER_SUFFIX = "-QINU\x7f";
00117 
00118     # Markers used for wrapping the table of contents
00119     const TOC_START = '<mw:toc>';
00120     const TOC_END = '</mw:toc>';
00121 
00122     # Persistent:
00123     public $mTagHooks = array();
00124     public $mTransparentTagHooks = array();
00125     public $mFunctionHooks = array();
00126     public $mFunctionSynonyms = array( 0 => array(), 1 => array() );
00127     public $mFunctionTagHooks = array();
00128     public $mStripList = array();
00129     public $mDefaultStripList = array();
00130     public $mVarCache = array();
00131     public $mImageParams = array();
00132     public $mImageParamsMagicArray = array();
00133     public $mMarkerIndex = 0;
00134     public $mFirstCall = true;
00135 
00136     # Initialised by initialiseVariables()
00137 
00141     public $mVariables;
00142 
00146     public $mSubstWords;
00147     public $mConf, $mPreprocessor, $mExtLinkBracketedRegex, $mUrlProtocols; # Initialised in constructor
00148 
00149     # Cleared with clearState():
00150 
00153     public $mOutput;
00154     public $mAutonumber, $mDTopen;
00155 
00159     public $mStripState;
00160 
00161     public $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
00165     public $mLinkHolders;
00166 
00167     public $mLinkID;
00168     public $mIncludeSizes, $mPPNodeCount, $mGeneratedPPNodeCount, $mHighestExpansionDepth;
00169     public $mDefaultSort;
00170     public $mTplRedirCache, $mTplDomCache, $mHeadings, $mDoubleUnderscores;
00171     public $mExpensiveFunctionCount; # number of expensive parser function calls
00172     public $mShowToc, $mForceTocPosition;
00173 
00177     public $mUser; # User object; only used when doing pre-save transform
00178 
00179     # Temporary
00180     # These are variables reset at least once per parse regardless of $clearState
00181 
00185     public $mOptions;
00186 
00190     public $mTitle;        # Title context, used for self-link rendering and similar things
00191     public $mOutputType;   # Output type, one of the OT_xxx constants
00192     public $ot;            # Shortcut alias, see setOutputType()
00193     public $mRevisionObject; # The revision object of the specified revision ID
00194     public $mRevisionId;   # ID to display in {{REVISIONID}} tags
00195     public $mRevisionTimestamp; # The timestamp of the specified revision ID
00196     public $mRevisionUser; # User to display in {{REVISIONUSER}} tag
00197     public $mRevisionSize; # Size to display in {{REVISIONSIZE}} variable
00198     public $mRevIdForTs;   # The revision ID which was used to fetch the timestamp
00199     public $mInputSize = false; # For {{PAGESIZE}} on current page.
00200 
00204     public $mUniqPrefix;
00205 
00211     public $mLangLinkLanguages;
00212 
00217     public $mInParse = false;
00218 
00222     public function __construct( $conf = array() ) {
00223         $this->mConf = $conf;
00224         $this->mUrlProtocols = wfUrlProtocols();
00225         $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->mUrlProtocols . ')' .
00226             self::EXT_LINK_URL_CLASS . '+)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F]*?)\]/Su';
00227         if ( isset( $conf['preprocessorClass'] ) ) {
00228             $this->mPreprocessorClass = $conf['preprocessorClass'];
00229         } elseif ( defined( 'HPHP_VERSION' ) ) {
00230             # Preprocessor_Hash is much faster than Preprocessor_DOM under HipHop
00231             $this->mPreprocessorClass = 'Preprocessor_Hash';
00232         } elseif ( extension_loaded( 'domxml' ) ) {
00233             # PECL extension that conflicts with the core DOM extension (bug 13770)
00234             wfDebug( "Warning: you have the obsolete domxml extension for PHP. Please remove it!\n" );
00235             $this->mPreprocessorClass = 'Preprocessor_Hash';
00236         } elseif ( extension_loaded( 'dom' ) ) {
00237             $this->mPreprocessorClass = 'Preprocessor_DOM';
00238         } else {
00239             $this->mPreprocessorClass = 'Preprocessor_Hash';
00240         }
00241         wfDebug( __CLASS__ . ": using preprocessor: {$this->mPreprocessorClass}\n" );
00242     }
00243 
00247     public function __destruct() {
00248         if ( isset( $this->mLinkHolders ) ) {
00249             unset( $this->mLinkHolders );
00250         }
00251         foreach ( $this as $name => $value ) {
00252             unset( $this->$name );
00253         }
00254     }
00255 
00259     public function __clone() {
00260         $this->mInParse = false;
00261         wfRunHooks( 'ParserCloned', array( $this ) );
00262     }
00263 
00267     public function firstCallInit() {
00268         if ( !$this->mFirstCall ) {
00269             return;
00270         }
00271         $this->mFirstCall = false;
00272 
00273         wfProfileIn( __METHOD__ );
00274 
00275         CoreParserFunctions::register( $this );
00276         CoreTagHooks::register( $this );
00277         $this->initialiseVariables();
00278 
00279         wfRunHooks( 'ParserFirstCallInit', array( &$this ) );
00280         wfProfileOut( __METHOD__ );
00281     }
00282 
00288     public function clearState() {
00289         wfProfileIn( __METHOD__ );
00290         if ( $this->mFirstCall ) {
00291             $this->firstCallInit();
00292         }
00293         $this->mOutput = new ParserOutput;
00294         $this->mOptions->registerWatcher( array( $this->mOutput, 'recordOption' ) );
00295         $this->mAutonumber = 0;
00296         $this->mLastSection = '';
00297         $this->mDTopen = false;
00298         $this->mIncludeCount = array();
00299         $this->mArgStack = false;
00300         $this->mInPre = false;
00301         $this->mLinkHolders = new LinkHolderArray( $this );
00302         $this->mLinkID = 0;
00303         $this->mRevisionObject = $this->mRevisionTimestamp =
00304             $this->mRevisionId = $this->mRevisionUser = $this->mRevisionSize = null;
00305         $this->mVarCache = array();
00306         $this->mUser = null;
00307         $this->mLangLinkLanguages = array();
00308 
00319         $this->mUniqPrefix = "\x7fUNIQ" . self::getRandomString();
00320         $this->mStripState = new StripState( $this->mUniqPrefix );
00321 
00322         # Clear these on every parse, bug 4549
00323         $this->mTplRedirCache = $this->mTplDomCache = array();
00324 
00325         $this->mShowToc = true;
00326         $this->mForceTocPosition = false;
00327         $this->mIncludeSizes = array(
00328             'post-expand' => 0,
00329             'arg' => 0,
00330         );
00331         $this->mPPNodeCount = 0;
00332         $this->mGeneratedPPNodeCount = 0;
00333         $this->mHighestExpansionDepth = 0;
00334         $this->mDefaultSort = false;
00335         $this->mHeadings = array();
00336         $this->mDoubleUnderscores = array();
00337         $this->mExpensiveFunctionCount = 0;
00338 
00339         # Fix cloning
00340         if ( isset( $this->mPreprocessor ) && $this->mPreprocessor->parser !== $this ) {
00341             $this->mPreprocessor = null;
00342         }
00343 
00344         wfRunHooks( 'ParserClearState', array( &$this ) );
00345         wfProfileOut( __METHOD__ );
00346     }
00347 
00360     public function parse( $text, Title $title, ParserOptions $options,
00361         $linestart = true, $clearState = true, $revid = null
00362     ) {
00368         global $wgUseTidy, $wgAlwaysUseTidy, $wgShowHostnames;
00369         $fname = __METHOD__ . '-' . wfGetCaller();
00370         wfProfileIn( __METHOD__ );
00371         wfProfileIn( $fname );
00372 
00373         if ( $clearState ) {
00374             $magicScopeVariable = $this->lock();
00375         }
00376 
00377         $this->startParse( $title, $options, self::OT_HTML, $clearState );
00378 
00379         $this->mInputSize = strlen( $text );
00380         if ( $this->mOptions->getEnableLimitReport() ) {
00381             $this->mOutput->resetParseStartTime();
00382         }
00383 
00384         # Remove the strip marker tag prefix from the input, if present.
00385         if ( $clearState ) {
00386             $text = str_replace( $this->mUniqPrefix, '', $text );
00387         }
00388 
00389         $oldRevisionId = $this->mRevisionId;
00390         $oldRevisionObject = $this->mRevisionObject;
00391         $oldRevisionTimestamp = $this->mRevisionTimestamp;
00392         $oldRevisionUser = $this->mRevisionUser;
00393         $oldRevisionSize = $this->mRevisionSize;
00394         if ( $revid !== null ) {
00395             $this->mRevisionId = $revid;
00396             $this->mRevisionObject = null;
00397             $this->mRevisionTimestamp = null;
00398             $this->mRevisionUser = null;
00399             $this->mRevisionSize = null;
00400         }
00401 
00402         wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) );
00403         # No more strip!
00404         wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) );
00405         $text = $this->internalParse( $text );
00406         wfRunHooks( 'ParserAfterParse', array( &$this, &$text, &$this->mStripState ) );
00407 
00408         $text = $this->mStripState->unstripGeneral( $text );
00409 
00410         # Clean up special characters, only run once, next-to-last before doBlockLevels
00411         $fixtags = array(
00412             # french spaces, last one Guillemet-left
00413             # only if there is something before the space
00414             '/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1&#160;',
00415             # french spaces, Guillemet-right
00416             '/(\\302\\253) /' => '\\1&#160;',
00417             '/&#160;(!\s*important)/' => ' \\1', # Beware of CSS magic word !important, bug #11874.
00418         );
00419         $text = preg_replace( array_keys( $fixtags ), array_values( $fixtags ), $text );
00420 
00421         $text = $this->doBlockLevels( $text, $linestart );
00422 
00423         $this->replaceLinkHolders( $text );
00424 
00432         if ( !( $options->getDisableContentConversion()
00433             || isset( $this->mDoubleUnderscores['nocontentconvert'] ) )
00434         ) {
00435             if ( !$this->mOptions->getInterfaceMessage() ) {
00436                 # The position of the convert() call should not be changed. it
00437                 # assumes that the links are all replaced and the only thing left
00438                 # is the <nowiki> mark.
00439                 $text = $this->getConverterLanguage()->convert( $text );
00440             }
00441         }
00442 
00450         if ( !( $options->getDisableTitleConversion()
00451             || isset( $this->mDoubleUnderscores['nocontentconvert'] )
00452             || isset( $this->mDoubleUnderscores['notitleconvert'] )
00453             || $this->mOutput->getDisplayTitle() !== false )
00454         ) {
00455             $convruletitle = $this->getConverterLanguage()->getConvRuleTitle();
00456             if ( $convruletitle ) {
00457                 $this->mOutput->setTitleText( $convruletitle );
00458             } else {
00459                 $titleText = $this->getConverterLanguage()->convertTitle( $title );
00460                 $this->mOutput->setTitleText( $titleText );
00461             }
00462         }
00463 
00464         $text = $this->mStripState->unstripNoWiki( $text );
00465 
00466         wfRunHooks( 'ParserBeforeTidy', array( &$this, &$text ) );
00467 
00468         $text = $this->replaceTransparentTags( $text );
00469         $text = $this->mStripState->unstripGeneral( $text );
00470 
00471         $text = Sanitizer::normalizeCharReferences( $text );
00472 
00473         if ( ( $wgUseTidy && $this->mOptions->getTidy() ) || $wgAlwaysUseTidy ) {
00474             $text = MWTidy::tidy( $text );
00475         } else {
00476             # attempt to sanitize at least some nesting problems
00477             # (bug #2702 and quite a few others)
00478             $tidyregs = array(
00479                 # ''Something [http://www.cool.com cool''] -->
00480                 # <i>Something</i><a href="http://www.cool.com"..><i>cool></i></a>
00481                 '/(<([bi])>)(<([bi])>)?([^<]*)(<\/?a[^<]*>)([^<]*)(<\/\\4>)?(<\/\\2>)/' =>
00482                 '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9',
00483                 # fix up an anchor inside another anchor, only
00484                 # at least for a single single nested link (bug 3695)
00485                 '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\/a>(.*)<\/a>/' =>
00486                 '\\1\\2</a>\\3</a>\\1\\4</a>',
00487                 # fix div inside inline elements- doBlockLevels won't wrap a line which
00488                 # contains a div, so fix it up here; replace
00489                 # div with escaped text
00490                 '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\/div>)([^<]*)(<\/\\2>)/' =>
00491                 '\\1\\3&lt;div\\5&gt;\\6&lt;/div&gt;\\8\\9',
00492                 # remove empty italic or bold tag pairs, some
00493                 # introduced by rules above
00494                 '/<([bi])><\/\\1>/' => '',
00495             );
00496 
00497             $text = preg_replace(
00498                 array_keys( $tidyregs ),
00499                 array_values( $tidyregs ),
00500                 $text );
00501         }
00502 
00503         if ( $this->mExpensiveFunctionCount > $this->mOptions->getExpensiveParserFunctionLimit() ) {
00504             $this->limitationWarn( 'expensive-parserfunction',
00505                 $this->mExpensiveFunctionCount,
00506                 $this->mOptions->getExpensiveParserFunctionLimit()
00507             );
00508         }
00509 
00510         wfRunHooks( 'ParserAfterTidy', array( &$this, &$text ) );
00511 
00512         # Information on include size limits, for the benefit of users who try to skirt them
00513         if ( $this->mOptions->getEnableLimitReport() ) {
00514             $max = $this->mOptions->getMaxIncludeSize();
00515 
00516             $cpuTime = $this->mOutput->getTimeSinceStart( 'cpu' );
00517             if ( $cpuTime !== null ) {
00518                 $this->mOutput->setLimitReportData( 'limitreport-cputime',
00519                     sprintf( "%.3f", $cpuTime )
00520                 );
00521             }
00522 
00523             $wallTime = $this->mOutput->getTimeSinceStart( 'wall' );
00524             $this->mOutput->setLimitReportData( 'limitreport-walltime',
00525                 sprintf( "%.3f", $wallTime )
00526             );
00527 
00528             $this->mOutput->setLimitReportData( 'limitreport-ppvisitednodes',
00529                 array( $this->mPPNodeCount, $this->mOptions->getMaxPPNodeCount() )
00530             );
00531             $this->mOutput->setLimitReportData( 'limitreport-ppgeneratednodes',
00532                 array( $this->mGeneratedPPNodeCount, $this->mOptions->getMaxGeneratedPPNodeCount() )
00533             );
00534             $this->mOutput->setLimitReportData( 'limitreport-postexpandincludesize',
00535                 array( $this->mIncludeSizes['post-expand'], $max )
00536             );
00537             $this->mOutput->setLimitReportData( 'limitreport-templateargumentsize',
00538                 array( $this->mIncludeSizes['arg'], $max )
00539             );
00540             $this->mOutput->setLimitReportData( 'limitreport-expansiondepth',
00541                 array( $this->mHighestExpansionDepth, $this->mOptions->getMaxPPExpandDepth() )
00542             );
00543             $this->mOutput->setLimitReportData( 'limitreport-expensivefunctioncount',
00544                 array( $this->mExpensiveFunctionCount, $this->mOptions->getExpensiveParserFunctionLimit() )
00545             );
00546             wfRunHooks( 'ParserLimitReportPrepare', array( $this, $this->mOutput ) );
00547 
00548             $limitReport = "NewPP limit report\n";
00549             if ( $wgShowHostnames ) {
00550                 $limitReport .= 'Parsed by ' . wfHostname() . "\n";
00551             }
00552             foreach ( $this->mOutput->getLimitReportData() as $key => $value ) {
00553                 if ( wfRunHooks( 'ParserLimitReportFormat',
00554                     array( $key, &$value, &$limitReport, false, false )
00555                 ) ) {
00556                     $keyMsg = wfMessage( $key )->inLanguage( 'en' )->useDatabase( false );
00557                     $valueMsg = wfMessage( array( "$key-value-text", "$key-value" ) )
00558                         ->inLanguage( 'en' )->useDatabase( false );
00559                     if ( !$valueMsg->exists() ) {
00560                         $valueMsg = new RawMessage( '$1' );
00561                     }
00562                     if ( !$keyMsg->isDisabled() && !$valueMsg->isDisabled() ) {
00563                         $valueMsg->params( $value );
00564                         $limitReport .= "{$keyMsg->text()}: {$valueMsg->text()}\n";
00565                     }
00566                 }
00567             }
00568             // Since we're not really outputting HTML, decode the entities and
00569             // then re-encode the things that need hiding inside HTML comments.
00570             $limitReport = htmlspecialchars_decode( $limitReport );
00571             wfRunHooks( 'ParserLimitReport', array( $this, &$limitReport ) );
00572 
00573             // Sanitize for comment. Note '‐' in the replacement is U+2010,
00574             // which looks much like the problematic '-'.
00575             $limitReport = str_replace( array( '-', '&' ), array( '‐', '&amp;' ), $limitReport );
00576             $text .= "\n<!-- \n$limitReport-->\n";
00577 
00578             if ( $this->mGeneratedPPNodeCount > $this->mOptions->getMaxGeneratedPPNodeCount() / 10 ) {
00579                 wfDebugLog( 'generated-pp-node-count', $this->mGeneratedPPNodeCount . ' ' .
00580                     $this->mTitle->getPrefixedDBkey() );
00581             }
00582         }
00583         $this->mOutput->setText( $text );
00584 
00585         $this->mRevisionId = $oldRevisionId;
00586         $this->mRevisionObject = $oldRevisionObject;
00587         $this->mRevisionTimestamp = $oldRevisionTimestamp;
00588         $this->mRevisionUser = $oldRevisionUser;
00589         $this->mRevisionSize = $oldRevisionSize;
00590         $this->mInputSize = false;
00591         wfProfileOut( $fname );
00592         wfProfileOut( __METHOD__ );
00593 
00594         return $this->mOutput;
00595     }
00596 
00608     public function recursiveTagParse( $text, $frame = false ) {
00609         wfProfileIn( __METHOD__ );
00610         wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) );
00611         wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) );
00612         $text = $this->internalParse( $text, false, $frame );
00613         wfProfileOut( __METHOD__ );
00614         return $text;
00615     }
00616 
00628     public function preprocess( $text, Title $title = null, ParserOptions $options, $revid = null, $frame = false ) {
00629         wfProfileIn( __METHOD__ );
00630         $magicScopeVariable = $this->lock();
00631         $this->startParse( $title, $options, self::OT_PREPROCESS, true );
00632         if ( $revid !== null ) {
00633             $this->mRevisionId = $revid;
00634         }
00635         wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) );
00636         wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) );
00637         $text = $this->replaceVariables( $text, $frame );
00638         $text = $this->mStripState->unstripBoth( $text );
00639         wfProfileOut( __METHOD__ );
00640         return $text;
00641     }
00642 
00652     public function recursivePreprocess( $text, $frame = false ) {
00653         wfProfileIn( __METHOD__ );
00654         $text = $this->replaceVariables( $text, $frame );
00655         $text = $this->mStripState->unstripBoth( $text );
00656         wfProfileOut( __METHOD__ );
00657         return $text;
00658     }
00659 
00673     public function getPreloadText( $text, Title $title, ParserOptions $options, $params = array() ) {
00674         $msg = new RawMessage( $text );
00675         $text = $msg->params( $params )->plain();
00676 
00677         # Parser (re)initialisation
00678         $magicScopeVariable = $this->lock();
00679         $this->startParse( $title, $options, self::OT_PLAIN, true );
00680 
00681         $flags = PPFrame::NO_ARGS | PPFrame::NO_TEMPLATES;
00682         $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
00683         $text = $this->getPreprocessor()->newFrame()->expand( $dom, $flags );
00684         $text = $this->mStripState->unstripBoth( $text );
00685         return $text;
00686     }
00687 
00693     public static function getRandomString() {
00694         return wfRandomString( 16 );
00695     }
00696 
00703     public function setUser( $user ) {
00704         $this->mUser = $user;
00705     }
00706 
00712     public function uniqPrefix() {
00713         if ( !isset( $this->mUniqPrefix ) ) {
00714             # @todo FIXME: This is probably *horribly wrong*
00715             # LanguageConverter seems to want $wgParser's uniqPrefix, however
00716             # if this is called for a parser cache hit, the parser may not
00717             # have ever been initialized in the first place.
00718             # Not really sure what the heck is supposed to be going on here.
00719             return '';
00720             # throw new MWException( "Accessing uninitialized mUniqPrefix" );
00721         }
00722         return $this->mUniqPrefix;
00723     }
00724 
00730     public function setTitle( $t ) {
00731         if ( !$t ) {
00732             $t = Title::newFromText( 'NO TITLE' );
00733         }
00734 
00735         if ( $t->hasFragment() ) {
00736             # Strip the fragment to avoid various odd effects
00737             $this->mTitle = clone $t;
00738             $this->mTitle->setFragment( '' );
00739         } else {
00740             $this->mTitle = $t;
00741         }
00742     }
00743 
00749     public function getTitle() {
00750         return $this->mTitle;
00751     }
00752 
00759     public function Title( $x = null ) {
00760         return wfSetVar( $this->mTitle, $x );
00761     }
00762 
00768     public function setOutputType( $ot ) {
00769         $this->mOutputType = $ot;
00770         # Shortcut alias
00771         $this->ot = array(
00772             'html' => $ot == self::OT_HTML,
00773             'wiki' => $ot == self::OT_WIKI,
00774             'pre' => $ot == self::OT_PREPROCESS,
00775             'plain' => $ot == self::OT_PLAIN,
00776         );
00777     }
00778 
00785     public function OutputType( $x = null ) {
00786         return wfSetVar( $this->mOutputType, $x );
00787     }
00788 
00794     public function getOutput() {
00795         return $this->mOutput;
00796     }
00797 
00803     public function getOptions() {
00804         return $this->mOptions;
00805     }
00806 
00813     public function Options( $x = null ) {
00814         return wfSetVar( $this->mOptions, $x );
00815     }
00816 
00820     public function nextLinkID() {
00821         return $this->mLinkID++;
00822     }
00823 
00827     public function setLinkID( $id ) {
00828         $this->mLinkID = $id;
00829     }
00830 
00835     public function getFunctionLang() {
00836         return $this->getTargetLanguage();
00837     }
00838 
00848     public function getTargetLanguage() {
00849         $target = $this->mOptions->getTargetLanguage();
00850 
00851         if ( $target !== null ) {
00852             return $target;
00853         } elseif ( $this->mOptions->getInterfaceMessage() ) {
00854             return $this->mOptions->getUserLangObj();
00855         } elseif ( is_null( $this->mTitle ) ) {
00856             throw new MWException( __METHOD__ . ': $this->mTitle is null' );
00857         }
00858 
00859         return $this->mTitle->getPageLanguage();
00860     }
00861 
00866     public function getConverterLanguage() {
00867         return $this->getTargetLanguage();
00868     }
00869 
00876     public function getUser() {
00877         if ( !is_null( $this->mUser ) ) {
00878             return $this->mUser;
00879         }
00880         return $this->mOptions->getUser();
00881     }
00882 
00888     public function getPreprocessor() {
00889         if ( !isset( $this->mPreprocessor ) ) {
00890             $class = $this->mPreprocessorClass;
00891             $this->mPreprocessor = new $class( $this );
00892         }
00893         return $this->mPreprocessor;
00894     }
00895 
00916     public static function extractTagsAndParams( $elements, $text, &$matches, $uniq_prefix = '' ) {
00917         static $n = 1;
00918         $stripped = '';
00919         $matches = array();
00920 
00921         $taglist = implode( '|', $elements );
00922         $start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?" . ">)|<(!--)/i";
00923 
00924         while ( $text != '' ) {
00925             $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
00926             $stripped .= $p[0];
00927             if ( count( $p ) < 5 ) {
00928                 break;
00929             }
00930             if ( count( $p ) > 5 ) {
00931                 # comment
00932                 $element = $p[4];
00933                 $attributes = '';
00934                 $close = '';
00935                 $inside = $p[5];
00936             } else {
00937                 # tag
00938                 $element = $p[1];
00939                 $attributes = $p[2];
00940                 $close = $p[3];
00941                 $inside = $p[4];
00942             }
00943 
00944             $marker = "$uniq_prefix-$element-" . sprintf( '%08X', $n++ ) . self::MARKER_SUFFIX;
00945             $stripped .= $marker;
00946 
00947             if ( $close === '/>' ) {
00948                 # Empty element tag, <tag />
00949                 $content = null;
00950                 $text = $inside;
00951                 $tail = null;
00952             } else {
00953                 if ( $element === '!--' ) {
00954                     $end = '/(-->)/';
00955                 } else {
00956                     $end = "/(<\\/$element\\s*>)/i";
00957                 }
00958                 $q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE );
00959                 $content = $q[0];
00960                 if ( count( $q ) < 3 ) {
00961                     # No end tag -- let it run out to the end of the text.
00962                     $tail = '';
00963                     $text = '';
00964                 } else {
00965                     $tail = $q[1];
00966                     $text = $q[2];
00967                 }
00968             }
00969 
00970             $matches[$marker] = array( $element,
00971                 $content,
00972                 Sanitizer::decodeTagAttributes( $attributes ),
00973                 "<$element$attributes$close$content$tail" );
00974         }
00975         return $stripped;
00976     }
00977 
00983     public function getStripList() {
00984         return $this->mStripList;
00985     }
00986 
00996     public function insertStripItem( $text ) {
00997         $rnd = "{$this->mUniqPrefix}-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX;
00998         $this->mMarkerIndex++;
00999         $this->mStripState->addGeneral( $rnd, $text );
01000         return $rnd;
01001     }
01002 
01010     public function doTableStuff( $text ) {
01011         wfProfileIn( __METHOD__ );
01012 
01013         $lines = StringUtils::explode( "\n", $text );
01014         $out = '';
01015         $td_history = array(); # Is currently a td tag open?
01016         $last_tag_history = array(); # Save history of last lag activated (td, th or caption)
01017         $tr_history = array(); # Is currently a tr tag open?
01018         $tr_attributes = array(); # history of tr attributes
01019         $has_opened_tr = array(); # Did this table open a <tr> element?
01020         $indent_level = 0; # indent level of the table
01021 
01022         foreach ( $lines as $outLine ) {
01023             $line = trim( $outLine );
01024 
01025             if ( $line === '' ) { # empty line, go to next line
01026                 $out .= $outLine . "\n";
01027                 continue;
01028             }
01029 
01030             $first_character = $line[0];
01031             $matches = array();
01032 
01033             if ( preg_match( '/^(:*)\{\|(.*)$/', $line, $matches ) ) {
01034                 # First check if we are starting a new table
01035                 $indent_level = strlen( $matches[1] );
01036 
01037                 $attributes = $this->mStripState->unstripBoth( $matches[2] );
01038                 $attributes = Sanitizer::fixTagAttributes( $attributes, 'table' );
01039 
01040                 $outLine = str_repeat( '<dl><dd>', $indent_level ) . "<table{$attributes}>";
01041                 array_push( $td_history, false );
01042                 array_push( $last_tag_history, '' );
01043                 array_push( $tr_history, false );
01044                 array_push( $tr_attributes, '' );
01045                 array_push( $has_opened_tr, false );
01046             } elseif ( count( $td_history ) == 0 ) {
01047                 # Don't do any of the following
01048                 $out .= $outLine . "\n";
01049                 continue;
01050             } elseif ( substr( $line, 0, 2 ) === '|}' ) {
01051                 # We are ending a table
01052                 $line = '</table>' . substr( $line, 2 );
01053                 $last_tag = array_pop( $last_tag_history );
01054 
01055                 if ( !array_pop( $has_opened_tr ) ) {
01056                     $line = "<tr><td></td></tr>{$line}";
01057                 }
01058 
01059                 if ( array_pop( $tr_history ) ) {
01060                     $line = "</tr>{$line}";
01061                 }
01062 
01063                 if ( array_pop( $td_history ) ) {
01064                     $line = "</{$last_tag}>{$line}";
01065                 }
01066                 array_pop( $tr_attributes );
01067                 $outLine = $line . str_repeat( '</dd></dl>', $indent_level );
01068             } elseif ( substr( $line, 0, 2 ) === '|-' ) {
01069                 # Now we have a table row
01070                 $line = preg_replace( '#^\|-+#', '', $line );
01071 
01072                 # Whats after the tag is now only attributes
01073                 $attributes = $this->mStripState->unstripBoth( $line );
01074                 $attributes = Sanitizer::fixTagAttributes( $attributes, 'tr' );
01075                 array_pop( $tr_attributes );
01076                 array_push( $tr_attributes, $attributes );
01077 
01078                 $line = '';
01079                 $last_tag = array_pop( $last_tag_history );
01080                 array_pop( $has_opened_tr );
01081                 array_push( $has_opened_tr, true );
01082 
01083                 if ( array_pop( $tr_history ) ) {
01084                     $line = '</tr>';
01085                 }
01086 
01087                 if ( array_pop( $td_history ) ) {
01088                     $line = "</{$last_tag}>{$line}";
01089                 }
01090 
01091                 $outLine = $line;
01092                 array_push( $tr_history, false );
01093                 array_push( $td_history, false );
01094                 array_push( $last_tag_history, '' );
01095             } elseif ( $first_character === '|'
01096                 || $first_character === '!'
01097                 || substr( $line, 0, 2 ) === '|+'
01098             ) {
01099                 # This might be cell elements, td, th or captions
01100                 if ( substr( $line, 0, 2 ) === '|+' ) {
01101                     $first_character = '+';
01102                     $line = substr( $line, 1 );
01103                 }
01104 
01105                 $line = substr( $line, 1 );
01106 
01107                 if ( $first_character === '!' ) {
01108                     $line = str_replace( '!!', '||', $line );
01109                 }
01110 
01111                 # Split up multiple cells on the same line.
01112                 # FIXME : This can result in improper nesting of tags processed
01113                 # by earlier parser steps, but should avoid splitting up eg
01114                 # attribute values containing literal "||".
01115                 $cells = StringUtils::explodeMarkup( '||', $line );
01116 
01117                 $outLine = '';
01118 
01119                 # Loop through each table cell
01120                 foreach ( $cells as $cell ) {
01121                     $previous = '';
01122                     if ( $first_character !== '+' ) {
01123                         $tr_after = array_pop( $tr_attributes );
01124                         if ( !array_pop( $tr_history ) ) {
01125                             $previous = "<tr{$tr_after}>\n";
01126                         }
01127                         array_push( $tr_history, true );
01128                         array_push( $tr_attributes, '' );
01129                         array_pop( $has_opened_tr );
01130                         array_push( $has_opened_tr, true );
01131                     }
01132 
01133                     $last_tag = array_pop( $last_tag_history );
01134 
01135                     if ( array_pop( $td_history ) ) {
01136                         $previous = "</{$last_tag}>\n{$previous}";
01137                     }
01138 
01139                     if ( $first_character === '|' ) {
01140                         $last_tag = 'td';
01141                     } elseif ( $first_character === '!' ) {
01142                         $last_tag = 'th';
01143                     } elseif ( $first_character === '+' ) {
01144                         $last_tag = 'caption';
01145                     } else {
01146                         $last_tag = '';
01147                     }
01148 
01149                     array_push( $last_tag_history, $last_tag );
01150 
01151                     # A cell could contain both parameters and data
01152                     $cell_data = explode( '|', $cell, 2 );
01153 
01154                     # Bug 553: Note that a '|' inside an invalid link should not
01155                     # be mistaken as delimiting cell parameters
01156                     if ( strpos( $cell_data[0], '[[' ) !== false ) {
01157                         $cell = "{$previous}<{$last_tag}>{$cell}";
01158                     } elseif ( count( $cell_data ) == 1 ) {
01159                         $cell = "{$previous}<{$last_tag}>{$cell_data[0]}";
01160                     } else {
01161                         $attributes = $this->mStripState->unstripBoth( $cell_data[0] );
01162                         $attributes = Sanitizer::fixTagAttributes( $attributes, $last_tag );
01163                         $cell = "{$previous}<{$last_tag}{$attributes}>{$cell_data[1]}";
01164                     }
01165 
01166                     $outLine .= $cell;
01167                     array_push( $td_history, true );
01168                 }
01169             }
01170             $out .= $outLine . "\n";
01171         }
01172 
01173         # Closing open td, tr && table
01174         while ( count( $td_history ) > 0 ) {
01175             if ( array_pop( $td_history ) ) {
01176                 $out .= "</td>\n";
01177             }
01178             if ( array_pop( $tr_history ) ) {
01179                 $out .= "</tr>\n";
01180             }
01181             if ( !array_pop( $has_opened_tr ) ) {
01182                 $out .= "<tr><td></td></tr>\n";
01183             }
01184 
01185             $out .= "</table>\n";
01186         }
01187 
01188         # Remove trailing line-ending (b/c)
01189         if ( substr( $out, -1 ) === "\n" ) {
01190             $out = substr( $out, 0, -1 );
01191         }
01192 
01193         # special case: don't return empty table
01194         if ( $out === "<table>\n<tr><td></td></tr>\n</table>" ) {
01195             $out = '';
01196         }
01197 
01198         wfProfileOut( __METHOD__ );
01199 
01200         return $out;
01201     }
01202 
01215     public function internalParse( $text, $isMain = true, $frame = false ) {
01216         wfProfileIn( __METHOD__ );
01217 
01218         $origText = $text;
01219 
01220         # Hook to suspend the parser in this state
01221         if ( !wfRunHooks( 'ParserBeforeInternalParse', array( &$this, &$text, &$this->mStripState ) ) ) {
01222             wfProfileOut( __METHOD__ );
01223             return $text;
01224         }
01225 
01226         # if $frame is provided, then use $frame for replacing any variables
01227         if ( $frame ) {
01228             # use frame depth to infer how include/noinclude tags should be handled
01229             # depth=0 means this is the top-level document; otherwise it's an included document
01230             if ( !$frame->depth ) {
01231                 $flag = 0;
01232             } else {
01233                 $flag = Parser::PTD_FOR_INCLUSION;
01234             }
01235             $dom = $this->preprocessToDom( $text, $flag );
01236             $text = $frame->expand( $dom );
01237         } else {
01238             # if $frame is not provided, then use old-style replaceVariables
01239             $text = $this->replaceVariables( $text );
01240         }
01241 
01242         wfRunHooks( 'InternalParseBeforeSanitize', array( &$this, &$text, &$this->mStripState ) );
01243         $text = Sanitizer::removeHTMLtags(
01244             $text,
01245             array( &$this, 'attributeStripCallback' ),
01246             false,
01247             array_keys( $this->mTransparentTagHooks )
01248         );
01249         wfRunHooks( 'InternalParseBeforeLinks', array( &$this, &$text, &$this->mStripState ) );
01250 
01251         # Tables need to come after variable replacement for things to work
01252         # properly; putting them before other transformations should keep
01253         # exciting things like link expansions from showing up in surprising
01254         # places.
01255         $text = $this->doTableStuff( $text );
01256 
01257         $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
01258 
01259         $text = $this->doDoubleUnderscore( $text );
01260 
01261         $text = $this->doHeadings( $text );
01262         $text = $this->replaceInternalLinks( $text );
01263         $text = $this->doAllQuotes( $text );
01264         $text = $this->replaceExternalLinks( $text );
01265 
01266         # replaceInternalLinks may sometimes leave behind
01267         # absolute URLs, which have to be masked to hide them from replaceExternalLinks
01268         $text = str_replace( $this->mUniqPrefix . 'NOPARSE', '', $text );
01269 
01270         $text = $this->doMagicLinks( $text );
01271         $text = $this->formatHeadings( $text, $origText, $isMain );
01272 
01273         wfProfileOut( __METHOD__ );
01274         return $text;
01275     }
01276 
01288     public function doMagicLinks( $text ) {
01289         wfProfileIn( __METHOD__ );
01290         $prots = wfUrlProtocolsWithoutProtRel();
01291         $urlChar = self::EXT_LINK_URL_CLASS;
01292         $text = preg_replace_callback(
01293             '!(?:                           # Start cases
01294                 (<a[ \t\r\n>].*?</a>) |     # m[1]: Skip link text
01295                 (<.*?>) |                   # m[2]: Skip stuff inside HTML elements' . "
01296                 (\\b(?i:$prots)$urlChar+) |  # m[3]: Free external links" . '
01297                 (?:RFC|PMID)\s+([0-9]+) |   # m[4]: RFC or PMID, capture number
01298                 ISBN\s+(\b                  # m[5]: ISBN, capture number
01299                     (?: 97[89] [\ \-]? )?   # optional 13-digit ISBN prefix
01300                     (?: [0-9]  [\ \-]? ){9} # 9 digits with opt. delimiters
01301                     [0-9Xx]                 # check digit
01302                     \b)
01303             )!xu', array( &$this, 'magicLinkCallback' ), $text );
01304         wfProfileOut( __METHOD__ );
01305         return $text;
01306     }
01307 
01313     public function magicLinkCallback( $m ) {
01314         if ( isset( $m[1] ) && $m[1] !== '' ) {
01315             # Skip anchor
01316             return $m[0];
01317         } elseif ( isset( $m[2] ) && $m[2] !== '' ) {
01318             # Skip HTML element
01319             return $m[0];
01320         } elseif ( isset( $m[3] ) && $m[3] !== '' ) {
01321             # Free external link
01322             return $this->makeFreeExternalLink( $m[0] );
01323         } elseif ( isset( $m[4] ) && $m[4] !== '' ) {
01324             # RFC or PMID
01325             if ( substr( $m[0], 0, 3 ) === 'RFC' ) {
01326                 $keyword = 'RFC';
01327                 $urlmsg = 'rfcurl';
01328                 $cssClass = 'mw-magiclink-rfc';
01329                 $id = $m[4];
01330             } elseif ( substr( $m[0], 0, 4 ) === 'PMID' ) {
01331                 $keyword = 'PMID';
01332                 $urlmsg = 'pubmedurl';
01333                 $cssClass = 'mw-magiclink-pmid';
01334                 $id = $m[4];
01335             } else {
01336                 throw new MWException( __METHOD__ . ': unrecognised match type "' .
01337                     substr( $m[0], 0, 20 ) . '"' );
01338             }
01339             $url = wfMessage( $urlmsg, $id )->inContentLanguage()->text();
01340             return Linker::makeExternalLink( $url, "{$keyword} {$id}", true, $cssClass );
01341         } elseif ( isset( $m[5] ) && $m[5] !== '' ) {
01342             # ISBN
01343             $isbn = $m[5];
01344             $num = strtr( $isbn, array(
01345                 '-' => '',
01346                 ' ' => '',
01347                 'x' => 'X',
01348             ));
01349             $titleObj = SpecialPage::getTitleFor( 'Booksources', $num );
01350             return '<a href="' .
01351                 htmlspecialchars( $titleObj->getLocalURL() ) .
01352                 "\" class=\"internal mw-magiclink-isbn\">ISBN $isbn</a>";
01353         } else {
01354             return $m[0];
01355         }
01356     }
01357 
01366     public function makeFreeExternalLink( $url ) {
01367         wfProfileIn( __METHOD__ );
01368 
01369         $trail = '';
01370 
01371         # The characters '<' and '>' (which were escaped by
01372         # removeHTMLtags()) should not be included in
01373         # URLs, per RFC 2396.
01374         $m2 = array();
01375         if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) {
01376             $trail = substr( $url, $m2[0][1] ) . $trail;
01377             $url = substr( $url, 0, $m2[0][1] );
01378         }
01379 
01380         # Move trailing punctuation to $trail
01381         $sep = ',;\.:!?';
01382         # If there is no left bracket, then consider right brackets fair game too
01383         if ( strpos( $url, '(' ) === false ) {
01384             $sep .= ')';
01385         }
01386 
01387         $numSepChars = strspn( strrev( $url ), $sep );
01388         if ( $numSepChars ) {
01389             $trail = substr( $url, -$numSepChars ) . $trail;
01390             $url = substr( $url, 0, -$numSepChars );
01391         }
01392 
01393         $url = Sanitizer::cleanUrl( $url );
01394 
01395         # Is this an external image?
01396         $text = $this->maybeMakeExternalImage( $url );
01397         if ( $text === false ) {
01398             # Not an image, make a link
01399             $text = Linker::makeExternalLink( $url,
01400                 $this->getConverterLanguage()->markNoConversion( $url, true ),
01401                 true, 'free',
01402                 $this->getExternalLinkAttribs( $url ) );
01403             # Register it in the output object...
01404             # Replace unnecessary URL escape codes with their equivalent characters
01405             $pasteurized = self::normalizeLinkUrl( $url );
01406             $this->mOutput->addExternalLink( $pasteurized );
01407         }
01408         wfProfileOut( __METHOD__ );
01409         return $text . $trail;
01410     }
01411 
01421     public function doHeadings( $text ) {
01422         wfProfileIn( __METHOD__ );
01423         for ( $i = 6; $i >= 1; --$i ) {
01424             $h = str_repeat( '=', $i );
01425             $text = preg_replace( "/^$h(.+)$h\\s*$/m", "<h$i>\\1</h$i>", $text );
01426         }
01427         wfProfileOut( __METHOD__ );
01428         return $text;
01429     }
01430 
01439     public function doAllQuotes( $text ) {
01440         wfProfileIn( __METHOD__ );
01441         $outtext = '';
01442         $lines = StringUtils::explode( "\n", $text );
01443         foreach ( $lines as $line ) {
01444             $outtext .= $this->doQuotes( $line ) . "\n";
01445         }
01446         $outtext = substr( $outtext, 0, -1 );
01447         wfProfileOut( __METHOD__ );
01448         return $outtext;
01449     }
01450 
01458     public function doQuotes( $text ) {
01459         $arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );
01460         $countarr = count( $arr );
01461         if ( $countarr == 1 ) {
01462             return $text;
01463         }
01464 
01465         // First, do some preliminary work. This may shift some apostrophes from
01466         // being mark-up to being text. It also counts the number of occurrences
01467         // of bold and italics mark-ups.
01468         $numbold = 0;
01469         $numitalics = 0;
01470         for ( $i = 1; $i < $countarr; $i += 2 ) {
01471             $thislen = strlen( $arr[$i] );
01472             // If there are ever four apostrophes, assume the first is supposed to
01473             // be text, and the remaining three constitute mark-up for bold text.
01474             // (bug 13227: ''''foo'''' turns into ' ''' foo ' ''')
01475             if ( $thislen == 4 ) {
01476                 $arr[$i - 1] .= "'";
01477                 $arr[$i] = "'''";
01478                 $thislen = 3;
01479             } elseif ( $thislen > 5 ) {
01480                 // If there are more than 5 apostrophes in a row, assume they're all
01481                 // text except for the last 5.
01482                 // (bug 13227: ''''''foo'''''' turns into ' ''''' foo ' ''''')
01483                 $arr[$i - 1] .= str_repeat( "'", $thislen - 5 );
01484                 $arr[$i] = "'''''";
01485                 $thislen = 5;
01486             }
01487             // Count the number of occurrences of bold and italics mark-ups.
01488             if ( $thislen == 2 ) {
01489                 $numitalics++;
01490             } elseif ( $thislen == 3 ) {
01491                 $numbold++;
01492             } elseif ( $thislen == 5 ) {
01493                 $numitalics++;
01494                 $numbold++;
01495             }
01496         }
01497 
01498         // If there is an odd number of both bold and italics, it is likely
01499         // that one of the bold ones was meant to be an apostrophe followed
01500         // by italics. Which one we cannot know for certain, but it is more
01501         // likely to be one that has a single-letter word before it.
01502         if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) ) {
01503             $firstsingleletterword = -1;
01504             $firstmultiletterword = -1;
01505             $firstspace = -1;
01506             for ( $i = 1; $i < $countarr; $i += 2 ) {
01507                 if ( strlen( $arr[$i] ) == 3 ) {
01508                     $x1 = substr( $arr[$i - 1], -1 );
01509                     $x2 = substr( $arr[$i - 1], -2, 1 );
01510                     if ( $x1 === ' ' ) {
01511                         if ( $firstspace == -1 ) {
01512                             $firstspace = $i;
01513                         }
01514                     } elseif ( $x2 === ' ' ) {
01515                         if ( $firstsingleletterword == -1 ) {
01516                             $firstsingleletterword = $i;
01517                             // if $firstsingleletterword is set, we don't
01518                             // look at the other options, so we can bail early.
01519                             break;
01520                         }
01521                     } else {
01522                         if ( $firstmultiletterword == -1 ) {
01523                             $firstmultiletterword = $i;
01524                         }
01525                     }
01526                 }
01527             }
01528 
01529             // If there is a single-letter word, use it!
01530             if ( $firstsingleletterword > -1 ) {
01531                 $arr[$firstsingleletterword] = "''";
01532                 $arr[$firstsingleletterword - 1] .= "'";
01533             } elseif ( $firstmultiletterword > -1 ) {
01534                 // If not, but there's a multi-letter word, use that one.
01535                 $arr[$firstmultiletterword] = "''";
01536                 $arr[$firstmultiletterword - 1] .= "'";
01537             } elseif ( $firstspace > -1 ) {
01538                 // ... otherwise use the first one that has neither.
01539                 // (notice that it is possible for all three to be -1 if, for example,
01540                 // there is only one pentuple-apostrophe in the line)
01541                 $arr[$firstspace] = "''";
01542                 $arr[$firstspace - 1] .= "'";
01543             }
01544         }
01545 
01546         // Now let's actually convert our apostrophic mush to HTML!
01547         $output = '';
01548         $buffer = '';
01549         $state = '';
01550         $i = 0;
01551         foreach ( $arr as $r ) {
01552             if ( ( $i % 2 ) == 0 ) {
01553                 if ( $state === 'both' ) {
01554                     $buffer .= $r;
01555                 } else {
01556                     $output .= $r;
01557                 }
01558             } else {
01559                 $thislen = strlen( $r );
01560                 if ( $thislen == 2 ) {
01561                     if ( $state === 'i' ) {
01562                         $output .= '</i>';
01563                         $state = '';
01564                     } elseif ( $state === 'bi' ) {
01565                         $output .= '</i>';
01566                         $state = 'b';
01567                     } elseif ( $state === 'ib' ) {
01568                         $output .= '</b></i><b>';
01569                         $state = 'b';
01570                     } elseif ( $state === 'both' ) {
01571                         $output .= '<b><i>' . $buffer . '</i>';
01572                         $state = 'b';
01573                     } else { // $state can be 'b' or ''
01574                         $output .= '<i>';
01575                         $state .= 'i';
01576                     }
01577                 } elseif ( $thislen == 3 ) {
01578                     if ( $state === 'b' ) {
01579                         $output .= '</b>';
01580                         $state = '';
01581                     } elseif ( $state === 'bi' ) {
01582                         $output .= '</i></b><i>';
01583                         $state = 'i';
01584                     } elseif ( $state === 'ib' ) {
01585                         $output .= '</b>';
01586                         $state = 'i';
01587                     } elseif ( $state === 'both' ) {
01588                         $output .= '<i><b>' . $buffer . '</b>';
01589                         $state = 'i';
01590                     } else { // $state can be 'i' or ''
01591                         $output .= '<b>';
01592                         $state .= 'b';
01593                     }
01594                 } elseif ( $thislen == 5 ) {
01595                     if ( $state === 'b' ) {
01596                         $output .= '</b><i>';
01597                         $state = 'i';
01598                     } elseif ( $state === 'i' ) {
01599                         $output .= '</i><b>';
01600                         $state = 'b';
01601                     } elseif ( $state === 'bi' ) {
01602                         $output .= '</i></b>';
01603                         $state = '';
01604                     } elseif ( $state === 'ib' ) {
01605                         $output .= '</b></i>';
01606                         $state = '';
01607                     } elseif ( $state === 'both' ) {
01608                         $output .= '<i><b>' . $buffer . '</b></i>';
01609                         $state = '';
01610                     } else { // ($state == '')
01611                         $buffer = '';
01612                         $state = 'both';
01613                     }
01614                 }
01615             }
01616             $i++;
01617         }
01618         // Now close all remaining tags.  Notice that the order is important.
01619         if ( $state === 'b' || $state === 'ib' ) {
01620             $output .= '</b>';
01621         }
01622         if ( $state === 'i' || $state === 'bi' || $state === 'ib' ) {
01623             $output .= '</i>';
01624         }
01625         if ( $state === 'bi' ) {
01626             $output .= '</b>';
01627         }
01628         // There might be lonely ''''', so make sure we have a buffer
01629         if ( $state === 'both' && $buffer ) {
01630             $output .= '<b><i>' . $buffer . '</i></b>';
01631         }
01632         return $output;
01633     }
01634 
01648     public function replaceExternalLinks( $text ) {
01649         wfProfileIn( __METHOD__ );
01650 
01651         $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
01652         if ( $bits === false ) {
01653             wfProfileOut( __METHOD__ );
01654             throw new MWException( "PCRE needs to be compiled with "
01655                 . "--enable-unicode-properties in order for MediaWiki to function" );
01656         }
01657         $s = array_shift( $bits );
01658 
01659         $i = 0;
01660         while ( $i < count( $bits ) ) {
01661             $url = $bits[$i++];
01662             $i++; // protocol
01663             $text = $bits[$i++];
01664             $trail = $bits[$i++];
01665 
01666             # The characters '<' and '>' (which were escaped by
01667             # removeHTMLtags()) should not be included in
01668             # URLs, per RFC 2396.
01669             $m2 = array();
01670             if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) {
01671                 $text = substr( $url, $m2[0][1] ) . ' ' . $text;
01672                 $url = substr( $url, 0, $m2[0][1] );
01673             }
01674 
01675             # If the link text is an image URL, replace it with an <img> tag
01676             # This happened by accident in the original parser, but some people used it extensively
01677             $img = $this->maybeMakeExternalImage( $text );
01678             if ( $img !== false ) {
01679                 $text = $img;
01680             }
01681 
01682             $dtrail = '';
01683 
01684             # Set linktype for CSS - if URL==text, link is essentially free
01685             $linktype = ( $text === $url ) ? 'free' : 'text';
01686 
01687             # No link text, e.g. [http://domain.tld/some.link]
01688             if ( $text == '' ) {
01689                 # Autonumber
01690                 $langObj = $this->getTargetLanguage();
01691                 $text = '[' . $langObj->formatNum( ++$this->mAutonumber ) . ']';
01692                 $linktype = 'autonumber';
01693             } else {
01694                 # Have link text, e.g. [http://domain.tld/some.link text]s
01695                 # Check for trail
01696                 list( $dtrail, $trail ) = Linker::splitTrail( $trail );
01697             }
01698 
01699             $text = $this->getConverterLanguage()->markNoConversion( $text );
01700 
01701             $url = Sanitizer::cleanUrl( $url );
01702 
01703             # Use the encoded URL
01704             # This means that users can paste URLs directly into the text
01705             # Funny characters like ö aren't valid in URLs anyway
01706             # This was changed in August 2004
01707             $s .= Linker::makeExternalLink( $url, $text, false, $linktype,
01708                 $this->getExternalLinkAttribs( $url ) ) . $dtrail . $trail;
01709 
01710             # Register link in the output object.
01711             # Replace unnecessary URL escape codes with the referenced character
01712             # This prevents spammers from hiding links from the filters
01713             $pasteurized = self::normalizeLinkUrl( $url );
01714             $this->mOutput->addExternalLink( $pasteurized );
01715         }
01716 
01717         wfProfileOut( __METHOD__ );
01718         return $s;
01719     }
01720 
01730     public static function getExternalLinkRel( $url = false, $title = null ) {
01731         global $wgNoFollowLinks, $wgNoFollowNsExceptions, $wgNoFollowDomainExceptions;
01732         $ns = $title ? $title->getNamespace() : false;
01733         if ( $wgNoFollowLinks && !in_array( $ns, $wgNoFollowNsExceptions )
01734             && !wfMatchesDomainList( $url, $wgNoFollowDomainExceptions )
01735         ) {
01736             return 'nofollow';
01737         }
01738         return null;
01739     }
01740 
01751     public function getExternalLinkAttribs( $url = false ) {
01752         $attribs = array();
01753         $attribs['rel'] = self::getExternalLinkRel( $url, $this->mTitle );
01754 
01755         if ( $this->mOptions->getExternalLinkTarget() ) {
01756             $attribs['target'] = $this->mOptions->getExternalLinkTarget();
01757         }
01758         return $attribs;
01759     }
01760 
01768     public static function replaceUnusualEscapes( $url ) {
01769         wfDeprecated( __METHOD__, '1.24' );
01770         return self::normalizeLinkUrl( $url );
01771     }
01772 
01782     public static function normalizeLinkUrl( $url ) {
01783         # First, make sure unsafe characters are encoded
01784         $url = preg_replace_callback( '/[\x00-\x20"<>\[\\\\\]^`{|}\x7F-\xFF]/',
01785             function ( $m ) {
01786                 return rawurlencode( $m[0] );
01787             },
01788             $url
01789         );
01790 
01791         $ret = '';
01792         $end = strlen( $url );
01793 
01794         # Fragment part - 'fragment'
01795         $start = strpos( $url, '#' );
01796         if ( $start !== false && $start < $end ) {
01797             $ret = self::normalizeUrlComponent(
01798                 substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}' ) . $ret;
01799             $end = $start;
01800         }
01801 
01802         # Query part - 'query' minus &=+;
01803         $start = strpos( $url, '?' );
01804         if ( $start !== false && $start < $end ) {
01805             $ret = self::normalizeUrlComponent(
01806                 substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}&=+;' ) . $ret;
01807             $end = $start;
01808         }
01809 
01810         # Scheme and path part - 'pchar'
01811         # (we assume no userinfo or encoded colons in the host)
01812         $ret = self::normalizeUrlComponent(
01813             substr( $url, 0, $end ), '"#%<>[\]^`{|}/?' ) . $ret;
01814 
01815         return $ret;
01816     }
01817 
01818     private static function normalizeUrlComponent( $component, $unsafe ) {
01819         $callback = function ( $matches ) use ( $unsafe ) {
01820             $char = urldecode( $matches[0] );
01821             $ord = ord( $char );
01822             if ( $ord > 32 && $ord < 127 && strpos( $unsafe, $char ) === false ) {
01823                 # Unescape it
01824                 return $char;
01825             } else {
01826                 # Leave it escaped, but use uppercase for a-f
01827                 return strtoupper( $matches[0] );
01828             }
01829         };
01830         return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', $callback, $component );
01831     }
01832 
01841     private function maybeMakeExternalImage( $url ) {
01842         $imagesfrom = $this->mOptions->getAllowExternalImagesFrom();
01843         $imagesexception = !empty( $imagesfrom );
01844         $text = false;
01845         # $imagesfrom could be either a single string or an array of strings, parse out the latter
01846         if ( $imagesexception && is_array( $imagesfrom ) ) {
01847             $imagematch = false;
01848             foreach ( $imagesfrom as $match ) {
01849                 if ( strpos( $url, $match ) === 0 ) {
01850                     $imagematch = true;
01851                     break;
01852                 }
01853             }
01854         } elseif ( $imagesexception ) {
01855             $imagematch = ( strpos( $url, $imagesfrom ) === 0 );
01856         } else {
01857             $imagematch = false;
01858         }
01859 
01860         if ( $this->mOptions->getAllowExternalImages()
01861             || ( $imagesexception && $imagematch )
01862         ) {
01863             if ( preg_match( self::EXT_IMAGE_REGEX, $url ) ) {
01864                 # Image found
01865                 $text = Linker::makeExternalImage( $url );
01866             }
01867         }
01868         if ( !$text && $this->mOptions->getEnableImageWhitelist()
01869             && preg_match( self::EXT_IMAGE_REGEX, $url )
01870         ) {
01871             $whitelist = explode(
01872                 "\n",
01873                 wfMessage( 'external_image_whitelist' )->inContentLanguage()->text()
01874             );
01875 
01876             foreach ( $whitelist as $entry ) {
01877                 # Sanitize the regex fragment, make it case-insensitive, ignore blank entries/comments
01878                 if ( strpos( $entry, '#' ) === 0 || $entry === '' ) {
01879                     continue;
01880                 }
01881                 if ( preg_match( '/' . str_replace( '/', '\\/', $entry ) . '/i', $url ) ) {
01882                     # Image matches a whitelist entry
01883                     $text = Linker::makeExternalImage( $url );
01884                     break;
01885                 }
01886             }
01887         }
01888         return $text;
01889     }
01890 
01900     public function replaceInternalLinks( $s ) {
01901         $this->mLinkHolders->merge( $this->replaceInternalLinks2( $s ) );
01902         return $s;
01903     }
01904 
01913     public function replaceInternalLinks2( &$s ) {
01914         global $wgExtraInterlanguageLinkPrefixes;
01915         wfProfileIn( __METHOD__ );
01916 
01917         wfProfileIn( __METHOD__ . '-setup' );
01918         static $tc = false, $e1, $e1_img;
01919         # the % is needed to support urlencoded titles as well
01920         if ( !$tc ) {
01921             $tc = Title::legalChars() . '#%';
01922             # Match a link having the form [[namespace:link|alternate]]trail
01923             $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
01924             # Match cases where there is no "]]", which might still be images
01925             $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
01926         }
01927 
01928         $holders = new LinkHolderArray( $this );
01929 
01930         # split the entire text string on occurrences of [[
01931         $a = StringUtils::explode( '[[', ' ' . $s );
01932         # get the first element (all text up to first [[), and remove the space we added
01933         $s = $a->current();
01934         $a->next();
01935         $line = $a->current(); # Workaround for broken ArrayIterator::next() that returns "void"
01936         $s = substr( $s, 1 );
01937 
01938         $useLinkPrefixExtension = $this->getTargetLanguage()->linkPrefixExtension();
01939         $e2 = null;
01940         if ( $useLinkPrefixExtension ) {
01941             # Match the end of a line for a word that's not followed by whitespace,
01942             # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
01943             global $wgContLang;
01944             $charset = $wgContLang->linkPrefixCharset();
01945             $e2 = "/^((?>.*[^$charset]|))(.+)$/sDu";
01946         }
01947 
01948         if ( is_null( $this->mTitle ) ) {
01949             wfProfileOut( __METHOD__ . '-setup' );
01950             wfProfileOut( __METHOD__ );
01951             throw new MWException( __METHOD__ . ": \$this->mTitle is null\n" );
01952         }
01953         $nottalk = !$this->mTitle->isTalkPage();
01954 
01955         if ( $useLinkPrefixExtension ) {
01956             $m = array();
01957             if ( preg_match( $e2, $s, $m ) ) {
01958                 $first_prefix = $m[2];
01959             } else {
01960                 $first_prefix = false;
01961             }
01962         } else {
01963             $prefix = '';
01964         }
01965 
01966         $useSubpages = $this->areSubpagesAllowed();
01967         wfProfileOut( __METHOD__ . '-setup' );
01968 
01969         // @codingStandardsIgnoreStart Squiz.WhiteSpace.SemicolonSpacing.Incorrect
01970         # Loop for each link
01971         for ( ; $line !== false && $line !== null; $a->next(), $line = $a->current() ) {
01972             // @codingStandardsIgnoreStart
01973 
01974             # Check for excessive memory usage
01975             if ( $holders->isBig() ) {
01976                 # Too big
01977                 # Do the existence check, replace the link holders and clear the array
01978                 $holders->replace( $s );
01979                 $holders->clear();
01980             }
01981 
01982             if ( $useLinkPrefixExtension ) {
01983                 wfProfileIn( __METHOD__ . '-prefixhandling' );
01984                 if ( preg_match( $e2, $s, $m ) ) {
01985                     $prefix = $m[2];
01986                     $s = $m[1];
01987                 } else {
01988                     $prefix = '';
01989                 }
01990                 # first link
01991                 if ( $first_prefix ) {
01992                     $prefix = $first_prefix;
01993                     $first_prefix = false;
01994                 }
01995                 wfProfileOut( __METHOD__ . '-prefixhandling' );
01996             }
01997 
01998             $might_be_img = false;
01999 
02000             wfProfileIn( __METHOD__ . "-e1" );
02001             if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
02002                 $text = $m[2];
02003                 # If we get a ] at the beginning of $m[3] that means we have a link that's something like:
02004                 # [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up,
02005                 # the real problem is with the $e1 regex
02006                 # See bug 1300.
02007                 #
02008                 # Still some problems for cases where the ] is meant to be outside punctuation,
02009                 # and no image is in sight. See bug 2095.
02010                 #
02011                 if ( $text !== ''
02012                     && substr( $m[3], 0, 1 ) === ']'
02013                     && strpos( $text, '[' ) !== false
02014                 ) {
02015                     $text .= ']'; # so that replaceExternalLinks($text) works later
02016                     $m[3] = substr( $m[3], 1 );
02017                 }
02018                 # fix up urlencoded title texts
02019                 if ( strpos( $m[1], '%' ) !== false ) {
02020                     # Should anchors '#' also be rejected?
02021                     $m[1] = str_replace( array( '<', '>' ), array( '&lt;', '&gt;' ), rawurldecode( $m[1] ) );
02022                 }
02023                 $trail = $m[3];
02024             } elseif ( preg_match( $e1_img, $line, $m ) ) {
02025                 # Invalid, but might be an image with a link in its caption
02026                 $might_be_img = true;
02027                 $text = $m[2];
02028                 if ( strpos( $m[1], '%' ) !== false ) {
02029                     $m[1] = rawurldecode( $m[1] );
02030                 }
02031                 $trail = "";
02032             } else { # Invalid form; output directly
02033                 $s .= $prefix . '[[' . $line;
02034                 wfProfileOut( __METHOD__ . "-e1" );
02035                 continue;
02036             }
02037             wfProfileOut( __METHOD__ . "-e1" );
02038             wfProfileIn( __METHOD__ . "-misc" );
02039 
02040             $origLink = $m[1];
02041 
02042             # Don't allow internal links to pages containing
02043             # PROTO: where PROTO is a valid URL protocol; these
02044             # should be external links.
02045             if ( preg_match( '/^(?i:' . $this->mUrlProtocols . ')/', $origLink ) ) {
02046                 $s .= $prefix . '[[' . $line;
02047                 wfProfileOut( __METHOD__ . "-misc" );
02048                 continue;
02049             }
02050 
02051             # Make subpage if necessary
02052             if ( $useSubpages ) {
02053                 $link = $this->maybeDoSubpageLink( $origLink, $text );
02054             } else {
02055                 $link = $origLink;
02056             }
02057 
02058             $noforce = ( substr( $origLink, 0, 1 ) !== ':' );
02059             if ( !$noforce ) {
02060                 # Strip off leading ':'
02061                 $link = substr( $link, 1 );
02062             }
02063 
02064             wfProfileOut( __METHOD__ . "-misc" );
02065             wfProfileIn( __METHOD__ . "-title" );
02066             $nt = Title::newFromText( $this->mStripState->unstripNoWiki( $link ) );
02067             if ( $nt === null ) {
02068                 $s .= $prefix . '[[' . $line;
02069                 wfProfileOut( __METHOD__ . "-title" );
02070                 continue;
02071             }
02072 
02073             $ns = $nt->getNamespace();
02074             $iw = $nt->getInterwiki();
02075             wfProfileOut( __METHOD__ . "-title" );
02076 
02077             if ( $might_be_img ) { # if this is actually an invalid link
02078                 wfProfileIn( __METHOD__ . "-might_be_img" );
02079                 if ( $ns == NS_FILE && $noforce ) { # but might be an image
02080                     $found = false;
02081                     while ( true ) {
02082                         # look at the next 'line' to see if we can close it there
02083                         $a->next();
02084                         $next_line = $a->current();
02085                         if ( $next_line === false || $next_line === null ) {
02086                             break;
02087                         }
02088                         $m = explode( ']]', $next_line, 3 );
02089                         if ( count( $m ) == 3 ) {
02090                             # the first ]] closes the inner link, the second the image
02091                             $found = true;
02092                             $text .= "[[{$m[0]}]]{$m[1]}";
02093                             $trail = $m[2];
02094                             break;
02095                         } elseif ( count( $m ) == 2 ) {
02096                             # if there's exactly one ]] that's fine, we'll keep looking
02097                             $text .= "[[{$m[0]}]]{$m[1]}";
02098                         } else {
02099                             # if $next_line is invalid too, we need look no further
02100                             $text .= '[[' . $next_line;
02101                             break;
02102                         }
02103                     }
02104                     if ( !$found ) {
02105                         # we couldn't find the end of this imageLink, so output it raw
02106                         # but don't ignore what might be perfectly normal links in the text we've examined
02107                         $holders->merge( $this->replaceInternalLinks2( $text ) );
02108                         $s .= "{$prefix}[[$link|$text";
02109                         # note: no $trail, because without an end, there *is* no trail
02110                         wfProfileOut( __METHOD__ . "-might_be_img" );
02111                         continue;
02112                     }
02113                 } else { # it's not an image, so output it raw
02114                     $s .= "{$prefix}[[$link|$text";
02115                     # note: no $trail, because without an end, there *is* no trail
02116                     wfProfileOut( __METHOD__ . "-might_be_img" );
02117                     continue;
02118                 }
02119                 wfProfileOut( __METHOD__ . "-might_be_img" );
02120             }
02121 
02122             $wasblank = ( $text == '' );
02123             if ( $wasblank ) {
02124                 $text = $link;
02125             } else {
02126                 # Bug 4598 madness. Handle the quotes only if they come from the alternate part
02127                 # [[Lista d''e paise d''o munno]] -> <a href="...">Lista d''e paise d''o munno</a>
02128                 # [[Criticism of Harry Potter|Criticism of ''Harry Potter'']]
02129                 #    -> <a href="Criticism of Harry Potter">Criticism of <i>Harry Potter</i></a>
02130                 $text = $this->doQuotes( $text );
02131             }
02132 
02133             # Link not escaped by : , create the various objects
02134             if ( $noforce && !$nt->wasLocalInterwiki() ) {
02135                 # Interwikis
02136                 wfProfileIn( __METHOD__ . "-interwiki" );
02137                 if (
02138                     $iw && $this->mOptions->getInterwikiMagic() && $nottalk && (
02139                         Language::fetchLanguageName( $iw, null, 'mw' ) ||
02140                         in_array( $iw, $wgExtraInterlanguageLinkPrefixes )
02141                     )
02142                 ) {
02143                     # Bug 24502: filter duplicates
02144                     if ( !isset( $this->mLangLinkLanguages[$iw] ) ) {
02145                         $this->mLangLinkLanguages[$iw] = true;
02146                         $this->mOutput->addLanguageLink( $nt->getFullText() );
02147                     }
02148 
02149                     $s = rtrim( $s . $prefix );
02150                     $s .= trim( $trail, "\n" ) == '' ? '': $prefix . $trail;
02151                     wfProfileOut( __METHOD__ . "-interwiki" );
02152                     continue;
02153                 }
02154                 wfProfileOut( __METHOD__ . "-interwiki" );
02155 
02156                 if ( $ns == NS_FILE ) {
02157                     wfProfileIn( __METHOD__ . "-image" );
02158                     if ( !wfIsBadImage( $nt->getDBkey(), $this->mTitle ) ) {
02159                         if ( $wasblank ) {
02160                             # if no parameters were passed, $text
02161                             # becomes something like "File:Foo.png",
02162                             # which we don't want to pass on to the
02163                             # image generator
02164                             $text = '';
02165                         } else {
02166                             # recursively parse links inside the image caption
02167                             # actually, this will parse them in any other parameters, too,
02168                             # but it might be hard to fix that, and it doesn't matter ATM
02169                             $text = $this->replaceExternalLinks( $text );
02170                             $holders->merge( $this->replaceInternalLinks2( $text ) );
02171                         }
02172                         # cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them
02173                         $s .= $prefix . $this->armorLinks(
02174                             $this->makeImage( $nt, $text, $holders ) ) . $trail;
02175                     } else {
02176                         $s .= $prefix . $trail;
02177                     }
02178                     wfProfileOut( __METHOD__ . "-image" );
02179                     continue;
02180                 }
02181 
02182                 if ( $ns == NS_CATEGORY ) {
02183                     wfProfileIn( __METHOD__ . "-category" );
02184                     $s = rtrim( $s . "\n" ); # bug 87
02185 
02186                     if ( $wasblank ) {
02187                         $sortkey = $this->getDefaultSort();
02188                     } else {
02189                         $sortkey = $text;
02190                     }
02191                     $sortkey = Sanitizer::decodeCharReferences( $sortkey );
02192                     $sortkey = str_replace( "\n", '', $sortkey );
02193                     $sortkey = $this->getConverterLanguage()->convertCategoryKey( $sortkey );
02194                     $this->mOutput->addCategory( $nt->getDBkey(), $sortkey );
02195 
02199                     $s .= trim( $prefix . $trail, "\n" ) == '' ? '' : $prefix . $trail;
02200 
02201                     wfProfileOut( __METHOD__ . "-category" );
02202                     continue;
02203                 }
02204             }
02205 
02206             # Self-link checking. For some languages, variants of the title are checked in
02207             # LinkHolderArray::doVariants() to allow batching the existence checks necessary
02208             # for linking to a different variant.
02209             if ( $ns != NS_SPECIAL && $nt->equals( $this->mTitle ) && !$nt->hasFragment() ) {
02210                 $s .= $prefix . Linker::makeSelfLinkObj( $nt, $text, '', $trail );
02211                 continue;
02212             }
02213 
02214             # NS_MEDIA is a pseudo-namespace for linking directly to a file
02215             # @todo FIXME: Should do batch file existence checks, see comment below
02216             if ( $ns == NS_MEDIA ) {
02217                 wfProfileIn( __METHOD__ . "-media" );
02218                 # Give extensions a chance to select the file revision for us
02219                 $options = array();
02220                 $descQuery = false;
02221                 wfRunHooks( 'BeforeParserFetchFileAndTitle',
02222                     array( $this, $nt, &$options, &$descQuery ) );
02223                 # Fetch and register the file (file title may be different via hooks)
02224                 list( $file, $nt ) = $this->fetchFileAndTitle( $nt, $options );
02225                 # Cloak with NOPARSE to avoid replacement in replaceExternalLinks
02226                 $s .= $prefix . $this->armorLinks(
02227                     Linker::makeMediaLinkFile( $nt, $file, $text ) ) . $trail;
02228                 wfProfileOut( __METHOD__ . "-media" );
02229                 continue;
02230             }
02231 
02232             wfProfileIn( __METHOD__ . "-always_known" );
02233             # Some titles, such as valid special pages or files in foreign repos, should
02234             # be shown as bluelinks even though they're not included in the page table
02235             #
02236             # @todo FIXME: isAlwaysKnown() can be expensive for file links; we should really do
02237             # batch file existence checks for NS_FILE and NS_MEDIA
02238             if ( $iw == '' && $nt->isAlwaysKnown() ) {
02239                 $this->mOutput->addLink( $nt );
02240                 $s .= $this->makeKnownLinkHolder( $nt, $text, array(), $trail, $prefix );
02241             } else {
02242                 # Links will be added to the output link list after checking
02243                 $s .= $holders->makeHolder( $nt, $text, array(), $trail, $prefix );
02244             }
02245             wfProfileOut( __METHOD__ . "-always_known" );
02246         }
02247         wfProfileOut( __METHOD__ );
02248         return $holders;
02249     }
02250 
02265     public function makeKnownLinkHolder( $nt, $text = '', $query = array(), $trail = '', $prefix = '' ) {
02266         list( $inside, $trail ) = Linker::splitTrail( $trail );
02267 
02268         if ( is_string( $query ) ) {
02269             $query = wfCgiToArray( $query );
02270         }
02271         if ( $text == '' ) {
02272             $text = htmlspecialchars( $nt->getPrefixedText() );
02273         }
02274 
02275         $link = Linker::linkKnown( $nt, "$prefix$text$inside", array(), $query );
02276 
02277         return $this->armorLinks( $link ) . $trail;
02278     }
02279 
02290     public function armorLinks( $text ) {
02291         return preg_replace( '/\b((?i)' . $this->mUrlProtocols . ')/',
02292             "{$this->mUniqPrefix}NOPARSE$1", $text );
02293     }
02294 
02299     public function areSubpagesAllowed() {
02300         # Some namespaces don't allow subpages
02301         return MWNamespace::hasSubpages( $this->mTitle->getNamespace() );
02302     }
02303 
02312     public function maybeDoSubpageLink( $target, &$text ) {
02313         return Linker::normalizeSubpageLink( $this->mTitle, $target, $text );
02314     }
02315 
02322     public function closeParagraph() {
02323         $result = '';
02324         if ( $this->mLastSection != '' ) {
02325             $result = '</' . $this->mLastSection . ">\n";
02326         }
02327         $this->mInPre = false;
02328         $this->mLastSection = '';
02329         return $result;
02330     }
02331 
02342     public function getCommon( $st1, $st2 ) {
02343         $fl = strlen( $st1 );
02344         $shorter = strlen( $st2 );
02345         if ( $fl < $shorter ) {
02346             $shorter = $fl;
02347         }
02348 
02349         for ( $i = 0; $i < $shorter; ++$i ) {
02350             if ( $st1[$i] != $st2[$i] ) {
02351                 break;
02352             }
02353         }
02354         return $i;
02355     }
02356 
02366     public function openList( $char ) {
02367         $result = $this->closeParagraph();
02368 
02369         if ( '*' === $char ) {
02370             $result .= "<ul><li>";
02371         } elseif ( '#' === $char ) {
02372             $result .= "<ol><li>";
02373         } elseif ( ':' === $char ) {
02374             $result .= "<dl><dd>";
02375         } elseif ( ';' === $char ) {
02376             $result .= "<dl><dt>";
02377             $this->mDTopen = true;
02378         } else {
02379             $result = '<!-- ERR 1 -->';
02380         }
02381 
02382         return $result;
02383     }
02384 
02392     public function nextItem( $char ) {
02393         if ( '*' === $char || '#' === $char ) {
02394             return "</li>\n<li>";
02395         } elseif ( ':' === $char || ';' === $char ) {
02396             $close = "</dd>\n";
02397             if ( $this->mDTopen ) {
02398                 $close = "</dt>\n";
02399             }
02400             if ( ';' === $char ) {
02401                 $this->mDTopen = true;
02402                 return $close . '<dt>';
02403             } else {
02404                 $this->mDTopen = false;
02405                 return $close . '<dd>';
02406             }
02407         }
02408         return '<!-- ERR 2 -->';
02409     }
02410 
02418     public function closeList( $char ) {
02419         if ( '*' === $char ) {
02420             $text = "</li></ul>";
02421         } elseif ( '#' === $char ) {
02422             $text = "</li></ol>";
02423         } elseif ( ':' === $char ) {
02424             if ( $this->mDTopen ) {
02425                 $this->mDTopen = false;
02426                 $text = "</dt></dl>";
02427             } else {
02428                 $text = "</dd></dl>";
02429             }
02430         } else {
02431             return '<!-- ERR 3 -->';
02432         }
02433         return $text;
02434     }
02445     public function doBlockLevels( $text, $linestart ) {
02446         wfProfileIn( __METHOD__ );
02447 
02448         # Parsing through the text line by line.  The main thing
02449         # happening here is handling of block-level elements p, pre,
02450         # and making lists from lines starting with * # : etc.
02451         #
02452         $textLines = StringUtils::explode( "\n", $text );
02453 
02454         $lastPrefix = $output = '';
02455         $this->mDTopen = $inBlockElem = false;
02456         $prefixLength = 0;
02457         $paragraphStack = false;
02458         $inBlockquote = false;
02459 
02460         foreach ( $textLines as $oLine ) {
02461             # Fix up $linestart
02462             if ( !$linestart ) {
02463                 $output .= $oLine;
02464                 $linestart = true;
02465                 continue;
02466             }
02467             # * = ul
02468             # # = ol
02469             # ; = dt
02470             # : = dd
02471 
02472             $lastPrefixLength = strlen( $lastPrefix );
02473             $preCloseMatch = preg_match( '/<\\/pre/i', $oLine );
02474             $preOpenMatch = preg_match( '/<pre/i', $oLine );
02475             # If not in a <pre> element, scan for and figure out what prefixes are there.
02476             if ( !$this->mInPre ) {
02477                 # Multiple prefixes may abut each other for nested lists.
02478                 $prefixLength = strspn( $oLine, '*#:;' );
02479                 $prefix = substr( $oLine, 0, $prefixLength );
02480 
02481                 # eh?
02482                 # ; and : are both from definition-lists, so they're equivalent
02483                 #  for the purposes of determining whether or not we need to open/close
02484                 #  elements.
02485                 $prefix2 = str_replace( ';', ':', $prefix );
02486                 $t = substr( $oLine, $prefixLength );
02487                 $this->mInPre = (bool)$preOpenMatch;
02488             } else {
02489                 # Don't interpret any other prefixes in preformatted text
02490                 $prefixLength = 0;
02491                 $prefix = $prefix2 = '';
02492                 $t = $oLine;
02493             }
02494 
02495             # List generation
02496             if ( $prefixLength && $lastPrefix === $prefix2 ) {
02497                 # Same as the last item, so no need to deal with nesting or opening stuff
02498                 $output .= $this->nextItem( substr( $prefix, -1 ) );
02499                 $paragraphStack = false;
02500 
02501                 if ( substr( $prefix, -1 ) === ';' ) {
02502                     # The one nasty exception: definition lists work like this:
02503                     # ; title : definition text
02504                     # So we check for : in the remainder text to split up the
02505                     # title and definition, without b0rking links.
02506                     $term = $t2 = '';
02507                     if ( $this->findColonNoLinks( $t, $term, $t2 ) !== false ) {
02508                         $t = $t2;
02509                         $output .= $term . $this->nextItem( ':' );
02510                     }
02511                 }
02512             } elseif ( $prefixLength || $lastPrefixLength ) {
02513                 # We need to open or close prefixes, or both.
02514 
02515                 # Either open or close a level...
02516                 $commonPrefixLength = $this->getCommon( $prefix, $lastPrefix );
02517                 $paragraphStack = false;
02518 
02519                 # Close all the prefixes which aren't shared.
02520                 while ( $commonPrefixLength < $lastPrefixLength ) {
02521                     $output .= $this->closeList( $lastPrefix[$lastPrefixLength - 1] );
02522                     --$lastPrefixLength;
02523                 }
02524 
02525                 # Continue the current prefix if appropriate.
02526                 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
02527                     $output .= $this->nextItem( $prefix[$commonPrefixLength - 1] );
02528                 }
02529 
02530                 # Open prefixes where appropriate.
02531                 if (  $lastPrefix && $prefixLength > $commonPrefixLength ) {
02532                     $output .= "\n";
02533                 }
02534                 while ( $prefixLength > $commonPrefixLength ) {
02535                     $char = substr( $prefix, $commonPrefixLength, 1 );
02536                     $output .= $this->openList( $char );
02537 
02538                     if ( ';' === $char ) {
02539                         # @todo FIXME: This is dupe of code above
02540                         if ( $this->findColonNoLinks( $t, $term, $t2 ) !== false ) {
02541                             $t = $t2;
02542                             $output .= $term . $this->nextItem( ':' );
02543                         }
02544                     }
02545                     ++$commonPrefixLength;
02546                 }
02547                 if ( !$prefixLength && $lastPrefix ) {
02548                     $output .= "\n";
02549                 }
02550                 $lastPrefix = $prefix2;
02551             }
02552 
02553             # If we have no prefixes, go to paragraph mode.
02554             if ( 0 == $prefixLength ) {
02555                 wfProfileIn( __METHOD__ . "-paragraph" );
02556                 # No prefix (not in list)--go to paragraph mode
02557                 # XXX: use a stack for nestable elements like span, table and div
02558                 $openmatch = preg_match(
02559                     '/(?:<table|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|'
02560                         . '<p|<ul|<ol|<dl|<li|<\\/tr|<\\/td|<\\/th)/iS',
02561                     $t
02562                 );
02563                 $closematch = preg_match(
02564                     '/(?:<\\/table|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'
02565                         . '<td|<th|<\\/?blockquote|<\\/?div|<hr|<\\/pre|<\\/p|<\\/mw:|'
02566                         . $this->mUniqPrefix
02567                         . '-pre|<\\/li|<\\/ul|<\\/ol|<\\/dl|<\\/?center)/iS',
02568                     $t
02569                 );
02570 
02571                 if ( $openmatch or $closematch ) {
02572                     $paragraphStack = false;
02573                     # @todo bug 5718: paragraph closed
02574                     $output .= $this->closeParagraph();
02575                     if ( $preOpenMatch and !$preCloseMatch ) {
02576                         $this->mInPre = true;
02577                     }
02578                     $bqOffset = 0;
02579                     while ( preg_match( '/<(\\/?)blockquote[\s>]/i', $t, $bqMatch, PREG_OFFSET_CAPTURE, $bqOffset ) ) {
02580                         $inBlockquote = !$bqMatch[1][0]; // is this a close tag?
02581                         $bqOffset = $bqMatch[0][1] + strlen( $bqMatch[0][0] );
02582                     }
02583                     $inBlockElem = !$closematch;
02584                 } elseif ( !$inBlockElem && !$this->mInPre ) {
02585                     if ( ' ' == substr( $t, 0, 1 )
02586                         && ( $this->mLastSection === 'pre' || trim( $t ) != '' )
02587                         && !$inBlockquote
02588                     ) {
02589                         # pre
02590                         if ( $this->mLastSection !== 'pre' ) {
02591                             $paragraphStack = false;
02592                             $output .= $this->closeParagraph() . '<pre>';
02593                             $this->mLastSection = 'pre';
02594                         }
02595                         $t = substr( $t, 1 );
02596                     } else {
02597                         # paragraph
02598                         if ( trim( $t ) === '' ) {
02599                             if ( $paragraphStack ) {
02600                                 $output .= $paragraphStack . '<br />';
02601                                 $paragraphStack = false;
02602                                 $this->mLastSection = 'p';
02603                             } else {
02604                                 if ( $this->mLastSection !== 'p' ) {
02605                                     $output .= $this->closeParagraph();
02606                                     $this->mLastSection = '';
02607                                     $paragraphStack = '<p>';
02608                                 } else {
02609                                     $paragraphStack = '</p><p>';
02610                                 }
02611                             }
02612                         } else {
02613                             if ( $paragraphStack ) {
02614                                 $output .= $paragraphStack;
02615                                 $paragraphStack = false;
02616                                 $this->mLastSection = 'p';
02617                             } elseif ( $this->mLastSection !== 'p' ) {
02618                                 $output .= $this->closeParagraph() . '<p>';
02619                                 $this->mLastSection = 'p';
02620                             }
02621                         }
02622                     }
02623                 }
02624                 wfProfileOut( __METHOD__ . "-paragraph" );
02625             }
02626             # somewhere above we forget to get out of pre block (bug 785)
02627             if ( $preCloseMatch && $this->mInPre ) {
02628                 $this->mInPre = false;
02629             }
02630             if ( $paragraphStack === false ) {
02631                 $output .= $t;
02632                 if ( $prefixLength === 0 ) {
02633                     $output .= "\n";
02634                 }
02635             }
02636         }
02637         while ( $prefixLength ) {
02638             $output .= $this->closeList( $prefix2[$prefixLength - 1] );
02639             --$prefixLength;
02640             if ( !$prefixLength ) {
02641                 $output .= "\n";
02642             }
02643         }
02644         if ( $this->mLastSection != '' ) {
02645             $output .= '</' . $this->mLastSection . '>';
02646             $this->mLastSection = '';
02647         }
02648 
02649         wfProfileOut( __METHOD__ );
02650         return $output;
02651     }
02652 
02663     public function findColonNoLinks( $str, &$before, &$after ) {
02664         wfProfileIn( __METHOD__ );
02665 
02666         $pos = strpos( $str, ':' );
02667         if ( $pos === false ) {
02668             # Nothing to find!
02669             wfProfileOut( __METHOD__ );
02670             return false;
02671         }
02672 
02673         $lt = strpos( $str, '<' );
02674         if ( $lt === false || $lt > $pos ) {
02675             # Easy; no tag nesting to worry about
02676             $before = substr( $str, 0, $pos );
02677             $after = substr( $str, $pos + 1 );
02678             wfProfileOut( __METHOD__ );
02679             return $pos;
02680         }
02681 
02682         # Ugly state machine to walk through avoiding tags.
02683         $state = self::COLON_STATE_TEXT;
02684         $stack = 0;
02685         $len = strlen( $str );
02686         for ( $i = 0; $i < $len; $i++ ) {
02687             $c = $str[$i];
02688 
02689             switch ( $state ) {
02690             # (Using the number is a performance hack for common cases)
02691             case 0: # self::COLON_STATE_TEXT:
02692                 switch ( $c ) {
02693                 case "<":
02694                     # Could be either a <start> tag or an </end> tag
02695                     $state = self::COLON_STATE_TAGSTART;
02696                     break;
02697                 case ":":
02698                     if ( $stack == 0 ) {
02699                         # We found it!
02700                         $before = substr( $str, 0, $i );
02701                         $after = substr( $str, $i + 1 );
02702                         wfProfileOut( __METHOD__ );
02703                         return $i;
02704                     }
02705                     # Embedded in a tag; don't break it.
02706                     break;
02707                 default:
02708                     # Skip ahead looking for something interesting
02709                     $colon = strpos( $str, ':', $i );
02710                     if ( $colon === false ) {
02711                         # Nothing else interesting
02712                         wfProfileOut( __METHOD__ );
02713                         return false;
02714                     }
02715                     $lt = strpos( $str, '<', $i );
02716                     if ( $stack === 0 ) {
02717                         if ( $lt === false || $colon < $lt ) {
02718                             # We found it!
02719                             $before = substr( $str, 0, $colon );
02720                             $after = substr( $str, $colon + 1 );
02721                             wfProfileOut( __METHOD__ );
02722                             return $i;
02723                         }
02724                     }
02725                     if ( $lt === false ) {
02726                         # Nothing else interesting to find; abort!
02727                         # We're nested, but there's no close tags left. Abort!
02728                         break 2;
02729                     }
02730                     # Skip ahead to next tag start
02731                     $i = $lt;
02732                     $state = self::COLON_STATE_TAGSTART;
02733                 }
02734                 break;
02735             case 1: # self::COLON_STATE_TAG:
02736                 # In a <tag>
02737                 switch ( $c ) {
02738                 case ">":
02739                     $stack++;
02740                     $state = self::COLON_STATE_TEXT;
02741                     break;
02742                 case "/":
02743                     # Slash may be followed by >?
02744                     $state = self::COLON_STATE_TAGSLASH;
02745                     break;
02746                 default:
02747                     # ignore
02748                 }
02749                 break;
02750             case 2: # self::COLON_STATE_TAGSTART:
02751                 switch ( $c ) {
02752                 case "/":
02753                     $state = self::COLON_STATE_CLOSETAG;
02754                     break;
02755                 case "!":
02756                     $state = self::COLON_STATE_COMMENT;
02757                     break;
02758                 case ">":
02759                     # Illegal early close? This shouldn't happen D:
02760                     $state = self::COLON_STATE_TEXT;
02761                     break;
02762                 default:
02763                     $state = self::COLON_STATE_TAG;
02764                 }
02765                 break;
02766             case 3: # self::COLON_STATE_CLOSETAG:
02767                 # In a </tag>
02768                 if ( $c === ">" ) {
02769                     $stack--;
02770                     if ( $stack < 0 ) {
02771                         wfDebug( __METHOD__ . ": Invalid input; too many close tags\n" );
02772                         wfProfileOut( __METHOD__ );
02773                         return false;
02774                     }
02775                     $state = self::COLON_STATE_TEXT;
02776                 }
02777                 break;
02778             case self::COLON_STATE_TAGSLASH:
02779                 if ( $c === ">" ) {
02780                     # Yes, a self-closed tag <blah/>
02781                     $state = self::COLON_STATE_TEXT;
02782                 } else {
02783                     # Probably we're jumping the gun, and this is an attribute
02784                     $state = self::COLON_STATE_TAG;
02785                 }
02786                 break;
02787             case 5: # self::COLON_STATE_COMMENT:
02788                 if ( $c === "-" ) {
02789                     $state = self::COLON_STATE_COMMENTDASH;
02790                 }
02791                 break;
02792             case self::COLON_STATE_COMMENTDASH:
02793                 if ( $c === "-" ) {
02794                     $state = self::COLON_STATE_COMMENTDASHDASH;
02795                 } else {
02796                     $state = self::COLON_STATE_COMMENT;
02797                 }
02798                 break;
02799             case self::COLON_STATE_COMMENTDASHDASH:
02800                 if ( $c === ">" ) {
02801                     $state = self::COLON_STATE_TEXT;
02802                 } else {
02803                     $state = self::COLON_STATE_COMMENT;
02804                 }
02805                 break;
02806             default:
02807                 wfProfileOut( __METHOD__ );
02808                 throw new MWException( "State machine error in " . __METHOD__ );
02809             }
02810         }
02811         if ( $stack > 0 ) {
02812             wfDebug( __METHOD__ . ": Invalid input; not enough close tags (stack $stack, state $state)\n" );
02813             wfProfileOut( __METHOD__ );
02814             return false;
02815         }
02816         wfProfileOut( __METHOD__ );
02817         return false;
02818     }
02819 
02831     public function getVariableValue( $index, $frame = false ) {
02832         global $wgContLang, $wgSitename, $wgServer, $wgServerName;
02833         global $wgArticlePath, $wgScriptPath, $wgStylePath;
02834 
02835         if ( is_null( $this->mTitle ) ) {
02836             // If no title set, bad things are going to happen
02837             // later. Title should always be set since this
02838             // should only be called in the middle of a parse
02839             // operation (but the unit-tests do funky stuff)
02840             throw new MWException( __METHOD__ . ' Should only be '
02841                 . ' called while parsing (no title set)' );
02842         }
02843 
02848         if ( wfRunHooks( 'ParserGetVariableValueVarCache', array( &$this, &$this->mVarCache ) ) ) {
02849             if ( isset( $this->mVarCache[$index] ) ) {
02850                 return $this->mVarCache[$index];
02851             }
02852         }
02853 
02854         $ts = wfTimestamp( TS_UNIX, $this->mOptions->getTimestamp() );
02855         wfRunHooks( 'ParserGetVariableValueTs', array( &$this, &$ts ) );
02856 
02857         $pageLang = $this->getFunctionLang();
02858 
02859         switch ( $index ) {
02860             case '!':
02861                 $value = '|';
02862                 break;
02863             case 'currentmonth':
02864                 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'm' ) );
02865                 break;
02866             case 'currentmonth1':
02867                 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'n' ) );
02868                 break;
02869             case 'currentmonthname':
02870                 $value = $pageLang->getMonthName( MWTimestamp::getInstance( $ts )->format( 'n' ) );
02871                 break;
02872             case 'currentmonthnamegen':
02873                 $value = $pageLang->getMonthNameGen( MWTimestamp::getInstance( $ts )->format( 'n' ) );
02874                 break;
02875             case 'currentmonthabbrev':
02876                 $value = $pageLang->getMonthAbbreviation( MWTimestamp::getInstance( $ts )->format( 'n' ) );
02877                 break;
02878             case 'currentday':
02879                 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'j' ) );
02880                 break;
02881             case 'currentday2':
02882                 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'd' ) );
02883                 break;
02884             case 'localmonth':
02885                 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'm' ) );
02886                 break;
02887             case 'localmonth1':
02888                 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
02889                 break;
02890             case 'localmonthname':
02891                 $value = $pageLang->getMonthName( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
02892                 break;
02893             case 'localmonthnamegen':
02894                 $value = $pageLang->getMonthNameGen( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
02895                 break;
02896             case 'localmonthabbrev':
02897                 $value = $pageLang->getMonthAbbreviation( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
02898                 break;
02899             case 'localday':
02900                 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'j' ) );
02901                 break;
02902             case 'localday2':
02903                 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'd' ) );
02904                 break;
02905             case 'pagename':
02906                 $value = wfEscapeWikiText( $this->mTitle->getText() );
02907                 break;
02908             case 'pagenamee':
02909                 $value = wfEscapeWikiText( $this->mTitle->getPartialURL() );
02910                 break;
02911             case 'fullpagename':
02912                 $value = wfEscapeWikiText( $this->mTitle->getPrefixedText() );
02913                 break;
02914             case 'fullpagenamee':
02915                 $value = wfEscapeWikiText( $this->mTitle->getPrefixedURL() );
02916                 break;
02917             case 'subpagename':
02918                 $value = wfEscapeWikiText( $this->mTitle->getSubpageText() );
02919                 break;
02920             case 'subpagenamee':
02921                 $value = wfEscapeWikiText( $this->mTitle->getSubpageUrlForm() );
02922                 break;
02923             case 'rootpagename':
02924                 $value = wfEscapeWikiText( $this->mTitle->getRootText() );
02925                 break;
02926             case 'rootpagenamee':
02927                 $value = wfEscapeWikiText( wfUrlEncode( str_replace(
02928                     ' ',
02929                     '_',
02930                     $this->mTitle->getRootText()
02931                 ) ) );
02932                 break;
02933             case 'basepagename':
02934                 $value = wfEscapeWikiText( $this->mTitle->getBaseText() );
02935                 break;
02936             case 'basepagenamee':
02937                 $value = wfEscapeWikiText( wfUrlEncode( str_replace(
02938                     ' ',
02939                     '_',
02940                     $this->mTitle->getBaseText()
02941                 ) ) );
02942                 break;
02943             case 'talkpagename':
02944                 if ( $this->mTitle->canTalk() ) {
02945                     $talkPage = $this->mTitle->getTalkPage();
02946                     $value = wfEscapeWikiText( $talkPage->getPrefixedText() );
02947                 } else {
02948                     $value = '';
02949                 }
02950                 break;
02951             case 'talkpagenamee':
02952                 if ( $this->mTitle->canTalk() ) {
02953                     $talkPage = $this->mTitle->getTalkPage();
02954                     $value = wfEscapeWikiText( $talkPage->getPrefixedURL() );
02955                 } else {
02956                     $value = '';
02957                 }
02958                 break;
02959             case 'subjectpagename':
02960                 $subjPage = $this->mTitle->getSubjectPage();
02961                 $value = wfEscapeWikiText( $subjPage->getPrefixedText() );
02962                 break;
02963             case 'subjectpagenamee':
02964                 $subjPage = $this->mTitle->getSubjectPage();
02965                 $value = wfEscapeWikiText( $subjPage->getPrefixedURL() );
02966                 break;
02967             case 'pageid': // requested in bug 23427
02968                 $pageid = $this->getTitle()->getArticleID();
02969                 if ( $pageid == 0 ) {
02970                     # 0 means the page doesn't exist in the database,
02971                     # which means the user is previewing a new page.
02972                     # The vary-revision flag must be set, because the magic word
02973                     # will have a different value once the page is saved.
02974                     $this->mOutput->setFlag( 'vary-revision' );
02975                     wfDebug( __METHOD__ . ": {{PAGEID}} used in a new page, setting vary-revision...\n" );
02976                 }
02977                 $value = $pageid ? $pageid : null;
02978                 break;
02979             case 'revisionid':
02980                 # Let the edit saving system know we should parse the page
02981                 # *after* a revision ID has been assigned.
02982                 $this->mOutput->setFlag( 'vary-revision' );
02983                 wfDebug( __METHOD__ . ": {{REVISIONID}} used, setting vary-revision...\n" );
02984                 $value = $this->mRevisionId;
02985                 break;
02986             case 'revisionday':
02987                 # Let the edit saving system know we should parse the page
02988                 # *after* a revision ID has been assigned. This is for null edits.
02989                 $this->mOutput->setFlag( 'vary-revision' );
02990                 wfDebug( __METHOD__ . ": {{REVISIONDAY}} used, setting vary-revision...\n" );
02991                 $value = intval( substr( $this->getRevisionTimestamp(), 6, 2 ) );
02992                 break;
02993             case 'revisionday2':
02994                 # Let the edit saving system know we should parse the page
02995                 # *after* a revision ID has been assigned. This is for null edits.
02996                 $this->mOutput->setFlag( 'vary-revision' );
02997                 wfDebug( __METHOD__ . ": {{REVISIONDAY2}} used, setting vary-revision...\n" );
02998                 $value = substr( $this->getRevisionTimestamp(), 6, 2 );
02999                 break;
03000             case 'revisionmonth':
03001                 # Let the edit saving system know we should parse the page
03002                 # *after* a revision ID has been assigned. This is for null edits.
03003                 $this->mOutput->setFlag( 'vary-revision' );
03004                 wfDebug( __METHOD__ . ": {{REVISIONMONTH}} used, setting vary-revision...\n" );
03005                 $value = substr( $this->getRevisionTimestamp(), 4, 2 );
03006                 break;
03007             case 'revisionmonth1':
03008                 # Let the edit saving system know we should parse the page
03009                 # *after* a revision ID has been assigned. This is for null edits.
03010                 $this->mOutput->setFlag( 'vary-revision' );
03011                 wfDebug( __METHOD__ . ": {{REVISIONMONTH1}} used, setting vary-revision...\n" );
03012                 $value = intval( substr( $this->getRevisionTimestamp(), 4, 2 ) );
03013                 break;
03014             case 'revisionyear':
03015                 # Let the edit saving system know we should parse the page
03016                 # *after* a revision ID has been assigned. This is for null edits.
03017                 $this->mOutput->setFlag( 'vary-revision' );
03018                 wfDebug( __METHOD__ . ": {{REVISIONYEAR}} used, setting vary-revision...\n" );
03019                 $value = substr( $this->getRevisionTimestamp(), 0, 4 );
03020                 break;
03021             case 'revisiontimestamp':
03022                 # Let the edit saving system know we should parse the page
03023                 # *after* a revision ID has been assigned. This is for null edits.
03024                 $this->mOutput->setFlag( 'vary-revision' );
03025                 wfDebug( __METHOD__ . ": {{REVISIONTIMESTAMP}} used, setting vary-revision...\n" );
03026                 $value = $this->getRevisionTimestamp();
03027                 break;
03028             case 'revisionuser':
03029                 # Let the edit saving system know we should parse the page
03030                 # *after* a revision ID has been assigned. This is for null edits.
03031                 $this->mOutput->setFlag( 'vary-revision' );
03032                 wfDebug( __METHOD__ . ": {{REVISIONUSER}} used, setting vary-revision...\n" );
03033                 $value = $this->getRevisionUser();
03034                 break;
03035             case 'revisionsize':
03036                 # Let the edit saving system know we should parse the page
03037                 # *after* a revision ID has been assigned. This is for null edits.
03038                 $this->mOutput->setFlag( 'vary-revision' );
03039                 wfDebug( __METHOD__ . ": {{REVISIONSIZE}} used, setting vary-revision...\n" );
03040                 $value = $this->getRevisionSize();
03041                 break;
03042             case 'namespace':
03043                 $value = str_replace( '_', ' ', $wgContLang->getNsText( $this->mTitle->getNamespace() ) );
03044                 break;
03045             case 'namespacee':
03046                 $value = wfUrlencode( $wgContLang->getNsText( $this->mTitle->getNamespace() ) );
03047                 break;
03048             case 'namespacenumber':
03049                 $value = $this->mTitle->getNamespace();
03050                 break;
03051             case 'talkspace':
03052                 $value = $this->mTitle->canTalk()
03053                     ? str_replace( '_', ' ', $this->mTitle->getTalkNsText() )
03054                     : '';
03055                 break;
03056             case 'talkspacee':
03057                 $value = $this->mTitle->canTalk() ? wfUrlencode( $this->mTitle->getTalkNsText() ) : '';
03058                 break;
03059             case 'subjectspace':
03060                 $value = str_replace( '_', ' ', $this->mTitle->getSubjectNsText() );
03061                 break;
03062             case 'subjectspacee':
03063                 $value = ( wfUrlencode( $this->mTitle->getSubjectNsText() ) );
03064                 break;
03065             case 'currentdayname':
03066                 $value = $pageLang->getWeekdayName( (int)MWTimestamp::getInstance( $ts )->format( 'w' ) + 1 );
03067                 break;
03068             case 'currentyear':
03069                 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'Y' ), true );
03070                 break;
03071             case 'currenttime':
03072                 $value = $pageLang->time( wfTimestamp( TS_MW, $ts ), false, false );
03073                 break;
03074             case 'currenthour':
03075                 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'H' ), true );
03076                 break;
03077             case 'currentweek':
03078                 # @bug 4594 PHP5 has it zero padded, PHP4 does not, cast to
03079                 # int to remove the padding
03080                 $value = $pageLang->formatNum( (int)MWTimestamp::getInstance( $ts )->format( 'W' ) );
03081                 break;
03082             case 'currentdow':
03083                 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'w' ) );
03084                 break;
03085             case 'localdayname':
03086                 $value = $pageLang->getWeekdayName(
03087                     (int)MWTimestamp::getLocalInstance( $ts )->format( 'w' ) + 1
03088                 );
03089                 break;
03090             case 'localyear':
03091                 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'Y' ), true );
03092                 break;
03093             case 'localtime':
03094                 $value = $pageLang->time(
03095                     MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' ),
03096                     false,
03097                     false
03098                 );
03099                 break;
03100             case 'localhour':
03101                 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'H' ), true );
03102                 break;
03103             case 'localweek':
03104                 # @bug 4594 PHP5 has it zero padded, PHP4 does not, cast to
03105                 # int to remove the padding
03106                 $value = $pageLang->formatNum( (int)MWTimestamp::getLocalInstance( $ts )->format( 'W' ) );
03107                 break;
03108             case 'localdow':
03109                 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'w' ) );
03110                 break;
03111             case 'numberofarticles':
03112                 $value = $pageLang->formatNum( SiteStats::articles() );
03113                 break;
03114             case 'numberoffiles':
03115                 $value = $pageLang->formatNum( SiteStats::images() );
03116                 break;
03117             case 'numberofusers':
03118                 $value = $pageLang->formatNum( SiteStats::users() );
03119                 break;
03120             case 'numberofactiveusers':
03121                 $value = $pageLang->formatNum( SiteStats::activeUsers() );
03122                 break;
03123             case 'numberofpages':
03124                 $value = $pageLang->formatNum( SiteStats::pages() );
03125                 break;
03126             case 'numberofadmins':
03127                 $value = $pageLang->formatNum( SiteStats::numberingroup( 'sysop' ) );
03128                 break;
03129             case 'numberofedits':
03130                 $value = $pageLang->formatNum( SiteStats::edits() );
03131                 break;
03132             case 'numberofviews':
03133                 global $wgDisableCounters;
03134                 $value = !$wgDisableCounters ? $pageLang->formatNum( SiteStats::views() ) : '';
03135                 break;
03136             case 'currenttimestamp':
03137                 $value = wfTimestamp( TS_MW, $ts );
03138                 break;
03139             case 'localtimestamp':
03140                 $value = MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' );
03141                 break;
03142             case 'currentversion':
03143                 $value = SpecialVersion::getVersion();
03144                 break;
03145             case 'articlepath':
03146                 return $wgArticlePath;
03147             case 'sitename':
03148                 return $wgSitename;
03149             case 'server':
03150                 return $wgServer;
03151             case 'servername':
03152                 return $wgServerName;
03153             case 'scriptpath':
03154                 return $wgScriptPath;
03155             case 'stylepath':
03156                 return $wgStylePath;
03157             case 'directionmark':
03158                 return $pageLang->getDirMark();
03159             case 'contentlanguage':
03160                 global $wgLanguageCode;
03161                 return $wgLanguageCode;
03162             case 'cascadingsources':
03163                 $value = CoreParserFunctions::cascadingsources( $this );
03164                 break;
03165             default:
03166                 $ret = null;
03167                 wfRunHooks(
03168                     'ParserGetVariableValueSwitch',
03169                     array( &$this, &$this->mVarCache, &$index, &$ret, &$frame )
03170                 );
03171 
03172                 return $ret;
03173         }
03174 
03175         if ( $index ) {
03176             $this->mVarCache[$index] = $value;
03177         }
03178 
03179         return $value;
03180     }
03181 
03187     public function initialiseVariables() {
03188         wfProfileIn( __METHOD__ );
03189         $variableIDs = MagicWord::getVariableIDs();
03190         $substIDs = MagicWord::getSubstIDs();
03191 
03192         $this->mVariables = new MagicWordArray( $variableIDs );
03193         $this->mSubstWords = new MagicWordArray( $substIDs );
03194         wfProfileOut( __METHOD__ );
03195     }
03196 
03219     public function preprocessToDom( $text, $flags = 0 ) {
03220         $dom = $this->getPreprocessor()->preprocessToObj( $text, $flags );
03221         return $dom;
03222     }
03223 
03231     public static function splitWhitespace( $s ) {
03232         $ltrimmed = ltrim( $s );
03233         $w1 = substr( $s, 0, strlen( $s ) - strlen( $ltrimmed ) );
03234         $trimmed = rtrim( $ltrimmed );
03235         $diff = strlen( $ltrimmed ) - strlen( $trimmed );
03236         if ( $diff > 0 ) {
03237             $w2 = substr( $ltrimmed, -$diff );
03238         } else {
03239             $w2 = '';
03240         }
03241         return array( $w1, $trimmed, $w2 );
03242     }
03243 
03264     public function replaceVariables( $text, $frame = false, $argsOnly = false ) {
03265         # Is there any text? Also, Prevent too big inclusions!
03266         if ( strlen( $text ) < 1 || strlen( $text ) > $this->mOptions->getMaxIncludeSize() ) {
03267             return $text;
03268         }
03269         wfProfileIn( __METHOD__ );
03270 
03271         if ( $frame === false ) {
03272             $frame = $this->getPreprocessor()->newFrame();
03273         } elseif ( !( $frame instanceof PPFrame ) ) {
03274             wfDebug( __METHOD__ . " called using plain parameters instead of "
03275                 . "a PPFrame instance. Creating custom frame.\n" );
03276             $frame = $this->getPreprocessor()->newCustomFrame( $frame );
03277         }
03278 
03279         $dom = $this->preprocessToDom( $text );
03280         $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0;
03281         $text = $frame->expand( $dom, $flags );
03282 
03283         wfProfileOut( __METHOD__ );
03284         return $text;
03285     }
03286 
03294     public static function createAssocArgs( $args ) {
03295         $assocArgs = array();
03296         $index = 1;
03297         foreach ( $args as $arg ) {
03298             $eqpos = strpos( $arg, '=' );
03299             if ( $eqpos === false ) {
03300                 $assocArgs[$index++] = $arg;
03301             } else {
03302                 $name = trim( substr( $arg, 0, $eqpos ) );
03303                 $value = trim( substr( $arg, $eqpos + 1 ) );
03304                 if ( $value === false ) {
03305                     $value = '';
03306                 }
03307                 if ( $name !== false ) {
03308                     $assocArgs[$name] = $value;
03309                 }
03310             }
03311         }
03312 
03313         return $assocArgs;
03314     }
03315 
03340     public function limitationWarn( $limitationType, $current = '', $max = '' ) {
03341         # does no harm if $current and $max are present but are unnecessary for the message
03342         $warning = wfMessage( "$limitationType-warning" )->numParams( $current, $max )
03343             ->inLanguage( $this->mOptions->getUserLangObj() )->text();
03344         $this->mOutput->addWarning( $warning );
03345         $this->addTrackingCategory( "$limitationType-category" );
03346     }
03347 
03360     public function braceSubstitution( $piece, $frame ) {
03361         wfProfileIn( __METHOD__ );
03362         wfProfileIn( __METHOD__ . '-setup' );
03363 
03364         // Flags
03365 
03366         // $text has been filled
03367         $found = false;
03368         // wiki markup in $text should be escaped
03369         $nowiki = false;
03370         // $text is HTML, armour it against wikitext transformation
03371         $isHTML = false;
03372         // Force interwiki transclusion to be done in raw mode not rendered
03373         $forceRawInterwiki = false;
03374         // $text is a DOM node needing expansion in a child frame
03375         $isChildObj = false;
03376         // $text is a DOM node needing expansion in the current frame
03377         $isLocalObj = false;
03378 
03379         # Title object, where $text came from
03380         $title = false;
03381 
03382         # $part1 is the bit before the first |, and must contain only title characters.
03383         # Various prefixes will be stripped from it later.
03384         $titleWithSpaces = $frame->expand( $piece['title'] );
03385         $part1 = trim( $titleWithSpaces );
03386         $titleText = false;
03387 
03388         # Original title text preserved for various purposes
03389         $originalTitle = $part1;
03390 
03391         # $args is a list of argument nodes, starting from index 0, not including $part1
03392         # @todo FIXME: If piece['parts'] is null then the call to getLength()
03393         # below won't work b/c this $args isn't an object
03394         $args = ( null == $piece['parts'] ) ? array() : $piece['parts'];
03395         wfProfileOut( __METHOD__ . '-setup' );
03396 
03397         $titleProfileIn = null; // profile templates
03398 
03399         # SUBST
03400         wfProfileIn( __METHOD__ . '-modifiers' );
03401         if ( !$found ) {
03402 
03403             $substMatch = $this->mSubstWords->matchStartAndRemove( $part1 );
03404 
03405             # Possibilities for substMatch: "subst", "safesubst" or FALSE
03406             # Decide whether to expand template or keep wikitext as-is.
03407             if ( $this->ot['wiki'] ) {
03408                 if ( $substMatch === false ) {
03409                     $literal = true;  # literal when in PST with no prefix
03410                 } else {
03411                     $literal = false; # expand when in PST with subst: or safesubst:
03412                 }
03413             } else {
03414                 if ( $substMatch == 'subst' ) {
03415                     $literal = true;  # literal when not in PST with plain subst:
03416                 } else {
03417                     $literal = false; # expand when not in PST with safesubst: or no prefix
03418                 }
03419             }
03420             if ( $literal ) {
03421                 $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
03422                 $isLocalObj = true;
03423                 $found = true;
03424             }
03425         }
03426 
03427         # Variables
03428         if ( !$found && $args->getLength() == 0 ) {
03429             $id = $this->mVariables->matchStartToEnd( $part1 );
03430             if ( $id !== false ) {
03431                 $text = $this->getVariableValue( $id, $frame );
03432                 if ( MagicWord::getCacheTTL( $id ) > -1 ) {
03433                     $this->mOutput->updateCacheExpiry( MagicWord::getCacheTTL( $id ) );
03434                 }
03435                 $found = true;
03436             }
03437         }
03438 
03439         # MSG, MSGNW and RAW
03440         if ( !$found ) {
03441             # Check for MSGNW:
03442             $mwMsgnw = MagicWord::get( 'msgnw' );
03443             if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
03444                 $nowiki = true;
03445             } else {
03446                 # Remove obsolete MSG:
03447                 $mwMsg = MagicWord::get( 'msg' );
03448                 $mwMsg->matchStartAndRemove( $part1 );
03449             }
03450 
03451             # Check for RAW:
03452             $mwRaw = MagicWord::get( 'raw' );
03453             if ( $mwRaw->matchStartAndRemove( $part1 ) ) {
03454                 $forceRawInterwiki = true;
03455             }
03456         }
03457         wfProfileOut( __METHOD__ . '-modifiers' );
03458 
03459         # Parser functions
03460         if ( !$found ) {
03461             wfProfileIn( __METHOD__ . '-pfunc' );
03462 
03463             $colonPos = strpos( $part1, ':' );
03464             if ( $colonPos !== false ) {
03465                 $func = substr( $part1, 0, $colonPos );
03466                 $funcArgs = array( trim( substr( $part1, $colonPos + 1 ) ) );
03467                 for ( $i = 0; $i < $args->getLength(); $i++ ) {
03468                     $funcArgs[] = $args->item( $i );
03469                 }
03470                 try {
03471                     $result = $this->callParserFunction( $frame, $func, $funcArgs );
03472                 } catch ( Exception $ex ) {
03473                     wfProfileOut( __METHOD__ . '-pfunc' );
03474                     wfProfileOut( __METHOD__ );
03475                     throw $ex;
03476                 }
03477 
03478                 # The interface for parser functions allows for extracting
03479                 # flags into the local scope. Extract any forwarded flags
03480                 # here.
03481                 extract( $result );
03482             }
03483             wfProfileOut( __METHOD__ . '-pfunc' );
03484         }
03485 
03486         # Finish mangling title and then check for loops.
03487         # Set $title to a Title object and $titleText to the PDBK
03488         if ( !$found ) {
03489             $ns = NS_TEMPLATE;
03490             # Split the title into page and subpage
03491             $subpage = '';
03492             $relative = $this->maybeDoSubpageLink( $part1, $subpage );
03493             if ( $part1 !== $relative ) {
03494                 $part1 = $relative;
03495                 $ns = $this->mTitle->getNamespace();
03496             }
03497             $title = Title::newFromText( $part1, $ns );
03498             if ( $title ) {
03499                 $titleText = $title->getPrefixedText();
03500                 # Check for language variants if the template is not found
03501                 if ( $this->getConverterLanguage()->hasVariants() && $title->getArticleID() == 0 ) {
03502                     $this->getConverterLanguage()->findVariantLink( $part1, $title, true );
03503                 }
03504                 # Do recursion depth check
03505                 $limit = $this->mOptions->getMaxTemplateDepth();
03506                 if ( $frame->depth >= $limit ) {
03507                     $found = true;
03508                     $text = '<span class="error">'
03509                         . wfMessage( 'parser-template-recursion-depth-warning' )
03510                             ->numParams( $limit )->inContentLanguage()->text()
03511                         . '</span>';
03512                 }
03513             }
03514         }
03515 
03516         # Load from database
03517         if ( !$found && $title ) {
03518             if ( !Profiler::instance()->isPersistent() ) {
03519                 # Too many unique items can kill profiling DBs/collectors
03520                 $titleProfileIn = __METHOD__ . "-title-" . $title->getPrefixedDBkey();
03521                 wfProfileIn( $titleProfileIn ); // template in
03522             }
03523             wfProfileIn( __METHOD__ . '-loadtpl' );
03524             if ( !$title->isExternal() ) {
03525                 if ( $title->isSpecialPage()
03526                     && $this->mOptions->getAllowSpecialInclusion()
03527                     && $this->ot['html']
03528                 ) {
03529                     // Pass the template arguments as URL parameters.
03530                     // "uselang" will have no effect since the Language object
03531                     // is forced to the one defined in ParserOptions.
03532                     $pageArgs = array();
03533                     $argsLength = $args->getLength();
03534                     for ( $i = 0; $i < $argsLength; $i++ ) {
03535                         $bits = $args->item( $i )->splitArg();
03536                         if ( strval( $bits['index'] ) === '' ) {
03537                             $name = trim( $frame->expand( $bits['name'], PPFrame::STRIP_COMMENTS ) );
03538                             $value = trim( $frame->expand( $bits['value'] ) );
03539                             $pageArgs[$name] = $value;
03540                         }
03541                     }
03542 
03543                     // Create a new context to execute the special page
03544                     $context = new RequestContext;
03545                     $context->setTitle( $title );
03546                     $context->setRequest( new FauxRequest( $pageArgs ) );
03547                     $context->setUser( $this->getUser() );
03548                     $context->setLanguage( $this->mOptions->getUserLangObj() );
03549                     $ret = SpecialPageFactory::capturePath( $title, $context );
03550                     if ( $ret ) {
03551                         $text = $context->getOutput()->getHTML();
03552                         $this->mOutput->addOutputPageMetadata( $context->getOutput() );
03553                         $found = true;
03554                         $isHTML = true;
03555                         $this->disableCache();
03556                     }
03557                 } elseif ( MWNamespace::isNonincludable( $title->getNamespace() ) ) {
03558                     $found = false; # access denied
03559                     wfDebug( __METHOD__ . ": template inclusion denied for " .
03560                         $title->getPrefixedDBkey() . "\n" );
03561                 } else {
03562                     list( $text, $title ) = $this->getTemplateDom( $title );
03563                     if ( $text !== false ) {
03564                         $found = true;
03565                         $isChildObj = true;
03566                     }
03567                 }
03568 
03569                 # If the title is valid but undisplayable, make a link to it
03570                 if ( !$found && ( $this->ot['html'] || $this->ot['pre'] ) ) {
03571                     $text = "[[:$titleText]]";
03572                     $found = true;
03573                 }
03574             } elseif ( $title->isTrans() ) {
03575                 # Interwiki transclusion
03576                 if ( $this->ot['html'] && !$forceRawInterwiki ) {
03577                     $text = $this->interwikiTransclude( $title, 'render' );
03578                     $isHTML = true;
03579                 } else {
03580                     $text = $this->interwikiTransclude( $title, 'raw' );
03581                     # Preprocess it like a template
03582                     $text = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
03583                     $isChildObj = true;
03584                 }
03585                 $found = true;
03586             }
03587 
03588             # Do infinite loop check
03589             # This has to be done after redirect resolution to avoid infinite loops via redirects
03590             if ( !$frame->loopCheck( $title ) ) {
03591                 $found = true;
03592                 $text = '<span class="error">'
03593                     . wfMessage( 'parser-template-loop-warning', $titleText )->inContentLanguage()->text()
03594                     . '</span>';
03595                 wfDebug( __METHOD__ . ": template loop broken at '$titleText'\n" );
03596             }
03597             wfProfileOut( __METHOD__ . '-loadtpl' );
03598         }
03599 
03600         # If we haven't found text to substitute by now, we're done
03601         # Recover the source wikitext and return it
03602         if ( !$found ) {
03603             $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
03604             if ( $titleProfileIn ) {
03605                 wfProfileOut( $titleProfileIn ); // template out
03606             }
03607             wfProfileOut( __METHOD__ );
03608             return array( 'object' => $text );
03609         }
03610 
03611         # Expand DOM-style return values in a child frame
03612         if ( $isChildObj ) {
03613             # Clean up argument array
03614             $newFrame = $frame->newChild( $args, $title );
03615 
03616             if ( $nowiki ) {
03617                 $text = $newFrame->expand( $text, PPFrame::RECOVER_ORIG );
03618             } elseif ( $titleText !== false && $newFrame->isEmpty() ) {
03619                 # Expansion is eligible for the empty-frame cache
03620                 $text = $newFrame->cachedExpand( $titleText, $text );
03621             } else {
03622                 # Uncached expansion
03623                 $text = $newFrame->expand( $text );
03624             }
03625         }
03626         if ( $isLocalObj && $nowiki ) {
03627             $text = $frame->expand( $text, PPFrame::RECOVER_ORIG );
03628             $isLocalObj = false;
03629         }
03630 
03631         if ( $titleProfileIn ) {
03632             wfProfileOut( $titleProfileIn ); // template out
03633         }
03634 
03635         # Replace raw HTML by a placeholder
03636         if ( $isHTML ) {
03637             $text = $this->insertStripItem( $text );
03638         } elseif ( $nowiki && ( $this->ot['html'] || $this->ot['pre'] ) ) {
03639             # Escape nowiki-style return values
03640             $text = wfEscapeWikiText( $text );
03641         } elseif ( is_string( $text )
03642             && !$piece['lineStart']
03643             && preg_match( '/^(?:{\\||:|;|#|\*)/', $text )
03644         ) {
03645             # Bug 529: if the template begins with a table or block-level
03646             # element, it should be treated as beginning a new line.
03647             # This behavior is somewhat controversial.
03648             $text = "\n" . $text;
03649         }
03650 
03651         if ( is_string( $text ) && !$this->incrementIncludeSize( 'post-expand', strlen( $text ) ) ) {
03652             # Error, oversize inclusion
03653             if ( $titleText !== false ) {
03654                 # Make a working, properly escaped link if possible (bug 23588)
03655                 $text = "[[:$titleText]]";
03656             } else {
03657                 # This will probably not be a working link, but at least it may
03658                 # provide some hint of where the problem is
03659                 preg_replace( '/^:/', '', $originalTitle );
03660                 $text = "[[:$originalTitle]]";
03661             }
03662             $text .= $this->insertStripItem( '<!-- WARNING: template omitted, '
03663                 . 'post-expand include size too large -->' );
03664             $this->limitationWarn( 'post-expand-template-inclusion' );
03665         }
03666 
03667         if ( $isLocalObj ) {
03668             $ret = array( 'object' => $text );
03669         } else {
03670             $ret = array( 'text' => $text );
03671         }
03672 
03673         wfProfileOut( __METHOD__ );
03674         return $ret;
03675     }
03676 
03696     public function callParserFunction( $frame, $function, array $args = array() ) {
03697         global $wgContLang;
03698 
03699         wfProfileIn( __METHOD__ );
03700 
03701         # Case sensitive functions
03702         if ( isset( $this->mFunctionSynonyms[1][$function] ) ) {
03703             $function = $this->mFunctionSynonyms[1][$function];
03704         } else {
03705             # Case insensitive functions
03706             $function = $wgContLang->lc( $function );
03707             if ( isset( $this->mFunctionSynonyms[0][$function] ) ) {
03708                 $function = $this->mFunctionSynonyms[0][$function];
03709             } else {
03710                 wfProfileOut( __METHOD__ );
03711                 return array( 'found' => false );
03712             }
03713         }
03714 
03715         wfProfileIn( __METHOD__ . '-pfunc-' . $function );
03716         list( $callback, $flags ) = $this->mFunctionHooks[$function];
03717 
03718         # Workaround for PHP bug 35229 and similar
03719         if ( !is_callable( $callback ) ) {
03720             wfProfileOut( __METHOD__ . '-pfunc-' . $function );
03721             wfProfileOut( __METHOD__ );
03722             throw new MWException( "Tag hook for $function is not callable\n" );
03723         }
03724 
03725         $allArgs = array( &$this );
03726         if ( $flags & SFH_OBJECT_ARGS ) {
03727             # Convert arguments to PPNodes and collect for appending to $allArgs
03728             $funcArgs = array();
03729             foreach ( $args as $k => $v ) {
03730                 if ( $v instanceof PPNode || $k === 0 ) {
03731                     $funcArgs[] = $v;
03732                 } else {
03733                     $funcArgs[] = $this->mPreprocessor->newPartNodeArray( array( $k => $v ) )->item( 0 );
03734                 }
03735             }
03736 
03737             # Add a frame parameter, and pass the arguments as an array
03738             $allArgs[] = $frame;
03739             $allArgs[] = $funcArgs;
03740         } else {
03741             # Convert arguments to plain text and append to $allArgs
03742             foreach ( $args as $k => $v ) {
03743                 if ( $v instanceof PPNode ) {
03744                     $allArgs[] = trim( $frame->expand( $v ) );
03745                 } elseif ( is_int( $k ) && $k >= 0 ) {
03746                     $allArgs[] = trim( $v );
03747                 } else {
03748                     $allArgs[] = trim( "$k=$v" );
03749                 }
03750             }
03751         }
03752 
03753         $result = call_user_func_array( $callback, $allArgs );
03754 
03755         # The interface for function hooks allows them to return a wikitext
03756         # string or an array containing the string and any flags. This mungs
03757         # things around to match what this method should return.
03758         if ( !is_array( $result ) ) {
03759             $result = array(
03760                 'found' => true,
03761                 'text' => $result,
03762             );
03763         } else {
03764             if ( isset( $result[0] ) && !isset( $result['text'] ) ) {
03765                 $result['text'] = $result[0];
03766             }
03767             unset( $result[0] );
03768             $result += array(
03769                 'found' => true,
03770             );
03771         }
03772 
03773         $noparse = true;
03774         $preprocessFlags = 0;
03775         if ( isset( $result['noparse'] ) ) {
03776             $noparse = $result['noparse'];
03777         }
03778         if ( isset( $result['preprocessFlags'] ) ) {
03779             $preprocessFlags = $result['preprocessFlags'];
03780         }
03781 
03782         if ( !$noparse ) {
03783             $result['text'] = $this->preprocessToDom( $result['text'], $preprocessFlags );
03784             $result['isChildObj'] = true;
03785         }
03786         wfProfileOut( __METHOD__ . '-pfunc-' . $function );
03787         wfProfileOut( __METHOD__ );
03788 
03789         return $result;
03790     }
03791 
03800     public function getTemplateDom( $title ) {
03801         $cacheTitle = $title;
03802         $titleText = $title->getPrefixedDBkey();
03803 
03804         if ( isset( $this->mTplRedirCache[$titleText] ) ) {
03805             list( $ns, $dbk ) = $this->mTplRedirCache[$titleText];
03806             $title = Title::makeTitle( $ns, $dbk );
03807             $titleText = $title->getPrefixedDBkey();
03808         }
03809         if ( isset( $this->mTplDomCache[$titleText] ) ) {
03810             return array( $this->mTplDomCache[$titleText], $title );
03811         }
03812 
03813         # Cache miss, go to the database
03814         list( $text, $title ) = $this->fetchTemplateAndTitle( $title );
03815 
03816         if ( $text === false ) {
03817             $this->mTplDomCache[$titleText] = false;
03818             return array( false, $title );
03819         }
03820 
03821         $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
03822         $this->mTplDomCache[$titleText] = $dom;
03823 
03824         if ( !$title->equals( $cacheTitle ) ) {
03825             $this->mTplRedirCache[$cacheTitle->getPrefixedDBkey()] =
03826                 array( $title->getNamespace(), $cdb = $title->getDBkey() );
03827         }
03828 
03829         return array( $dom, $title );
03830     }
03831 
03837     public function fetchTemplateAndTitle( $title ) {
03838         // Defaults to Parser::statelessFetchTemplate()
03839         $templateCb = $this->mOptions->getTemplateCallback();
03840         $stuff = call_user_func( $templateCb, $title, $this );
03841         $text = $stuff['text'];
03842         $finalTitle = isset( $stuff['finalTitle'] ) ? $stuff['finalTitle'] : $title;
03843         if ( isset( $stuff['deps'] ) ) {
03844             foreach ( $stuff['deps'] as $dep ) {
03845                 $this->mOutput->addTemplate( $dep['title'], $dep['page_id'], $dep['rev_id'] );
03846                 if ( $dep['title']->equals( $this->getTitle() ) ) {
03847                     // If we transclude ourselves, the final result
03848                     // will change based on the new version of the page
03849                     $this->mOutput->setFlag( 'vary-revision' );
03850                 }
03851             }
03852         }
03853         return array( $text, $finalTitle );
03854     }
03855 
03861     public function fetchTemplate( $title ) {
03862         $rv = $this->fetchTemplateAndTitle( $title );
03863         return $rv[0];
03864     }
03865 
03875     public static function statelessFetchTemplate( $title, $parser = false ) {
03876         $text = $skip = false;
03877         $finalTitle = $title;
03878         $deps = array();
03879 
03880         # Loop to fetch the article, with up to 1 redirect
03881         for ( $i = 0; $i < 2 && is_object( $title ); $i++ ) {
03882             # Give extensions a chance to select the revision instead
03883             $id = false; # Assume current
03884             wfRunHooks( 'BeforeParserFetchTemplateAndtitle',
03885                 array( $parser, $title, &$skip, &$id ) );
03886 
03887             if ( $skip ) {
03888                 $text = false;
03889                 $deps[] = array(
03890                     'title' => $title,
03891                     'page_id' => $title->getArticleID(),
03892                     'rev_id' => null
03893                 );
03894                 break;
03895             }
03896             # Get the revision
03897             $rev = $id
03898                 ? Revision::newFromId( $id )
03899                 : Revision::newFromTitle( $title, false, Revision::READ_NORMAL );
03900             $rev_id = $rev ? $rev->getId() : 0;
03901             # If there is no current revision, there is no page
03902             if ( $id === false && !$rev ) {
03903                 $linkCache = LinkCache::singleton();
03904                 $linkCache->addBadLinkObj( $title );
03905             }
03906 
03907             $deps[] = array(
03908                 'title' => $title,
03909                 'page_id' => $title->getArticleID(),
03910                 'rev_id' => $rev_id );
03911             if ( $rev && !$title->equals( $rev->getTitle() ) ) {
03912                 # We fetched a rev from a different title; register it too...
03913                 $deps[] = array(
03914                     'title' => $rev->getTitle(),
03915                     'page_id' => $rev->getPage(),
03916                     'rev_id' => $rev_id );
03917             }
03918 
03919             if ( $rev ) {
03920                 $content = $rev->getContent();
03921                 $text = $content ? $content->getWikitextForTransclusion() : null;
03922 
03923                 if ( $text === false || $text === null ) {
03924                     $text = false;
03925                     break;
03926                 }
03927             } elseif ( $title->getNamespace() == NS_MEDIAWIKI ) {
03928                 global $wgContLang;
03929                 $message = wfMessage( $wgContLang->lcfirst( $title->getText() ) )->inContentLanguage();
03930                 if ( !$message->exists() ) {
03931                     $text = false;
03932                     break;
03933                 }
03934                 $content = $message->content();
03935                 $text = $message->plain();
03936             } else {
03937                 break;
03938             }
03939             if ( !$content ) {
03940                 break;
03941             }
03942             # Redirect?
03943             $finalTitle = $title;
03944             $title = $content->getRedirectTarget();
03945         }
03946         return array(
03947             'text' => $text,
03948             'finalTitle' => $finalTitle,
03949             'deps' => $deps );
03950     }
03951 
03959     public function fetchFile( $title, $options = array() ) {
03960         $res = $this->fetchFileAndTitle( $title, $options );
03961         return $res[0];
03962     }
03963 
03971     public function fetchFileAndTitle( $title, $options = array() ) {
03972         $file = $this->fetchFileNoRegister( $title, $options );
03973 
03974         $time = $file ? $file->getTimestamp() : false;
03975         $sha1 = $file ? $file->getSha1() : false;
03976         # Register the file as a dependency...
03977         $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
03978         if ( $file && !$title->equals( $file->getTitle() ) ) {
03979             # Update fetched file title
03980             $title = $file->getTitle();
03981             $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
03982         }
03983         return array( $file, $title );
03984     }
03985 
03996     protected function fetchFileNoRegister( $title, $options = array() ) {
03997         if ( isset( $options['broken'] ) ) {
03998             $file = false; // broken thumbnail forced by hook
03999         } elseif ( isset( $options['sha1'] ) ) { // get by (sha1,timestamp)
04000             $file = RepoGroup::singleton()->findFileFromKey( $options['sha1'], $options );
04001         } else { // get by (name,timestamp)
04002             $file = wfFindFile( $title, $options );
04003         }
04004         return $file;
04005     }
04006 
04015     public function interwikiTransclude( $title, $action ) {
04016         global $wgEnableScaryTranscluding;
04017 
04018         if ( !$wgEnableScaryTranscluding ) {
04019             return wfMessage( 'scarytranscludedisabled' )->inContentLanguage()->text();
04020         }
04021 
04022         $url = $title->getFullURL( array( 'action' => $action ) );
04023 
04024         if ( strlen( $url ) > 255 ) {
04025             return wfMessage( 'scarytranscludetoolong' )->inContentLanguage()->text();
04026         }
04027         return $this->fetchScaryTemplateMaybeFromCache( $url );
04028     }
04029 
04034     public function fetchScaryTemplateMaybeFromCache( $url ) {
04035         global $wgTranscludeCacheExpiry;
04036         $dbr = wfGetDB( DB_SLAVE );
04037         $tsCond = $dbr->timestamp( time() - $wgTranscludeCacheExpiry );
04038         $obj = $dbr->selectRow( 'transcache', array( 'tc_time', 'tc_contents' ),
04039                 array( 'tc_url' => $url, "tc_time >= " . $dbr->addQuotes( $tsCond ) ) );
04040         if ( $obj ) {
04041             return $obj->tc_contents;
04042         }
04043 
04044         $req = MWHttpRequest::factory( $url );
04045         $status = $req->execute(); // Status object
04046         if ( $status->isOK() ) {
04047             $text = $req->getContent();
04048         } elseif ( $req->getStatus() != 200 ) {
04049             // Though we failed to fetch the content, this status is useless.
04050             return wfMessage( 'scarytranscludefailed-httpstatus' )
04051                 ->params( $url, $req->getStatus() /* HTTP status */ )->inContentLanguage()->text();
04052         } else {
04053             return wfMessage( 'scarytranscludefailed', $url )->inContentLanguage()->text();
04054         }
04055 
04056         $dbw = wfGetDB( DB_MASTER );
04057         $dbw->replace( 'transcache', array( 'tc_url' ), array(
04058             'tc_url' => $url,
04059             'tc_time' => $dbw->timestamp( time() ),
04060             'tc_contents' => $text
04061         ) );
04062         return $text;
04063     }
04064 
04074     public function argSubstitution( $piece, $frame ) {
04075         wfProfileIn( __METHOD__ );
04076 
04077         $error = false;
04078         $parts = $piece['parts'];
04079         $nameWithSpaces = $frame->expand( $piece['title'] );
04080         $argName = trim( $nameWithSpaces );
04081         $object = false;
04082         $text = $frame->getArgument( $argName );
04083         if ( $text === false && $parts->getLength() > 0
04084             && ( $this->ot['html']
04085                 || $this->ot['pre']
04086                 || ( $this->ot['wiki'] && $frame->isTemplate() )
04087             )
04088         ) {
04089             # No match in frame, use the supplied default
04090             $object = $parts->item( 0 )->getChildren();
04091         }
04092         if ( !$this->incrementIncludeSize( 'arg', strlen( $text ) ) ) {
04093             $error = '<!-- WARNING: argument omitted, expansion size too large -->';
04094             $this->limitationWarn( 'post-expand-template-argument' );
04095         }
04096 
04097         if ( $text === false && $object === false ) {
04098             # No match anywhere
04099             $object = $frame->virtualBracketedImplode( '{{{', '|', '}}}', $nameWithSpaces, $parts );
04100         }
04101         if ( $error !== false ) {
04102             $text .= $error;
04103         }
04104         if ( $object !== false ) {
04105             $ret = array( 'object' => $object );
04106         } else {
04107             $ret = array( 'text' => $text );
04108         }
04109 
04110         wfProfileOut( __METHOD__ );
04111         return $ret;
04112     }
04113 
04129     public function extensionSubstitution( $params, $frame ) {
04130         $name = $frame->expand( $params['name'] );
04131         $attrText = !isset( $params['attr'] ) ? null : $frame->expand( $params['attr'] );
04132         $content = !isset( $params['inner'] ) ? null : $frame->expand( $params['inner'] );
04133         $marker = "{$this->mUniqPrefix}-$name-"
04134             . sprintf( '%08X', $this->mMarkerIndex++ ) . self::MARKER_SUFFIX;
04135 
04136         $isFunctionTag = isset( $this->mFunctionTagHooks[strtolower( $name )] ) &&
04137             ( $this->ot['html'] || $this->ot['pre'] );
04138         if ( $isFunctionTag ) {
04139             $markerType = 'none';
04140         } else {
04141             $markerType = 'general';
04142         }
04143         if ( $this->ot['html'] || $isFunctionTag ) {
04144             $name = strtolower( $name );
04145             $attributes = Sanitizer::decodeTagAttributes( $attrText );
04146             if ( isset( $params['attributes'] ) ) {
04147                 $attributes = $attributes + $params['attributes'];
04148             }
04149 
04150             if ( isset( $this->mTagHooks[$name] ) ) {
04151                 # Workaround for PHP bug 35229 and similar
04152                 if ( !is_callable( $this->mTagHooks[$name] ) ) {
04153                     throw new MWException( "Tag hook for $name is not callable\n" );
04154                 }
04155                 $output = call_user_func_array( $this->mTagHooks[$name],
04156                     array( $content, $attributes, $this, $frame ) );
04157             } elseif ( isset( $this->mFunctionTagHooks[$name] ) ) {
04158                 list( $callback, ) = $this->mFunctionTagHooks[$name];
04159                 if ( !is_callable( $callback ) ) {
04160                     throw new MWException( "Tag hook for $name is not callable\n" );
04161                 }
04162 
04163                 $output = call_user_func_array( $callback, array( &$this, $frame, $content, $attributes ) );
04164             } else {
04165                 $output = '<span class="error">Invalid tag extension name: ' .
04166                     htmlspecialchars( $name ) . '</span>';
04167             }
04168 
04169             if ( is_array( $output ) ) {
04170                 # Extract flags to local scope (to override $markerType)
04171                 $flags = $output;
04172                 $output = $flags[0];
04173                 unset( $flags[0] );
04174                 extract( $flags );
04175             }
04176         } else {
04177             if ( is_null( $attrText ) ) {
04178                 $attrText = '';
04179             }
04180             if ( isset( $params['attributes'] ) ) {
04181                 foreach ( $params['attributes'] as $attrName => $attrValue ) {
04182                     $attrText .= ' ' . htmlspecialchars( $attrName ) . '="' .
04183                         htmlspecialchars( $attrValue ) . '"';
04184                 }
04185             }
04186             if ( $content === null ) {
04187                 $output = "<$name$attrText/>";
04188             } else {
04189                 $close = is_null( $params['close'] ) ? '' : $frame->expand( $params['close'] );
04190                 $output = "<$name$attrText>$content$close";
04191             }
04192         }
04193 
04194         if ( $markerType === 'none' ) {
04195             return $output;
04196         } elseif ( $markerType === 'nowiki' ) {
04197             $this->mStripState->addNoWiki( $marker, $output );
04198         } elseif ( $markerType === 'general' ) {
04199             $this->mStripState->addGeneral( $marker, $output );
04200         } else {
04201             throw new MWException( __METHOD__ . ': invalid marker type' );
04202         }
04203         return $marker;
04204     }
04205 
04213     public function incrementIncludeSize( $type, $size ) {
04214         if ( $this->mIncludeSizes[$type] + $size > $this->mOptions->getMaxIncludeSize() ) {
04215             return false;
04216         } else {
04217             $this->mIncludeSizes[$type] += $size;
04218             return true;
04219         }
04220     }
04221 
04227     public function incrementExpensiveFunctionCount() {
04228         $this->mExpensiveFunctionCount++;
04229         return $this->mExpensiveFunctionCount <= $this->mOptions->getExpensiveParserFunctionLimit();
04230     }
04231 
04240     public function doDoubleUnderscore( $text ) {
04241         wfProfileIn( __METHOD__ );
04242 
04243         # The position of __TOC__ needs to be recorded
04244         $mw = MagicWord::get( 'toc' );
04245         if ( $mw->match( $text ) ) {
04246             $this->mShowToc = true;
04247             $this->mForceTocPosition = true;
04248 
04249             # Set a placeholder. At the end we'll fill it in with the TOC.
04250             $text = $mw->replace( '<!--MWTOC-->', $text, 1 );
04251 
04252             # Only keep the first one.
04253             $text = $mw->replace( '', $text );
04254         }
04255 
04256         # Now match and remove the rest of them
04257         $mwa = MagicWord::getDoubleUnderscoreArray();
04258         $this->mDoubleUnderscores = $mwa->matchAndRemove( $text );
04259 
04260         if ( isset( $this->mDoubleUnderscores['nogallery'] ) ) {
04261             $this->mOutput->mNoGallery = true;
04262         }
04263         if ( isset( $this->mDoubleUnderscores['notoc'] ) && !$this->mForceTocPosition ) {
04264             $this->mShowToc = false;
04265         }
04266         if ( isset( $this->mDoubleUnderscores['hiddencat'] )
04267             && $this->mTitle->getNamespace() == NS_CATEGORY
04268         ) {
04269             $this->addTrackingCategory( 'hidden-category-category' );
04270         }
04271         # (bug 8068) Allow control over whether robots index a page.
04272         #
04273         # @todo FIXME: Bug 14899: __INDEX__ always overrides __NOINDEX__ here!  This
04274         # is not desirable, the last one on the page should win.
04275         if ( isset( $this->mDoubleUnderscores['noindex'] ) && $this->mTitle->canUseNoindex() ) {
04276             $this->mOutput->setIndexPolicy( 'noindex' );
04277             $this->addTrackingCategory( 'noindex-category' );
04278         }
04279         if ( isset( $this->mDoubleUnderscores['index'] ) && $this->mTitle->canUseNoindex() ) {
04280             $this->mOutput->setIndexPolicy( 'index' );
04281             $this->addTrackingCategory( 'index-category' );
04282         }
04283 
04284         # Cache all double underscores in the database
04285         foreach ( $this->mDoubleUnderscores as $key => $val ) {
04286             $this->mOutput->setProperty( $key, '' );
04287         }
04288 
04289         wfProfileOut( __METHOD__ );
04290         return $text;
04291     }
04292 
04304     public function addTrackingCategory( $msg ) {
04305         if ( $this->mTitle->getNamespace() === NS_SPECIAL ) {
04306             wfDebug( __METHOD__ . ": Not adding tracking category $msg to special page!\n" );
04307             return false;
04308         }
04309         // Important to parse with correct title (bug 31469)
04310         $cat = wfMessage( $msg )
04311             ->title( $this->getTitle() )
04312             ->inContentLanguage()
04313             ->text();
04314 
04315         # Allow tracking categories to be disabled by setting them to "-"
04316         if ( $cat === '-' ) {
04317             return false;
04318         }
04319 
04320         $containerCategory = Title::makeTitleSafe( NS_CATEGORY, $cat );
04321         if ( $containerCategory ) {
04322             $this->mOutput->addCategory( $containerCategory->getDBkey(), $this->getDefaultSort() );
04323             return true;
04324         } else {
04325             wfDebug( __METHOD__ . ": [[MediaWiki:$msg]] is not a valid title!\n" );
04326             return false;
04327         }
04328     }
04329 
04346     public function formatHeadings( $text, $origText, $isMain = true ) {
04347         global $wgMaxTocLevel, $wgExperimentalHtmlIds;
04348 
04349         # Inhibit editsection links if requested in the page
04350         if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) {
04351             $maybeShowEditLink = $showEditLink = false;
04352         } else {
04353             $maybeShowEditLink = true; /* Actual presence will depend on ParserOptions option */
04354             $showEditLink = $this->mOptions->getEditSection();
04355         }
04356         if ( $showEditLink ) {
04357             $this->mOutput->setEditSectionTokens( true );
04358         }
04359 
04360         # Get all headlines for numbering them and adding funky stuff like [edit]
04361         # links - this is for later, but we need the number of headlines right now
04362         $matches = array();
04363         $numMatches = preg_match_all(
04364             '/<H(?P<level>[1-6])(?P<attrib>.*?' . '>)\s*(?P<header>[\s\S]*?)\s*<\/H[1-6] *>/i',
04365             $text,
04366             $matches
04367         );
04368 
04369         # if there are fewer than 4 headlines in the article, do not show TOC
04370         # unless it's been explicitly enabled.
04371         $enoughToc = $this->mShowToc &&
04372             ( ( $numMatches >= 4 ) || $this->mForceTocPosition );
04373 
04374         # Allow user to stipulate that a page should have a "new section"
04375         # link added via __NEWSECTIONLINK__
04376         if ( isset( $this->mDoubleUnderscores['newsectionlink'] ) ) {
04377             $this->mOutput->setNewSection( true );
04378         }
04379 
04380         # Allow user to remove the "new section"
04381         # link via __NONEWSECTIONLINK__
04382         if ( isset( $this->mDoubleUnderscores['nonewsectionlink'] ) ) {
04383             $this->mOutput->hideNewSection( true );
04384         }
04385 
04386         # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
04387         # override above conditions and always show TOC above first header
04388         if ( isset( $this->mDoubleUnderscores['forcetoc'] ) ) {
04389             $this->mShowToc = true;
04390             $enoughToc = true;
04391         }
04392 
04393         # headline counter
04394         $headlineCount = 0;
04395         $numVisible = 0;
04396 
04397         # Ugh .. the TOC should have neat indentation levels which can be
04398         # passed to the skin functions. These are determined here
04399         $toc = '';
04400         $full = '';
04401         $head = array();
04402         $sublevelCount = array();
04403         $levelCount = array();
04404         $level = 0;
04405         $prevlevel = 0;
04406         $toclevel = 0;
04407         $prevtoclevel = 0;
04408         $markerRegex = "{$this->mUniqPrefix}-h-(\d+)-" . self::MARKER_SUFFIX;
04409         $baseTitleText = $this->mTitle->getPrefixedDBkey();
04410         $oldType = $this->mOutputType;
04411         $this->setOutputType( self::OT_WIKI );
04412         $frame = $this->getPreprocessor()->newFrame();
04413         $root = $this->preprocessToDom( $origText );
04414         $node = $root->getFirstChild();
04415         $byteOffset = 0;
04416         $tocraw = array();
04417         $refers = array();
04418 
04419         foreach ( $matches[3] as $headline ) {
04420             $isTemplate = false;
04421             $titleText = false;
04422             $sectionIndex = false;
04423             $numbering = '';
04424             $markerMatches = array();
04425             if ( preg_match( "/^$markerRegex/", $headline, $markerMatches ) ) {
04426                 $serial = $markerMatches[1];
04427                 list( $titleText, $sectionIndex ) = $this->mHeadings[$serial];
04428                 $isTemplate = ( $titleText != $baseTitleText );
04429                 $headline = preg_replace( "/^$markerRegex\\s*/", "", $headline );
04430             }
04431 
04432             if ( $toclevel ) {
04433                 $prevlevel = $level;
04434             }
04435             $level = $matches[1][$headlineCount];
04436 
04437             if ( $level > $prevlevel ) {
04438                 # Increase TOC level
04439                 $toclevel++;
04440                 $sublevelCount[$toclevel] = 0;
04441                 if ( $toclevel < $wgMaxTocLevel ) {
04442                     $prevtoclevel = $toclevel;
04443                     $toc .= Linker::tocIndent();
04444                     $numVisible++;
04445                 }
04446             } elseif ( $level < $prevlevel && $toclevel > 1 ) {
04447                 # Decrease TOC level, find level to jump to
04448 
04449                 for ( $i = $toclevel; $i > 0; $i-- ) {
04450                     if ( $levelCount[$i] == $level ) {
04451                         # Found last matching level
04452                         $toclevel = $i;
04453                         break;
04454                     } elseif ( $levelCount[$i] < $level ) {
04455                         # Found first matching level below current level
04456                         $toclevel = $i + 1;
04457                         break;
04458                     }
04459                 }
04460                 if ( $i == 0 ) {
04461                     $toclevel = 1;
04462                 }
04463                 if ( $toclevel < $wgMaxTocLevel ) {
04464                     if ( $prevtoclevel < $wgMaxTocLevel ) {
04465                         # Unindent only if the previous toc level was shown :p
04466                         $toc .= Linker::tocUnindent( $prevtoclevel - $toclevel );
04467                         $prevtoclevel = $toclevel;
04468                     } else {
04469                         $toc .= Linker::tocLineEnd();
04470                     }
04471                 }
04472             } else {
04473                 # No change in level, end TOC line
04474                 if ( $toclevel < $wgMaxTocLevel ) {
04475                     $toc .= Linker::tocLineEnd();
04476                 }
04477             }
04478 
04479             $levelCount[$toclevel] = $level;
04480 
04481             # count number of headlines for each level
04482             $sublevelCount[$toclevel]++;
04483             $dot = 0;
04484             for ( $i = 1; $i <= $toclevel; $i++ ) {
04485                 if ( !empty( $sublevelCount[$i] ) ) {
04486                     if ( $dot ) {
04487                         $numbering .= '.';
04488                     }
04489                     $numbering .= $this->getTargetLanguage()->formatNum( $sublevelCount[$i] );
04490                     $dot = 1;
04491                 }
04492             }
04493 
04494             # The safe header is a version of the header text safe to use for links
04495 
04496             # Remove link placeholders by the link text.
04497             #     <!--LINK number-->
04498             # turns into
04499             #     link text with suffix
04500             # Do this before unstrip since link text can contain strip markers
04501             $safeHeadline = $this->replaceLinkHoldersText( $headline );
04502 
04503             # Avoid insertion of weird stuff like <math> by expanding the relevant sections
04504             $safeHeadline = $this->mStripState->unstripBoth( $safeHeadline );
04505 
04506             # Strip out HTML (first regex removes any tag not allowed)
04507             # Allowed tags are:
04508             # * <sup> and <sub> (bug 8393)
04509             # * <i> (bug 26375)
04510             # * <b> (r105284)
04511             # * <span dir="rtl"> and <span dir="ltr"> (bug 35167)
04512             #
04513             # We strip any parameter from accepted tags (second regex), except dir="rtl|ltr" from <span>,
04514             # to allow setting directionality in toc items.
04515             $tocline = preg_replace(
04516                 array(
04517                     '#<(?!/?(span|sup|sub|i|b)(?: [^>]*)?>).*?' . '>#',
04518                     '#<(/?(?:span(?: dir="(?:rtl|ltr)")?|sup|sub|i|b))(?: .*?)?' . '>#'
04519                 ),
04520                 array( '', '<$1>' ),
04521                 $safeHeadline
04522             );
04523             $tocline = trim( $tocline );
04524 
04525             # For the anchor, strip out HTML-y stuff period
04526             $safeHeadline = preg_replace( '/<.*?' . '>/', '', $safeHeadline );
04527             $safeHeadline = Sanitizer::normalizeSectionNameWhitespace( $safeHeadline );
04528 
04529             # Save headline for section edit hint before it's escaped
04530             $headlineHint = $safeHeadline;
04531 
04532             if ( $wgExperimentalHtmlIds ) {
04533                 # For reverse compatibility, provide an id that's
04534                 # HTML4-compatible, like we used to.
04535                 #
04536                 # It may be worth noting, academically, that it's possible for
04537                 # the legacy anchor to conflict with a non-legacy headline
04538                 # anchor on the page.  In this case likely the "correct" thing
04539                 # would be to either drop the legacy anchors or make sure
04540                 # they're numbered first.  However, this would require people
04541                 # to type in section names like "abc_.D7.93.D7.90.D7.A4"
04542                 # manually, so let's not bother worrying about it.
04543                 $legacyHeadline = Sanitizer::escapeId( $safeHeadline,
04544                     array( 'noninitial', 'legacy' ) );
04545                 $safeHeadline = Sanitizer::escapeId( $safeHeadline );
04546 
04547                 if ( $legacyHeadline == $safeHeadline ) {
04548                     # No reason to have both (in fact, we can't)
04549                     $legacyHeadline = false;
04550                 }
04551             } else {
04552                 $legacyHeadline = false;
04553                 $safeHeadline = Sanitizer::escapeId( $safeHeadline,
04554                     'noninitial' );
04555             }
04556 
04557             # HTML names must be case-insensitively unique (bug 10721).
04558             # This does not apply to Unicode characters per
04559             # http://dev.w3.org/html5/spec/infrastructure.html#case-sensitivity-and-string-comparison
04560             # @todo FIXME: We may be changing them depending on the current locale.
04561             $arrayKey = strtolower( $safeHeadline );
04562             if ( $legacyHeadline === false ) {
04563                 $legacyArrayKey = false;
04564             } else {
04565                 $legacyArrayKey = strtolower( $legacyHeadline );
04566             }
04567 
04568             # count how many in assoc. array so we can track dupes in anchors
04569             if ( isset( $refers[$arrayKey] ) ) {
04570                 $refers[$arrayKey]++;
04571             } else {
04572                 $refers[$arrayKey] = 1;
04573             }
04574             if ( isset( $refers[$legacyArrayKey] ) ) {
04575                 $refers[$legacyArrayKey]++;
04576             } else {
04577                 $refers[$legacyArrayKey] = 1;
04578             }
04579 
04580             # Don't number the heading if it is the only one (looks silly)
04581             if ( count( $matches[3] ) > 1 && $this->mOptions->getNumberHeadings() ) {
04582                 # the two are different if the line contains a link
04583                 $headline = Html::element(
04584                     'span',
04585                     array( 'class' => 'mw-headline-number' ),
04586                     $numbering
04587                 ) . ' ' . $headline;
04588             }
04589 
04590             # Create the anchor for linking from the TOC to the section
04591             $anchor = $safeHeadline;
04592             $legacyAnchor = $legacyHeadline;
04593             if ( $refers[$arrayKey] > 1 ) {
04594                 $anchor .= '_' . $refers[$arrayKey];
04595             }
04596             if ( $legacyHeadline !== false && $refers[$legacyArrayKey] > 1 ) {
04597                 $legacyAnchor .= '_' . $refers[$legacyArrayKey];
04598             }
04599             if ( $enoughToc && ( !isset( $wgMaxTocLevel ) || $toclevel < $wgMaxTocLevel ) ) {
04600                 $toc .= Linker::tocLine( $anchor, $tocline,
04601                     $numbering, $toclevel, ( $isTemplate ? false : $sectionIndex ) );
04602             }
04603 
04604             # Add the section to the section tree
04605             # Find the DOM node for this header
04606             $noOffset = ( $isTemplate || $sectionIndex === false );
04607             while ( $node && !$noOffset ) {
04608                 if ( $node->getName() === 'h' ) {
04609                     $bits = $node->splitHeading();
04610                     if ( $bits['i'] == $sectionIndex ) {
04611                         break;
04612                     }
04613                 }
04614                 $byteOffset += mb_strlen( $this->mStripState->unstripBoth(
04615                     $frame->expand( $node, PPFrame::RECOVER_ORIG ) ) );
04616                 $node = $node->getNextSibling();
04617             }
04618             $tocraw[] = array(
04619                 'toclevel' => $toclevel,
04620                 'level' => $level,
04621                 'line' => $tocline,
04622                 'number' => $numbering,
04623                 'index' => ( $isTemplate ? 'T-' : '' ) . $sectionIndex,
04624                 'fromtitle' => $titleText,
04625                 'byteoffset' => ( $noOffset ? null : $byteOffset ),
04626                 'anchor' => $anchor,
04627             );
04628 
04629             # give headline the correct <h#> tag
04630             if ( $maybeShowEditLink && $sectionIndex !== false ) {
04631                 // Output edit section links as markers with styles that can be customized by skins
04632                 if ( $isTemplate ) {
04633                     # Put a T flag in the section identifier, to indicate to extractSections()
04634                     # that sections inside <includeonly> should be counted.
04635                     $editsectionPage = $titleText;
04636                     $editsectionSection = "T-$sectionIndex";
04637                     $editsectionContent = null;
04638                 } else {
04639                     $editsectionPage = $this->mTitle->getPrefixedText();
04640                     $editsectionSection = $sectionIndex;
04641                     $editsectionContent = $headlineHint;
04642                 }
04643                 // We use a bit of pesudo-xml for editsection markers. The
04644                 // language converter is run later on. Using a UNIQ style marker
04645                 // leads to the converter screwing up the tokens when it
04646                 // converts stuff. And trying to insert strip tags fails too. At
04647                 // this point all real inputted tags have already been escaped,
04648                 // so we don't have to worry about a user trying to input one of
04649                 // these markers directly. We use a page and section attribute
04650                 // to stop the language converter from converting these
04651                 // important bits of data, but put the headline hint inside a
04652                 // content block because the language converter is supposed to
04653                 // be able to convert that piece of data.
04654                 // Gets replaced with html in ParserOutput::getText
04655                 $editlink = '<mw:editsection page="' . htmlspecialchars( $editsectionPage );
04656                 $editlink .= '" section="' . htmlspecialchars( $editsectionSection ) . '"';
04657                 if ( $editsectionContent !== null ) {
04658                     $editlink .= '>' . $editsectionContent . '</mw:editsection>';
04659                 } else {
04660                     $editlink .= '/>';
04661                 }
04662             } else {
04663                 $editlink = '';
04664             }
04665             $head[$headlineCount] = Linker::makeHeadline( $level,
04666                 $matches['attrib'][$headlineCount], $anchor, $headline,
04667                 $editlink, $legacyAnchor );
04668 
04669             $headlineCount++;
04670         }
04671 
04672         $this->setOutputType( $oldType );
04673 
04674         # Never ever show TOC if no headers
04675         if ( $numVisible < 1 ) {
04676             $enoughToc = false;
04677         }
04678 
04679         if ( $enoughToc ) {
04680             if ( $prevtoclevel > 0 && $prevtoclevel < $wgMaxTocLevel ) {
04681                 $toc .= Linker::tocUnindent( $prevtoclevel - 1 );
04682             }
04683             $toc = Linker::tocList( $toc, $this->mOptions->getUserLangObj() );
04684             $this->mOutput->setTOCHTML( $toc );
04685             $toc = self::TOC_START . $toc . self::TOC_END;
04686             $this->mOutput->addModules( 'mediawiki.toc' );
04687         }
04688 
04689         if ( $isMain ) {
04690             $this->mOutput->setSections( $tocraw );
04691         }
04692 
04693         # split up and insert constructed headlines
04694         $blocks = preg_split( '/<H[1-6].*?' . '>[\s\S]*?<\/H[1-6]>/i', $text );
04695         $i = 0;
04696 
04697         // build an array of document sections
04698         $sections = array();
04699         foreach ( $blocks as $block ) {
04700             // $head is zero-based, sections aren't.
04701             if ( empty( $head[$i - 1] ) ) {
04702                 $sections[$i] = $block;
04703             } else {
04704                 $sections[$i] = $head[$i - 1] . $block;
04705             }
04706 
04717             wfRunHooks( 'ParserSectionCreate', array( $this, $i, &$sections[$i], $showEditLink ) );
04718 
04719             $i++;
04720         }
04721 
04722         if ( $enoughToc && $isMain && !$this->mForceTocPosition ) {
04723             // append the TOC at the beginning
04724             // Top anchor now in skin
04725             $sections[0] = $sections[0] . $toc . "\n";
04726         }
04727 
04728         $full .= join( '', $sections );
04729 
04730         if ( $this->mForceTocPosition ) {
04731             return str_replace( '<!--MWTOC-->', $toc, $full );
04732         } else {
04733             return $full;
04734         }
04735     }
04736 
04748     public function preSaveTransform( $text, Title $title, User $user,
04749         ParserOptions $options, $clearState = true
04750     ) {
04751         if ( $clearState ) {
04752             $magicScopeVariable = $this->lock();
04753         }
04754         $this->startParse( $title, $options, self::OT_WIKI, $clearState );
04755         $this->setUser( $user );
04756 
04757         $pairs = array(
04758             "\r\n" => "\n",
04759         );
04760         $text = str_replace( array_keys( $pairs ), array_values( $pairs ), $text );
04761         if ( $options->getPreSaveTransform() ) {
04762             $text = $this->pstPass2( $text, $user );
04763         }
04764         $text = $this->mStripState->unstripBoth( $text );
04765 
04766         $this->setUser( null ); #Reset
04767 
04768         return $text;
04769     }
04770 
04779     private function pstPass2( $text, $user ) {
04780         global $wgContLang;
04781 
04782         # Note: This is the timestamp saved as hardcoded wikitext to
04783         # the database, we use $wgContLang here in order to give
04784         # everyone the same signature and use the default one rather
04785         # than the one selected in each user's preferences.
04786         # (see also bug 12815)
04787         $ts = $this->mOptions->getTimestamp();
04788         $timestamp = MWTimestamp::getLocalInstance( $ts );
04789         $ts = $timestamp->format( 'YmdHis' );
04790         $tzMsg = $timestamp->format( 'T' );  # might vary on DST changeover!
04791 
04792         # Allow translation of timezones through wiki. format() can return
04793         # whatever crap the system uses, localised or not, so we cannot
04794         # ship premade translations.
04795         $key = 'timezone-' . strtolower( trim( $tzMsg ) );
04796         $msg = wfMessage( $key )->inContentLanguage();
04797         if ( $msg->exists() ) {
04798             $tzMsg = $msg->text();
04799         }
04800 
04801         $d = $wgContLang->timeanddate( $ts, false, false ) . " ($tzMsg)";
04802 
04803         # Variable replacement
04804         # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
04805         $text = $this->replaceVariables( $text );
04806 
04807         # This works almost by chance, as the replaceVariables are done before the getUserSig(),
04808         # which may corrupt this parser instance via its wfMessage()->text() call-
04809 
04810         # Signatures
04811         $sigText = $this->getUserSig( $user );
04812         $text = strtr( $text, array(
04813             '~~~~~' => $d,
04814             '~~~~' => "$sigText $d",
04815             '~~~' => $sigText
04816         ) );
04817 
04818         # Context links ("pipe tricks"): [[|name]] and [[name (context)|]]
04819         $tc = '[' . Title::legalChars() . ']';
04820         $nc = '[ _0-9A-Za-z\x80-\xff-]'; # Namespaces can use non-ascii!
04821 
04822         // [[ns:page (context)|]]
04823         $p1 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\))\\|]]/";
04824         // [[ns:page(context)|]] (double-width brackets, added in r40257)
04825         $p4 = "/\[\[(:?$nc+:|:|)($tc+?)( ?($tc+))\\|]]/";
04826         // [[ns:page (context), context|]] (using either single or double-width comma)
04827         $p3 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\)|)((?:, |,)$tc+|)\\|]]/";
04828         // [[|page]] (reverse pipe trick: add context from page title)
04829         $p2 = "/\[\[\\|($tc+)]]/";
04830 
04831         # try $p1 first, to turn "[[A, B (C)|]]" into "[[A, B (C)|A, B]]"
04832         $text = preg_replace( $p1, '[[\\1\\2\\3|\\2]]', $text );
04833         $text = preg_replace( $p4, '[[\\1\\2\\3|\\2]]', $text );
04834         $text = preg_replace( $p3, '[[\\1\\2\\3\\4|\\2]]', $text );
04835 
04836         $t = $this->mTitle->getText();
04837         $m = array();
04838         if ( preg_match( "/^($nc+:|)$tc+?( \\($tc+\\))$/", $t, $m ) ) {
04839             $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
04840         } elseif ( preg_match( "/^($nc+:|)$tc+?(, $tc+|)$/", $t, $m ) && "$m[1]$m[2]" != '' ) {
04841             $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
04842         } else {
04843             # if there's no context, don't bother duplicating the title
04844             $text = preg_replace( $p2, '[[\\1]]', $text );
04845         }
04846 
04847         # Trim trailing whitespace
04848         $text = rtrim( $text );
04849 
04850         return $text;
04851     }
04852 
04867     public function getUserSig( &$user, $nickname = false, $fancySig = null ) {
04868         global $wgMaxSigChars;
04869 
04870         $username = $user->getName();
04871 
04872         # If not given, retrieve from the user object.
04873         if ( $nickname === false ) {
04874             $nickname = $user->getOption( 'nickname' );
04875         }
04876 
04877         if ( is_null( $fancySig ) ) {
04878             $fancySig = $user->getBoolOption( 'fancysig' );
04879         }
04880 
04881         $nickname = $nickname == null ? $username : $nickname;
04882 
04883         if ( mb_strlen( $nickname ) > $wgMaxSigChars ) {
04884             $nickname = $username;
04885             wfDebug( __METHOD__ . ": $username has overlong signature.\n" );
04886         } elseif ( $fancySig !== false ) {
04887             # Sig. might contain markup; validate this
04888             if ( $this->validateSig( $nickname ) !== false ) {
04889                 # Validated; clean up (if needed) and return it
04890                 return $this->cleanSig( $nickname, true );
04891             } else {
04892                 # Failed to validate; fall back to the default
04893                 $nickname = $username;
04894                 wfDebug( __METHOD__ . ": $username has bad XML tags in signature.\n" );
04895             }
04896         }
04897 
04898         # Make sure nickname doesnt get a sig in a sig
04899         $nickname = self::cleanSigInSig( $nickname );
04900 
04901         # If we're still here, make it a link to the user page
04902         $userText = wfEscapeWikiText( $username );
04903         $nickText = wfEscapeWikiText( $nickname );
04904         $msgName = $user->isAnon() ? 'signature-anon' : 'signature';
04905 
04906         return wfMessage( $msgName, $userText, $nickText )->inContentLanguage()
04907             ->title( $this->getTitle() )->text();
04908     }
04909 
04916     public function validateSig( $text ) {
04917         return Xml::isWellFormedXmlFragment( $text ) ? $text : false;
04918     }
04919 
04930     public function cleanSig( $text, $parsing = false ) {
04931         if ( !$parsing ) {
04932             global $wgTitle;
04933             $magicScopeVariable = $this->lock();
04934             $this->startParse( $wgTitle, new ParserOptions, self::OT_PREPROCESS, true );
04935         }
04936 
04937         # Option to disable this feature
04938         if ( !$this->mOptions->getCleanSignatures() ) {
04939             return $text;
04940         }
04941 
04942         # @todo FIXME: Regex doesn't respect extension tags or nowiki
04943         #  => Move this logic to braceSubstitution()
04944         $substWord = MagicWord::get( 'subst' );
04945         $substRegex = '/\{\{(?!(?:' . $substWord->getBaseRegex() . '))/x' . $substWord->getRegexCase();
04946         $substText = '{{' . $substWord->getSynonym( 0 );
04947 
04948         $text = preg_replace( $substRegex, $substText, $text );
04949         $text = self::cleanSigInSig( $text );
04950         $dom = $this->preprocessToDom( $text );
04951         $frame = $this->getPreprocessor()->newFrame();
04952         $text = $frame->expand( $dom );
04953 
04954         if ( !$parsing ) {
04955             $text = $this->mStripState->unstripBoth( $text );
04956         }
04957 
04958         return $text;
04959     }
04960 
04967     public static function cleanSigInSig( $text ) {
04968         $text = preg_replace( '/~{3,5}/', '', $text );
04969         return $text;
04970     }
04971 
04981     public function startExternalParse( Title $title = null, ParserOptions $options,
04982         $outputType, $clearState = true
04983     ) {
04984         $this->startParse( $title, $options, $outputType, $clearState );
04985     }
04986 
04993     private function startParse( Title $title = null, ParserOptions $options,
04994         $outputType, $clearState = true
04995     ) {
04996         $this->setTitle( $title );
04997         $this->mOptions = $options;
04998         $this->setOutputType( $outputType );
04999         if ( $clearState ) {
05000             $this->clearState();
05001         }
05002     }
05003 
05012     public function transformMsg( $text, $options, $title = null ) {
05013         static $executing = false;
05014 
05015         # Guard against infinite recursion
05016         if ( $executing ) {
05017             return $text;
05018         }
05019         $executing = true;
05020 
05021         wfProfileIn( __METHOD__ );
05022         if ( !$title ) {
05023             global $wgTitle;
05024             $title = $wgTitle;
05025         }
05026 
05027         $text = $this->preprocess( $text, $title, $options );
05028 
05029         $executing = false;
05030         wfProfileOut( __METHOD__ );
05031         return $text;
05032     }
05033 
05058     public function setHook( $tag, $callback ) {
05059         $tag = strtolower( $tag );
05060         if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
05061             throw new MWException( "Invalid character {$m[0]} in setHook('$tag', ...) call" );
05062         }
05063         $oldVal = isset( $this->mTagHooks[$tag] ) ? $this->mTagHooks[$tag] : null;
05064         $this->mTagHooks[$tag] = $callback;
05065         if ( !in_array( $tag, $this->mStripList ) ) {
05066             $this->mStripList[] = $tag;
05067         }
05068 
05069         return $oldVal;
05070     }
05071 
05089     public function setTransparentTagHook( $tag, $callback ) {
05090         $tag = strtolower( $tag );
05091         if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
05092             throw new MWException( "Invalid character {$m[0]} in setTransparentHook('$tag', ...) call" );
05093         }
05094         $oldVal = isset( $this->mTransparentTagHooks[$tag] ) ? $this->mTransparentTagHooks[$tag] : null;
05095         $this->mTransparentTagHooks[$tag] = $callback;
05096 
05097         return $oldVal;
05098     }
05099 
05103     public function clearTagHooks() {
05104         $this->mTagHooks = array();
05105         $this->mFunctionTagHooks = array();
05106         $this->mStripList = $this->mDefaultStripList;
05107     }
05108 
05152     public function setFunctionHook( $id, $callback, $flags = 0 ) {
05153         global $wgContLang;
05154 
05155         $oldVal = isset( $this->mFunctionHooks[$id] ) ? $this->mFunctionHooks[$id][0] : null;
05156         $this->mFunctionHooks[$id] = array( $callback, $flags );
05157 
05158         # Add to function cache
05159         $mw = MagicWord::get( $id );
05160         if ( !$mw ) {
05161             throw new MWException( __METHOD__ . '() expecting a magic word identifier.' );
05162         }
05163 
05164         $synonyms = $mw->getSynonyms();
05165         $sensitive = intval( $mw->isCaseSensitive() );
05166 
05167         foreach ( $synonyms as $syn ) {
05168             # Case
05169             if ( !$sensitive ) {
05170                 $syn = $wgContLang->lc( $syn );
05171             }
05172             # Add leading hash
05173             if ( !( $flags & SFH_NO_HASH ) ) {
05174                 $syn = '#' . $syn;
05175             }
05176             # Remove trailing colon
05177             if ( substr( $syn, -1, 1 ) === ':' ) {
05178                 $syn = substr( $syn, 0, -1 );
05179             }
05180             $this->mFunctionSynonyms[$sensitive][$syn] = $id;
05181         }
05182         return $oldVal;
05183     }
05184 
05190     public function getFunctionHooks() {
05191         return array_keys( $this->mFunctionHooks );
05192     }
05193 
05204     public function setFunctionTagHook( $tag, $callback, $flags ) {
05205         $tag = strtolower( $tag );
05206         if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
05207             throw new MWException( "Invalid character {$m[0]} in setFunctionTagHook('$tag', ...) call" );
05208         }
05209         $old = isset( $this->mFunctionTagHooks[$tag] ) ?
05210             $this->mFunctionTagHooks[$tag] : null;
05211         $this->mFunctionTagHooks[$tag] = array( $callback, $flags );
05212 
05213         if ( !in_array( $tag, $this->mStripList ) ) {
05214             $this->mStripList[] = $tag;
05215         }
05216 
05217         return $old;
05218     }
05219 
05230     public function replaceLinkHolders( &$text, $options = 0 ) {
05231         return $this->mLinkHolders->replace( $text );
05232     }
05233 
05241     public function replaceLinkHoldersText( $text ) {
05242         return $this->mLinkHolders->replaceText( $text );
05243     }
05244 
05258     public function renderImageGallery( $text, $params ) {
05259         wfProfileIn( __METHOD__ );
05260 
05261         $mode = false;
05262         if ( isset( $params['mode'] ) ) {
05263             $mode = $params['mode'];
05264         }
05265 
05266         try {
05267             $ig = ImageGalleryBase::factory( $mode );
05268         } catch ( MWException $e ) {
05269             // If invalid type set, fallback to default.
05270             $ig = ImageGalleryBase::factory( false );
05271         }
05272 
05273         $ig->setContextTitle( $this->mTitle );
05274         $ig->setShowBytes( false );
05275         $ig->setShowFilename( false );
05276         $ig->setParser( $this );
05277         $ig->setHideBadImages();
05278         $ig->setAttributes( Sanitizer::validateTagAttributes( $params, 'table' ) );
05279 
05280         if ( isset( $params['showfilename'] ) ) {
05281             $ig->setShowFilename( true );
05282         } else {
05283             $ig->setShowFilename( false );
05284         }
05285         if ( isset( $params['caption'] ) ) {
05286             $caption = $params['caption'];
05287             $caption = htmlspecialchars( $caption );
05288             $caption = $this->replaceInternalLinks( $caption );
05289             $ig->setCaptionHtml( $caption );
05290         }
05291         if ( isset( $params['perrow'] ) ) {
05292             $ig->setPerRow( $params['perrow'] );
05293         }
05294         if ( isset( $params['widths'] ) ) {
05295             $ig->setWidths( $params['widths'] );
05296         }
05297         if ( isset( $params['heights'] ) ) {
05298             $ig->setHeights( $params['heights'] );
05299         }
05300         $ig->setAdditionalOptions( $params );
05301 
05302         wfRunHooks( 'BeforeParserrenderImageGallery', array( &$this, &$ig ) );
05303 
05304         $lines = StringUtils::explode( "\n", $text );
05305         foreach ( $lines as $line ) {
05306             # match lines like these:
05307             # Image:someimage.jpg|This is some image
05308             $matches = array();
05309             preg_match( "/^([^|]+)(\\|(.*))?$/", $line, $matches );
05310             # Skip empty lines
05311             if ( count( $matches ) == 0 ) {
05312                 continue;
05313             }
05314 
05315             if ( strpos( $matches[0], '%' ) !== false ) {
05316                 $matches[1] = rawurldecode( $matches[1] );
05317             }
05318             $title = Title::newFromText( $matches[1], NS_FILE );
05319             if ( is_null( $title ) ) {
05320                 # Bogus title. Ignore these so we don't bomb out later.
05321                 continue;
05322             }
05323 
05324             # We need to get what handler the file uses, to figure out parameters.
05325             # Note, a hook can overide the file name, and chose an entirely different
05326             # file (which potentially could be of a different type and have different handler).
05327             $options = array();
05328             $descQuery = false;
05329             wfRunHooks( 'BeforeParserFetchFileAndTitle',
05330                 array( $this, $title, &$options, &$descQuery ) );
05331             # Don't register it now, as ImageGallery does that later.
05332             $file = $this->fetchFileNoRegister( $title, $options );
05333             $handler = $file ? $file->getHandler() : false;
05334 
05335             wfProfileIn( __METHOD__ . '-getMagicWord' );
05336             $paramMap = array(
05337                 'img_alt' => 'gallery-internal-alt',
05338                 'img_link' => 'gallery-internal-link',
05339             );
05340             if ( $handler ) {
05341                 $paramMap = $paramMap + $handler->getParamMap();
05342                 // We don't want people to specify per-image widths.
05343                 // Additionally the width parameter would need special casing anyhow.
05344                 unset( $paramMap['img_width'] );
05345             }
05346 
05347             $mwArray = new MagicWordArray( array_keys( $paramMap ) );
05348             wfProfileOut( __METHOD__ . '-getMagicWord' );
05349 
05350             $label = '';
05351             $alt = '';
05352             $link = '';
05353             $handlerOptions = array();
05354             if ( isset( $matches[3] ) ) {
05355                 // look for an |alt= definition while trying not to break existing
05356                 // captions with multiple pipes (|) in it, until a more sensible grammar
05357                 // is defined for images in galleries
05358 
05359                 // FIXME: Doing recursiveTagParse at this stage, and the trim before
05360                 // splitting on '|' is a bit odd, and different from makeImage.
05361                 $matches[3] = $this->recursiveTagParse( trim( $matches[3] ) );
05362                 $parameterMatches = StringUtils::explode( '|', $matches[3] );
05363 
05364                 foreach ( $parameterMatches as $parameterMatch ) {
05365                     list( $magicName, $match ) = $mwArray->matchVariableStartToEnd( $parameterMatch );
05366                     if ( $magicName ) {
05367                         $paramName = $paramMap[$magicName];
05368 
05369                         switch ( $paramName ) {
05370                         case 'gallery-internal-alt':
05371                             $alt = $this->stripAltText( $match, false );
05372                             break;
05373                         case 'gallery-internal-link':
05374                             $linkValue = strip_tags( $this->replaceLinkHoldersText( $match ) );
05375                             $chars = self::EXT_LINK_URL_CLASS;
05376                             $prots = $this->mUrlProtocols;
05377                             //check to see if link matches an absolute url, if not then it must be a wiki link.
05378                             if ( preg_match( "/^($prots)$chars+$/u", $linkValue ) ) {
05379                                 $link = $linkValue;
05380                             } else {
05381                                 $localLinkTitle = Title::newFromText( $linkValue );
05382                                 if ( $localLinkTitle !== null ) {
05383                                     $link = $localLinkTitle->getLinkURL();
05384                                 }
05385                             }
05386                             break;
05387                         default:
05388                             // Must be a handler specific parameter.
05389                             if ( $handler->validateParam( $paramName, $match ) ) {
05390                                 $handlerOptions[$paramName] = $match;
05391                             } else {
05392                                 // Guess not. Append it to the caption.
05393                                 wfDebug( "$parameterMatch failed parameter validation\n" );
05394                                 $label .= '|' . $parameterMatch;
05395                             }
05396                         }
05397 
05398                     } else {
05399                         // concatenate all other pipes
05400                         $label .= '|' . $parameterMatch;
05401                     }
05402                 }
05403                 // remove the first pipe
05404                 $label = substr( $label, 1 );
05405             }
05406 
05407             $ig->add( $title, $label, $alt, $link, $handlerOptions );
05408         }
05409         $html = $ig->toHTML();
05410         wfRunHooks( 'AfterParserFetchFileAndTitle', array( $this, $ig, &$html ) );
05411         wfProfileOut( __METHOD__ );
05412         return $html;
05413     }
05414 
05419     public function getImageParams( $handler ) {
05420         if ( $handler ) {
05421             $handlerClass = get_class( $handler );
05422         } else {
05423             $handlerClass = '';
05424         }
05425         if ( !isset( $this->mImageParams[$handlerClass] ) ) {
05426             # Initialise static lists
05427             static $internalParamNames = array(
05428                 'horizAlign' => array( 'left', 'right', 'center', 'none' ),
05429                 'vertAlign' => array( 'baseline', 'sub', 'super', 'top', 'text-top', 'middle',
05430                     'bottom', 'text-bottom' ),
05431                 'frame' => array( 'thumbnail', 'manualthumb', 'framed', 'frameless',
05432                     'upright', 'border', 'link', 'alt', 'class' ),
05433             );
05434             static $internalParamMap;
05435             if ( !$internalParamMap ) {
05436                 $internalParamMap = array();
05437                 foreach ( $internalParamNames as $type => $names ) {
05438                     foreach ( $names as $name ) {
05439                         $magicName = str_replace( '-', '_', "img_$name" );
05440                         $internalParamMap[$magicName] = array( $type, $name );
05441                     }
05442                 }
05443             }
05444 
05445             # Add handler params
05446             $paramMap = $internalParamMap;
05447             if ( $handler ) {
05448                 $handlerParamMap = $handler->getParamMap();
05449                 foreach ( $handlerParamMap as $magic => $paramName ) {
05450                     $paramMap[$magic] = array( 'handler', $paramName );
05451                 }
05452             }
05453             $this->mImageParams[$handlerClass] = $paramMap;
05454             $this->mImageParamsMagicArray[$handlerClass] = new MagicWordArray( array_keys( $paramMap ) );
05455         }
05456         return array( $this->mImageParams[$handlerClass], $this->mImageParamsMagicArray[$handlerClass] );
05457     }
05458 
05467     public function makeImage( $title, $options, $holders = false ) {
05468         # Check if the options text is of the form "options|alt text"
05469         # Options are:
05470         #  * thumbnail  make a thumbnail with enlarge-icon and caption, alignment depends on lang
05471         #  * left       no resizing, just left align. label is used for alt= only
05472         #  * right      same, but right aligned
05473         #  * none       same, but not aligned
05474         #  * ___px      scale to ___ pixels width, no aligning. e.g. use in taxobox
05475         #  * center     center the image
05476         #  * frame      Keep original image size, no magnify-button.
05477         #  * framed     Same as "frame"
05478         #  * frameless  like 'thumb' but without a frame. Keeps user preferences for width
05479         #  * upright    reduce width for upright images, rounded to full __0 px
05480         #  * border     draw a 1px border around the image
05481         #  * alt        Text for HTML alt attribute (defaults to empty)
05482         #  * class      Set a class for img node
05483         #  * link       Set the target of the image link. Can be external, interwiki, or local
05484         # vertical-align values (no % or length right now):
05485         #  * baseline
05486         #  * sub
05487         #  * super
05488         #  * top
05489         #  * text-top
05490         #  * middle
05491         #  * bottom
05492         #  * text-bottom
05493 
05494         $parts = StringUtils::explode( "|", $options );
05495 
05496         # Give extensions a chance to select the file revision for us
05497         $options = array();
05498         $descQuery = false;
05499         wfRunHooks( 'BeforeParserFetchFileAndTitle',
05500             array( $this, $title, &$options, &$descQuery ) );
05501         # Fetch and register the file (file title may be different via hooks)
05502         list( $file, $title ) = $this->fetchFileAndTitle( $title, $options );
05503 
05504         # Get parameter map
05505         $handler = $file ? $file->getHandler() : false;
05506 
05507         list( $paramMap, $mwArray ) = $this->getImageParams( $handler );
05508 
05509         if ( !$file ) {
05510             $this->addTrackingCategory( 'broken-file-category' );
05511         }
05512 
05513         # Process the input parameters
05514         $caption = '';
05515         $params = array( 'frame' => array(), 'handler' => array(),
05516             'horizAlign' => array(), 'vertAlign' => array() );
05517         $seenformat = false;
05518         foreach ( $parts as $part ) {
05519             $part = trim( $part );
05520             list( $magicName, $value ) = $mwArray->matchVariableStartToEnd( $part );
05521             $validated = false;
05522             if ( isset( $paramMap[$magicName] ) ) {
05523                 list( $type, $paramName ) = $paramMap[$magicName];
05524 
05525                 # Special case; width and height come in one variable together
05526                 if ( $type === 'handler' && $paramName === 'width' ) {
05527                     $parsedWidthParam = $this->parseWidthParam( $value );
05528                     if ( isset( $parsedWidthParam['width'] ) ) {
05529                         $width = $parsedWidthParam['width'];
05530                         if ( $handler->validateParam( 'width', $width ) ) {
05531                             $params[$type]['width'] = $width;
05532                             $validated = true;
05533                         }
05534                     }
05535                     if ( isset( $parsedWidthParam['height'] ) ) {
05536                         $height = $parsedWidthParam['height'];
05537                         if ( $handler->validateParam( 'height', $height ) ) {
05538                             $params[$type]['height'] = $height;
05539                             $validated = true;
05540                         }
05541                     }
05542                     # else no validation -- bug 13436
05543                 } else {
05544                     if ( $type === 'handler' ) {
05545                         # Validate handler parameter
05546                         $validated = $handler->validateParam( $paramName, $value );
05547                     } else {
05548                         # Validate internal parameters
05549                         switch ( $paramName ) {
05550                         case 'manualthumb':
05551                         case 'alt':
05552                         case 'class':
05553                             # @todo FIXME: Possibly check validity here for
05554                             # manualthumb? downstream behavior seems odd with
05555                             # missing manual thumbs.
05556                             $validated = true;
05557                             $value = $this->stripAltText( $value, $holders );
05558                             break;
05559                         case 'link':
05560                             $chars = self::EXT_LINK_URL_CLASS;
05561                             $prots = $this->mUrlProtocols;
05562                             if ( $value === '' ) {
05563                                 $paramName = 'no-link';
05564                                 $value = true;
05565                                 $validated = true;
05566                             } elseif ( preg_match( "/^((?i)$prots)/", $value ) ) {
05567                                 if ( preg_match( "/^((?i)$prots)$chars+$/u", $value, $m ) ) {
05568                                     $paramName = 'link-url';
05569                                     $this->mOutput->addExternalLink( $value );
05570                                     if ( $this->mOptions->getExternalLinkTarget() ) {
05571                                         $params[$type]['link-target'] = $this->mOptions->getExternalLinkTarget();
05572                                     }
05573                                     $validated = true;
05574                                 }
05575                             } else {
05576                                 $linkTitle = Title::newFromText( $value );
05577                                 if ( $linkTitle ) {
05578                                     $paramName = 'link-title';
05579                                     $value = $linkTitle;
05580                                     $this->mOutput->addLink( $linkTitle );
05581                                     $validated = true;
05582                                 }
05583                             }
05584                             break;
05585                         case 'frameless':
05586                         case 'framed':
05587                         case 'thumbnail':
05588                             // use first appearing option, discard others.
05589                             $validated = ! $seenformat;
05590                             $seenformat = true;
05591                             break;
05592                         default:
05593                             # Most other things appear to be empty or numeric...
05594                             $validated = ( $value === false || is_numeric( trim( $value ) ) );
05595                         }
05596                     }
05597 
05598                     if ( $validated ) {
05599                         $params[$type][$paramName] = $value;
05600                     }
05601                 }
05602             }
05603             if ( !$validated ) {
05604                 $caption = $part;
05605             }
05606         }
05607 
05608         # Process alignment parameters
05609         if ( $params['horizAlign'] ) {
05610             $params['frame']['align'] = key( $params['horizAlign'] );
05611         }
05612         if ( $params['vertAlign'] ) {
05613             $params['frame']['valign'] = key( $params['vertAlign'] );
05614         }
05615 
05616         $params['frame']['caption'] = $caption;
05617 
05618         # Will the image be presented in a frame, with the caption below?
05619         $imageIsFramed = isset( $params['frame']['frame'] )
05620             || isset( $params['frame']['framed'] )
05621             || isset( $params['frame']['thumbnail'] )
05622             || isset( $params['frame']['manualthumb'] );
05623 
05624         # In the old days, [[Image:Foo|text...]] would set alt text.  Later it
05625         # came to also set the caption, ordinary text after the image -- which
05626         # makes no sense, because that just repeats the text multiple times in
05627         # screen readers.  It *also* came to set the title attribute.
05628         #
05629         # Now that we have an alt attribute, we should not set the alt text to
05630         # equal the caption: that's worse than useless, it just repeats the
05631         # text.  This is the framed/thumbnail case.  If there's no caption, we
05632         # use the unnamed parameter for alt text as well, just for the time be-
05633         # ing, if the unnamed param is set and the alt param is not.
05634         #
05635         # For the future, we need to figure out if we want to tweak this more,
05636         # e.g., introducing a title= parameter for the title; ignoring the un-
05637         # named parameter entirely for images without a caption; adding an ex-
05638         # plicit caption= parameter and preserving the old magic unnamed para-
05639         # meter for BC; ...
05640         if ( $imageIsFramed ) { # Framed image
05641             if ( $caption === '' && !isset( $params['frame']['alt'] ) ) {
05642                 # No caption or alt text, add the filename as the alt text so
05643                 # that screen readers at least get some description of the image
05644                 $params['frame']['alt'] = $title->getText();
05645             }
05646             # Do not set $params['frame']['title'] because tooltips don't make sense
05647             # for framed images
05648         } else { # Inline image
05649             if ( !isset( $params['frame']['alt'] ) ) {
05650                 # No alt text, use the "caption" for the alt text
05651                 if ( $caption !== '' ) {
05652                     $params['frame']['alt'] = $this->stripAltText( $caption, $holders );
05653                 } else {
05654                     # No caption, fall back to using the filename for the
05655                     # alt text
05656                     $params['frame']['alt'] = $title->getText();
05657                 }
05658             }
05659             # Use the "caption" for the tooltip text
05660             $params['frame']['title'] = $this->stripAltText( $caption, $holders );
05661         }
05662 
05663         wfRunHooks( 'ParserMakeImageParams', array( $title, $file, &$params, $this ) );
05664 
05665         # Linker does the rest
05666         $time = isset( $options['time'] ) ? $options['time'] : false;
05667         $ret = Linker::makeImageLink( $this, $title, $file, $params['frame'], $params['handler'],
05668             $time, $descQuery, $this->mOptions->getThumbSize() );
05669 
05670         # Give the handler a chance to modify the parser object
05671         if ( $handler ) {
05672             $handler->parserTransformHook( $this, $file );
05673         }
05674 
05675         return $ret;
05676     }
05677 
05683     protected function stripAltText( $caption, $holders ) {
05684         # Strip bad stuff out of the title (tooltip).  We can't just use
05685         # replaceLinkHoldersText() here, because if this function is called
05686         # from replaceInternalLinks2(), mLinkHolders won't be up-to-date.
05687         if ( $holders ) {
05688             $tooltip = $holders->replaceText( $caption );
05689         } else {
05690             $tooltip = $this->replaceLinkHoldersText( $caption );
05691         }
05692 
05693         # make sure there are no placeholders in thumbnail attributes
05694         # that are later expanded to html- so expand them now and
05695         # remove the tags
05696         $tooltip = $this->mStripState->unstripBoth( $tooltip );
05697         $tooltip = Sanitizer::stripAllTags( $tooltip );
05698 
05699         return $tooltip;
05700     }
05701 
05706     public function disableCache() {
05707         wfDebug( "Parser output marked as uncacheable.\n" );
05708         if ( !$this->mOutput ) {
05709             throw new MWException( __METHOD__ .
05710                 " can only be called when actually parsing something" );
05711         }
05712         $this->mOutput->setCacheTime( -1 ); // old style, for compatibility
05713         $this->mOutput->updateCacheExpiry( 0 ); // new style, for consistency
05714     }
05715 
05724     public function attributeStripCallback( &$text, $frame = false ) {
05725         $text = $this->replaceVariables( $text, $frame );
05726         $text = $this->mStripState->unstripBoth( $text );
05727         return $text;
05728     }
05729 
05735     public function getTags() {
05736         return array_merge(
05737             array_keys( $this->mTransparentTagHooks ),
05738             array_keys( $this->mTagHooks ),
05739             array_keys( $this->mFunctionTagHooks )
05740         );
05741     }
05742 
05753     public function replaceTransparentTags( $text ) {
05754         $matches = array();
05755         $elements = array_keys( $this->mTransparentTagHooks );
05756         $text = self::extractTagsAndParams( $elements, $text, $matches, $this->mUniqPrefix );
05757         $replacements = array();
05758 
05759         foreach ( $matches as $marker => $data ) {
05760             list( $element, $content, $params, $tag ) = $data;
05761             $tagName = strtolower( $element );
05762             if ( isset( $this->mTransparentTagHooks[$tagName] ) ) {
05763                 $output = call_user_func_array(
05764                     $this->mTransparentTagHooks[$tagName],
05765                     array( $content, $params, $this )
05766                 );
05767             } else {
05768                 $output = $tag;
05769             }
05770             $replacements[$marker] = $output;
05771         }
05772         return strtr( $text, $replacements );
05773     }
05774 
05804     private function extractSections( $text, $sectionId, $mode, $newText = '' ) {
05805         global $wgTitle; # not generally used but removes an ugly failure mode
05806 
05807         $magicScopeVariable = $this->lock();
05808         $this->startParse( $wgTitle, new ParserOptions, self::OT_PLAIN, true );
05809         $outText = '';
05810         $frame = $this->getPreprocessor()->newFrame();
05811 
05812         # Process section extraction flags
05813         $flags = 0;
05814         $sectionParts = explode( '-', $sectionId );
05815         $sectionIndex = array_pop( $sectionParts );
05816         foreach ( $sectionParts as $part ) {
05817             if ( $part === 'T' ) {
05818                 $flags |= self::PTD_FOR_INCLUSION;
05819             }
05820         }
05821 
05822         # Check for empty input
05823         if ( strval( $text ) === '' ) {
05824             # Only sections 0 and T-0 exist in an empty document
05825             if ( $sectionIndex == 0 ) {
05826                 if ( $mode === 'get' ) {
05827                     return '';
05828                 } else {
05829                     return $newText;
05830                 }
05831             } else {
05832                 if ( $mode === 'get' ) {
05833                     return $newText;
05834                 } else {
05835                     return $text;
05836                 }
05837             }
05838         }
05839 
05840         # Preprocess the text
05841         $root = $this->preprocessToDom( $text, $flags );
05842 
05843         # <h> nodes indicate section breaks
05844         # They can only occur at the top level, so we can find them by iterating the root's children
05845         $node = $root->getFirstChild();
05846 
05847         # Find the target section
05848         if ( $sectionIndex == 0 ) {
05849             # Section zero doesn't nest, level=big
05850             $targetLevel = 1000;
05851         } else {
05852             while ( $node ) {
05853                 if ( $node->getName() === 'h' ) {
05854                     $bits = $node->splitHeading();
05855                     if ( $bits['i'] == $sectionIndex ) {
05856                         $targetLevel = $bits['level'];
05857                         break;
05858                     }
05859                 }
05860                 if ( $mode === 'replace' ) {
05861                     $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
05862                 }
05863                 $node = $node->getNextSibling();
05864             }
05865         }
05866 
05867         if ( !$node ) {
05868             # Not found
05869             if ( $mode === 'get' ) {
05870                 return $newText;
05871             } else {
05872                 return $text;
05873             }
05874         }
05875 
05876         # Find the end of the section, including nested sections
05877         do {
05878             if ( $node->getName() === 'h' ) {
05879                 $bits = $node->splitHeading();
05880                 $curLevel = $bits['level'];
05881                 if ( $bits['i'] != $sectionIndex && $curLevel <= $targetLevel ) {
05882                     break;
05883                 }
05884             }
05885             if ( $mode === 'get' ) {
05886                 $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
05887             }
05888             $node = $node->getNextSibling();
05889         } while ( $node );
05890 
05891         # Write out the remainder (in replace mode only)
05892         if ( $mode === 'replace' ) {
05893             # Output the replacement text
05894             # Add two newlines on -- trailing whitespace in $newText is conventionally
05895             # stripped by the editor, so we need both newlines to restore the paragraph gap
05896             # Only add trailing whitespace if there is newText
05897             if ( $newText != "" ) {
05898                 $outText .= $newText . "\n\n";
05899             }
05900 
05901             while ( $node ) {
05902                 $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
05903                 $node = $node->getNextSibling();
05904             }
05905         }
05906 
05907         if ( is_string( $outText ) ) {
05908             # Re-insert stripped tags
05909             $outText = rtrim( $this->mStripState->unstripBoth( $outText ) );
05910         }
05911 
05912         return $outText;
05913     }
05914 
05929     public function getSection( $text, $sectionId, $defaultText = '' ) {
05930         return $this->extractSections( $text, $sectionId, 'get', $defaultText );
05931     }
05932 
05945     public function replaceSection( $oldText, $sectionId, $newText ) {
05946         return $this->extractSections( $oldText, $sectionId, 'replace', $newText );
05947     }
05948 
05954     public function getRevisionId() {
05955         return $this->mRevisionId;
05956     }
05957 
05964     public function getRevisionObject() {
05965         if ( !is_null( $this->mRevisionObject ) ) {
05966             return $this->mRevisionObject;
05967         }
05968         if ( is_null( $this->mRevisionId ) ) {
05969             return null;
05970         }
05971 
05972         $this->mRevisionObject = Revision::newFromId( $this->mRevisionId );
05973         return $this->mRevisionObject;
05974     }
05975 
05981     public function getRevisionTimestamp() {
05982         if ( is_null( $this->mRevisionTimestamp ) ) {
05983             wfProfileIn( __METHOD__ );
05984 
05985             global $wgContLang;
05986 
05987             $revObject = $this->getRevisionObject();
05988             $timestamp = $revObject ? $revObject->getTimestamp() : wfTimestampNow();
05989 
05990             # The cryptic '' timezone parameter tells to use the site-default
05991             # timezone offset instead of the user settings.
05992             #
05993             # Since this value will be saved into the parser cache, served
05994             # to other users, and potentially even used inside links and such,
05995             # it needs to be consistent for all visitors.
05996             $this->mRevisionTimestamp = $wgContLang->userAdjust( $timestamp, '' );
05997 
05998             wfProfileOut( __METHOD__ );
05999         }
06000         return $this->mRevisionTimestamp;
06001     }
06002 
06008     public function getRevisionUser() {
06009         if ( is_null( $this->mRevisionUser ) ) {
06010             $revObject = $this->getRevisionObject();
06011 
06012             # if this template is subst: the revision id will be blank,
06013             # so just use the current user's name
06014             if ( $revObject ) {
06015                 $this->mRevisionUser = $revObject->getUserText();
06016             } elseif ( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) {
06017                 $this->mRevisionUser = $this->getUser()->getName();
06018             }
06019         }
06020         return $this->mRevisionUser;
06021     }
06022 
06028     public function getRevisionSize() {
06029         if ( is_null( $this->mRevisionSize ) ) {
06030             $revObject = $this->getRevisionObject();
06031 
06032             # if this variable is subst: the revision id will be blank,
06033             # so just use the parser input size, because the own substituation
06034             # will change the size.
06035             if ( $revObject ) {
06036                 $this->mRevisionSize = $revObject->getSize();
06037             } elseif ( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) {
06038                 $this->mRevisionSize = $this->mInputSize;
06039             }
06040         }
06041         return $this->mRevisionSize;
06042     }
06043 
06049     public function setDefaultSort( $sort ) {
06050         $this->mDefaultSort = $sort;
06051         $this->mOutput->setProperty( 'defaultsort', $sort );
06052     }
06053 
06064     public function getDefaultSort() {
06065         if ( $this->mDefaultSort !== false ) {
06066             return $this->mDefaultSort;
06067         } else {
06068             return '';
06069         }
06070     }
06071 
06078     public function getCustomDefaultSort() {
06079         return $this->mDefaultSort;
06080     }
06081 
06091     public function guessSectionNameFromWikiText( $text ) {
06092         # Strip out wikitext links(they break the anchor)
06093         $text = $this->stripSectionName( $text );
06094         $text = Sanitizer::normalizeSectionNameWhitespace( $text );
06095         return '#' . Sanitizer::escapeId( $text, 'noninitial' );
06096     }
06097 
06106     public function guessLegacySectionNameFromWikiText( $text ) {
06107         # Strip out wikitext links(they break the anchor)
06108         $text = $this->stripSectionName( $text );
06109         $text = Sanitizer::normalizeSectionNameWhitespace( $text );
06110         return '#' . Sanitizer::escapeId( $text, array( 'noninitial', 'legacy' ) );
06111     }
06112 
06127     public function stripSectionName( $text ) {
06128         # Strip internal link markup
06129         $text = preg_replace( '/\[\[:?([^[|]+)\|([^[]+)\]\]/', '$2', $text );
06130         $text = preg_replace( '/\[\[:?([^[]+)\|?\]\]/', '$1', $text );
06131 
06132         # Strip external link markup
06133         # @todo FIXME: Not tolerant to blank link text
06134         # I.E. [https://www.mediawiki.org] will render as [1] or something depending
06135         # on how many empty links there are on the page - need to figure that out.
06136         $text = preg_replace( '/\[(?i:' . $this->mUrlProtocols . ')([^ ]+?) ([^[]+)\]/', '$2', $text );
06137 
06138         # Parse wikitext quotes (italics & bold)
06139         $text = $this->doQuotes( $text );
06140 
06141         # Strip HTML tags
06142         $text = StringUtils::delimiterReplace( '<', '>', '', $text );
06143         return $text;
06144     }
06145 
06156     public function testSrvus( $text, Title $title, ParserOptions $options, $outputType = self::OT_HTML ) {
06157         $magicScopeVariable = $this->lock();
06158         $this->startParse( $title, $options, $outputType, true );
06159 
06160         $text = $this->replaceVariables( $text );
06161         $text = $this->mStripState->unstripBoth( $text );
06162         $text = Sanitizer::removeHTMLtags( $text );
06163         return $text;
06164     }
06165 
06172     public function testPst( $text, Title $title, ParserOptions $options ) {
06173         return $this->preSaveTransform( $text, $title, $options->getUser(), $options );
06174     }
06175 
06182     public function testPreprocess( $text, Title $title, ParserOptions $options ) {
06183         return $this->testSrvus( $text, $title, $options, self::OT_PREPROCESS );
06184     }
06185 
06202     public function markerSkipCallback( $s, $callback ) {
06203         $i = 0;
06204         $out = '';
06205         while ( $i < strlen( $s ) ) {
06206             $markerStart = strpos( $s, $this->mUniqPrefix, $i );
06207             if ( $markerStart === false ) {
06208                 $out .= call_user_func( $callback, substr( $s, $i ) );
06209                 break;
06210             } else {
06211                 $out .= call_user_func( $callback, substr( $s, $i, $markerStart - $i ) );
06212                 $markerEnd = strpos( $s, self::MARKER_SUFFIX, $markerStart );
06213                 if ( $markerEnd === false ) {
06214                     $out .= substr( $s, $markerStart );
06215                     break;
06216                 } else {
06217                     $markerEnd += strlen( self::MARKER_SUFFIX );
06218                     $out .= substr( $s, $markerStart, $markerEnd - $markerStart );
06219                     $i = $markerEnd;
06220                 }
06221             }
06222         }
06223         return $out;
06224     }
06225 
06232     public function killMarkers( $text ) {
06233         return $this->mStripState->killMarkers( $text );
06234     }
06235 
06252     public function serializeHalfParsedText( $text ) {
06253         wfProfileIn( __METHOD__ );
06254         $data = array(
06255             'text' => $text,
06256             'version' => self::HALF_PARSED_VERSION,
06257             'stripState' => $this->mStripState->getSubState( $text ),
06258             'linkHolders' => $this->mLinkHolders->getSubArray( $text )
06259         );
06260         wfProfileOut( __METHOD__ );
06261         return $data;
06262     }
06263 
06279     public function unserializeHalfParsedText( $data ) {
06280         if ( !isset( $data['version'] ) || $data['version'] != self::HALF_PARSED_VERSION ) {
06281             throw new MWException( __METHOD__ . ': invalid version' );
06282         }
06283 
06284         # First, extract the strip state.
06285         $texts = array( $data['text'] );
06286         $texts = $this->mStripState->merge( $data['stripState'], $texts );
06287 
06288         # Now renumber links
06289         $texts = $this->mLinkHolders->mergeForeign( $data['linkHolders'], $texts );
06290 
06291         # Should be good to go.
06292         return $texts[0];
06293     }
06294 
06304     public function isValidHalfParsedText( $data ) {
06305         return isset( $data['version'] ) && $data['version'] == self::HALF_PARSED_VERSION;
06306     }
06307 
06316     public function parseWidthParam( $value ) {
06317         $parsedWidthParam = array();
06318         if ( $value === '' ) {
06319             return $parsedWidthParam;
06320         }
06321         $m = array();
06322         # (bug 13500) In both cases (width/height and width only),
06323         # permit trailing "px" for backward compatibility.
06324         if ( preg_match( '/^([0-9]*)x([0-9]*)\s*(?:px)?\s*$/', $value, $m ) ) {
06325             $width = intval( $m[1] );
06326             $height = intval( $m[2] );
06327             $parsedWidthParam['width'] = $width;
06328             $parsedWidthParam['height'] = $height;
06329         } elseif ( preg_match( '/^[0-9]*\s*(?:px)?\s*$/', $value ) ) {
06330             $width = intval( $value );
06331             $parsedWidthParam['width'] = $width;
06332         }
06333         return $parsedWidthParam;
06334     }
06335 
06345     protected function lock() {
06346         if ( $this->mInParse ) {
06347             throw new MWException( "Parser state cleared while parsing. "
06348                 . "Did you call Parser::parse recursively?" );
06349         }
06350         $this->mInParse = true;
06351 
06352         $that = $this;
06353         $recursiveCheck = new ScopedCallback( function() use ( $that ) {
06354             $that->mInParse = false;
06355         } );
06356 
06357         return $recursiveCheck;
06358     }
06359 
06370     public static function stripOuterParagraph( $html ) {
06371         $m = array();
06372         if ( preg_match( '/^<p>(.*)\n?<\/p>\n?$/sU', $html, $m ) ) {
06373             if ( strpos( $m[1], '</p>' ) === false ) {
06374                 $html = $m[1];
06375             }
06376         }
06377 
06378         return $html;
06379     }
06380 
06391     public function getFreshParser() {
06392         global $wgParserConf;
06393         if ( $this->mInParse ) {
06394             return new $wgParserConf['class']( $wgParserConf );
06395         } else {
06396             return $this;
06397         }
06398     }
06399 }