MediaWiki  REL1_24
MagicWord.php
Go to the documentation of this file.
00001 <?php
00061 class MagicWord {
00065     public $mId;
00066 
00068     public $mSynonyms;
00069 
00071     public $mCaseSensitive;
00072 
00074     private $mRegex = '';
00075 
00077     private $mRegexStart = '';
00078 
00080     private $mRegexStartToEnd = '';
00081 
00083     private $mBaseRegex = '';
00084 
00086     private $mVariableRegex = '';
00087 
00089     private $mVariableStartToEndRegex = '';
00090 
00092     private $mModified = false;
00093 
00095     private $mFound = false;
00096 
00097     static public $mVariableIDsInitialised = false;
00098     static public $mVariableIDs = array(
00099         '!',
00100         'currentmonth',
00101         'currentmonth1',
00102         'currentmonthname',
00103         'currentmonthnamegen',
00104         'currentmonthabbrev',
00105         'currentday',
00106         'currentday2',
00107         'currentdayname',
00108         'currentyear',
00109         'currenttime',
00110         'currenthour',
00111         'localmonth',
00112         'localmonth1',
00113         'localmonthname',
00114         'localmonthnamegen',
00115         'localmonthabbrev',
00116         'localday',
00117         'localday2',
00118         'localdayname',
00119         'localyear',
00120         'localtime',
00121         'localhour',
00122         'numberofarticles',
00123         'numberoffiles',
00124         'numberofedits',
00125         'articlepath',
00126         'pageid',
00127         'sitename',
00128         'server',
00129         'servername',
00130         'scriptpath',
00131         'stylepath',
00132         'pagename',
00133         'pagenamee',
00134         'fullpagename',
00135         'fullpagenamee',
00136         'namespace',
00137         'namespacee',
00138         'namespacenumber',
00139         'currentweek',
00140         'currentdow',
00141         'localweek',
00142         'localdow',
00143         'revisionid',
00144         'revisionday',
00145         'revisionday2',
00146         'revisionmonth',
00147         'revisionmonth1',
00148         'revisionyear',
00149         'revisiontimestamp',
00150         'revisionuser',
00151         'revisionsize',
00152         'subpagename',
00153         'subpagenamee',
00154         'talkspace',
00155         'talkspacee',
00156         'subjectspace',
00157         'subjectspacee',
00158         'talkpagename',
00159         'talkpagenamee',
00160         'subjectpagename',
00161         'subjectpagenamee',
00162         'numberofusers',
00163         'numberofactiveusers',
00164         'numberofpages',
00165         'currentversion',
00166         'rootpagename',
00167         'rootpagenamee',
00168         'basepagename',
00169         'basepagenamee',
00170         'currenttimestamp',
00171         'localtimestamp',
00172         'directionmark',
00173         'contentlanguage',
00174         'numberofadmins',
00175         'numberofviews',
00176         'cascadingsources',
00177     );
00178 
00179     /* Array of caching hints for ParserCache */
00180     static public $mCacheTTLs = array(
00181         'currentmonth' => 86400,
00182         'currentmonth1' => 86400,
00183         'currentmonthname' => 86400,
00184         'currentmonthnamegen' => 86400,
00185         'currentmonthabbrev' => 86400,
00186         'currentday' => 3600,
00187         'currentday2' => 3600,
00188         'currentdayname' => 3600,
00189         'currentyear' => 86400,
00190         'currenttime' => 3600,
00191         'currenthour' => 3600,
00192         'localmonth' => 86400,
00193         'localmonth1' => 86400,
00194         'localmonthname' => 86400,
00195         'localmonthnamegen' => 86400,
00196         'localmonthabbrev' => 86400,
00197         'localday' => 3600,
00198         'localday2' => 3600,
00199         'localdayname' => 3600,
00200         'localyear' => 86400,
00201         'localtime' => 3600,
00202         'localhour' => 3600,
00203         'numberofarticles' => 3600,
00204         'numberoffiles' => 3600,
00205         'numberofedits' => 3600,
00206         'currentweek' => 3600,
00207         'currentdow' => 3600,
00208         'localweek' => 3600,
00209         'localdow' => 3600,
00210         'numberofusers' => 3600,
00211         'numberofactiveusers' => 3600,
00212         'numberofpages' => 3600,
00213         'currentversion' => 86400,
00214         'currenttimestamp' => 3600,
00215         'localtimestamp' => 3600,
00216         'pagesinnamespace' => 3600,
00217         'numberofadmins' => 3600,
00218         'numberofviews' => 3600,
00219         'numberingroup' => 3600,
00220     );
00221 
00222     static public $mDoubleUnderscoreIDs = array(
00223         'notoc',
00224         'nogallery',
00225         'forcetoc',
00226         'toc',
00227         'noeditsection',
00228         'newsectionlink',
00229         'nonewsectionlink',
00230         'hiddencat',
00231         'index',
00232         'noindex',
00233         'staticredirect',
00234         'notitleconvert',
00235         'nocontentconvert',
00236     );
00237 
00238     static public $mSubstIDs = array(
00239         'subst',
00240         'safesubst',
00241     );
00242 
00243     static public $mObjects = array();
00244     static public $mDoubleUnderscoreArray = null;
00245 
00248     function __construct( $id = 0, $syn = array(), $cs = false ) {
00249         $this->mId = $id;
00250         $this->mSynonyms = (array)$syn;
00251         $this->mCaseSensitive = $cs;
00252     }
00253 
00261     static function &get( $id ) {
00262         if ( !isset( self::$mObjects[$id] ) ) {
00263             $mw = new MagicWord();
00264             $mw->load( $id );
00265             self::$mObjects[$id] = $mw;
00266         }
00267         return self::$mObjects[$id];
00268     }
00269 
00275     static function getVariableIDs() {
00276         if ( !self::$mVariableIDsInitialised ) {
00277             # Get variable IDs
00278             wfRunHooks( 'MagicWordwgVariableIDs', array( &self::$mVariableIDs ) );
00279             self::$mVariableIDsInitialised = true;
00280         }
00281         return self::$mVariableIDs;
00282     }
00283 
00288     static function getSubstIDs() {
00289         return self::$mSubstIDs;
00290     }
00291 
00298     static function getCacheTTL( $id ) {
00299         if ( array_key_exists( $id, self::$mCacheTTLs ) ) {
00300             return self::$mCacheTTLs[$id];
00301         } else {
00302             return -1;
00303         }
00304     }
00305 
00311     static function getDoubleUnderscoreArray() {
00312         if ( is_null( self::$mDoubleUnderscoreArray ) ) {
00313             wfRunHooks( 'GetDoubleUnderscoreIDs', array( &self::$mDoubleUnderscoreIDs ) );
00314             self::$mDoubleUnderscoreArray = new MagicWordArray( self::$mDoubleUnderscoreIDs );
00315         }
00316         return self::$mDoubleUnderscoreArray;
00317     }
00318 
00323     public static function clearCache() {
00324         self::$mObjects = array();
00325     }
00326 
00333     function load( $id ) {
00334         global $wgContLang;
00335         wfProfileIn( __METHOD__ );
00336         $this->mId = $id;
00337         $wgContLang->getMagic( $this );
00338         if ( !$this->mSynonyms ) {
00339             $this->mSynonyms = array( 'brionmademeputthishere' );
00340             wfProfileOut( __METHOD__ );
00341             throw new MWException( "Error: invalid magic word '$id'" );
00342         }
00343         wfProfileOut( __METHOD__ );
00344     }
00345 
00350     function initRegex() {
00351         // Sort the synonyms by length, descending, so that the longest synonym
00352         // matches in precedence to the shortest
00353         $synonyms = $this->mSynonyms;
00354         usort( $synonyms, array( $this, 'compareStringLength' ) );
00355 
00356         $escSyn = array();
00357         foreach ( $synonyms as $synonym ) {
00358             // In case a magic word contains /, like that's going to happen;)
00359             $escSyn[] = preg_quote( $synonym, '/' );
00360         }
00361         $this->mBaseRegex = implode( '|', $escSyn );
00362 
00363         $case = $this->mCaseSensitive ? '' : 'iu';
00364         $this->mRegex = "/{$this->mBaseRegex}/{$case}";
00365         $this->mRegexStart = "/^(?:{$this->mBaseRegex})/{$case}";
00366         $this->mRegexStartToEnd = "/^(?:{$this->mBaseRegex})$/{$case}";
00367         $this->mVariableRegex = str_replace( "\\$1", "(.*?)", $this->mRegex );
00368         $this->mVariableStartToEndRegex = str_replace( "\\$1", "(.*?)",
00369             "/^(?:{$this->mBaseRegex})$/{$case}" );
00370     }
00371 
00382     function compareStringLength( $s1, $s2 ) {
00383         $l1 = strlen( $s1 );
00384         $l2 = strlen( $s2 );
00385         if ( $l1 < $l2 ) {
00386             return 1;
00387         } elseif ( $l1 > $l2 ) {
00388             return -1;
00389         } else {
00390             return 0;
00391         }
00392     }
00393 
00399     function getRegex() {
00400         if ( $this->mRegex == '' ) {
00401             $this->initRegex();
00402         }
00403         return $this->mRegex;
00404     }
00405 
00413     function getRegexCase() {
00414         if ( $this->mRegex === '' ) {
00415             $this->initRegex();
00416         }
00417 
00418         return $this->mCaseSensitive ? '' : 'iu';
00419     }
00420 
00426     function getRegexStart() {
00427         if ( $this->mRegex == '' ) {
00428             $this->initRegex();
00429         }
00430         return $this->mRegexStart;
00431     }
00432 
00439     function getRegexStartToEnd() {
00440         if ( $this->mRegexStartToEnd == '' ) {
00441             $this->initRegex();
00442         }
00443         return $this->mRegexStartToEnd;
00444     }
00445 
00451     function getBaseRegex() {
00452         if ( $this->mRegex == '' ) {
00453             $this->initRegex();
00454         }
00455         return $this->mBaseRegex;
00456     }
00457 
00465     function match( $text ) {
00466         return (bool)preg_match( $this->getRegex(), $text );
00467     }
00468 
00476     function matchStart( $text ) {
00477         return (bool)preg_match( $this->getRegexStart(), $text );
00478     }
00479 
00488     function matchStartToEnd( $text ) {
00489         return (bool)preg_match( $this->getRegexStartToEnd(), $text );
00490     }
00491 
00502     function matchVariableStartToEnd( $text ) {
00503         $matches = array();
00504         $matchcount = preg_match( $this->getVariableStartToEndRegex(), $text, $matches );
00505         if ( $matchcount == 0 ) {
00506             return null;
00507         } else {
00508             # multiple matched parts (variable match); some will be empty because of
00509             # synonyms. The variable will be the second non-empty one so remove any
00510             # blank elements and re-sort the indices.
00511             # See also bug 6526
00512 
00513             $matches = array_values( array_filter( $matches ) );
00514 
00515             if ( count( $matches ) == 1 ) {
00516                 return $matches[0];
00517             } else {
00518                 return $matches[1];
00519             }
00520         }
00521     }
00522 
00531     function matchAndRemove( &$text ) {
00532         $this->mFound = false;
00533         $text = preg_replace_callback(
00534             $this->getRegex(),
00535             array( &$this, 'pregRemoveAndRecord' ),
00536             $text
00537         );
00538 
00539         return $this->mFound;
00540     }
00541 
00546     function matchStartAndRemove( &$text ) {
00547         $this->mFound = false;
00548         $text = preg_replace_callback(
00549             $this->getRegexStart(),
00550             array( &$this, 'pregRemoveAndRecord' ),
00551             $text
00552         );
00553 
00554         return $this->mFound;
00555     }
00556 
00562     function pregRemoveAndRecord() {
00563         $this->mFound = true;
00564         return '';
00565     }
00566 
00576     function replace( $replacement, $subject, $limit = -1 ) {
00577         $res = preg_replace(
00578             $this->getRegex(),
00579             StringUtils::escapeRegexReplacement( $replacement ),
00580             $subject,
00581             $limit
00582         );
00583         $this->mModified = $res !== $subject;
00584         return $res;
00585     }
00586 
00597     function substituteCallback( $text, $callback ) {
00598         $res = preg_replace_callback( $this->getVariableRegex(), $callback, $text );
00599         $this->mModified = $res !== $text;
00600         return $res;
00601     }
00602 
00608     function getVariableRegex() {
00609         if ( $this->mVariableRegex == '' ) {
00610             $this->initRegex();
00611         }
00612         return $this->mVariableRegex;
00613     }
00614 
00620     function getVariableStartToEndRegex() {
00621         if ( $this->mVariableStartToEndRegex == '' ) {
00622             $this->initRegex();
00623         }
00624         return $this->mVariableStartToEndRegex;
00625     }
00626 
00634     function getSynonym( $i ) {
00635         return $this->mSynonyms[$i];
00636     }
00637 
00641     function getSynonyms() {
00642         return $this->mSynonyms;
00643     }
00644 
00651     function getWasModified() {
00652         return $this->mModified;
00653     }
00654 
00668     function replaceMultiple( $magicarr, $subject, &$result ) {
00669         $search = array();
00670         $replace = array();
00671         foreach ( $magicarr as $id => $replacement ) {
00672             $mw = MagicWord::get( $id );
00673             $search[] = $mw->getRegex();
00674             $replace[] = $replacement;
00675         }
00676 
00677         $result = preg_replace( $search, $replace, $subject );
00678         return $result !== $subject;
00679     }
00680 
00688     function addToArray( &$array, $value ) {
00689         global $wgContLang;
00690         foreach ( $this->mSynonyms as $syn ) {
00691             $array[$wgContLang->lc( $syn )] = $value;
00692         }
00693     }
00694 
00698     function isCaseSensitive() {
00699         return $this->mCaseSensitive;
00700     }
00701 
00705     function getId() {
00706         return $this->mId;
00707     }
00708 }
00709 
00714 class MagicWordArray {
00716     public $names = array();
00717 
00719     private $hash;
00720 
00721     private $baseRegex;
00722 
00723     private $regex;
00724 
00726     private $matches;
00727 
00731     function __construct( $names = array() ) {
00732         $this->names = $names;
00733     }
00734 
00740     public function add( $name ) {
00741         $this->names[] = $name;
00742         $this->hash = $this->baseRegex = $this->regex = null;
00743     }
00744 
00750     public function addArray( $names ) {
00751         $this->names = array_merge( $this->names, array_values( $names ) );
00752         $this->hash = $this->baseRegex = $this->regex = null;
00753     }
00754 
00759     function getHash() {
00760         if ( is_null( $this->hash ) ) {
00761             global $wgContLang;
00762             $this->hash = array( 0 => array(), 1 => array() );
00763             foreach ( $this->names as $name ) {
00764                 $magic = MagicWord::get( $name );
00765                 $case = intval( $magic->isCaseSensitive() );
00766                 foreach ( $magic->getSynonyms() as $syn ) {
00767                     if ( !$case ) {
00768                         $syn = $wgContLang->lc( $syn );
00769                     }
00770                     $this->hash[$case][$syn] = $name;
00771                 }
00772             }
00773         }
00774         return $this->hash;
00775     }
00776 
00781     function getBaseRegex() {
00782         if ( is_null( $this->baseRegex ) ) {
00783             $this->baseRegex = array( 0 => '', 1 => '' );
00784             foreach ( $this->names as $name ) {
00785                 $magic = MagicWord::get( $name );
00786                 $case = intval( $magic->isCaseSensitive() );
00787                 foreach ( $magic->getSynonyms() as $i => $syn ) {
00788                     // Group name must start with a non-digit in PCRE 8.34+
00789                     $it = strtr( $i, '0123456789', 'abcdefghij' );
00790                     $group = "(?P<{$it}_{$name}>" . preg_quote( $syn, '/' ) . ')';
00791                     if ( $this->baseRegex[$case] === '' ) {
00792                         $this->baseRegex[$case] = $group;
00793                     } else {
00794                         $this->baseRegex[$case] .= '|' . $group;
00795                     }
00796                 }
00797             }
00798         }
00799         return $this->baseRegex;
00800     }
00801 
00806     function getRegex() {
00807         if ( is_null( $this->regex ) ) {
00808             $base = $this->getBaseRegex();
00809             $this->regex = array( '', '' );
00810             if ( $this->baseRegex[0] !== '' ) {
00811                 $this->regex[0] = "/{$base[0]}/iuS";
00812             }
00813             if ( $this->baseRegex[1] !== '' ) {
00814                 $this->regex[1] = "/{$base[1]}/S";
00815             }
00816         }
00817         return $this->regex;
00818     }
00819 
00825     function getVariableRegex() {
00826         return str_replace( "\\$1", "(.*?)", $this->getRegex() );
00827     }
00828 
00834     function getRegexStart() {
00835         $base = $this->getBaseRegex();
00836         $newRegex = array( '', '' );
00837         if ( $base[0] !== '' ) {
00838             $newRegex[0] = "/^(?:{$base[0]})/iuS";
00839         }
00840         if ( $base[1] !== '' ) {
00841             $newRegex[1] = "/^(?:{$base[1]})/S";
00842         }
00843         return $newRegex;
00844     }
00845 
00851     function getVariableStartToEndRegex() {
00852         $base = $this->getBaseRegex();
00853         $newRegex = array( '', '' );
00854         if ( $base[0] !== '' ) {
00855             $newRegex[0] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[0]})$/iuS" );
00856         }
00857         if ( $base[1] !== '' ) {
00858             $newRegex[1] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[1]})$/S" );
00859         }
00860         return $newRegex;
00861     }
00862 
00867     public function getNames() {
00868         return $this->names;
00869     }
00870 
00881     function parseMatch( $m ) {
00882         reset( $m );
00883         while ( list( $key, $value ) = each( $m ) ) {
00884             if ( $key === 0 || $value === '' ) {
00885                 continue;
00886             }
00887             $parts = explode( '_', $key, 2 );
00888             if ( count( $parts ) != 2 ) {
00889                 // This shouldn't happen
00890                 // continue;
00891                 throw new MWException( __METHOD__ . ': bad parameter name' );
00892             }
00893             list( /* $synIndex */, $magicName ) = $parts;
00894             $paramValue = next( $m );
00895             return array( $magicName, $paramValue );
00896         }
00897         // This shouldn't happen either
00898         throw new MWException( __METHOD__ . ': parameter not found' );
00899     }
00900 
00911     public function matchVariableStartToEnd( $text ) {
00912         $regexes = $this->getVariableStartToEndRegex();
00913         foreach ( $regexes as $regex ) {
00914             if ( $regex !== '' ) {
00915                 $m = array();
00916                 if ( preg_match( $regex, $text, $m ) ) {
00917                     return $this->parseMatch( $m );
00918                 }
00919             }
00920         }
00921         return array( false, false );
00922     }
00923 
00932     public function matchStartToEnd( $text ) {
00933         $hash = $this->getHash();
00934         if ( isset( $hash[1][$text] ) ) {
00935             return $hash[1][$text];
00936         }
00937         global $wgContLang;
00938         $lc = $wgContLang->lc( $text );
00939         if ( isset( $hash[0][$lc] ) ) {
00940             return $hash[0][$lc];
00941         }
00942         return false;
00943     }
00944 
00953     public function matchAndRemove( &$text ) {
00954         $found = array();
00955         $regexes = $this->getRegex();
00956         foreach ( $regexes as $regex ) {
00957             if ( $regex === '' ) {
00958                 continue;
00959             }
00960             preg_match_all( $regex, $text, $matches, PREG_SET_ORDER );
00961             foreach ( $matches as $m ) {
00962                 list( $name, $param ) = $this->parseMatch( $m );
00963                 $found[$name] = $param;
00964             }
00965             $text = preg_replace( $regex, '', $text );
00966         }
00967         return $found;
00968     }
00969 
00980     public function matchStartAndRemove( &$text ) {
00981         $regexes = $this->getRegexStart();
00982         foreach ( $regexes as $regex ) {
00983             if ( $regex === '' ) {
00984                 continue;
00985             }
00986             if ( preg_match( $regex, $text, $m ) ) {
00987                 list( $id, ) = $this->parseMatch( $m );
00988                 if ( strlen( $m[0] ) >= strlen( $text ) ) {
00989                     $text = '';
00990                 } else {
00991                     $text = substr( $text, strlen( $m[0] ) );
00992                 }
00993                 return $id;
00994             }
00995         }
00996         return false;
00997     }
00998 }