MediaWiki  REL1_22
MagicWord.php
Go to the documentation of this file.
00001 <?php
00061 class MagicWord {
00065     var $mId, $mSynonyms, $mCaseSensitive;
00066     var $mRegex = '';
00067     var $mRegexStart = '';
00068     var $mBaseRegex = '';
00069     var $mVariableRegex = '';
00070     var $mVariableStartToEndRegex = '';
00071     var $mModified = false;
00072     var $mFound = false;
00073 
00074     static public $mVariableIDsInitialised = false;
00075     static public $mVariableIDs = array(
00076         'currentmonth',
00077         'currentmonth1',
00078         'currentmonthname',
00079         'currentmonthnamegen',
00080         'currentmonthabbrev',
00081         'currentday',
00082         'currentday2',
00083         'currentdayname',
00084         'currentyear',
00085         'currenttime',
00086         'currenthour',
00087         'localmonth',
00088         'localmonth1',
00089         'localmonthname',
00090         'localmonthnamegen',
00091         'localmonthabbrev',
00092         'localday',
00093         'localday2',
00094         'localdayname',
00095         'localyear',
00096         'localtime',
00097         'localhour',
00098         'numberofarticles',
00099         'numberoffiles',
00100         'numberofedits',
00101         'articlepath',
00102         'pageid',
00103         'sitename',
00104         'server',
00105         'servername',
00106         'scriptpath',
00107         'stylepath',
00108         'pagename',
00109         'pagenamee',
00110         'fullpagename',
00111         'fullpagenamee',
00112         'namespace',
00113         'namespacee',
00114         'namespacenumber',
00115         'currentweek',
00116         'currentdow',
00117         'localweek',
00118         'localdow',
00119         'revisionid',
00120         'revisionday',
00121         'revisionday2',
00122         'revisionmonth',
00123         'revisionmonth1',
00124         'revisionyear',
00125         'revisiontimestamp',
00126         'revisionuser',
00127         'revisionsize',
00128         'subpagename',
00129         'subpagenamee',
00130         'talkspace',
00131         'talkspacee',
00132         'subjectspace',
00133         'subjectspacee',
00134         'talkpagename',
00135         'talkpagenamee',
00136         'subjectpagename',
00137         'subjectpagenamee',
00138         'numberofusers',
00139         'numberofactiveusers',
00140         'numberofpages',
00141         'currentversion',
00142         'rootpagename',
00143         'rootpagenamee',
00144         'basepagename',
00145         'basepagenamee',
00146         'currenttimestamp',
00147         'localtimestamp',
00148         'directionmark',
00149         'contentlanguage',
00150         'numberofadmins',
00151         'numberofviews',
00152     );
00153 
00154     /* Array of caching hints for ParserCache */
00155     static public $mCacheTTLs = array(
00156         'currentmonth' => 86400,
00157         'currentmonth1' => 86400,
00158         'currentmonthname' => 86400,
00159         'currentmonthnamegen' => 86400,
00160         'currentmonthabbrev' => 86400,
00161         'currentday' => 3600,
00162         'currentday2' => 3600,
00163         'currentdayname' => 3600,
00164         'currentyear' => 86400,
00165         'currenttime' => 3600,
00166         'currenthour' => 3600,
00167         'localmonth' => 86400,
00168         'localmonth1' => 86400,
00169         'localmonthname' => 86400,
00170         'localmonthnamegen' => 86400,
00171         'localmonthabbrev' => 86400,
00172         'localday' => 3600,
00173         'localday2' => 3600,
00174         'localdayname' => 3600,
00175         'localyear' => 86400,
00176         'localtime' => 3600,
00177         'localhour' => 3600,
00178         'numberofarticles' => 3600,
00179         'numberoffiles' => 3600,
00180         'numberofedits' => 3600,
00181         'currentweek' => 3600,
00182         'currentdow' => 3600,
00183         'localweek' => 3600,
00184         'localdow' => 3600,
00185         'numberofusers' => 3600,
00186         'numberofactiveusers' => 3600,
00187         'numberofpages' => 3600,
00188         'currentversion' => 86400,
00189         'currenttimestamp' => 3600,
00190         'localtimestamp' => 3600,
00191         'pagesinnamespace' => 3600,
00192         'numberofadmins' => 3600,
00193         'numberofviews' => 3600,
00194         'numberingroup' => 3600,
00195         );
00196 
00197     static public $mDoubleUnderscoreIDs = array(
00198         'notoc',
00199         'nogallery',
00200         'forcetoc',
00201         'toc',
00202         'noeditsection',
00203         'newsectionlink',
00204         'nonewsectionlink',
00205         'hiddencat',
00206         'index',
00207         'noindex',
00208         'staticredirect',
00209         'notitleconvert',
00210         'nocontentconvert',
00211     );
00212 
00213     static public $mSubstIDs = array(
00214         'subst',
00215         'safesubst',
00216     );
00217 
00218     static public $mObjects = array();
00219     static public $mDoubleUnderscoreArray = null;
00220 
00223     function __construct( $id = 0, $syn = array(), $cs = false ) {
00224         $this->mId = $id;
00225         $this->mSynonyms = (array)$syn;
00226         $this->mCaseSensitive = $cs;
00227     }
00228 
00236     static function &get( $id ) {
00237         if ( !isset( self::$mObjects[$id] ) ) {
00238             $mw = new MagicWord();
00239             $mw->load( $id );
00240             self::$mObjects[$id] = $mw;
00241         }
00242         return self::$mObjects[$id];
00243     }
00244 
00250     static function getVariableIDs() {
00251         if ( !self::$mVariableIDsInitialised ) {
00252             # Get variable IDs
00253             wfRunHooks( 'MagicWordwgVariableIDs', array( &self::$mVariableIDs ) );
00254             self::$mVariableIDsInitialised = true;
00255         }
00256         return self::$mVariableIDs;
00257     }
00258 
00263     static function getSubstIDs() {
00264         return self::$mSubstIDs;
00265     }
00266 
00273     static function getCacheTTL( $id ) {
00274         if ( array_key_exists( $id, self::$mCacheTTLs ) ) {
00275             return self::$mCacheTTLs[$id];
00276         } else {
00277             return -1;
00278         }
00279     }
00280 
00286     static function getDoubleUnderscoreArray() {
00287         if ( is_null( self::$mDoubleUnderscoreArray ) ) {
00288             wfRunHooks( 'GetDoubleUnderscoreIDs', array( &self::$mDoubleUnderscoreIDs ) );
00289             self::$mDoubleUnderscoreArray = new MagicWordArray( self::$mDoubleUnderscoreIDs );
00290         }
00291         return self::$mDoubleUnderscoreArray;
00292     }
00293 
00298     public static function clearCache() {
00299         self::$mObjects = array();
00300     }
00301 
00308     function load( $id ) {
00309         global $wgContLang;
00310         wfProfileIn( __METHOD__ );
00311         $this->mId = $id;
00312         $wgContLang->getMagic( $this );
00313         if ( !$this->mSynonyms ) {
00314             $this->mSynonyms = array( 'brionmademeputthishere' );
00315             wfProfileOut( __METHOD__ );
00316             throw new MWException( "Error: invalid magic word '$id'" );
00317         }
00318         wfProfileOut( __METHOD__ );
00319     }
00320 
00325     function initRegex() {
00326         // Sort the synonyms by length, descending, so that the longest synonym
00327         // matches in precedence to the shortest
00328         $synonyms = $this->mSynonyms;
00329         usort( $synonyms, array( $this, 'compareStringLength' ) );
00330 
00331         $escSyn = array();
00332         foreach ( $synonyms as $synonym ) {
00333             // In case a magic word contains /, like that's going to happen;)
00334             $escSyn[] = preg_quote( $synonym, '/' );
00335         }
00336         $this->mBaseRegex = implode( '|', $escSyn );
00337 
00338         $case = $this->mCaseSensitive ? '' : 'iu';
00339         $this->mRegex = "/{$this->mBaseRegex}/{$case}";
00340         $this->mRegexStart = "/^(?:{$this->mBaseRegex})/{$case}";
00341         $this->mVariableRegex = str_replace( "\\$1", "(.*?)", $this->mRegex );
00342         $this->mVariableStartToEndRegex = str_replace( "\\$1", "(.*?)",
00343             "/^(?:{$this->mBaseRegex})$/{$case}" );
00344     }
00345 
00356     function compareStringLength( $s1, $s2 ) {
00357         $l1 = strlen( $s1 );
00358         $l2 = strlen( $s2 );
00359         if ( $l1 < $l2 ) {
00360             return 1;
00361         } elseif ( $l1 > $l2 ) {
00362             return -1;
00363         } else {
00364             return 0;
00365         }
00366     }
00367 
00373     function getRegex() {
00374         if ( $this->mRegex == '' ) {
00375             $this->initRegex();
00376         }
00377         return $this->mRegex;
00378     }
00379 
00387     function getRegexCase() {
00388         if ( $this->mRegex === '' ) {
00389             $this->initRegex();
00390         }
00391 
00392         return $this->mCaseSensitive ? '' : 'iu';
00393     }
00394 
00400     function getRegexStart() {
00401         if ( $this->mRegex == '' ) {
00402             $this->initRegex();
00403         }
00404         return $this->mRegexStart;
00405     }
00406 
00412     function getBaseRegex() {
00413         if ( $this->mRegex == '' ) {
00414             $this->initRegex();
00415         }
00416         return $this->mBaseRegex;
00417     }
00418 
00426     function match( $text ) {
00427         return (bool)preg_match( $this->getRegex(), $text );
00428     }
00429 
00437     function matchStart( $text ) {
00438         return (bool)preg_match( $this->getRegexStart(), $text );
00439     }
00440 
00451     function matchVariableStartToEnd( $text ) {
00452         $matches = array();
00453         $matchcount = preg_match( $this->getVariableStartToEndRegex(), $text, $matches );
00454         if ( $matchcount == 0 ) {
00455             return null;
00456         } else {
00457             # multiple matched parts (variable match); some will be empty because of
00458             # synonyms. The variable will be the second non-empty one so remove any
00459             # blank elements and re-sort the indices.
00460             # See also bug 6526
00461 
00462             $matches = array_values( array_filter( $matches ) );
00463 
00464             if ( count( $matches ) == 1 ) {
00465                 return $matches[0];
00466             } else {
00467                 return $matches[1];
00468             }
00469         }
00470     }
00471 
00480     function matchAndRemove( &$text ) {
00481         $this->mFound = false;
00482         $text = preg_replace_callback( $this->getRegex(), array( &$this, 'pregRemoveAndRecord' ), $text );
00483         return $this->mFound;
00484     }
00485 
00490     function matchStartAndRemove( &$text ) {
00491         $this->mFound = false;
00492         $text = preg_replace_callback( $this->getRegexStart(), array( &$this, 'pregRemoveAndRecord' ), $text );
00493         return $this->mFound;
00494     }
00495 
00501     function pregRemoveAndRecord() {
00502         $this->mFound = true;
00503         return '';
00504     }
00505 
00515     function replace( $replacement, $subject, $limit = -1 ) {
00516         $res = preg_replace( $this->getRegex(), StringUtils::escapeRegexReplacement( $replacement ), $subject, $limit );
00517         $this->mModified = $res !== $subject;
00518         return $res;
00519     }
00520 
00531     function substituteCallback( $text, $callback ) {
00532         $res = preg_replace_callback( $this->getVariableRegex(), $callback, $text );
00533         $this->mModified = $res !== $text;
00534         return $res;
00535     }
00536 
00542     function getVariableRegex() {
00543         if ( $this->mVariableRegex == '' ) {
00544             $this->initRegex();
00545         }
00546         return $this->mVariableRegex;
00547     }
00548 
00554     function getVariableStartToEndRegex() {
00555         if ( $this->mVariableStartToEndRegex == '' ) {
00556             $this->initRegex();
00557         }
00558         return $this->mVariableStartToEndRegex;
00559     }
00560 
00568     function getSynonym( $i ) {
00569         return $this->mSynonyms[$i];
00570     }
00571 
00575     function getSynonyms() {
00576         return $this->mSynonyms;
00577     }
00578 
00585     function getWasModified() {
00586         return $this->mModified;
00587     }
00588 
00602     function replaceMultiple( $magicarr, $subject, &$result ) {
00603         $search = array();
00604         $replace = array();
00605         foreach ( $magicarr as $id => $replacement ) {
00606             $mw = MagicWord::get( $id );
00607             $search[] = $mw->getRegex();
00608             $replace[] = $replacement;
00609         }
00610 
00611         $result = preg_replace( $search, $replace, $subject );
00612         return $result !== $subject;
00613     }
00614 
00622     function addToArray( &$array, $value ) {
00623         global $wgContLang;
00624         foreach ( $this->mSynonyms as $syn ) {
00625             $array[$wgContLang->lc( $syn )] = $value;
00626         }
00627     }
00628 
00632     function isCaseSensitive() {
00633         return $this->mCaseSensitive;
00634     }
00635 
00639     function getId() {
00640         return $this->mId;
00641     }
00642 }
00643 
00648 class MagicWordArray {
00649     var $names = array();
00650     var $hash;
00651     var $baseRegex, $regex;
00652     var $matches;
00653 
00657     function __construct( $names = array() ) {
00658         $this->names = $names;
00659     }
00660 
00666     public function add( $name ) {
00667         $this->names[] = $name;
00668         $this->hash = $this->baseRegex = $this->regex = null;
00669     }
00670 
00676     public function addArray( $names ) {
00677         $this->names = array_merge( $this->names, array_values( $names ) );
00678         $this->hash = $this->baseRegex = $this->regex = null;
00679     }
00680 
00684     function getHash() {
00685         if ( is_null( $this->hash ) ) {
00686             global $wgContLang;
00687             $this->hash = array( 0 => array(), 1 => array() );
00688             foreach ( $this->names as $name ) {
00689                 $magic = MagicWord::get( $name );
00690                 $case = intval( $magic->isCaseSensitive() );
00691                 foreach ( $magic->getSynonyms() as $syn ) {
00692                     if ( !$case ) {
00693                         $syn = $wgContLang->lc( $syn );
00694                     }
00695                     $this->hash[$case][$syn] = $name;
00696                 }
00697             }
00698         }
00699         return $this->hash;
00700     }
00701 
00705     function getBaseRegex() {
00706         if ( is_null( $this->baseRegex ) ) {
00707             $this->baseRegex = array( 0 => '', 1 => '' );
00708             foreach ( $this->names as $name ) {
00709                 $magic = MagicWord::get( $name );
00710                 $case = intval( $magic->isCaseSensitive() );
00711                 foreach ( $magic->getSynonyms() as $i => $syn ) {
00712                     // Group name must start with a non-digit in PCRE 8.34+
00713                     $it = strtr( $i, '0123456789', 'abcdefghij' );
00714                     $group = "(?P<{$it}_{$name}>" . preg_quote( $syn, '/' ) . ')';
00715                     if ( $this->baseRegex[$case] === '' ) {
00716                         $this->baseRegex[$case] = $group;
00717                     } else {
00718                         $this->baseRegex[$case] .= '|' . $group;
00719                     }
00720                 }
00721             }
00722         }
00723         return $this->baseRegex;
00724     }
00725 
00729     function getRegex() {
00730         if ( is_null( $this->regex ) ) {
00731             $base = $this->getBaseRegex();
00732             $this->regex = array( '', '' );
00733             if ( $this->baseRegex[0] !== '' ) {
00734                 $this->regex[0] = "/{$base[0]}/iuS";
00735             }
00736             if ( $this->baseRegex[1] !== '' ) {
00737                 $this->regex[1] = "/{$base[1]}/S";
00738             }
00739         }
00740         return $this->regex;
00741     }
00742 
00748     function getVariableRegex() {
00749         return str_replace( "\\$1", "(.*?)", $this->getRegex() );
00750     }
00751 
00757     function getRegexStart() {
00758         $base = $this->getBaseRegex();
00759         $newRegex = array( '', '' );
00760         if ( $base[0] !== '' ) {
00761             $newRegex[0] = "/^(?:{$base[0]})/iuS";
00762         }
00763         if ( $base[1] !== '' ) {
00764             $newRegex[1] = "/^(?:{$base[1]})/S";
00765         }
00766         return $newRegex;
00767     }
00768 
00774     function getVariableStartToEndRegex() {
00775         $base = $this->getBaseRegex();
00776         $newRegex = array( '', '' );
00777         if ( $base[0] !== '' ) {
00778             $newRegex[0] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[0]})$/iuS" );
00779         }
00780         if ( $base[1] !== '' ) {
00781             $newRegex[1] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[1]})$/S" );
00782         }
00783         return $newRegex;
00784     }
00785 
00790     public function getNames() {
00791         return $this->names;
00792     }
00793 
00804     function parseMatch( $m ) {
00805         reset( $m );
00806         while ( list( $key, $value ) = each( $m ) ) {
00807             if ( $key === 0 || $value === '' ) {
00808                 continue;
00809             }
00810             $parts = explode( '_', $key, 2 );
00811             if ( count( $parts ) != 2 ) {
00812                 // This shouldn't happen
00813                 // continue;
00814                 throw new MWException( __METHOD__ . ': bad parameter name' );
00815             }
00816             list( /* $synIndex */, $magicName ) = $parts;
00817             $paramValue = next( $m );
00818             return array( $magicName, $paramValue );
00819         }
00820         // This shouldn't happen either
00821         throw new MWException( __METHOD__ . ': parameter not found' );
00822     }
00823 
00834     public function matchVariableStartToEnd( $text ) {
00835         $regexes = $this->getVariableStartToEndRegex();
00836         foreach ( $regexes as $regex ) {
00837             if ( $regex !== '' ) {
00838                 $m = array();
00839                 if ( preg_match( $regex, $text, $m ) ) {
00840                     return $this->parseMatch( $m );
00841                 }
00842             }
00843         }
00844         return array( false, false );
00845     }
00846 
00855     public function matchStartToEnd( $text ) {
00856         $hash = $this->getHash();
00857         if ( isset( $hash[1][$text] ) ) {
00858             return $hash[1][$text];
00859         }
00860         global $wgContLang;
00861         $lc = $wgContLang->lc( $text );
00862         if ( isset( $hash[0][$lc] ) ) {
00863             return $hash[0][$lc];
00864         }
00865         return false;
00866     }
00867 
00876     public function matchAndRemove( &$text ) {
00877         $found = array();
00878         $regexes = $this->getRegex();
00879         foreach ( $regexes as $regex ) {
00880             if ( $regex === '' ) {
00881                 continue;
00882             }
00883             preg_match_all( $regex, $text, $matches, PREG_SET_ORDER );
00884             foreach ( $matches as $m ) {
00885                 list( $name, $param ) = $this->parseMatch( $m );
00886                 $found[$name] = $param;
00887             }
00888             $text = preg_replace( $regex, '', $text );
00889         }
00890         return $found;
00891     }
00892 
00903     public function matchStartAndRemove( &$text ) {
00904         $regexes = $this->getRegexStart();
00905         foreach ( $regexes as $regex ) {
00906             if ( $regex === '' ) {
00907                 continue;
00908             }
00909             if ( preg_match( $regex, $text, $m ) ) {
00910                 list( $id, ) = $this->parseMatch( $m );
00911                 if ( strlen( $m[0] ) >= strlen( $text ) ) {
00912                     $text = '';
00913                 } else {
00914                     $text = substr( $text, strlen( $m[0] ) );
00915                 }
00916                 return $id;
00917             }
00918         }
00919         return false;
00920     }
00921 }