MediaWiki  REL1_21
MagicWord.php
Go to the documentation of this file.
00001 <?php
00061 class MagicWord {
00065         var $mId, $mSynonyms, $mCaseSensitive;
00066         var $mRegex = '';
00067         var $mRegexStart = '';
00068         var $mBaseRegex = '';
00069         var $mVariableRegex = '';
00070         var $mVariableStartToEndRegex = '';
00071         var $mModified = false;
00072         var $mFound = false;
00073 
00074         static public $mVariableIDsInitialised = false;
00075         static public $mVariableIDs = array(
00076                 'currentmonth',
00077                 'currentmonth1',
00078                 'currentmonthname',
00079                 'currentmonthnamegen',
00080                 'currentmonthabbrev',
00081                 'currentday',
00082                 'currentday2',
00083                 'currentdayname',
00084                 'currentyear',
00085                 'currenttime',
00086                 'currenthour',
00087                 'localmonth',
00088                 'localmonth1',
00089                 'localmonthname',
00090                 'localmonthnamegen',
00091                 'localmonthabbrev',
00092                 'localday',
00093                 'localday2',
00094                 'localdayname',
00095                 'localyear',
00096                 'localtime',
00097                 'localhour',
00098                 'numberofarticles',
00099                 'numberoffiles',
00100                 'numberofedits',
00101                 'articlepath',
00102                 'pageid',
00103                 'sitename',
00104                 'server',
00105                 'servername',
00106                 'scriptpath',
00107                 'stylepath',
00108                 'pagename',
00109                 'pagenamee',
00110                 'fullpagename',
00111                 'fullpagenamee',
00112                 'namespace',
00113                 'namespacee',
00114                 'namespacenumber',
00115                 'currentweek',
00116                 'currentdow',
00117                 'localweek',
00118                 'localdow',
00119                 'revisionid',
00120                 'revisionday',
00121                 'revisionday2',
00122                 'revisionmonth',
00123                 'revisionmonth1',
00124                 'revisionyear',
00125                 'revisiontimestamp',
00126                 'revisionuser',
00127                 'subpagename',
00128                 'subpagenamee',
00129                 'talkspace',
00130                 'talkspacee',
00131                 'subjectspace',
00132                 'subjectspacee',
00133                 'talkpagename',
00134                 'talkpagenamee',
00135                 'subjectpagename',
00136                 'subjectpagenamee',
00137                 'numberofusers',
00138                 'numberofactiveusers',
00139                 'numberofpages',
00140                 'currentversion',
00141                 'basepagename',
00142                 'basepagenamee',
00143                 'currenttimestamp',
00144                 'localtimestamp',
00145                 'directionmark',
00146                 'contentlanguage',
00147                 'numberofadmins',
00148                 'numberofviews',
00149         );
00150 
00151         /* Array of caching hints for ParserCache */
00152         static public $mCacheTTLs = array (
00153                 'currentmonth' => 86400,
00154                 'currentmonth1' => 86400,
00155                 'currentmonthname' => 86400,
00156                 'currentmonthnamegen' => 86400,
00157                 'currentmonthabbrev' => 86400,
00158                 'currentday' => 3600,
00159                 'currentday2' => 3600,
00160                 'currentdayname' => 3600,
00161                 'currentyear' => 86400,
00162                 'currenttime' => 3600,
00163                 'currenthour' => 3600,
00164                 'localmonth' => 86400,
00165                 'localmonth1' => 86400,
00166                 'localmonthname' => 86400,
00167                 'localmonthnamegen' => 86400,
00168                 'localmonthabbrev' => 86400,
00169                 'localday' => 3600,
00170                 'localday2' => 3600,
00171                 'localdayname' => 3600,
00172                 'localyear' => 86400,
00173                 'localtime' => 3600,
00174                 'localhour' => 3600,
00175                 'numberofarticles' => 3600,
00176                 'numberoffiles' => 3600,
00177                 'numberofedits' => 3600,
00178                 'currentweek' => 3600,
00179                 'currentdow' => 3600,
00180                 'localweek' => 3600,
00181                 'localdow' => 3600,
00182                 'numberofusers' => 3600,
00183                 'numberofactiveusers' => 3600,
00184                 'numberofpages' => 3600,
00185                 'currentversion' => 86400,
00186                 'currenttimestamp' => 3600,
00187                 'localtimestamp' => 3600,
00188                 'pagesinnamespace' => 3600,
00189                 'numberofadmins' => 3600,
00190                 'numberofviews' => 3600,
00191                 'numberingroup' => 3600,
00192                 );
00193 
00194         static public $mDoubleUnderscoreIDs = array(
00195                 'notoc',
00196                 'nogallery',
00197                 'forcetoc',
00198                 'toc',
00199                 'noeditsection',
00200                 'newsectionlink',
00201                 'nonewsectionlink',
00202                 'hiddencat',
00203                 'index',
00204                 'noindex',
00205                 'staticredirect',
00206                 'notitleconvert',
00207                 'nocontentconvert',
00208         );
00209 
00210         static public $mSubstIDs = array(
00211                 'subst',
00212                 'safesubst',
00213         );
00214 
00215         static public $mObjects = array();
00216         static public $mDoubleUnderscoreArray = null;
00217 
00220         function __construct( $id = 0, $syn = array(), $cs = false ) {
00221                 $this->mId = $id;
00222                 $this->mSynonyms = (array)$syn;
00223                 $this->mCaseSensitive = $cs;
00224         }
00225 
00233         static function &get( $id ) {
00234                 if ( !isset( self::$mObjects[$id] ) ) {
00235                         $mw = new MagicWord();
00236                         $mw->load( $id );
00237                         self::$mObjects[$id] = $mw;
00238                 }
00239                 return self::$mObjects[$id];
00240         }
00241 
00247         static function getVariableIDs() {
00248                 if ( !self::$mVariableIDsInitialised ) {
00249                         # Get variable IDs
00250                         wfRunHooks( 'MagicWordwgVariableIDs', array( &self::$mVariableIDs ) );
00251                         self::$mVariableIDsInitialised = true;
00252                 }
00253                 return self::$mVariableIDs;
00254         }
00255 
00260         static function getSubstIDs() {
00261                 return self::$mSubstIDs;
00262         }
00263 
00270         static function getCacheTTL( $id ) {
00271                 if ( array_key_exists( $id, self::$mCacheTTLs ) ) {
00272                         return self::$mCacheTTLs[$id];
00273                 } else {
00274                         return -1;
00275                 }
00276         }
00277 
00283         static function getDoubleUnderscoreArray() {
00284                 if ( is_null( self::$mDoubleUnderscoreArray ) ) {
00285                         wfRunHooks( 'GetDoubleUnderscoreIDs', array( &self::$mDoubleUnderscoreIDs ) );
00286                         self::$mDoubleUnderscoreArray = new MagicWordArray( self::$mDoubleUnderscoreIDs );
00287                 }
00288                 return self::$mDoubleUnderscoreArray;
00289         }
00290 
00295         public static function clearCache() {
00296                 self::$mObjects = array();
00297         }
00298 
00305         function load( $id ) {
00306                 global $wgContLang;
00307                 wfProfileIn( __METHOD__ );
00308                 $this->mId = $id;
00309                 $wgContLang->getMagic( $this );
00310                 if ( !$this->mSynonyms ) {
00311                         $this->mSynonyms = array( 'dkjsagfjsgashfajsh' );
00312                         throw new MWException( "Error: invalid magic word '$id'" );
00313                         #wfDebugLog( 'exception', "Error: invalid magic word '$id'\n" );
00314                 }
00315                 wfProfileOut( __METHOD__ );
00316         }
00317 
00322         function initRegex() {
00323                 // Sort the synonyms by length, descending, so that the longest synonym
00324                 // matches in precedence to the shortest
00325                 $synonyms = $this->mSynonyms;
00326                 usort( $synonyms, array( $this, 'compareStringLength' ) );
00327 
00328                 $escSyn = array();
00329                 foreach ( $synonyms as $synonym )
00330                         // In case a magic word contains /, like that's going to happen;)
00331                         $escSyn[] = preg_quote( $synonym, '/' );
00332                 $this->mBaseRegex = implode( '|', $escSyn );
00333 
00334                 $case = $this->mCaseSensitive ? '' : 'iu';
00335                 $this->mRegex = "/{$this->mBaseRegex}/{$case}";
00336                 $this->mRegexStart = "/^(?:{$this->mBaseRegex})/{$case}";
00337                 $this->mVariableRegex = str_replace( "\\$1", "(.*?)", $this->mRegex );
00338                 $this->mVariableStartToEndRegex = str_replace( "\\$1", "(.*?)",
00339                         "/^(?:{$this->mBaseRegex})$/{$case}" );
00340         }
00341 
00352         function compareStringLength( $s1, $s2 ) {
00353                 $l1 = strlen( $s1 );
00354                 $l2 = strlen( $s2 );
00355                 if ( $l1 < $l2 ) {
00356                         return 1;
00357                 } elseif ( $l1 > $l2 ) {
00358                         return -1;
00359                 } else {
00360                         return 0;
00361                 }
00362         }
00363 
00369         function getRegex() {
00370                 if ( $this->mRegex == '' ) {
00371                         $this->initRegex();
00372                 }
00373                 return $this->mRegex;
00374         }
00375 
00383         function getRegexCase() {
00384                 if ( $this->mRegex === '' )
00385                         $this->initRegex();
00386 
00387                 return $this->mCaseSensitive ? '' : 'iu';
00388         }
00389 
00395         function getRegexStart() {
00396                 if ( $this->mRegex == '' ) {
00397                         $this->initRegex();
00398                 }
00399                 return $this->mRegexStart;
00400         }
00401 
00407         function getBaseRegex() {
00408                 if ( $this->mRegex == '' ) {
00409                         $this->initRegex();
00410                 }
00411                 return $this->mBaseRegex;
00412         }
00413 
00421         function match( $text ) {
00422                 return (bool)preg_match( $this->getRegex(), $text );
00423         }
00424 
00432         function matchStart( $text ) {
00433                 return (bool)preg_match( $this->getRegexStart(), $text );
00434         }
00435 
00446         function matchVariableStartToEnd( $text ) {
00447                 $matches = array();
00448                 $matchcount = preg_match( $this->getVariableStartToEndRegex(), $text, $matches );
00449                 if ( $matchcount == 0 ) {
00450                         return null;
00451                 } else {
00452                         # multiple matched parts (variable match); some will be empty because of
00453                         # synonyms. The variable will be the second non-empty one so remove any
00454                         # blank elements and re-sort the indices.
00455                         # See also bug 6526
00456 
00457                         $matches = array_values( array_filter( $matches ) );
00458 
00459                         if ( count( $matches ) == 1 ) {
00460                                 return $matches[0];
00461                         } else {
00462                                 return $matches[1];
00463                         }
00464                 }
00465         }
00466 
00475         function matchAndRemove( &$text ) {
00476                 $this->mFound = false;
00477                 $text = preg_replace_callback( $this->getRegex(), array( &$this, 'pregRemoveAndRecord' ), $text );
00478                 return $this->mFound;
00479         }
00480 
00485         function matchStartAndRemove( &$text ) {
00486                 $this->mFound = false;
00487                 $text = preg_replace_callback( $this->getRegexStart(), array( &$this, 'pregRemoveAndRecord' ), $text );
00488                 return $this->mFound;
00489         }
00490 
00496         function pregRemoveAndRecord() {
00497                 $this->mFound = true;
00498                 return '';
00499         }
00500 
00510         function replace( $replacement, $subject, $limit = -1 ) {
00511                 $res = preg_replace( $this->getRegex(), StringUtils::escapeRegexReplacement( $replacement ), $subject, $limit );
00512                 $this->mModified = !($res === $subject);
00513                 return $res;
00514         }
00515 
00526         function substituteCallback( $text, $callback ) {
00527                 $res = preg_replace_callback( $this->getVariableRegex(), $callback, $text );
00528                 $this->mModified = !($res === $text);
00529                 return $res;
00530         }
00531 
00537         function getVariableRegex() {
00538                 if ( $this->mVariableRegex == '' ) {
00539                         $this->initRegex();
00540                 }
00541                 return $this->mVariableRegex;
00542         }
00543 
00549         function getVariableStartToEndRegex() {
00550                 if ( $this->mVariableStartToEndRegex == '' ) {
00551                         $this->initRegex();
00552                 }
00553                 return $this->mVariableStartToEndRegex;
00554         }
00555 
00563         function getSynonym( $i ) {
00564                 return $this->mSynonyms[$i];
00565         }
00566 
00570         function getSynonyms() {
00571                 return $this->mSynonyms;
00572         }
00573 
00580         function getWasModified() {
00581                 return $this->mModified;
00582         }
00583 
00597         function replaceMultiple( $magicarr, $subject, &$result ) {
00598                 $search = array();
00599                 $replace = array();
00600                 foreach( $magicarr as $id => $replacement ) {
00601                         $mw = MagicWord::get( $id );
00602                         $search[] = $mw->getRegex();
00603                         $replace[] = $replacement;
00604                 }
00605 
00606                 $result = preg_replace( $search, $replace, $subject );
00607                 return !($result === $subject);
00608         }
00609 
00617         function addToArray( &$array, $value ) {
00618                 global $wgContLang;
00619                 foreach ( $this->mSynonyms as $syn ) {
00620                         $array[$wgContLang->lc( $syn )] = $value;
00621                 }
00622         }
00623 
00627         function isCaseSensitive() {
00628                 return $this->mCaseSensitive;
00629         }
00630 
00634         function getId() {
00635                 return $this->mId;
00636         }
00637 }
00638 
00643 class MagicWordArray {
00644         var $names = array();
00645         var $hash;
00646         var $baseRegex, $regex;
00647         var $matches;
00648 
00652         function __construct( $names = array() ) {
00653                 $this->names = $names;
00654         }
00655 
00661         public function add( $name ) {
00662                 $this->names[] = $name;
00663                 $this->hash = $this->baseRegex = $this->regex = null;
00664         }
00665 
00671         public function addArray( $names ) {
00672                 $this->names = array_merge( $this->names, array_values( $names ) );
00673                 $this->hash = $this->baseRegex = $this->regex = null;
00674         }
00675 
00679         function getHash() {
00680                 if ( is_null( $this->hash ) ) {
00681                         global $wgContLang;
00682                         $this->hash = array( 0 => array(), 1 => array() );
00683                         foreach ( $this->names as $name ) {
00684                                 $magic = MagicWord::get( $name );
00685                                 $case = intval( $magic->isCaseSensitive() );
00686                                 foreach ( $magic->getSynonyms() as $syn ) {
00687                                         if ( !$case ) {
00688                                                 $syn = $wgContLang->lc( $syn );
00689                                         }
00690                                         $this->hash[$case][$syn] = $name;
00691                                 }
00692                         }
00693                 }
00694                 return $this->hash;
00695         }
00696 
00700         function getBaseRegex() {
00701                 if ( is_null( $this->baseRegex ) ) {
00702                         $this->baseRegex = array( 0 => '', 1 => '' );
00703                         foreach ( $this->names as $name ) {
00704                                 $magic = MagicWord::get( $name );
00705                                 $case = intval( $magic->isCaseSensitive() );
00706                                 foreach ( $magic->getSynonyms() as $i => $syn ) {
00707                                         // Group name must start with a non-digit in PCRE 8.34+
00708                                         $it = strtr( $i, '0123456789', 'abcdefghij' );
00709                                         $group = "(?P<{$it}_{$name}>" . preg_quote( $syn, '/' ) . ')';
00710                                         if ( $this->baseRegex[$case] === '' ) {
00711                                                 $this->baseRegex[$case] = $group;
00712                                         } else {
00713                                                 $this->baseRegex[$case] .= '|' . $group;
00714                                         }
00715                                 }
00716                         }
00717                 }
00718                 return $this->baseRegex;
00719         }
00720 
00724         function getRegex() {
00725                 if ( is_null( $this->regex ) ) {
00726                         $base = $this->getBaseRegex();
00727                         $this->regex = array( '', '' );
00728                         if ( $this->baseRegex[0] !== '' ) {
00729                                 $this->regex[0] = "/{$base[0]}/iuS";
00730                         }
00731                         if ( $this->baseRegex[1] !== '' ) {
00732                                 $this->regex[1] = "/{$base[1]}/S";
00733                         }
00734                 }
00735                 return $this->regex;
00736         }
00737 
00743         function getVariableRegex() {
00744                 return str_replace( "\\$1", "(.*?)", $this->getRegex() );
00745         }
00746 
00752         function getRegexStart() {
00753                 $base = $this->getBaseRegex();
00754                 $newRegex = array( '', '' );
00755                 if ( $base[0] !== '' ) {
00756                         $newRegex[0] = "/^(?:{$base[0]})/iuS";
00757                 }
00758                 if ( $base[1] !== '' ) {
00759                         $newRegex[1] = "/^(?:{$base[1]})/S";
00760                 }
00761                 return $newRegex;
00762         }
00763 
00769         function getVariableStartToEndRegex() {
00770                 $base = $this->getBaseRegex();
00771                 $newRegex = array( '', '' );
00772                 if ( $base[0] !== '' ) {
00773                         $newRegex[0] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[0]})$/iuS" );
00774                 }
00775                 if ( $base[1] !== '' ) {
00776                         $newRegex[1] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[1]})$/S" );
00777                 }
00778                 return $newRegex;
00779         }
00780 
00785         public function getNames() {
00786                 return $this->names;
00787         }
00788 
00799         function parseMatch( $m ) {
00800                 reset( $m );
00801                 while ( list( $key, $value ) = each( $m ) ) {
00802                         if ( $key === 0 || $value === '' ) {
00803                                 continue;
00804                         }
00805                         $parts = explode( '_', $key, 2 );
00806                         if ( count( $parts ) != 2 ) {
00807                                 // This shouldn't happen
00808                                 // continue;
00809                                 throw new MWException( __METHOD__ . ': bad parameter name' );
00810                         }
00811                         list( /* $synIndex */, $magicName ) = $parts;
00812                         $paramValue = next( $m );
00813                         return array( $magicName, $paramValue );
00814                 }
00815                 // This shouldn't happen either
00816                 throw new MWException( __METHOD__ . ': parameter not found' );
00817         }
00818 
00829         public function matchVariableStartToEnd( $text ) {
00830                 $regexes = $this->getVariableStartToEndRegex();
00831                 foreach ( $regexes as $regex ) {
00832                         if ( $regex !== '' ) {
00833                                 $m = array();
00834                                 if ( preg_match( $regex, $text, $m ) ) {
00835                                         return $this->parseMatch( $m );
00836                                 }
00837                         }
00838                 }
00839                 return array( false, false );
00840         }
00841 
00850         public function matchStartToEnd( $text ) {
00851                 $hash = $this->getHash();
00852                 if ( isset( $hash[1][$text] ) ) {
00853                         return $hash[1][$text];
00854                 }
00855                 global $wgContLang;
00856                 $lc = $wgContLang->lc( $text );
00857                 if ( isset( $hash[0][$lc] ) ) {
00858                         return $hash[0][$lc];
00859                 }
00860                 return false;
00861         }
00862 
00871         public function matchAndRemove( &$text ) {
00872                 $found = array();
00873                 $regexes = $this->getRegex();
00874                 foreach ( $regexes as $regex ) {
00875                         if ( $regex === '' ) {
00876                                 continue;
00877                         }
00878                         preg_match_all( $regex, $text, $matches, PREG_SET_ORDER );
00879                         foreach ( $matches as $m ) {
00880                                 list( $name, $param ) = $this->parseMatch( $m );
00881                                 $found[$name] = $param;
00882                         }
00883                         $text = preg_replace( $regex, '', $text );
00884                 }
00885                 return $found;
00886         }
00887 
00898         public function matchStartAndRemove( &$text ) {
00899                 $regexes = $this->getRegexStart();
00900                 foreach ( $regexes as $regex ) {
00901                         if ( $regex === '' ) {
00902                                 continue;
00903                         }
00904                         if ( preg_match( $regex, $text, $m ) ) {
00905                                 list( $id, ) = $this->parseMatch( $m );
00906                                 if ( strlen( $m[0] ) >= strlen( $text ) ) {
00907                                         $text = '';
00908                                 } else {
00909                                         $text = substr( $text, strlen( $m[0] ) );
00910                                 }
00911                                 return $id;
00912                         }
00913                 }
00914                 return false;
00915         }
00916 }