MediaWiki  REL1_20
MagicWord.php
Go to the documentation of this file.
00001 <?php
00061 class MagicWord {
00065         var $mId, $mSynonyms, $mCaseSensitive;
00066         var $mRegex = '';
00067         var $mRegexStart = '';
00068         var $mBaseRegex = '';
00069         var $mVariableRegex = '';
00070         var $mVariableStartToEndRegex = '';
00071         var $mModified = false;
00072         var $mFound = false;
00073 
00074         static public $mVariableIDsInitialised = false;
00075         static public $mVariableIDs = array(
00076                 'currentmonth',
00077                 'currentmonth1',
00078                 'currentmonthname',
00079                 'currentmonthnamegen',
00080                 'currentmonthabbrev',
00081                 'currentday',
00082                 'currentday2',
00083                 'currentdayname',
00084                 'currentyear',
00085                 'currenttime',
00086                 'currenthour',
00087                 'localmonth',
00088                 'localmonth1',
00089                 'localmonthname',
00090                 'localmonthnamegen',
00091                 'localmonthabbrev',
00092                 'localday',
00093                 'localday2',
00094                 'localdayname',
00095                 'localyear',
00096                 'localtime',
00097                 'localhour',
00098                 'numberofarticles',
00099                 'numberoffiles',
00100                 'numberofedits',
00101                 'articlepath',
00102                 'pageid',
00103                 'sitename',
00104                 'server',
00105                 'servername',
00106                 'scriptpath',
00107                 'stylepath',
00108                 'pagename',
00109                 'pagenamee',
00110                 'fullpagename',
00111                 'fullpagenamee',
00112                 'namespace',
00113                 'namespacee',
00114                 'namespacenumber',
00115                 'currentweek',
00116                 'currentdow',
00117                 'localweek',
00118                 'localdow',
00119                 'revisionid',
00120                 'revisionday',
00121                 'revisionday2',
00122                 'revisionmonth',
00123                 'revisionmonth1',
00124                 'revisionyear',
00125                 'revisiontimestamp',
00126                 'revisionuser',
00127                 'subpagename',
00128                 'subpagenamee',
00129                 'talkspace',
00130                 'talkspacee',
00131                 'subjectspace',
00132                 'subjectspacee',
00133                 'talkpagename',
00134                 'talkpagenamee',
00135                 'subjectpagename',
00136                 'subjectpagenamee',
00137                 'numberofusers',
00138                 'numberofactiveusers',
00139                 'numberofpages',
00140                 'currentversion',
00141                 'basepagename',
00142                 'basepagenamee',
00143                 'currenttimestamp',
00144                 'localtimestamp',
00145                 'directionmark',
00146                 'contentlanguage',
00147                 'numberofadmins',
00148                 'numberofviews',
00149         );
00150 
00151         /* Array of caching hints for ParserCache */
00152         static public $mCacheTTLs = array (
00153                 'currentmonth' => 86400,
00154                 'currentmonth1' => 86400,
00155                 'currentmonthname' => 86400,
00156                 'currentmonthnamegen' => 86400,
00157                 'currentmonthabbrev' => 86400,
00158                 'currentday' => 3600,
00159                 'currentday2' => 3600,
00160                 'currentdayname' => 3600,
00161                 'currentyear' => 86400,
00162                 'currenttime' => 3600,
00163                 'currenthour' => 3600,
00164                 'localmonth' => 86400,
00165                 'localmonth1' => 86400,
00166                 'localmonthname' => 86400,
00167                 'localmonthnamegen' => 86400,
00168                 'localmonthabbrev' => 86400,
00169                 'localday' => 3600,
00170                 'localday2' => 3600,
00171                 'localdayname' => 3600,
00172                 'localyear' => 86400,
00173                 'localtime' => 3600,
00174                 'localhour' => 3600,
00175                 'numberofarticles' => 3600,
00176                 'numberoffiles' => 3600,
00177                 'numberofedits' => 3600,
00178                 'currentweek' => 3600,
00179                 'currentdow' => 3600,
00180                 'localweek' => 3600,
00181                 'localdow' => 3600,
00182                 'numberofusers' => 3600,
00183                 'numberofactiveusers' => 3600,
00184                 'numberofpages' => 3600,
00185                 'currentversion' => 86400,
00186                 'currenttimestamp' => 3600,
00187                 'localtimestamp' => 3600,
00188                 'pagesinnamespace' => 3600,
00189                 'numberofadmins' => 3600,
00190                 'numberofviews' => 3600,
00191                 'numberingroup' => 3600,
00192                 );
00193 
00194         static public $mDoubleUnderscoreIDs = array(
00195                 'notoc',
00196                 'nogallery',
00197                 'forcetoc',
00198                 'toc',
00199                 'noeditsection',
00200                 'newsectionlink',
00201                 'nonewsectionlink',
00202                 'hiddencat',
00203                 'index',
00204                 'noindex',
00205                 'staticredirect',
00206                 'notitleconvert',
00207                 'nocontentconvert',
00208         );
00209 
00210         static public $mSubstIDs = array(
00211                 'subst',
00212                 'safesubst',
00213         );
00214 
00215         static public $mObjects = array();
00216         static public $mDoubleUnderscoreArray = null;
00217 
00220         function __construct($id = 0, $syn = array(), $cs = false) {
00221                 $this->mId = $id;
00222                 $this->mSynonyms = (array)$syn;
00223                 $this->mCaseSensitive = $cs;
00224         }
00225 
00233         static function &get( $id ) {
00234                 if ( !isset( self::$mObjects[$id] ) ) {
00235                         $mw = new MagicWord();
00236                         $mw->load( $id );
00237                         self::$mObjects[$id] = $mw;
00238                 }
00239                 return self::$mObjects[$id];
00240         }
00241 
00247         static function getVariableIDs() {
00248                 if ( !self::$mVariableIDsInitialised ) {
00249                         # Get variable IDs
00250                         wfRunHooks( 'MagicWordwgVariableIDs', array( &self::$mVariableIDs ) );
00251                         self::$mVariableIDsInitialised = true;
00252                 }
00253                 return self::$mVariableIDs;
00254         }
00255 
00260         static function getSubstIDs() {
00261                 return self::$mSubstIDs;
00262         }
00263 
00270         static function getCacheTTL( $id ) {
00271                 if ( array_key_exists( $id, self::$mCacheTTLs ) ) {
00272                         return self::$mCacheTTLs[$id];
00273                 } else {
00274                         return -1;
00275                 }
00276         }
00277 
00283         static function getDoubleUnderscoreArray() {
00284                 if ( is_null( self::$mDoubleUnderscoreArray ) ) {
00285                         self::$mDoubleUnderscoreArray = new MagicWordArray( self::$mDoubleUnderscoreIDs );
00286                 }
00287                 return self::$mDoubleUnderscoreArray;
00288         }
00289 
00294         public static function clearCache() {
00295                 self::$mObjects = array();
00296         }
00297 
00304         function load( $id ) {
00305                 global $wgContLang;
00306                 wfProfileIn( __METHOD__ );
00307                 $this->mId = $id;
00308                 $wgContLang->getMagic( $this );
00309                 if ( !$this->mSynonyms ) {
00310                         $this->mSynonyms = array( 'dkjsagfjsgashfajsh' );
00311                         throw new MWException( "Error: invalid magic word '$id'" );
00312                         #wfDebugLog( 'exception', "Error: invalid magic word '$id'\n" );
00313                 }
00314                 wfProfileOut( __METHOD__ );
00315         }
00316 
00321         function initRegex() {
00322                 // Sort the synonyms by length, descending, so that the longest synonym
00323                 // matches in precedence to the shortest
00324                 $synonyms = $this->mSynonyms;
00325                 usort( $synonyms, array( $this, 'compareStringLength' ) );
00326 
00327                 $escSyn = array();
00328                 foreach ( $synonyms as $synonym )
00329                         // In case a magic word contains /, like that's going to happen;)
00330                         $escSyn[] = preg_quote( $synonym, '/' );
00331                 $this->mBaseRegex = implode( '|', $escSyn );
00332 
00333                 $case = $this->mCaseSensitive ? '' : 'iu';
00334                 $this->mRegex = "/{$this->mBaseRegex}/{$case}";
00335                 $this->mRegexStart = "/^(?:{$this->mBaseRegex})/{$case}";
00336                 $this->mVariableRegex = str_replace( "\\$1", "(.*?)", $this->mRegex );
00337                 $this->mVariableStartToEndRegex = str_replace( "\\$1", "(.*?)",
00338                         "/^(?:{$this->mBaseRegex})$/{$case}" );
00339         }
00340 
00351         function compareStringLength( $s1, $s2 ) {
00352                 $l1 = strlen( $s1 );
00353                 $l2 = strlen( $s2 );
00354                 if ( $l1 < $l2 ) {
00355                         return 1;
00356                 } elseif ( $l1 > $l2 ) {
00357                         return -1;
00358                 } else {
00359                         return 0;
00360                 }
00361         }
00362 
00368         function getRegex() {
00369                 if ($this->mRegex == '' ) {
00370                         $this->initRegex();
00371                 }
00372                 return $this->mRegex;
00373         }
00374 
00382         function getRegexCase() {
00383                 if ( $this->mRegex === '' )
00384                         $this->initRegex();
00385 
00386                 return $this->mCaseSensitive ? '' : 'iu';
00387         }
00388 
00394         function getRegexStart() {
00395                 if ($this->mRegex == '' ) {
00396                         $this->initRegex();
00397                 }
00398                 return $this->mRegexStart;
00399         }
00400 
00406         function getBaseRegex() {
00407                 if ($this->mRegex == '') {
00408                         $this->initRegex();
00409                 }
00410                 return $this->mBaseRegex;
00411         }
00412 
00420         function match( $text ) {
00421                 return (bool)preg_match( $this->getRegex(), $text );
00422         }
00423 
00431         function matchStart( $text ) {
00432                 return (bool)preg_match( $this->getRegexStart(), $text );
00433         }
00434 
00445         function matchVariableStartToEnd( $text ) {
00446                 $matches = array();
00447                 $matchcount = preg_match( $this->getVariableStartToEndRegex(), $text, $matches );
00448                 if ( $matchcount == 0 ) {
00449                         return null;
00450                 } else {
00451                         # multiple matched parts (variable match); some will be empty because of
00452                         # synonyms. The variable will be the second non-empty one so remove any
00453                         # blank elements and re-sort the indices.
00454                         # See also bug 6526
00455 
00456                         $matches = array_values(array_filter($matches));
00457 
00458                         if ( count($matches) == 1 ) {
00459                                 return $matches[0];
00460                         } else {
00461                                 return $matches[1];
00462                         }
00463                 }
00464         }
00465 
00466 
00475         function matchAndRemove( &$text ) {
00476                 $this->mFound = false;
00477                 $text = preg_replace_callback( $this->getRegex(), array( &$this, 'pregRemoveAndRecord' ), $text );
00478                 return $this->mFound;
00479         }
00480 
00485         function matchStartAndRemove( &$text ) {
00486                 $this->mFound = false;
00487                 $text = preg_replace_callback( $this->getRegexStart(), array( &$this, 'pregRemoveAndRecord' ), $text );
00488                 return $this->mFound;
00489         }
00490 
00496         function pregRemoveAndRecord() {
00497                 $this->mFound = true;
00498                 return '';
00499         }
00500 
00510         function replace( $replacement, $subject, $limit = -1 ) {
00511                 $res = preg_replace( $this->getRegex(), StringUtils::escapeRegexReplacement( $replacement ), $subject, $limit );
00512                 $this->mModified = !($res === $subject);
00513                 return $res;
00514         }
00515 
00526         function substituteCallback( $text, $callback ) {
00527                 $res = preg_replace_callback( $this->getVariableRegex(), $callback, $text );
00528                 $this->mModified = !($res === $text);
00529                 return $res;
00530         }
00531 
00537         function getVariableRegex()     {
00538                 if ( $this->mVariableRegex == '' ) {
00539                         $this->initRegex();
00540                 }
00541                 return $this->mVariableRegex;
00542         }
00543 
00549         function getVariableStartToEndRegex() {
00550                 if ( $this->mVariableStartToEndRegex == '' ) {
00551                         $this->initRegex();
00552                 }
00553                 return $this->mVariableStartToEndRegex;
00554         }
00555 
00563         function getSynonym( $i ) {
00564                 return $this->mSynonyms[$i];
00565         }
00566 
00570         function getSynonyms() {
00571                 return $this->mSynonyms;
00572         }
00573 
00580         function getWasModified(){
00581                 return $this->mModified;
00582         }
00583 
00597         function replaceMultiple( $magicarr, $subject, &$result ){
00598                 $search = array();
00599                 $replace = array();
00600                 foreach( $magicarr as $id => $replacement ){
00601                         $mw = MagicWord::get( $id );
00602                         $search[] = $mw->getRegex();
00603                         $replace[] = $replacement;
00604                 }
00605 
00606                 $result = preg_replace( $search, $replace, $subject );
00607                 return !($result === $subject);
00608         }
00609 
00617         function addToArray( &$array, $value ) {
00618                 global $wgContLang;
00619                 foreach ( $this->mSynonyms as $syn ) {
00620                         $array[$wgContLang->lc($syn)] = $value;
00621                 }
00622         }
00623 
00627         function isCaseSensitive() {
00628                 return $this->mCaseSensitive;
00629         }
00630 
00634         function getId() {
00635                 return $this->mId;
00636         }
00637 }
00638 
00643 class MagicWordArray {
00644         var $names = array();
00645         var $hash;
00646         var $baseRegex, $regex;
00647         var $matches;
00648 
00652         function __construct( $names = array() ) {
00653                 $this->names = $names;
00654         }
00655 
00661         public function add( $name ) {
00662                 $this->names[] = $name;
00663                 $this->hash = $this->baseRegex = $this->regex = null;
00664         }
00665 
00671         public function addArray( $names ) {
00672                 $this->names = array_merge( $this->names, array_values( $names ) );
00673                 $this->hash = $this->baseRegex = $this->regex = null;
00674         }
00675 
00679         function getHash() {
00680                 if ( is_null( $this->hash ) ) {
00681                         global $wgContLang;
00682                         $this->hash = array( 0 => array(), 1 => array() );
00683                         foreach ( $this->names as $name ) {
00684                                 $magic = MagicWord::get( $name );
00685                                 $case = intval( $magic->isCaseSensitive() );
00686                                 foreach ( $magic->getSynonyms() as $syn ) {
00687                                         if ( !$case ) {
00688                                                 $syn = $wgContLang->lc( $syn );
00689                                         }
00690                                         $this->hash[$case][$syn] = $name;
00691                                 }
00692                         }
00693                 }
00694                 return $this->hash;
00695         }
00696 
00700         function getBaseRegex() {
00701                 if ( is_null( $this->baseRegex ) ) {
00702                         $this->baseRegex = array( 0 => '', 1 => '' );
00703                         foreach ( $this->names as $name ) {
00704                                 $magic = MagicWord::get( $name );
00705                                 $case = intval( $magic->isCaseSensitive() );
00706                                 foreach ( $magic->getSynonyms() as $i => $syn ) {
00707                                         $group = "(?P<{$i}_{$name}>" . preg_quote( $syn, '/' ) . ')';
00708                                         if ( $this->baseRegex[$case] === '' ) {
00709                                                 $this->baseRegex[$case] = $group;
00710                                         } else {
00711                                                 $this->baseRegex[$case] .= '|' . $group;
00712                                         }
00713                                 }
00714                         }
00715                 }
00716                 return $this->baseRegex;
00717         }
00718 
00722         function getRegex() {
00723                 if ( is_null( $this->regex ) ) {
00724                         $base = $this->getBaseRegex();
00725                         $this->regex = array( '', '' );
00726                         if ( $this->baseRegex[0] !== '' ) {
00727                                 $this->regex[0] = "/{$base[0]}/iuS";
00728                         }
00729                         if ( $this->baseRegex[1] !== '' ) {
00730                                 $this->regex[1] = "/{$base[1]}/S";
00731                         }
00732                 }
00733                 return $this->regex;
00734         }
00735 
00741         function getVariableRegex() {
00742                 return str_replace( "\\$1", "(.*?)", $this->getRegex() );
00743         }
00744 
00750         function getRegexStart() {
00751                 $base = $this->getBaseRegex();
00752                 $newRegex = array( '', '' );
00753                 if ( $base[0] !== '' ) {
00754                         $newRegex[0] = "/^(?:{$base[0]})/iuS";
00755                 }
00756                 if ( $base[1] !== '' ) {
00757                         $newRegex[1] = "/^(?:{$base[1]})/S";
00758                 }
00759                 return $newRegex;
00760         }
00761 
00767         function getVariableStartToEndRegex() {
00768                 $base = $this->getBaseRegex();
00769                 $newRegex = array( '', '' );
00770                 if ( $base[0] !== '' ) {
00771                         $newRegex[0] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[0]})$/iuS" );
00772                 }
00773                 if ( $base[1] !== '' ) {
00774                         $newRegex[1] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[1]})$/S" );
00775                 }
00776                 return $newRegex;
00777         }
00778 
00783         public function getNames() {
00784                 return $this->names;
00785         }
00786 
00797         function parseMatch( $m ) {
00798                 reset( $m );
00799                 while ( list( $key, $value ) = each( $m ) ) {
00800                         if ( $key === 0 || $value === '' ) {
00801                                 continue;
00802                         }
00803                         $parts = explode( '_', $key, 2 );
00804                         if ( count( $parts ) != 2 ) {
00805                                 // This shouldn't happen
00806                                 // continue;
00807                                 throw new MWException( __METHOD__ . ': bad parameter name' );
00808                         }
00809                         list( /* $synIndex */, $magicName ) = $parts;
00810                         $paramValue = next( $m );
00811                         return array( $magicName, $paramValue );
00812                 }
00813                 // This shouldn't happen either
00814                 throw new MWException( __METHOD__.': parameter not found' );
00815         }
00816 
00827         public function matchVariableStartToEnd( $text ) {
00828                 $regexes = $this->getVariableStartToEndRegex();
00829                 foreach ( $regexes as $regex ) {
00830                         if ( $regex !== '' ) {
00831                                 $m = array();
00832                                 if ( preg_match( $regex, $text, $m ) ) {
00833                                         return $this->parseMatch( $m );
00834                                 }
00835                         }
00836                 }
00837                 return array( false, false );
00838         }
00839 
00848         public function matchStartToEnd( $text ) {
00849                 $hash = $this->getHash();
00850                 if ( isset( $hash[1][$text] ) ) {
00851                         return $hash[1][$text];
00852                 }
00853                 global $wgContLang;
00854                 $lc = $wgContLang->lc( $text );
00855                 if ( isset( $hash[0][$lc] ) ) {
00856                         return $hash[0][$lc];
00857                 }
00858                 return false;
00859         }
00860 
00869         public function matchAndRemove( &$text ) {
00870                 $found = array();
00871                 $regexes = $this->getRegex();
00872                 foreach ( $regexes as $regex ) {
00873                         if ( $regex === '' ) {
00874                                 continue;
00875                         }
00876                         preg_match_all( $regex, $text, $matches, PREG_SET_ORDER );
00877                         foreach ( $matches as $m ) {
00878                                 list( $name, $param ) = $this->parseMatch( $m );
00879                                 $found[$name] = $param;
00880                         }
00881                         $text = preg_replace( $regex, '', $text );
00882                 }
00883                 return $found;
00884         }
00885 
00896         public function matchStartAndRemove( &$text ) {
00897                 $regexes = $this->getRegexStart();
00898                 foreach ( $regexes as $regex ) {
00899                         if ( $regex === '' ) {
00900                                 continue;
00901                         }
00902                         if ( preg_match( $regex, $text, $m ) ) {
00903                                 list( $id, ) = $this->parseMatch( $m );
00904                                 if ( strlen( $m[0] ) >= strlen( $text ) ) {
00905                                         $text = '';
00906                                 } else {
00907                                         $text = substr( $text, strlen( $m[0] ) );
00908                                 }
00909                                 return $id;
00910                         }
00911                 }
00912                 return false;
00913         }
00914 }