MediaWiki  REL1_19
MagicWord.php
Go to the documentation of this file.
00001 <?php
00046 class MagicWord {
00050         var $mId, $mSynonyms, $mCaseSensitive;
00051         var $mRegex = '';
00052         var $mRegexStart = '';
00053         var $mBaseRegex = '';
00054         var $mVariableRegex = '';
00055         var $mVariableStartToEndRegex = '';
00056         var $mModified = false;
00057         var $mFound = false;
00058 
00059         static public $mVariableIDsInitialised = false;
00060         static public $mVariableIDs = array(
00061                 'currentmonth',
00062                 'currentmonth1',
00063                 'currentmonthname',
00064                 'currentmonthnamegen',
00065                 'currentmonthabbrev',
00066                 'currentday',
00067                 'currentday2',
00068                 'currentdayname',
00069                 'currentyear',
00070                 'currenttime',
00071                 'currenthour',
00072                 'localmonth',
00073                 'localmonth1',
00074                 'localmonthname',
00075                 'localmonthnamegen',
00076                 'localmonthabbrev',
00077                 'localday',
00078                 'localday2',
00079                 'localdayname',
00080                 'localyear',
00081                 'localtime',
00082                 'localhour',
00083                 'numberofarticles',
00084                 'numberoffiles',
00085                 'numberofedits',
00086                 'articlepath',
00087                 'sitename',
00088                 'server',
00089                 'servername',
00090                 'scriptpath',
00091                 'stylepath',
00092                 'pagename',
00093                 'pagenamee',
00094                 'fullpagename',
00095                 'fullpagenamee',
00096                 'namespace',
00097                 'namespacee',
00098                 'currentweek',
00099                 'currentdow',
00100                 'localweek',
00101                 'localdow',
00102                 'revisionid',
00103                 'revisionday',
00104                 'revisionday2',
00105                 'revisionmonth',
00106                 'revisionmonth1',
00107                 'revisionyear',
00108                 'revisiontimestamp',
00109                 'revisionuser',
00110                 'subpagename',
00111                 'subpagenamee',
00112                 'talkspace',
00113                 'talkspacee',
00114                 'subjectspace',
00115                 'subjectspacee',
00116                 'talkpagename',
00117                 'talkpagenamee',
00118                 'subjectpagename',
00119                 'subjectpagenamee',
00120                 'numberofusers',
00121                 'numberofactiveusers',
00122                 'numberofpages',
00123                 'currentversion',
00124                 'basepagename',
00125                 'basepagenamee',
00126                 'currenttimestamp',
00127                 'localtimestamp',
00128                 'directionmark',
00129                 'contentlanguage',
00130                 'numberofadmins',
00131                 'numberofviews',
00132         );
00133 
00134         /* Array of caching hints for ParserCache */
00135         static public $mCacheTTLs = array (
00136                 'currentmonth' => 86400,
00137                 'currentmonth1' => 86400,
00138                 'currentmonthname' => 86400,
00139                 'currentmonthnamegen' => 86400,
00140                 'currentmonthabbrev' => 86400,
00141                 'currentday' => 3600,
00142                 'currentday2' => 3600,
00143                 'currentdayname' => 3600,
00144                 'currentyear' => 86400,
00145                 'currenttime' => 3600,
00146                 'currenthour' => 3600,
00147                 'localmonth' => 86400,
00148                 'localmonth1' => 86400,
00149                 'localmonthname' => 86400,
00150                 'localmonthnamegen' => 86400,
00151                 'localmonthabbrev' => 86400,
00152                 'localday' => 3600,
00153                 'localday2' => 3600,
00154                 'localdayname' => 3600,
00155                 'localyear' => 86400,
00156                 'localtime' => 3600,
00157                 'localhour' => 3600,
00158                 'numberofarticles' => 3600,
00159                 'numberoffiles' => 3600,
00160                 'numberofedits' => 3600,
00161                 'currentweek' => 3600,
00162                 'currentdow' => 3600,
00163                 'localweek' => 3600,
00164                 'localdow' => 3600,
00165                 'numberofusers' => 3600,
00166                 'numberofactiveusers' => 3600,
00167                 'numberofpages' => 3600,
00168                 'currentversion' => 86400,
00169                 'currenttimestamp' => 3600,
00170                 'localtimestamp' => 3600,
00171                 'pagesinnamespace' => 3600,
00172                 'numberofadmins' => 3600,
00173                 'numberofviews' => 3600,
00174                 'numberingroup' => 3600,
00175                 );
00176 
00177         static public $mDoubleUnderscoreIDs = array(
00178                 'notoc',
00179                 'nogallery',
00180                 'forcetoc',
00181                 'toc',
00182                 'noeditsection',
00183                 'newsectionlink',
00184                 'nonewsectionlink',
00185                 'hiddencat',
00186                 'index',
00187                 'noindex',
00188                 'staticredirect',
00189                 'notitleconvert',
00190                 'nocontentconvert',
00191         );
00192 
00193         static public $mSubstIDs = array(
00194                 'subst',
00195                 'safesubst',
00196         );
00197 
00198         static public $mObjects = array();
00199         static public $mDoubleUnderscoreArray = null;
00200 
00203         function __construct($id = 0, $syn = array(), $cs = false) {
00204                 $this->mId = $id;
00205                 $this->mSynonyms = (array)$syn;
00206                 $this->mCaseSensitive = $cs;
00207         }
00208 
00216         static function &get( $id ) {
00217                 if ( !isset( self::$mObjects[$id] ) ) {
00218                         $mw = new MagicWord();
00219                         $mw->load( $id );
00220                         self::$mObjects[$id] = $mw;
00221                 }
00222                 return self::$mObjects[$id];
00223         }
00224 
00230         static function getVariableIDs() {
00231                 if ( !self::$mVariableIDsInitialised ) {
00232                         # Get variable IDs
00233                         wfRunHooks( 'MagicWordwgVariableIDs', array( &self::$mVariableIDs ) );
00234                         self::$mVariableIDsInitialised = true;
00235                 }
00236                 return self::$mVariableIDs;
00237         }
00238 
00243         static function getSubstIDs() {
00244                 return self::$mSubstIDs;
00245         }
00246 
00253         static function getCacheTTL( $id ) {
00254                 if ( array_key_exists( $id, self::$mCacheTTLs ) ) {
00255                         return self::$mCacheTTLs[$id];
00256                 } else {
00257                         return -1;
00258                 }
00259         }
00260 
00266         static function getDoubleUnderscoreArray() {
00267                 if ( is_null( self::$mDoubleUnderscoreArray ) ) {
00268                         self::$mDoubleUnderscoreArray = new MagicWordArray( self::$mDoubleUnderscoreIDs );
00269                 }
00270                 return self::$mDoubleUnderscoreArray;
00271         }
00272 
00277         public static function clearCache() {
00278                 self::$mObjects = array();
00279         }
00280 
00286         function load( $id ) {
00287                 global $wgContLang;
00288                 wfProfileIn( __METHOD__ );
00289                 $this->mId = $id;
00290                 $wgContLang->getMagic( $this );
00291                 if ( !$this->mSynonyms ) {
00292                         $this->mSynonyms = array( 'dkjsagfjsgashfajsh' );
00293                         #throw new MWException( "Error: invalid magic word '$id'" );
00294                         wfDebugLog( 'exception', "Error: invalid magic word '$id'\n" );
00295                 }
00296                 wfProfileOut( __METHOD__ );
00297         }
00298 
00303         function initRegex() {
00304                 // Sort the synonyms by length, descending, so that the longest synonym
00305                 // matches in precedence to the shortest
00306                 $synonyms = $this->mSynonyms;
00307                 usort( $synonyms, array( $this, 'compareStringLength' ) );
00308 
00309                 $escSyn = array();
00310                 foreach ( $synonyms as $synonym )
00311                         // In case a magic word contains /, like that's going to happen;)
00312                         $escSyn[] = preg_quote( $synonym, '/' );
00313                 $this->mBaseRegex = implode( '|', $escSyn );
00314 
00315                 $case = $this->mCaseSensitive ? '' : 'iu';
00316                 $this->mRegex = "/{$this->mBaseRegex}/{$case}";
00317                 $this->mRegexStart = "/^(?:{$this->mBaseRegex})/{$case}";
00318                 $this->mVariableRegex = str_replace( "\\$1", "(.*?)", $this->mRegex );
00319                 $this->mVariableStartToEndRegex = str_replace( "\\$1", "(.*?)",
00320                         "/^(?:{$this->mBaseRegex})$/{$case}" );
00321         }
00322 
00333         function compareStringLength( $s1, $s2 ) {
00334                 $l1 = strlen( $s1 );
00335                 $l2 = strlen( $s2 );
00336                 if ( $l1 < $l2 ) {
00337                         return 1;
00338                 } elseif ( $l1 > $l2 ) {
00339                         return -1;
00340                 } else {
00341                         return 0;
00342                 }
00343         }
00344 
00350         function getRegex() {
00351                 if ($this->mRegex == '' ) {
00352                         $this->initRegex();
00353                 }
00354                 return $this->mRegex;
00355         }
00356 
00364         function getRegexCase() {
00365                 if ( $this->mRegex === '' )
00366                         $this->initRegex();
00367 
00368                 return $this->mCaseSensitive ? '' : 'iu';
00369         }
00370 
00376         function getRegexStart() {
00377                 if ($this->mRegex == '' ) {
00378                         $this->initRegex();
00379                 }
00380                 return $this->mRegexStart;
00381         }
00382 
00388         function getBaseRegex() {
00389                 if ($this->mRegex == '') {
00390                         $this->initRegex();
00391                 }
00392                 return $this->mBaseRegex;
00393         }
00394 
00402         function match( $text ) {
00403                 return (bool)preg_match( $this->getRegex(), $text );
00404         }
00405 
00413         function matchStart( $text ) {
00414                 return (bool)preg_match( $this->getRegexStart(), $text );
00415         }
00416 
00427         function matchVariableStartToEnd( $text ) {
00428                 $matches = array();
00429                 $matchcount = preg_match( $this->getVariableStartToEndRegex(), $text, $matches );
00430                 if ( $matchcount == 0 ) {
00431                         return null;
00432                 } else {
00433                         # multiple matched parts (variable match); some will be empty because of
00434                         # synonyms. The variable will be the second non-empty one so remove any
00435                         # blank elements and re-sort the indices.
00436                         # See also bug 6526
00437 
00438                         $matches = array_values(array_filter($matches));
00439 
00440                         if ( count($matches) == 1 ) {
00441                                 return $matches[0];
00442                         } else {
00443                                 return $matches[1];
00444                         }
00445                 }
00446         }
00447 
00448 
00457         function matchAndRemove( &$text ) {
00458                 $this->mFound = false;
00459                 $text = preg_replace_callback( $this->getRegex(), array( &$this, 'pregRemoveAndRecord' ), $text );
00460                 return $this->mFound;
00461         }
00462 
00467         function matchStartAndRemove( &$text ) {
00468                 $this->mFound = false;
00469                 $text = preg_replace_callback( $this->getRegexStart(), array( &$this, 'pregRemoveAndRecord' ), $text );
00470                 return $this->mFound;
00471         }
00472 
00478         function pregRemoveAndRecord() {
00479                 $this->mFound = true;
00480                 return '';
00481         }
00482 
00492         function replace( $replacement, $subject, $limit = -1 ) {
00493                 $res = preg_replace( $this->getRegex(), StringUtils::escapeRegexReplacement( $replacement ), $subject, $limit );
00494                 $this->mModified = !($res === $subject);
00495                 return $res;
00496         }
00497 
00508         function substituteCallback( $text, $callback ) {
00509                 $res = preg_replace_callback( $this->getVariableRegex(), $callback, $text );
00510                 $this->mModified = !($res === $text);
00511                 return $res;
00512         }
00513 
00519         function getVariableRegex()     {
00520                 if ( $this->mVariableRegex == '' ) {
00521                         $this->initRegex();
00522                 }
00523                 return $this->mVariableRegex;
00524         }
00525 
00531         function getVariableStartToEndRegex() {
00532                 if ( $this->mVariableStartToEndRegex == '' ) {
00533                         $this->initRegex();
00534                 }
00535                 return $this->mVariableStartToEndRegex;
00536         }
00537 
00545         function getSynonym( $i ) {
00546                 return $this->mSynonyms[$i];
00547         }
00548 
00552         function getSynonyms() {
00553                 return $this->mSynonyms;
00554         }
00555 
00562         function getWasModified(){
00563                 return $this->mModified;
00564         }
00565 
00579         function replaceMultiple( $magicarr, $subject, &$result ){
00580                 $search = array();
00581                 $replace = array();
00582                 foreach( $magicarr as $id => $replacement ){
00583                         $mw = MagicWord::get( $id );
00584                         $search[] = $mw->getRegex();
00585                         $replace[] = $replacement;
00586                 }
00587 
00588                 $result = preg_replace( $search, $replace, $subject );
00589                 return !($result === $subject);
00590         }
00591 
00599         function addToArray( &$array, $value ) {
00600                 global $wgContLang;
00601                 foreach ( $this->mSynonyms as $syn ) {
00602                         $array[$wgContLang->lc($syn)] = $value;
00603                 }
00604         }
00605 
00609         function isCaseSensitive() {
00610                 return $this->mCaseSensitive;
00611         }
00612 
00616         function getId() {
00617                 return $this->mId;
00618         }
00619 }
00620 
00625 class MagicWordArray {
00626         var $names = array();
00627         var $hash;
00628         var $baseRegex, $regex;
00629         var $matches;
00630 
00631         function __construct( $names = array() ) {
00632                 $this->names = $names;
00633         }
00634 
00640         public function add( $name ) {
00641                 $this->names[] = $name;
00642                 $this->hash = $this->baseRegex = $this->regex = null;
00643         }
00644 
00650         public function addArray( $names ) {
00651                 $this->names = array_merge( $this->names, array_values( $names ) );
00652                 $this->hash = $this->baseRegex = $this->regex = null;
00653         }
00654 
00658         function getHash() {
00659                 if ( is_null( $this->hash ) ) {
00660                         global $wgContLang;
00661                         $this->hash = array( 0 => array(), 1 => array() );
00662                         foreach ( $this->names as $name ) {
00663                                 $magic = MagicWord::get( $name );
00664                                 $case = intval( $magic->isCaseSensitive() );
00665                                 foreach ( $magic->getSynonyms() as $syn ) {
00666                                         if ( !$case ) {
00667                                                 $syn = $wgContLang->lc( $syn );
00668                                         }
00669                                         $this->hash[$case][$syn] = $name;
00670                                 }
00671                         }
00672                 }
00673                 return $this->hash;
00674         }
00675 
00679         function getBaseRegex() {
00680                 if ( is_null( $this->baseRegex ) ) {
00681                         $this->baseRegex = array( 0 => '', 1 => '' );
00682                         foreach ( $this->names as $name ) {
00683                                 $magic = MagicWord::get( $name );
00684                                 $case = intval( $magic->isCaseSensitive() );
00685                                 foreach ( $magic->getSynonyms() as $i => $syn ) {
00686                                         // Group name must start with a non-digit in PCRE 8.34+
00687                                         $it = strtr( $i, '0123456789', 'abcdefghij' );
00688                                         $group = "(?P<{$it}_{$name}>" . preg_quote( $syn, '/' ) . ')';
00689                                         if ( $this->baseRegex[$case] === '' ) {
00690                                                 $this->baseRegex[$case] = $group;
00691                                         } else {
00692                                                 $this->baseRegex[$case] .= '|' . $group;
00693                                         }
00694                                 }
00695                         }
00696                 }
00697                 return $this->baseRegex;
00698         }
00699 
00703         function getRegex() {
00704                 if ( is_null( $this->regex ) ) {
00705                         $base = $this->getBaseRegex();
00706                         $this->regex = array( '', '' );
00707                         if ( $this->baseRegex[0] !== '' ) {
00708                                 $this->regex[0] = "/{$base[0]}/iuS";
00709                         }
00710                         if ( $this->baseRegex[1] !== '' ) {
00711                                 $this->regex[1] = "/{$base[1]}/S";
00712                         }
00713                 }
00714                 return $this->regex;
00715         }
00716 
00722         function getVariableRegex() {
00723                 return str_replace( "\\$1", "(.*?)", $this->getRegex() );
00724         }
00725 
00731         function getRegexStart() {
00732                 $base = $this->getBaseRegex();
00733                 $newRegex = array( '', '' );
00734                 if ( $base[0] !== '' ) {
00735                         $newRegex[0] = "/^(?:{$base[0]})/iuS";
00736                 }
00737                 if ( $base[1] !== '' ) {
00738                         $newRegex[1] = "/^(?:{$base[1]})/S";
00739                 }
00740                 return $newRegex;
00741         }
00742 
00748         function getVariableStartToEndRegex() {
00749                 $base = $this->getBaseRegex();
00750                 $newRegex = array( '', '' );
00751                 if ( $base[0] !== '' ) {
00752                         $newRegex[0] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[0]})$/iuS" );
00753                 }
00754                 if ( $base[1] !== '' ) {
00755                         $newRegex[1] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[1]})$/S" );
00756                 }
00757                 return $newRegex;
00758         }
00759 
00769         function parseMatch( $m ) {
00770                 reset( $m );
00771                 while ( list( $key, $value ) = each( $m ) ) {
00772                         if ( $key === 0 || $value === '' ) {
00773                                 continue;
00774                         }
00775                         $parts = explode( '_', $key, 2 );
00776                         if ( count( $parts ) != 2 ) {
00777                                 // This shouldn't happen
00778                                 // continue;
00779                                 throw new MWException( __METHOD__ . ': bad parameter name' );
00780                         }
00781                         list( /* $synIndex */, $magicName ) = $parts;
00782                         $paramValue = next( $m );
00783                         return array( $magicName, $paramValue );
00784                 }
00785                 // This shouldn't happen either
00786                 throw new MWException( __METHOD__.': parameter not found' );
00787         }
00788 
00799         public function matchVariableStartToEnd( $text ) {
00800                 $regexes = $this->getVariableStartToEndRegex();
00801                 foreach ( $regexes as $regex ) {
00802                         if ( $regex !== '' ) {
00803                                 $m = false;
00804                                 if ( preg_match( $regex, $text, $m ) ) {
00805                                         return $this->parseMatch( $m );
00806                                 }
00807                         }
00808                 }
00809                 return array( false, false );
00810         }
00811 
00820         public function matchStartToEnd( $text ) {
00821                 $hash = $this->getHash();
00822                 if ( isset( $hash[1][$text] ) ) {
00823                         return $hash[1][$text];
00824                 }
00825                 global $wgContLang;
00826                 $lc = $wgContLang->lc( $text );
00827                 if ( isset( $hash[0][$lc] ) ) {
00828                         return $hash[0][$lc];
00829                 }
00830                 return false;
00831         }
00832 
00841         public function matchAndRemove( &$text ) {
00842                 $found = array();
00843                 $regexes = $this->getRegex();
00844                 foreach ( $regexes as $regex ) {
00845                         if ( $regex === '' ) {
00846                                 continue;
00847                         }
00848                         preg_match_all( $regex, $text, $matches, PREG_SET_ORDER );
00849                         foreach ( $matches as $m ) {
00850                                 list( $name, $param ) = $this->parseMatch( $m );
00851                                 $found[$name] = $param;
00852                         }
00853                         $text = preg_replace( $regex, '', $text );
00854                 }
00855                 return $found;
00856         }
00857 
00868         public function matchStartAndRemove( &$text ) {
00869                 $regexes = $this->getRegexStart();
00870                 foreach ( $regexes as $regex ) {
00871                         if ( $regex === '' ) {
00872                                 continue;
00873                         }
00874                         if ( preg_match( $regex, $text, $m ) ) {
00875                                 list( $id, ) = $this->parseMatch( $m );
00876                                 if ( strlen( $m[0] ) >= strlen( $text ) ) {
00877                                         $text = '';
00878                                 } else {
00879                                         $text = substr( $text, strlen( $m[0] ) );
00880                                 }
00881                                 return $id;
00882                         }
00883                 }
00884                 return false;
00885         }
00886 }