MediaWiki  REL1_23
MagicWord.php
Go to the documentation of this file.
00001 <?php
00061 class MagicWord {
00065     var $mId, $mSynonyms, $mCaseSensitive;
00066     var $mRegex = '';
00067     var $mRegexStart = '';
00068     var $mRegexStartToEnd = '';
00069     var $mBaseRegex = '';
00070     var $mVariableRegex = '';
00071     var $mVariableStartToEndRegex = '';
00072     var $mModified = false;
00073     var $mFound = false;
00074 
00075     static public $mVariableIDsInitialised = false;
00076     static public $mVariableIDs = array(
00077         'currentmonth',
00078         'currentmonth1',
00079         'currentmonthname',
00080         'currentmonthnamegen',
00081         'currentmonthabbrev',
00082         'currentday',
00083         'currentday2',
00084         'currentdayname',
00085         'currentyear',
00086         'currenttime',
00087         'currenthour',
00088         'localmonth',
00089         'localmonth1',
00090         'localmonthname',
00091         'localmonthnamegen',
00092         'localmonthabbrev',
00093         'localday',
00094         'localday2',
00095         'localdayname',
00096         'localyear',
00097         'localtime',
00098         'localhour',
00099         'numberofarticles',
00100         'numberoffiles',
00101         'numberofedits',
00102         'articlepath',
00103         'pageid',
00104         'sitename',
00105         'server',
00106         'servername',
00107         'scriptpath',
00108         'stylepath',
00109         'pagename',
00110         'pagenamee',
00111         'fullpagename',
00112         'fullpagenamee',
00113         'namespace',
00114         'namespacee',
00115         'namespacenumber',
00116         'currentweek',
00117         'currentdow',
00118         'localweek',
00119         'localdow',
00120         'revisionid',
00121         'revisionday',
00122         'revisionday2',
00123         'revisionmonth',
00124         'revisionmonth1',
00125         'revisionyear',
00126         'revisiontimestamp',
00127         'revisionuser',
00128         'revisionsize',
00129         'subpagename',
00130         'subpagenamee',
00131         'talkspace',
00132         'talkspacee',
00133         'subjectspace',
00134         'subjectspacee',
00135         'talkpagename',
00136         'talkpagenamee',
00137         'subjectpagename',
00138         'subjectpagenamee',
00139         'numberofusers',
00140         'numberofactiveusers',
00141         'numberofpages',
00142         'currentversion',
00143         'rootpagename',
00144         'rootpagenamee',
00145         'basepagename',
00146         'basepagenamee',
00147         'currenttimestamp',
00148         'localtimestamp',
00149         'directionmark',
00150         'contentlanguage',
00151         'numberofadmins',
00152         'numberofviews',
00153         'cascadingsources',
00154     );
00155 
00156     /* Array of caching hints for ParserCache */
00157     static public $mCacheTTLs = array(
00158         'currentmonth' => 86400,
00159         'currentmonth1' => 86400,
00160         'currentmonthname' => 86400,
00161         'currentmonthnamegen' => 86400,
00162         'currentmonthabbrev' => 86400,
00163         'currentday' => 3600,
00164         'currentday2' => 3600,
00165         'currentdayname' => 3600,
00166         'currentyear' => 86400,
00167         'currenttime' => 3600,
00168         'currenthour' => 3600,
00169         'localmonth' => 86400,
00170         'localmonth1' => 86400,
00171         'localmonthname' => 86400,
00172         'localmonthnamegen' => 86400,
00173         'localmonthabbrev' => 86400,
00174         'localday' => 3600,
00175         'localday2' => 3600,
00176         'localdayname' => 3600,
00177         'localyear' => 86400,
00178         'localtime' => 3600,
00179         'localhour' => 3600,
00180         'numberofarticles' => 3600,
00181         'numberoffiles' => 3600,
00182         'numberofedits' => 3600,
00183         'currentweek' => 3600,
00184         'currentdow' => 3600,
00185         'localweek' => 3600,
00186         'localdow' => 3600,
00187         'numberofusers' => 3600,
00188         'numberofactiveusers' => 3600,
00189         'numberofpages' => 3600,
00190         'currentversion' => 86400,
00191         'currenttimestamp' => 3600,
00192         'localtimestamp' => 3600,
00193         'pagesinnamespace' => 3600,
00194         'numberofadmins' => 3600,
00195         'numberofviews' => 3600,
00196         'numberingroup' => 3600,
00197         );
00198 
00199     static public $mDoubleUnderscoreIDs = array(
00200         'notoc',
00201         'nogallery',
00202         'forcetoc',
00203         'toc',
00204         'noeditsection',
00205         'newsectionlink',
00206         'nonewsectionlink',
00207         'hiddencat',
00208         'index',
00209         'noindex',
00210         'staticredirect',
00211         'notitleconvert',
00212         'nocontentconvert',
00213     );
00214 
00215     static public $mSubstIDs = array(
00216         'subst',
00217         'safesubst',
00218     );
00219 
00220     static public $mObjects = array();
00221     static public $mDoubleUnderscoreArray = null;
00222 
00225     function __construct( $id = 0, $syn = array(), $cs = false ) {
00226         $this->mId = $id;
00227         $this->mSynonyms = (array)$syn;
00228         $this->mCaseSensitive = $cs;
00229     }
00230 
00238     static function &get( $id ) {
00239         if ( !isset( self::$mObjects[$id] ) ) {
00240             $mw = new MagicWord();
00241             $mw->load( $id );
00242             self::$mObjects[$id] = $mw;
00243         }
00244         return self::$mObjects[$id];
00245     }
00246 
00252     static function getVariableIDs() {
00253         if ( !self::$mVariableIDsInitialised ) {
00254             # Get variable IDs
00255             wfRunHooks( 'MagicWordwgVariableIDs', array( &self::$mVariableIDs ) );
00256             self::$mVariableIDsInitialised = true;
00257         }
00258         return self::$mVariableIDs;
00259     }
00260 
00265     static function getSubstIDs() {
00266         return self::$mSubstIDs;
00267     }
00268 
00275     static function getCacheTTL( $id ) {
00276         if ( array_key_exists( $id, self::$mCacheTTLs ) ) {
00277             return self::$mCacheTTLs[$id];
00278         } else {
00279             return -1;
00280         }
00281     }
00282 
00288     static function getDoubleUnderscoreArray() {
00289         if ( is_null( self::$mDoubleUnderscoreArray ) ) {
00290             wfRunHooks( 'GetDoubleUnderscoreIDs', array( &self::$mDoubleUnderscoreIDs ) );
00291             self::$mDoubleUnderscoreArray = new MagicWordArray( self::$mDoubleUnderscoreIDs );
00292         }
00293         return self::$mDoubleUnderscoreArray;
00294     }
00295 
00300     public static function clearCache() {
00301         self::$mObjects = array();
00302     }
00303 
00310     function load( $id ) {
00311         global $wgContLang;
00312         wfProfileIn( __METHOD__ );
00313         $this->mId = $id;
00314         $wgContLang->getMagic( $this );
00315         if ( !$this->mSynonyms ) {
00316             $this->mSynonyms = array( 'brionmademeputthishere' );
00317             wfProfileOut( __METHOD__ );
00318             throw new MWException( "Error: invalid magic word '$id'" );
00319         }
00320         wfProfileOut( __METHOD__ );
00321     }
00322 
00327     function initRegex() {
00328         // Sort the synonyms by length, descending, so that the longest synonym
00329         // matches in precedence to the shortest
00330         $synonyms = $this->mSynonyms;
00331         usort( $synonyms, array( $this, 'compareStringLength' ) );
00332 
00333         $escSyn = array();
00334         foreach ( $synonyms as $synonym ) {
00335             // In case a magic word contains /, like that's going to happen;)
00336             $escSyn[] = preg_quote( $synonym, '/' );
00337         }
00338         $this->mBaseRegex = implode( '|', $escSyn );
00339 
00340         $case = $this->mCaseSensitive ? '' : 'iu';
00341         $this->mRegex = "/{$this->mBaseRegex}/{$case}";
00342         $this->mRegexStart = "/^(?:{$this->mBaseRegex})/{$case}";
00343         $this->mRegexStartToEnd = "/^(?:{$this->mBaseRegex})$/{$case}";
00344         $this->mVariableRegex = str_replace( "\\$1", "(.*?)", $this->mRegex );
00345         $this->mVariableStartToEndRegex = str_replace( "\\$1", "(.*?)",
00346             "/^(?:{$this->mBaseRegex})$/{$case}" );
00347     }
00348 
00359     function compareStringLength( $s1, $s2 ) {
00360         $l1 = strlen( $s1 );
00361         $l2 = strlen( $s2 );
00362         if ( $l1 < $l2 ) {
00363             return 1;
00364         } elseif ( $l1 > $l2 ) {
00365             return -1;
00366         } else {
00367             return 0;
00368         }
00369     }
00370 
00376     function getRegex() {
00377         if ( $this->mRegex == '' ) {
00378             $this->initRegex();
00379         }
00380         return $this->mRegex;
00381     }
00382 
00390     function getRegexCase() {
00391         if ( $this->mRegex === '' ) {
00392             $this->initRegex();
00393         }
00394 
00395         return $this->mCaseSensitive ? '' : 'iu';
00396     }
00397 
00403     function getRegexStart() {
00404         if ( $this->mRegex == '' ) {
00405             $this->initRegex();
00406         }
00407         return $this->mRegexStart;
00408     }
00409 
00416     function getRegexStartToEnd() {
00417         if ( $this->mRegexStartToEnd == '' ) {
00418             $this->initRegex();
00419         }
00420         return $this->mRegexStartToEnd;
00421     }
00422 
00428     function getBaseRegex() {
00429         if ( $this->mRegex == '' ) {
00430             $this->initRegex();
00431         }
00432         return $this->mBaseRegex;
00433     }
00434 
00442     function match( $text ) {
00443         return (bool)preg_match( $this->getRegex(), $text );
00444     }
00445 
00453     function matchStart( $text ) {
00454         return (bool)preg_match( $this->getRegexStart(), $text );
00455     }
00456 
00465     function matchStartToEnd( $text ) {
00466         return (bool)preg_match( $this->getRegexStartToEnd(), $text );
00467     }
00468 
00479     function matchVariableStartToEnd( $text ) {
00480         $matches = array();
00481         $matchcount = preg_match( $this->getVariableStartToEndRegex(), $text, $matches );
00482         if ( $matchcount == 0 ) {
00483             return null;
00484         } else {
00485             # multiple matched parts (variable match); some will be empty because of
00486             # synonyms. The variable will be the second non-empty one so remove any
00487             # blank elements and re-sort the indices.
00488             # See also bug 6526
00489 
00490             $matches = array_values( array_filter( $matches ) );
00491 
00492             if ( count( $matches ) == 1 ) {
00493                 return $matches[0];
00494             } else {
00495                 return $matches[1];
00496             }
00497         }
00498     }
00499 
00508     function matchAndRemove( &$text ) {
00509         $this->mFound = false;
00510         $text = preg_replace_callback( $this->getRegex(), array( &$this, 'pregRemoveAndRecord' ), $text );
00511         return $this->mFound;
00512     }
00513 
00518     function matchStartAndRemove( &$text ) {
00519         $this->mFound = false;
00520         $text = preg_replace_callback( $this->getRegexStart(), array( &$this, 'pregRemoveAndRecord' ), $text );
00521         return $this->mFound;
00522     }
00523 
00529     function pregRemoveAndRecord() {
00530         $this->mFound = true;
00531         return '';
00532     }
00533 
00543     function replace( $replacement, $subject, $limit = -1 ) {
00544         $res = preg_replace( $this->getRegex(), StringUtils::escapeRegexReplacement( $replacement ), $subject, $limit );
00545         $this->mModified = $res !== $subject;
00546         return $res;
00547     }
00548 
00559     function substituteCallback( $text, $callback ) {
00560         $res = preg_replace_callback( $this->getVariableRegex(), $callback, $text );
00561         $this->mModified = $res !== $text;
00562         return $res;
00563     }
00564 
00570     function getVariableRegex() {
00571         if ( $this->mVariableRegex == '' ) {
00572             $this->initRegex();
00573         }
00574         return $this->mVariableRegex;
00575     }
00576 
00582     function getVariableStartToEndRegex() {
00583         if ( $this->mVariableStartToEndRegex == '' ) {
00584             $this->initRegex();
00585         }
00586         return $this->mVariableStartToEndRegex;
00587     }
00588 
00596     function getSynonym( $i ) {
00597         return $this->mSynonyms[$i];
00598     }
00599 
00603     function getSynonyms() {
00604         return $this->mSynonyms;
00605     }
00606 
00613     function getWasModified() {
00614         return $this->mModified;
00615     }
00616 
00630     function replaceMultiple( $magicarr, $subject, &$result ) {
00631         $search = array();
00632         $replace = array();
00633         foreach ( $magicarr as $id => $replacement ) {
00634             $mw = MagicWord::get( $id );
00635             $search[] = $mw->getRegex();
00636             $replace[] = $replacement;
00637         }
00638 
00639         $result = preg_replace( $search, $replace, $subject );
00640         return $result !== $subject;
00641     }
00642 
00650     function addToArray( &$array, $value ) {
00651         global $wgContLang;
00652         foreach ( $this->mSynonyms as $syn ) {
00653             $array[$wgContLang->lc( $syn )] = $value;
00654         }
00655     }
00656 
00660     function isCaseSensitive() {
00661         return $this->mCaseSensitive;
00662     }
00663 
00667     function getId() {
00668         return $this->mId;
00669     }
00670 }
00671 
00676 class MagicWordArray {
00677     var $names = array();
00678     var $hash;
00679     var $baseRegex, $regex;
00680     var $matches;
00681 
00685     function __construct( $names = array() ) {
00686         $this->names = $names;
00687     }
00688 
00694     public function add( $name ) {
00695         $this->names[] = $name;
00696         $this->hash = $this->baseRegex = $this->regex = null;
00697     }
00698 
00704     public function addArray( $names ) {
00705         $this->names = array_merge( $this->names, array_values( $names ) );
00706         $this->hash = $this->baseRegex = $this->regex = null;
00707     }
00708 
00712     function getHash() {
00713         if ( is_null( $this->hash ) ) {
00714             global $wgContLang;
00715             $this->hash = array( 0 => array(), 1 => array() );
00716             foreach ( $this->names as $name ) {
00717                 $magic = MagicWord::get( $name );
00718                 $case = intval( $magic->isCaseSensitive() );
00719                 foreach ( $magic->getSynonyms() as $syn ) {
00720                     if ( !$case ) {
00721                         $syn = $wgContLang->lc( $syn );
00722                     }
00723                     $this->hash[$case][$syn] = $name;
00724                 }
00725             }
00726         }
00727         return $this->hash;
00728     }
00729 
00733     function getBaseRegex() {
00734         if ( is_null( $this->baseRegex ) ) {
00735             $this->baseRegex = array( 0 => '', 1 => '' );
00736             foreach ( $this->names as $name ) {
00737                 $magic = MagicWord::get( $name );
00738                 $case = intval( $magic->isCaseSensitive() );
00739                 foreach ( $magic->getSynonyms() as $i => $syn ) {
00740                     // Group name must start with a non-digit in PCRE 8.34+
00741                     $it = strtr( $i, '0123456789', 'abcdefghij' );
00742                     $group = "(?P<{$it}_{$name}>" . preg_quote( $syn, '/' ) . ')';
00743                     if ( $this->baseRegex[$case] === '' ) {
00744                         $this->baseRegex[$case] = $group;
00745                     } else {
00746                         $this->baseRegex[$case] .= '|' . $group;
00747                     }
00748                 }
00749             }
00750         }
00751         return $this->baseRegex;
00752     }
00753 
00757     function getRegex() {
00758         if ( is_null( $this->regex ) ) {
00759             $base = $this->getBaseRegex();
00760             $this->regex = array( '', '' );
00761             if ( $this->baseRegex[0] !== '' ) {
00762                 $this->regex[0] = "/{$base[0]}/iuS";
00763             }
00764             if ( $this->baseRegex[1] !== '' ) {
00765                 $this->regex[1] = "/{$base[1]}/S";
00766             }
00767         }
00768         return $this->regex;
00769     }
00770 
00776     function getVariableRegex() {
00777         return str_replace( "\\$1", "(.*?)", $this->getRegex() );
00778     }
00779 
00785     function getRegexStart() {
00786         $base = $this->getBaseRegex();
00787         $newRegex = array( '', '' );
00788         if ( $base[0] !== '' ) {
00789             $newRegex[0] = "/^(?:{$base[0]})/iuS";
00790         }
00791         if ( $base[1] !== '' ) {
00792             $newRegex[1] = "/^(?:{$base[1]})/S";
00793         }
00794         return $newRegex;
00795     }
00796 
00802     function getVariableStartToEndRegex() {
00803         $base = $this->getBaseRegex();
00804         $newRegex = array( '', '' );
00805         if ( $base[0] !== '' ) {
00806             $newRegex[0] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[0]})$/iuS" );
00807         }
00808         if ( $base[1] !== '' ) {
00809             $newRegex[1] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[1]})$/S" );
00810         }
00811         return $newRegex;
00812     }
00813 
00818     public function getNames() {
00819         return $this->names;
00820     }
00821 
00832     function parseMatch( $m ) {
00833         reset( $m );
00834         while ( list( $key, $value ) = each( $m ) ) {
00835             if ( $key === 0 || $value === '' ) {
00836                 continue;
00837             }
00838             $parts = explode( '_', $key, 2 );
00839             if ( count( $parts ) != 2 ) {
00840                 // This shouldn't happen
00841                 // continue;
00842                 throw new MWException( __METHOD__ . ': bad parameter name' );
00843             }
00844             list( /* $synIndex */, $magicName ) = $parts;
00845             $paramValue = next( $m );
00846             return array( $magicName, $paramValue );
00847         }
00848         // This shouldn't happen either
00849         throw new MWException( __METHOD__ . ': parameter not found' );
00850     }
00851 
00862     public function matchVariableStartToEnd( $text ) {
00863         $regexes = $this->getVariableStartToEndRegex();
00864         foreach ( $regexes as $regex ) {
00865             if ( $regex !== '' ) {
00866                 $m = array();
00867                 if ( preg_match( $regex, $text, $m ) ) {
00868                     return $this->parseMatch( $m );
00869                 }
00870             }
00871         }
00872         return array( false, false );
00873     }
00874 
00883     public function matchStartToEnd( $text ) {
00884         $hash = $this->getHash();
00885         if ( isset( $hash[1][$text] ) ) {
00886             return $hash[1][$text];
00887         }
00888         global $wgContLang;
00889         $lc = $wgContLang->lc( $text );
00890         if ( isset( $hash[0][$lc] ) ) {
00891             return $hash[0][$lc];
00892         }
00893         return false;
00894     }
00895 
00904     public function matchAndRemove( &$text ) {
00905         $found = array();
00906         $regexes = $this->getRegex();
00907         foreach ( $regexes as $regex ) {
00908             if ( $regex === '' ) {
00909                 continue;
00910             }
00911             preg_match_all( $regex, $text, $matches, PREG_SET_ORDER );
00912             foreach ( $matches as $m ) {
00913                 list( $name, $param ) = $this->parseMatch( $m );
00914                 $found[$name] = $param;
00915             }
00916             $text = preg_replace( $regex, '', $text );
00917         }
00918         return $found;
00919     }
00920 
00931     public function matchStartAndRemove( &$text ) {
00932         $regexes = $this->getRegexStart();
00933         foreach ( $regexes as $regex ) {
00934             if ( $regex === '' ) {
00935                 continue;
00936             }
00937             if ( preg_match( $regex, $text, $m ) ) {
00938                 list( $id, ) = $this->parseMatch( $m );
00939                 if ( strlen( $m[0] ) >= strlen( $text ) ) {
00940                     $text = '';
00941                 } else {
00942                     $text = substr( $text, strlen( $m[0] ) );
00943                 }
00944                 return $id;
00945             }
00946         }
00947         return false;
00948     }
00949 }