[ Index ]

PHP Cross Reference of moodle-2.8

title

Body

[close]

/lib/ -> lexer.php (source)

   1  <?php
   2  
   3  /**
   4   * PHP lexer code snarfed from the CVS tree for the lamplib project at
   5   * http://sourceforge.net/projects/lamplib
   6   * This project is administered by Markus Baker, Harry Fuecks and Matt
   7   * Mitchell, and the project  code is in the public domain.
   8   * 
   9   * Thanks, guys!
  10   *
  11   * @package   moodlecore
  12   * @copyright Markus Baker, Harry Fuecks and Matt Mitchell
  13   * @license   Public Domain {@link http://sourceforge.net/projects/lamplib}
  14   */
  15  
  16      /** LEXER_ENTER = 1 */
  17      define("LEXER_ENTER", 1);
  18      /** LEXER_MATCHED = 2 */
  19      define("LEXER_MATCHED", 2);
  20      /** LEXER_UNMATCHED = 3 */
  21      define("LEXER_UNMATCHED", 3);
  22      /** LEXER_EXIT = 4 */
  23      define("LEXER_EXIT", 4);
  24      /** LEXER_SPECIAL = 5 */
  25      define("LEXER_SPECIAL", 5);
  26      
  27      /**
  28       * Compounded regular expression. Any of
  29       * the contained patterns could match and
  30       * when one does it's label is returned.
  31       * @package   moodlecore
  32       * @copyright Markus Baker, Harry Fuecks and Matt Mitchell
  33       * @license   Public Domain {@link http://sourceforge.net/projects/lamplib}
  34       */
  35      class ParallelRegex {
  36          var $_patterns;
  37          var $_labels;
  38          var $_regex;
  39          var $_case;
  40          
  41          /**
  42           *    Constructor. Starts with no patterns.
  43           *    @param bool $case    True for case sensitive, false
  44           *                    for insensitive.
  45           *    @access public
  46           */
  47          function ParallelRegex($case) {
  48              $this->_case = $case;
  49              $this->_patterns = array();
  50              $this->_labels = array();
  51              $this->_regex = null;
  52          }
  53          
  54          /**
  55           *    Adds a pattern with an optional label.
  56           *    @param string $pattern      Perl style regex, but ( and )
  57           *                         lose the usual meaning.
  58           *    @param string $label        Label of regex to be returned
  59           *                         on a match.
  60           *    @access public
  61           */
  62          function addPattern($pattern, $label = true) {
  63              $count = count($this->_patterns);
  64              $this->_patterns[$count] = $pattern;
  65              $this->_labels[$count] = $label;
  66              $this->_regex = null;
  67          }
  68          
  69          /**
  70           *    Attempts to match all patterns at once against
  71           *    a string.
  72           *    @param string $subject      String to match against.
  73           *    @param string $match        First matched portion of
  74           *                         subject.
  75           *    @return bool             True on success.
  76           *    @access public
  77           */
  78          function match($subject, &$match) {
  79              if (count($this->_patterns) == 0) {
  80                  return false;
  81              }
  82              if (!preg_match($this->_getCompoundedRegex(), $subject, $matches)) {
  83                  $match = "";
  84                  return false;
  85              }
  86              $match = $matches[0];
  87              for ($i = 1; $i < count($matches); $i++) {
  88                  if ($matches[$i]) {
  89                      return $this->_labels[$i - 1];
  90                  }
  91              }
  92              return true;
  93          }
  94          
  95          /**
  96           *    Compounds the patterns into a single
  97           *    regular expression separated with the
  98           *    "or" operator. Caches the regex.
  99           *    Will automatically escape (, ) and / tokens.
 100           *    @access private
 101           */
 102          function _getCompoundedRegex() {
 103              if ($this->_regex == null) {
 104                  for ($i = 0; $i < count($this->_patterns); $i++) {
 105                      $this->_patterns[$i] = '(' . str_replace(
 106                              array('/', '(', ')'),
 107                              array('\/', '\(', '\)'),
 108                              $this->_patterns[$i]) . ')';
 109                  }
 110                  $this->_regex = "/" . implode("|", $this->_patterns) . "/" . $this->_getPerlMatchingFlags();
 111              }
 112              return $this->_regex;
 113          }
 114          
 115          /**
 116           *    Accessor for perl regex mode flags to use.
 117           *    @return string       Flags as string.
 118           *    @access private
 119           */
 120          function _getPerlMatchingFlags() {
 121              return ($this->_case ? "msS" : "msSi");
 122          }
 123      }
 124      
 125      /**
 126       * States for a stack machine.
 127       *
 128       * @package   moodlecore
 129       * @copyright Markus Baker, Harry Fuecks and Matt Mitchell
 130       * @license   Public Domain {@link http://sourceforge.net/projects/lamplib}
 131       */
 132      class StateStack {
 133          var $_stack;
 134          
 135          /**
 136           *    Constructor. Starts in named state.
 137           *    @param string $start        Starting state name.
 138           *    @access public
 139           */
 140          function StateStack($start) {
 141              $this->_stack = array($start);
 142          }
 143          
 144          /**
 145           *    Accessor for current state.
 146           *    @return string State as string.
 147           *    @access public
 148           */
 149          function getCurrent() {
 150              return $this->_stack[count($this->_stack) - 1];
 151          }
 152          
 153          /**
 154           *    Adds a state to the stack and sets it
 155           *    to be the current state.
 156           *    @param string $state        New state.
 157           *    @access public
 158           */
 159          function enter($state) {
 160              array_push($this->_stack, $state);
 161          }
 162          
 163          /**
 164           *    Leaves the current state and reverts
 165           *    to the previous one.
 166           *    @return bool     False if we drop off
 167           *                the bottom of the list.
 168           *    @access public
 169           */
 170          function leave() {
 171              if (count($this->_stack) == 1) {
 172                  return false;
 173              }
 174              array_pop($this->_stack);
 175              return true;
 176          }
 177      }
 178      
 179      /**
 180       * Accepts text and breaks it into tokens.
 181       * Some optimisation to make the sure the
 182       * content is only scanned by the PHP regex
 183       * parser once. Lexer modes must not start
 184       * with leading underscores.
 185       *
 186       * @package   moodlecore
 187       * @copyright Markus Baker, Harry Fuecks and Matt Mitchell
 188       * @license   Public Domain {@link http://sourceforge.net/projects/lamplib}
 189       */
 190      class Lexer {
 191          var $_regexes;
 192          var $_parser;
 193          var $_mode;
 194          var $_mode_handlers;
 195          var $_case;
 196          
 197          /**
 198           *    Sets up the lexer in case insensitive matching
 199           *    by default.
 200           *    @param object $parser     Handling strategy by
 201           *                       reference.
 202           *    @param string $start      Starting handler.
 203           *    @param bool $case       True for case sensitive.
 204           *    @access public
 205           */
 206          function Lexer(&$parser, $start = "accept", $case = false) {
 207              $this->_case = $case;
 208              $this->_regexes = array();
 209              $this->_parser = &$parser;
 210              $this->_mode = new StateStack($start);
 211              $this->_mode_handlers = array();
 212          }
 213          
 214          /**
 215           *    Adds a token search pattern for a particular
 216           *    parsing mode. The pattern does not change the
 217           *    current mode.
 218           *    @param string $pattern      Perl style regex, but ( and )
 219           *                         lose the usual meaning.
 220           *    @param string $mode         Should only apply this
 221           *                         pattern when dealing with
 222           *                         this type of input.
 223           *    @access public
 224           */
 225          function addPattern($pattern, $mode = "accept") {
 226              if (!isset($this->_regexes[$mode])) {
 227                  $this->_regexes[$mode] = new ParallelRegex($this->_case);
 228              }
 229              $this->_regexes[$mode]->addPattern($pattern);
 230          }
 231          
 232          /**
 233           *    Adds a pattern that will enter a new parsing
 234           *    mode. Useful for entering parenthesis, strings,
 235           *    tags, etc.
 236           *    @param string $pattern      Perl style regex, but ( and )
 237           *                         lose the usual meaning.
 238           *    @param string $mode         Should only apply this
 239           *                         pattern when dealing with
 240           *                         this type of input.
 241           *    @param string $new_mode     Change parsing to this new
 242           *                         nested mode.
 243           *    @access public
 244           */
 245          function addEntryPattern($pattern, $mode, $new_mode) {
 246              if (!isset($this->_regexes[$mode])) {
 247                  $this->_regexes[$mode] = new ParallelRegex($this->_case);
 248              }
 249              $this->_regexes[$mode]->addPattern($pattern, $new_mode);
 250          }
 251          
 252          /**
 253           *    Adds a pattern that will exit the current mode
 254           *    and re-enter the previous one.
 255           *    @param string $pattern      Perl style regex, but ( and )
 256           *                         lose the usual meaning.
 257           *    @param string $mode         Mode to leave.
 258           *    @access public
 259           */
 260          function addExitPattern($pattern, $mode) {
 261              if (!isset($this->_regexes[$mode])) {
 262                  $this->_regexes[$mode] = new ParallelRegex($this->_case);
 263              }
 264              $this->_regexes[$mode]->addPattern($pattern, "__exit");
 265          }
 266          
 267          /**
 268           *    Adds a pattern that has a special mode.
 269           *    Acts as an entry and exit pattern in one go.
 270           *    @param string $pattern      Perl style regex, but ( and )
 271           *                         lose the usual meaning.
 272           *    @param string $mode         Should only apply this
 273           *                         pattern when dealing with
 274           *                         this type of input.
 275           *    @param string $special      Use this mode for this one token.
 276           *    @access public
 277           */
 278          function addSpecialPattern($pattern, $mode, $special) {
 279              if (!isset($this->_regexes[$mode])) {
 280                  $this->_regexes[$mode] = new ParallelRegex($this->_case);
 281              }
 282              $this->_regexes[$mode]->addPattern($pattern, "_$special");
 283          }
 284          
 285          /**
 286           *    Adds a mapping from a mode to another handler.
 287           *    @param string $mode        Mode to be remapped.
 288           *    @param string $handler     New target handler.
 289           *    @access public
 290           */
 291          function mapHandler($mode, $handler) {
 292              $this->_mode_handlers[$mode] = $handler;
 293          }
 294          
 295          /**
 296           *    Splits the page text into tokens. Will fail
 297           *    if the handlers report an error or if no
 298           *    content is consumed. If successful then each
 299           *    unparsed and parsed token invokes a call to the
 300           *    held listener.
 301           *    @param string $raw        Raw HTML text.
 302           *    @return bool           True on success, else false.
 303           *    @access public
 304           */
 305          function parse($raw) {
 306              if (!isset($this->_parser)) {
 307                  return false;
 308              }
 309              $length = strlen($raw);
 310              while (is_array($parsed = $this->_reduce($raw))) {
 311                  list($unmatched, $matched, $mode) = $parsed;
 312                  if (!$this->_dispatchTokens($unmatched, $matched, $mode)) {
 313                      return false;
 314                  }
 315                  if (strlen($raw) == $length) {
 316                      return false;
 317                  }
 318                  $length = strlen($raw);
 319              }
 320              if (!$parsed) {
 321                  return false;
 322              }
 323              return $this->_invokeParser($raw, LEXER_UNMATCHED);
 324          }
 325          
 326          /**
 327           *    Sends the matched token and any leading unmatched
 328           *    text to the parser changing the lexer to a new
 329           *    mode if one is listed.
 330           *    @param string $unmatched    Unmatched leading portion.
 331           *    @param string $matched      Actual token match.
 332           *    @param string $mode         Mode after match. The "_exit"
 333           *                         mode causes a stack pop. An
 334           *                         false mode causes no change.
 335           *    @return bool              False if there was any error
 336           *                         from the parser.
 337           *    @access private
 338           */
 339          function _dispatchTokens($unmatched, $matched, $mode = false) {
 340              if (!$this->_invokeParser($unmatched, LEXER_UNMATCHED)) {
 341                  return false;
 342              }
 343              if ($mode === "__exit") {
 344                  if (!$this->_invokeParser($matched, LEXER_EXIT)) {
 345                      return false;
 346                  }
 347                  return $this->_mode->leave();
 348              }
 349              if (strncmp($mode, "_", 1) == 0) {
 350                  $mode = substr($mode, 1);
 351                  $this->_mode->enter($mode);
 352                  if (!$this->_invokeParser($matched, LEXER_SPECIAL)) {
 353                      return false;
 354                  }
 355                  return $this->_mode->leave();
 356              }
 357              if (is_string($mode)) {
 358                  $this->_mode->enter($mode);
 359                  return $this->_invokeParser($matched, LEXER_ENTER);
 360              }
 361              return $this->_invokeParser($matched, LEXER_MATCHED);
 362          }
 363          
 364          /**
 365           *    Calls the parser method named after the current
 366           *    mode. Empty content will be ignored.
 367           *    @param string $content        Text parsed.
 368           *    @param string $is_match       Token is recognised rather
 369           *                           than unparsed data.
 370           *    @access private
 371           */
 372          function _invokeParser($content, $is_match) {
 373              if (($content === "") || ($content === false)) {
 374                  return true;
 375              }
 376              $handler = $this->_mode->getCurrent();
 377              if (isset($this->_mode_handlers[$handler])) {
 378                  $handler = $this->_mode_handlers[$handler];
 379              }
 380              return $this->_parser->$handler($content, $is_match);
 381          }
 382          
 383          /**
 384           *    Tries to match a chunk of text and if successful
 385           *    removes the recognised chunk and any leading
 386           *    unparsed data. Empty strings will not be matched.
 387           *    @param string $raw  The subject to parse. This is the
 388           *                        content that will be eaten.
 389           *    @return bool|array  Three item list of unparsed
 390           *                        content followed by the
 391           *                        recognised token and finally the
 392           *                        action the parser is to take.
 393           *                        True if no match, false if there
 394           *                        is a parsing error.
 395           *    @access private
 396           */
 397          function _reduce(&$raw) {
 398              if (!isset($this->_regexes[$this->_mode->getCurrent()])) {
 399                  return false;
 400              }
 401              if ($raw === "") {
 402                  return true;
 403              }
 404              if ($action = $this->_regexes[$this->_mode->getCurrent()]->match($raw, $match)) {
 405                  $count = strpos($raw, $match);
 406                  $unparsed = substr($raw, 0, $count);
 407                  $raw = substr($raw, $count + strlen($match));
 408                  return array($unparsed, $match, $action);
 409              }
 410              return true;
 411          }
 412      }
 413  ?>


Generated: Fri Nov 28 20:29:05 2014 Cross-referenced by PHPXref 0.7.1