[ Index ]

PHP Cross Reference of moodle-2.8

title

Body

[close]

/lib/ -> searchlib.php (source)

   1  <?php
   2  
   3  // This file is part of Moodle - http://moodle.org/
   4  //
   5  // Moodle is free software: you can redistribute it and/or modify
   6  // it under the terms of the GNU General Public License as published by
   7  // the Free Software Foundation, either version 3 of the License, or
   8  // (at your option) any later version.
   9  //
  10  // Moodle is distributed in the hope that it will be useful,
  11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13  // GNU General Public License for more details.
  14  //
  15  // You should have received a copy of the GNU General Public License
  16  // along with Moodle.  If not, see <http://www.gnu.org/licenses/>.
  17  
  18  /**
  19   * @package    core
  20   * @subpackage search
  21   * @copyright  1999 onwards Martin Dougiamas  {@link http://moodle.com}
  22   * @license    http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
  23   */
  24  
  25  defined('MOODLE_INTERNAL') || die();
  26  
  27  /** @see lexer.php */
  28  require_once($CFG->libdir.'/lexer.php');
  29  
  30  /** Constants for the various types of tokens */
  31  
  32  define("TOKEN_USER","0");
  33  define("TOKEN_META","1");
  34  define("TOKEN_EXACT","2");
  35  define("TOKEN_NEGATE","3");
  36  define("TOKEN_STRING","4");
  37  define("TOKEN_USERID","5");
  38  define("TOKEN_DATEFROM","6");
  39  define("TOKEN_DATETO","7");
  40  define("TOKEN_INSTANCE","8");
  41  
  42  /**
  43   * Class to hold token/value pairs after they're parsed.
  44   *
  45   * @package   moodlecore
  46   * @copyright 1999 onwards Martin Dougiamas  {@link http://moodle.com}
  47   * @license   http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
  48   */
  49  class search_token {
  50    private $value;
  51    private $type;
  52  
  53    function search_token($type,$value){
  54      $this->type = $type;
  55      $this->value = $this->sanitize($value);
  56  
  57    }
  58  
  59    // Try to clean up user input to avoid potential security issues.
  60    // Need to think about this some more.
  61  
  62    function sanitize($userstring){
  63      return htmlspecialchars($userstring);
  64    }
  65    function getValue(){
  66      return $this->value;
  67    }
  68    function getType(){
  69      return $this->type;
  70    }
  71  }
  72  
  73  
  74  /**
  75   * This class does the heavy lifting of lexing the search string into tokens.
  76   * Using a full-blown lexer is probably overkill for this application, but
  77   * might be useful for other tasks.
  78   *
  79   * @package   moodlecore
  80   * @copyright 1999 onwards Martin Dougiamas  {@link http://moodle.com}
  81   * @license   http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
  82   */
  83  class search_lexer extends Lexer{
  84  
  85    function search_lexer(&$parser){
  86  
  87      // Call parent constructor.
  88      $this->Lexer($parser);
  89  
  90      //Set up the state machine and pattern matches for transitions.
  91  
  92      // Patterns to handle strings  of the form datefrom:foo
  93  
  94      // If we see the string datefrom: while in the base accept state, start
  95      // parsing a username and go to the indatefrom state.
  96      $this->addEntryPattern("datefrom:\S+","accept","indatefrom");
  97  
  98      // Snarf everything into the username until we see whitespace, then exit
  99      // back to the base accept state.
 100      $this->addExitPattern("\s","indatefrom");
 101  
 102  
 103      // Patterns to handle strings  of the form dateto:foo
 104  
 105      // If we see the string dateto: while in the base accept state, start
 106      // parsing a username and go to the indateto state.
 107      $this->addEntryPattern("dateto:\S+","accept","indateto");
 108  
 109      // Snarf everything into the username until we see whitespace, then exit
 110      // back to the base accept state.
 111      $this->addExitPattern("\s","indateto");
 112  
 113  
 114      // Patterns to handle strings  of the form instance:foo
 115  
 116      // If we see the string instance: while in the base accept state, start
 117      // parsing for instance number and go to the ininstance state.
 118      $this->addEntryPattern("instance:\S+","accept","ininstance");
 119  
 120      // Snarf everything into the username until we see whitespace, then exit
 121      // back to the base accept state.
 122      $this->addExitPattern("\s","ininstance");
 123  
 124  
 125      // Patterns to handle strings  of the form userid:foo
 126  
 127      // If we see the string userid: while in the base accept state, start
 128      // parsing a username and go to the inuserid state.
 129      $this->addEntryPattern("userid:\S+","accept","inuserid");
 130  
 131      // Snarf everything into the username until we see whitespace, then exit
 132      // back to the base accept state.
 133      $this->addExitPattern("\s","inuserid");
 134  
 135  
 136      // Patterns to handle strings  of the form user:foo
 137  
 138      // If we see the string user: while in the base accept state, start
 139      // parsing a username and go to the inusername state.
 140      $this->addEntryPattern("user:\S+","accept","inusername");
 141  
 142      // Snarf everything into the username until we see whitespace, then exit
 143      // back to the base accept state.
 144      $this->addExitPattern("\s","inusername");
 145  
 146  
 147      // Patterns to handle strings  of the form meta:foo
 148  
 149     // If we see the string meta: while in the base accept state, start
 150      // parsing a username and go to the inmeta state.
 151      $this->addEntryPattern("subject:\S+","accept","inmeta");
 152  
 153      // Snarf everything into the meta token until we see whitespace, then exit
 154      // back to the base accept state.
 155      $this->addExitPattern("\s","inmeta");
 156  
 157  
 158      // Patterns to handle required exact match strings (+foo) .
 159  
 160      // If we see a + sign  while in the base accept state, start
 161      // parsing an exact match string and enter the inrequired state
 162      $this->addEntryPattern("\+\S+","accept","inrequired");
 163      // When we see white space, exit back to accept state.
 164      $this->addExitPattern("\s","inrequired");
 165  
 166      // Handle excluded strings (-foo)
 167  
 168     // If we see a - sign  while in the base accept state, start
 169      // parsing an excluded string and enter the inexcluded state
 170      $this->addEntryPattern("\-\S+","accept","inexcluded");
 171      // When we see white space, exit back to accept state.
 172      $this->addExitPattern("\s","inexcluded");
 173  
 174  
 175      // Patterns to handle quoted strings.
 176  
 177      // If we see a quote  while in the base accept state, start
 178      // parsing a quoted string and enter the inquotedstring state.
 179      // Grab everything until we see the closing quote.
 180  
 181      $this->addEntryPattern("\"[^\"]+","accept","inquotedstring");
 182  
 183      // When we see a closing quote, reenter the base accept state.
 184      $this->addExitPattern("\"","inquotedstring");
 185  
 186      // Patterns to handle ordinary, nonquoted words.
 187  
 188      // When we see non-whitespace, snarf everything into the nonquoted word
 189      // until we see whitespace again.
 190      $this->addEntryPattern("\S+","accept","plainstring");
 191  
 192      // Once we see whitespace, reenter the base accept state.
 193      $this->addExitPattern("\s","plainstring");
 194  
 195    }
 196  }
 197  
 198  
 199  
 200  /**
 201   * This class takes care of sticking the proper token type/value pairs into
 202   * the parsed token  array.
 203   * Most functions in this class should only be called by the lexer, the
 204   * one exception being getParseArray() which returns the result.
 205   *
 206   * @package   moodlecore
 207   * @copyright 1999 onwards Martin Dougiamas  {@link http://moodle.com}
 208   * @license   http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
 209   */
 210  class search_parser {
 211      private $tokens;
 212  
 213      // This function is called by the code that's interested in the result of the parse operation.
 214      function get_parsed_array(){
 215          return $this->tokens;
 216      }
 217  
 218      /*
 219       * Functions below this are part of the state machine for the parse
 220       * operation and should not be called directly.
 221       */
 222  
 223      // Base state. No output emitted.
 224      function accept() {
 225          return true;
 226      }
 227  
 228      // State for handling datefrom:foo constructs. Potentially emits a token.
 229      function indatefrom($content){
 230          if (strlen($content) < 10) { // State exit or missing parameter.
 231              return true;
 232          }
 233          // Strip off the datefrom: part and add the reminder to the parsed token array
 234          $param = trim(substr($content,9));
 235          $this->tokens[] = new search_token(TOKEN_DATEFROM,$param);
 236          return true;
 237      }
 238  
 239      // State for handling dateto:foo constructs. Potentially emits a token.
 240      function indateto($content){
 241          if (strlen($content) < 8) { // State exit or missing parameter.
 242              return true;
 243          }
 244          // Strip off the dateto: part and add the reminder to the parsed token array
 245          $param = trim(substr($content,7));
 246          $this->tokens[] = new search_token(TOKEN_DATETO,$param);
 247          return true;
 248      }
 249  
 250      // State for handling instance:foo constructs. Potentially emits a token.
 251      function ininstance($content){
 252          if (strlen($content) < 10) { // State exit or missing parameter.
 253              return true;
 254          }
 255          // Strip off the instance: part and add the reminder to the parsed token array
 256          $param = trim(substr($content,9));
 257          $this->tokens[] = new search_token(TOKEN_INSTANCE,$param);
 258          return true;
 259      }
 260  
 261  
 262      // State for handling userid:foo constructs. Potentially emits a token.
 263      function inuserid($content){
 264          if (strlen($content) < 8) { // State exit or missing parameter.
 265              return true;
 266          }
 267          // Strip off the userid: part and add the reminder to the parsed token array
 268          $param = trim(substr($content,7));
 269          $this->tokens[] = new search_token(TOKEN_USERID,$param);
 270          return true;
 271      }
 272  
 273  
 274      // State for handling user:foo constructs. Potentially emits a token.
 275      function inusername($content){
 276          if (strlen($content) < 6) { // State exit or missing parameter.
 277              return true;
 278          }
 279          // Strip off the user: part and add the reminder to the parsed token array
 280          $param = trim(substr($content,5));
 281          $this->tokens[] = new search_token(TOKEN_USER,$param);
 282          return true;
 283      }
 284  
 285  
 286      // State for handling meta:foo constructs. Potentially emits a token.
 287      function inmeta($content){
 288          if (strlen($content) < 9) { // Missing parameter.
 289              return true;
 290          }
 291          // Strip off the meta: part and add the reminder to the parsed token array.
 292          $param = trim(substr($content,8));
 293          $this->tokens[] = new search_token(TOKEN_META,$param);
 294          return true;
 295      }
 296  
 297  
 298      // State entered when we've seen a required string (+foo). Potentially
 299      // emits a token.
 300      function inrequired($content){
 301          if (strlen($content) < 2) { // State exit or missing parameter, don't emit.
 302              return true;
 303          }
 304          // Strip off the + sign and add the reminder to the parsed token array.
 305          $this->tokens[] = new search_token(TOKEN_EXACT,substr($content,1));
 306          return true;
 307      }
 308  
 309      // State entered when we've seen an excluded string (-foo). Potentially
 310      // emits a token.
 311      function inexcluded($content){
 312          if (strlen($content) < 2) { // State exit or missing parameter.
 313              return true;
 314          }
 315          // Strip off the -sign and add the reminder to the parsed token array.
 316          $this->tokens[] = new search_token(TOKEN_NEGATE,substr($content,1));
 317          return true;
 318      }
 319  
 320  
 321      // State entered when we've seen a quoted string. Potentially emits a token.
 322      function inquotedstring($content){
 323          if (strlen($content) < 2) { // State exit or missing parameter.
 324              return true;
 325          }
 326          // Strip off the opening quote and add the reminder to the parsed token array.
 327          $this->tokens[] = new search_token(TOKEN_STRING,substr($content,1));
 328          return true;
 329      }
 330  
 331      // State entered when we've seen an ordinary, non-quoted word. Potentially
 332      // emits a token.
 333      function plainstring($content){
 334          if (trim($content) === '') { // State exit
 335              return true;
 336          }
 337          // Add the string to the parsed token array.
 338          $this->tokens[] = new search_token(TOKEN_STRING,$content);
 339          return true;
 340      }
 341  }
 342  
 343  /**
 344   * Primitive function to generate a SQL string from a parse tree
 345   * using TEXT indexes. If searches aren't suitable to use TEXT
 346   * this function calls the default search_generate_SQL() one.
 347   *
 348   * $parsetree should be a parse tree generated by a
 349   * search_lexer/search_parser combination.
 350   * Other fields are database table names to search.
 351   *
 352   * @global object
 353   * @global object
 354   */
 355  function search_generate_text_SQL($parsetree, $datafield, $metafield, $mainidfield, $useridfield,
 356                               $userfirstnamefield, $userlastnamefield, $timefield, $instancefield) {
 357      global $CFG, $DB;
 358      static $p = 0;
 359  
 360  /// First of all, search for reasons to switch to standard SQL generation
 361  /// Only mysql are supported for now
 362      if ($DB->get_dbfamily() != 'mysql') {
 363          return search_generate_SQL($parsetree, $datafield, $metafield, $mainidfield, $useridfield,
 364                                     $userfirstnamefield, $userlastnamefield, $timefield, $instancefield);
 365      }
 366  
 367  /// Some languages don't have "word separators" and MySQL FULLTEXT doesn't perform well with them, so
 368  /// switch to standard SQL search generation
 369      if ($DB->get_dbfamily() == 'mysql') {
 370          $nonseparatedlangs = array('ja', 'th', 'zh_cn', 'zh_tw');
 371          if (in_array(current_language(), $nonseparatedlangs)) {
 372              return search_generate_SQL($parsetree, $datafield, $metafield, $mainidfield, $useridfield,
 373                                         $userfirstnamefield, $userlastnamefield, $timefield, $instancefield);
 374          }
 375      }
 376  
 377  /// Here we'll acumulate non-textual tokens
 378      $non_text_tokens = array();
 379      $params = array();
 380  
 381      $ntokens = count($parsetree);
 382      if ($ntokens == 0) {
 383          return "";
 384      }
 385  
 386      $SQLString = '';
 387      $text_sql_string = '';
 388  
 389      $datasearch_clause = '';
 390      $metasearch_clause = '';
 391  
 392      foreach ($parsetree as $token) {
 393  
 394          $type = $token->getType();
 395          $value = $token->getValue();
 396  
 397          switch($type){
 398              case TOKEN_STRING:
 399              /// If it's a multiword token, quote it
 400                  if (strstr($value, ' ')) {
 401                      $datasearch_clause .= '"' . $value . '" ';
 402              /// Simple word token, search for it as prefix
 403                  } else {
 404                      $datasearch_clause .= '+' . $value . '* ';
 405                  }
 406                  break;
 407              case TOKEN_EXACT:
 408              /// token must be exactly as requested
 409                  $datasearch_clause .= '+' . $value . ' ';
 410                  break;
 411              case TOKEN_NEGATE:
 412              /// token must not exist as prefix
 413                  $datasearch_clause .= '-' . $value . '* ';
 414                  break;
 415              case TOKEN_META:
 416              /// token in metafield, search for it as prefix
 417                  $metasearch_clause .= '+' . $value . '* ';
 418                  break;
 419              case TOKEN_USER:
 420              case TOKEN_USERID:
 421              case TOKEN_INSTANCE:
 422              case TOKEN_DATETO:
 423              case TOKEN_DATEFROM:
 424              /// delegate to standard search
 425                  $non_text_tokens[] = $token;
 426                  break;
 427              default:
 428                  return '';
 429          }
 430      }
 431  
 432  /// Call to standard search for pending tokens
 433      if (!empty($non_text_tokens)) {
 434          list($SQLString, $sparams) = search_generate_SQL($non_text_tokens, $datafield, $metafield, $mainidfield, $useridfield,
 435                                           $userfirstnamefield, $userlastnamefield, $timefield, $instancefield);
 436          $params = array_merge($params, $sparams);
 437      }
 438  /// Build the final SQL clause
 439      if (!empty($datasearch_clause)) {
 440      /// Must have $datafield to search within
 441          if (!empty($datafield)) {
 442              $text_sql_string .= 'MATCH (' . $datafield;
 443          /// And optionally $metafield
 444              if (!empty($metafield)) {
 445                  $text_sql_string .= ', ' . $metafield;
 446              }
 447          /// Begin with the AGAINST clause
 448              $text_sql_string .= ') AGAINST (';
 449          /// Add the search terms
 450              $text_sql_string .= ':sgt'.$p;
 451              $params['sgt'.$p++] = trim($datasearch_clause);
 452          /// Close AGAINST clause
 453              $text_sql_string .= " IN BOOLEAN MODE)";
 454          }
 455      }
 456  /// Now add the metasearch_clause
 457      if (!empty($metasearch_clause)) {
 458      /// Must have $metafield to search within
 459          if (!empty($metafield)) {
 460          /// AND operator if needed
 461              if (!empty($text_sql_string)) {
 462                  $text_sql_string .= ' AND ';
 463              }
 464              $text_sql_string .= 'MATCH (' . $metafield;
 465          /// Begin with the AGAINST clause
 466              $text_sql_string .= ') AGAINST (';
 467          /// Add the search terms
 468              $text_sql_string .= ':sgt'.$p;
 469              $params['sgt'.$p++] = trim($metasearch_clause);
 470          /// Close AGAINST clause
 471              $text_sql_string .= " IN BOOLEAN MODE)";
 472          }
 473      }
 474  /// Finally add the non-text conditions
 475      if (!empty($SQLString)) {
 476      /// AND operator if needed
 477          if (!empty($text_sql_string)) {
 478              $text_sql_string .= ' AND ';
 479          }
 480          $text_sql_string .= $SQLString;
 481      }
 482  
 483      return array($text_sql_string, $params);
 484  }
 485  
 486  /**
 487   * Primitive function to generate a SQL string from a parse tree.
 488   * Parameters:
 489   *
 490   * $parsetree should be a parse tree generated by a
 491   * search_lexer/search_parser combination.
 492   * Other fields are database table names to search.
 493   *
 494   * @global object
 495   * @global object
 496   */
 497  function search_generate_SQL($parsetree, $datafield, $metafield, $mainidfield, $useridfield,
 498                               $userfirstnamefield, $userlastnamefield, $timefield, $instancefield) {
 499      global $CFG, $DB;
 500      static $p = 0;
 501  
 502      if ($DB->sql_regex_supported()) {
 503          $REGEXP    = $DB->sql_regex(true);
 504          $NOTREGEXP = $DB->sql_regex(false);
 505      }
 506  
 507      $params = array();
 508  
 509      $ntokens = count($parsetree);
 510      if ($ntokens == 0) {
 511          return "";
 512      }
 513  
 514      $SQLString = '';
 515  
 516      for ($i=0; $i<$ntokens; $i++){
 517          if ($i > 0) {// We have more than one clause, need to tack on AND
 518              $SQLString .= ' AND ';
 519          }
 520  
 521          $type = $parsetree[$i]->getType();
 522          $value = $parsetree[$i]->getValue();
 523  
 524      /// Under Oracle and MSSQL, transform TOKEN searches into STRING searches and trim +- chars
 525          if (!$DB->sql_regex_supported()) {
 526              $value = trim($value, '+-');
 527              if ($type == TOKEN_EXACT) {
 528                  $type = TOKEN_STRING;
 529              }
 530          }
 531  
 532          $name1 = 'sq'.$p++;
 533          $name2 = 'sq'.$p++;
 534  
 535          switch($type){
 536              case TOKEN_STRING:
 537                  $SQLString .= "((".$DB->sql_like($datafield, ":$name1", false).") OR (".$DB->sql_like($metafield, ":$name2", false)."))";
 538                  $params[$name1] =  "%$value%";
 539                  $params[$name2] =  "%$value%";
 540                  break;
 541              case TOKEN_EXACT:
 542                  $SQLString .= "(($datafield $REGEXP :$name1) OR ($metafield $REGEXP :$name2))";
 543                  $params[$name1] =  "[[:<:]]".$value."[[:>:]]";
 544                  $params[$name2] =  "[[:<:]]".$value."[[:>:]]";
 545                  break;
 546              case TOKEN_META:
 547                  if ($metafield != '') {
 548                      $SQLString .= "(".$DB->sql_like($metafield, ":$name1", false).")";
 549                      $params[$name1] =  "%$value%";
 550                  }
 551                  break;
 552              case TOKEN_USER:
 553                  $SQLString .= "(($mainidfield = $useridfield) AND ((".$DB->sql_like($userfirstnamefield, ":$name1", false).") OR (".$DB->sql_like($userlastnamefield, ":$name2", false).")))";
 554                  $params[$name1] =  "%$value%";
 555                  $params[$name2] =  "%$value%";
 556                  break;
 557              case TOKEN_USERID:
 558                  $SQLString .= "($useridfield = :$name1)";
 559                  $params[$name1] =  $value;
 560                  break;
 561              case TOKEN_INSTANCE:
 562                  $SQLString .= "($instancefield = :$name1)";
 563                  $params[$name1] =  $value;
 564                  break;
 565              case TOKEN_DATETO:
 566                  $SQLString .= "($timefield <= :$name1)";
 567                  $params[$name1] =  $value;
 568                  break;
 569              case TOKEN_DATEFROM:
 570                  $SQLString .= "($timefield >= :$name1)";
 571                  $params[$name1] =  $value;
 572                  break;
 573              case TOKEN_NEGATE:
 574                  $SQLString .= "(NOT ((".$DB->sql_like($datafield, ":$name1", false).") OR (".$DB->sql_like($metafield, ":$name2", false).")))";
 575                  $params[$name1] =  "%$value%";
 576                  $params[$name2] =  "%$value%";
 577                  break;
 578              default:
 579                  return '';
 580  
 581          }
 582      }
 583      return array($SQLString, $params);
 584  }


Generated: Fri Nov 28 20:29:05 2014 Cross-referenced by PHPXref 0.7.1