[ Index ]

PHP Cross Reference of vtigercrm-6.1.0

title

Body

[close]

/libraries/htmlpurifier/library/HTMLPurifier/Strategy/ -> MakeWellFormed.php (source)

   1  <?php
   2  
   3  /**
   4   * Takes tokens makes them well-formed (balance end tags, etc.)
   5   */
   6  class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
   7  {
   8  
   9      /**
  10       * Array stream of tokens being processed.
  11       */
  12      protected $tokens;
  13  
  14      /**
  15       * Current index in $tokens.
  16       */
  17      protected $t;
  18  
  19      /**
  20       * Current nesting of elements.
  21       */
  22      protected $stack;
  23  
  24      /**
  25       * Injectors active in this stream processing.
  26       */
  27      protected $injectors;
  28  
  29      /**
  30       * Current instance of HTMLPurifier_Config.
  31       */
  32      protected $config;
  33  
  34      /**
  35       * Current instance of HTMLPurifier_Context.
  36       */
  37      protected $context;
  38  
  39      public function execute($tokens, $config, $context) {
  40  
  41          $definition = $config->getHTMLDefinition();
  42  
  43          // local variables
  44          $generator = new HTMLPurifier_Generator($config, $context);
  45          $escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags');
  46          $e = $context->get('ErrorCollector', true);
  47          $t = false; // token index
  48          $i = false; // injector index
  49          $token      = false; // the current token
  50          $reprocess  = false; // whether or not to reprocess the same token
  51          $stack = array();
  52  
  53          // member variables
  54          $this->stack   =& $stack;
  55          $this->t       =& $t;
  56          $this->tokens  =& $tokens;
  57          $this->config  = $config;
  58          $this->context = $context;
  59  
  60          // context variables
  61          $context->register('CurrentNesting', $stack);
  62          $context->register('InputIndex',     $t);
  63          $context->register('InputTokens',    $tokens);
  64          $context->register('CurrentToken',   $token);
  65  
  66          // -- begin INJECTOR --
  67  
  68          $this->injectors = array();
  69  
  70          $injectors = $config->getBatch('AutoFormat');
  71          $def_injectors = $definition->info_injector;
  72          $custom_injectors = $injectors['Custom'];
  73          unset($injectors['Custom']); // special case
  74          foreach ($injectors as $injector => $b) {
  75              $injector = "HTMLPurifier_Injector_$injector";
  76              if (!$b) continue;
  77              $this->injectors[] = new $injector;
  78          }
  79          foreach ($def_injectors as $injector) {
  80              // assumed to be objects
  81              $this->injectors[] = $injector;
  82          }
  83          foreach ($custom_injectors as $injector) {
  84              if (is_string($injector)) {
  85                  $injector = "HTMLPurifier_Injector_$injector";
  86                  $injector = new $injector;
  87              }
  88              $this->injectors[] = $injector;
  89          }
  90  
  91          // give the injectors references to the definition and context
  92          // variables for performance reasons
  93          foreach ($this->injectors as $ix => $injector) {
  94              $error = $injector->prepare($config, $context);
  95              if (!$error) continue;
  96              array_splice($this->injectors, $ix, 1); // rm the injector
  97              trigger_error("Cannot enable {$injector->name} injector because $error is not allowed", E_USER_WARNING);
  98          }
  99  
 100          // -- end INJECTOR --
 101  
 102          // a note on punting:
 103          //      In order to reduce code duplication, whenever some code needs
 104          //      to make HTML changes in order to make things "correct", the
 105          //      new HTML gets sent through the purifier, regardless of its
 106          //      status. This means that if we add a start token, because it
 107          //      was totally necessary, we don't have to update nesting; we just
 108          //      punt ($reprocess = true; continue;) and it does that for us.
 109  
 110          // isset is in loop because $tokens size changes during loop exec
 111          for (
 112              $t = 0;
 113              $t == 0 || isset($tokens[$t - 1]);
 114              // only increment if we don't need to reprocess
 115              $reprocess ? $reprocess = false : $t++
 116          ) {
 117  
 118              // check for a rewind
 119              if (is_int($i) && $i >= 0) {
 120                  // possibility: disable rewinding if the current token has a
 121                  // rewind set on it already. This would offer protection from
 122                  // infinite loop, but might hinder some advanced rewinding.
 123                  $rewind_to = $this->injectors[$i]->getRewind();
 124                  if (is_int($rewind_to) && $rewind_to < $t) {
 125                      if ($rewind_to < 0) $rewind_to = 0;
 126                      while ($t > $rewind_to) {
 127                          $t--;
 128                          $prev = $tokens[$t];
 129                          // indicate that other injectors should not process this token,
 130                          // but we need to reprocess it
 131                          unset($prev->skip[$i]);
 132                          $prev->rewind = $i;
 133                          if ($prev instanceof HTMLPurifier_Token_Start) array_pop($this->stack);
 134                          elseif ($prev instanceof HTMLPurifier_Token_End) $this->stack[] = $prev->start;
 135                      }
 136                  }
 137                  $i = false;
 138              }
 139  
 140              // handle case of document end
 141              if (!isset($tokens[$t])) {
 142                  // kill processing if stack is empty
 143                  if (empty($this->stack)) break;
 144  
 145                  // peek
 146                  $top_nesting = array_pop($this->stack);
 147                  $this->stack[] = $top_nesting;
 148  
 149                  // send error
 150                  if ($e && !isset($top_nesting->armor['MakeWellFormed_TagClosedError'])) {
 151                      $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $top_nesting);
 152                  }
 153  
 154                  // append, don't splice, since this is the end
 155                  $tokens[] = new HTMLPurifier_Token_End($top_nesting->name);
 156  
 157                  // punt!
 158                  $reprocess = true;
 159                  continue;
 160              }
 161  
 162              $token = $tokens[$t];
 163  
 164              //echo '<br>'; printTokens($tokens, $t); printTokens($this->stack);
 165  
 166              // quick-check: if it's not a tag, no need to process
 167              if (empty($token->is_tag)) {
 168                  if ($token instanceof HTMLPurifier_Token_Text) {
 169                      foreach ($this->injectors as $i => $injector) {
 170                          if (isset($token->skip[$i])) continue;
 171                          if ($token->rewind !== null && $token->rewind !== $i) continue;
 172                          $injector->handleText($token);
 173                          $this->processToken($token, $i);
 174                          $reprocess = true;
 175                          break;
 176                      }
 177                  }
 178                  // another possibility is a comment
 179                  continue;
 180              }
 181  
 182              if (isset($definition->info[$token->name])) {
 183                  $type = $definition->info[$token->name]->child->type;
 184              } else {
 185                  $type = false; // Type is unknown, treat accordingly
 186              }
 187  
 188              // quick tag checks: anything that's *not* an end tag
 189              $ok = false;
 190              if ($type === 'empty' && $token instanceof HTMLPurifier_Token_Start) {
 191                  // claims to be a start tag but is empty
 192                  $token = new HTMLPurifier_Token_Empty($token->name, $token->attr);
 193                  $ok = true;
 194              } elseif ($type && $type !== 'empty' && $token instanceof HTMLPurifier_Token_Empty) {
 195                  // claims to be empty but really is a start tag
 196                  $this->swap(new HTMLPurifier_Token_End($token->name));
 197                  $this->insertBefore(new HTMLPurifier_Token_Start($token->name, $token->attr));
 198                  // punt (since we had to modify the input stream in a non-trivial way)
 199                  $reprocess = true;
 200                  continue;
 201              } elseif ($token instanceof HTMLPurifier_Token_Empty) {
 202                  // real empty token
 203                  $ok = true;
 204              } elseif ($token instanceof HTMLPurifier_Token_Start) {
 205                  // start tag
 206  
 207                  // ...unless they also have to close their parent
 208                  if (!empty($this->stack)) {
 209  
 210                      $parent = array_pop($this->stack);
 211                      $this->stack[] = $parent;
 212  
 213                      if (isset($definition->info[$parent->name])) {
 214                          $elements = $definition->info[$parent->name]->child->getAllowedElements($config);
 215                          $autoclose = !isset($elements[$token->name]);
 216                      } else {
 217                          $autoclose = false;
 218                      }
 219  
 220                      $carryover = false;
 221                      if ($autoclose && $definition->info[$parent->name]->formatting) {
 222                          $carryover = true;
 223                      }
 224  
 225                      if ($autoclose) {
 226                          // errors need to be updated
 227                          $new_token = new HTMLPurifier_Token_End($parent->name);
 228                          $new_token->start = $parent;
 229                          if ($carryover) {
 230                              $element = clone $parent;
 231                              $element->armor['MakeWellFormed_TagClosedError'] = true;
 232                              $element->carryover = true;
 233                              $this->processToken(array($new_token, $token, $element));
 234                          } else {
 235                              $this->insertBefore($new_token);
 236                          }
 237                          if ($e && !isset($parent->armor['MakeWellFormed_TagClosedError'])) {
 238                              if (!$carryover) {
 239                                  $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent);
 240                              } else {
 241                                  $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag carryover', $parent);
 242                              }
 243                          }
 244                          $reprocess = true;
 245                          continue;
 246                      }
 247  
 248                  }
 249                  $ok = true;
 250              }
 251  
 252              if ($ok) {
 253                  foreach ($this->injectors as $i => $injector) {
 254                      if (isset($token->skip[$i])) continue;
 255                      if ($token->rewind !== null && $token->rewind !== $i) continue;
 256                      $injector->handleElement($token);
 257                      $this->processToken($token, $i);
 258                      $reprocess = true;
 259                      break;
 260                  }
 261                  if (!$reprocess) {
 262                      // ah, nothing interesting happened; do normal processing
 263                      $this->swap($token);
 264                      if ($token instanceof HTMLPurifier_Token_Start) {
 265                          $this->stack[] = $token;
 266                      } elseif ($token instanceof HTMLPurifier_Token_End) {
 267                          throw new HTMLPurifier_Exception('Improper handling of end tag in start code; possible error in MakeWellFormed');
 268                      }
 269                  }
 270                  continue;
 271              }
 272  
 273              // sanity check: we should be dealing with a closing tag
 274              if (!$token instanceof HTMLPurifier_Token_End) {
 275                  throw new HTMLPurifier_Exception('Unaccounted for tag token in input stream, bug in HTML Purifier');
 276              }
 277  
 278              // make sure that we have something open
 279              if (empty($this->stack)) {
 280                  if ($escape_invalid_tags) {
 281                      if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag to text');
 282                      $this->swap(new HTMLPurifier_Token_Text(
 283                          $generator->generateFromToken($token)
 284                      ));
 285                  } else {
 286                      $this->remove();
 287                      if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag removed');
 288                  }
 289                  $reprocess = true;
 290                  continue;
 291              }
 292  
 293              // first, check for the simplest case: everything closes neatly.
 294              // Eventually, everything passes through here; if there are problems
 295              // we modify the input stream accordingly and then punt, so that
 296              // the tokens get processed again.
 297              $current_parent = array_pop($this->stack);
 298              if ($current_parent->name == $token->name) {
 299                  $token->start = $current_parent;
 300                  foreach ($this->injectors as $i => $injector) {
 301                      if (isset($token->skip[$i])) continue;
 302                      if ($token->rewind !== null && $token->rewind !== $i) continue;
 303                      $injector->handleEnd($token);
 304                      $this->processToken($token, $i);
 305                      $this->stack[] = $current_parent;
 306                      $reprocess = true;
 307                      break;
 308                  }
 309                  continue;
 310              }
 311  
 312              // okay, so we're trying to close the wrong tag
 313  
 314              // undo the pop previous pop
 315              $this->stack[] = $current_parent;
 316  
 317              // scroll back the entire nest, trying to find our tag.
 318              // (feature could be to specify how far you'd like to go)
 319              $size = count($this->stack);
 320              // -2 because -1 is the last element, but we already checked that
 321              $skipped_tags = false;
 322              for ($j = $size - 2; $j >= 0; $j--) {
 323                  if ($this->stack[$j]->name == $token->name) {
 324                      $skipped_tags = array_slice($this->stack, $j);
 325                      break;
 326                  }
 327              }
 328  
 329              // we didn't find the tag, so remove
 330              if ($skipped_tags === false) {
 331                  if ($escape_invalid_tags) {
 332                      $this->swap(new HTMLPurifier_Token_Text(
 333                          $generator->generateFromToken($token)
 334                      ));
 335                      if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag to text');
 336                  } else {
 337                      $this->remove();
 338                      if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag removed');
 339                  }
 340                  $reprocess = true;
 341                  continue;
 342              }
 343  
 344              // do errors, in REVERSE $j order: a,b,c with </a></b></c>
 345              $c = count($skipped_tags);
 346              if ($e) {
 347                  for ($j = $c - 1; $j > 0; $j--) {
 348                      // notice we exclude $j == 0, i.e. the current ending tag, from
 349                      // the errors...
 350                      if (!isset($skipped_tags[$j]->armor['MakeWellFormed_TagClosedError'])) {
 351                          $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$j]);
 352                      }
 353                  }
 354              }
 355  
 356              // insert tags, in FORWARD $j order: c,b,a with </a></b></c>
 357              $replace = array($token);
 358              for ($j = 1; $j < $c; $j++) {
 359                  // ...as well as from the insertions
 360                  $new_token = new HTMLPurifier_Token_End($skipped_tags[$j]->name);
 361                  $new_token->start = $skipped_tags[$j];
 362                  array_unshift($replace, $new_token);
 363                  if (isset($definition->info[$new_token->name]) && $definition->info[$new_token->name]->formatting) {
 364                      $element = clone $skipped_tags[$j];
 365                      $element->carryover = true;
 366                      $element->armor['MakeWellFormed_TagClosedError'] = true;
 367                      $replace[] = $element;
 368                  }
 369              }
 370              $this->processToken($replace);
 371              $reprocess = true;
 372              continue;
 373          }
 374  
 375          $context->destroy('CurrentNesting');
 376          $context->destroy('InputTokens');
 377          $context->destroy('InputIndex');
 378          $context->destroy('CurrentToken');
 379  
 380          unset($this->injectors, $this->stack, $this->tokens, $this->t);
 381          return $tokens;
 382      }
 383  
 384      /**
 385       * Processes arbitrary token values for complicated substitution patterns.
 386       * In general:
 387       *
 388       * If $token is an array, it is a list of tokens to substitute for the
 389       * current token. These tokens then get individually processed. If there
 390       * is a leading integer in the list, that integer determines how many
 391       * tokens from the stream should be removed.
 392       *
 393       * If $token is a regular token, it is swapped with the current token.
 394       *
 395       * If $token is false, the current token is deleted.
 396       *
 397       * If $token is an integer, that number of tokens (with the first token
 398       * being the current one) will be deleted.
 399       *
 400       * @param $token Token substitution value
 401       * @param $injector Injector that performed the substitution; default is if
 402       *        this is not an injector related operation.
 403       */
 404      protected function processToken($token, $injector = -1) {
 405  
 406          // normalize forms of token
 407          if (is_object($token)) $token = array(1, $token);
 408          if (is_int($token))    $token = array($token);
 409          if ($token === false)  $token = array(1);
 410          if (!is_array($token)) throw new HTMLPurifier_Exception('Invalid token type from injector');
 411          if (!is_int($token[0])) array_unshift($token, 1);
 412          if ($token[0] === 0) throw new HTMLPurifier_Exception('Deleting zero tokens is not valid');
 413  
 414          // $token is now an array with the following form:
 415          // array(number nodes to delete, new node 1, new node 2, ...)
 416  
 417          $delete = array_shift($token);
 418          $old = array_splice($this->tokens, $this->t, $delete, $token);
 419  
 420          if ($injector > -1) {
 421              // determine appropriate skips
 422              $oldskip = isset($old[0]) ? $old[0]->skip : array();
 423              foreach ($token as $object) {
 424                  $object->skip = $oldskip;
 425                  $object->skip[$injector] = true;
 426              }
 427          }
 428  
 429      }
 430  
 431      /**
 432       * Inserts a token before the current token. Cursor now points to this token
 433       */
 434      private function insertBefore($token) {
 435          array_splice($this->tokens, $this->t, 0, array($token));
 436      }
 437  
 438      /**
 439       * Removes current token. Cursor now points to new token occupying previously
 440       * occupied space.
 441       */
 442      private function remove() {
 443          array_splice($this->tokens, $this->t, 1);
 444      }
 445  
 446      /**
 447       * Swap current token with new token. Cursor points to new token (no change).
 448       */
 449      private function swap($token) {
 450          $this->tokens[$this->t] = $token;
 451      }
 452  
 453  }
 454  
 455  // vim: et sw=4 sts=4


Generated: Fri Nov 28 20:08:37 2014 Cross-referenced by PHPXref 0.7.1