[ Index ]

PHP Cross Reference of vtigercrm-6.1.0

title

Body

[close]

/libraries/htmlpurifier/library/HTMLPurifier/Strategy/ -> FixNesting.php (source)

   1  <?php
   2  
   3  /**
   4   * Takes a well formed list of tokens and fixes their nesting.
   5   *
   6   * HTML elements dictate which elements are allowed to be their children,
   7   * for example, you can't have a p tag in a span tag.  Other elements have
   8   * much more rigorous definitions: tables, for instance, require a specific
   9   * order for their elements.  There are also constraints not expressible by
  10   * document type definitions, such as the chameleon nature of ins/del
  11   * tags and global child exclusions.
  12   *
  13   * The first major objective of this strategy is to iterate through all the
  14   * nodes (not tokens) of the list of tokens and determine whether or not
  15   * their children conform to the element's definition.  If they do not, the
  16   * child definition may optionally supply an amended list of elements that
  17   * is valid or require that the entire node be deleted (and the previous
  18   * node rescanned).
  19   *
  20   * The second objective is to ensure that explicitly excluded elements of
  21   * an element do not appear in its children.  Code that accomplishes this
  22   * task is pervasive through the strategy, though the two are distinct tasks
  23   * and could, theoretically, be seperated (although it's not recommended).
  24   *
  25   * @note Whether or not unrecognized children are silently dropped or
  26   *       translated into text depends on the child definitions.
  27   *
  28   * @todo Enable nodes to be bubbled out of the structure.
  29   */
  30  
  31  class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
  32  {
  33  
  34      public function execute($tokens, $config, $context) {
  35          //####################################################################//
  36          // Pre-processing
  37  
  38          // get a copy of the HTML definition
  39          $definition = $config->getHTMLDefinition();
  40  
  41          // insert implicit "parent" node, will be removed at end.
  42          // DEFINITION CALL
  43          $parent_name = $definition->info_parent;
  44          array_unshift($tokens, new HTMLPurifier_Token_Start($parent_name));
  45          $tokens[] = new HTMLPurifier_Token_End($parent_name);
  46  
  47          // setup the context variable 'IsInline', for chameleon processing
  48          // is 'false' when we are not inline, 'true' when it must always
  49          // be inline, and an integer when it is inline for a certain
  50          // branch of the document tree
  51          $is_inline = $definition->info_parent_def->descendants_are_inline;
  52          $context->register('IsInline', $is_inline);
  53  
  54          // setup error collector
  55          $e =& $context->get('ErrorCollector', true);
  56  
  57          //####################################################################//
  58          // Loop initialization
  59  
  60          // stack that contains the indexes of all parents,
  61          // $stack[count($stack)-1] being the current parent
  62          $stack = array();
  63  
  64          // stack that contains all elements that are excluded
  65          // it is organized by parent elements, similar to $stack,
  66          // but it is only populated when an element with exclusions is
  67          // processed, i.e. there won't be empty exclusions.
  68          $exclude_stack = array();
  69  
  70          // variable that contains the start token while we are processing
  71          // nodes. This enables error reporting to do its job
  72          $start_token = false;
  73          $context->register('CurrentToken', $start_token);
  74  
  75          //####################################################################//
  76          // Loop
  77  
  78          // iterate through all start nodes. Determining the start node
  79          // is complicated so it has been omitted from the loop construct
  80          for ($i = 0, $size = count($tokens) ; $i < $size; ) {
  81  
  82              //################################################################//
  83              // Gather information on children
  84  
  85              // child token accumulator
  86              $child_tokens = array();
  87  
  88              // scroll to the end of this node, report number, and collect
  89              // all children
  90              for ($j = $i, $depth = 0; ; $j++) {
  91                  if ($tokens[$j] instanceof HTMLPurifier_Token_Start) {
  92                      $depth++;
  93                      // skip token assignment on first iteration, this is the
  94                      // token we currently are on
  95                      if ($depth == 1) continue;
  96                  } elseif ($tokens[$j] instanceof HTMLPurifier_Token_End) {
  97                      $depth--;
  98                      // skip token assignment on last iteration, this is the
  99                      // end token of the token we're currently on
 100                      if ($depth == 0) break;
 101                  }
 102                  $child_tokens[] = $tokens[$j];
 103              }
 104  
 105              // $i is index of start token
 106              // $j is index of end token
 107  
 108              $start_token = $tokens[$i]; // to make token available via CurrentToken
 109  
 110              //################################################################//
 111              // Gather information on parent
 112  
 113              // calculate parent information
 114              if ($count = count($stack)) {
 115                  $parent_index = $stack[$count-1];
 116                  $parent_name  = $tokens[$parent_index]->name;
 117                  if ($parent_index == 0) {
 118                      $parent_def   = $definition->info_parent_def;
 119                  } else {
 120                      $parent_def   = $definition->info[$parent_name];
 121                  }
 122              } else {
 123                  // processing as if the parent were the "root" node
 124                  // unknown info, it won't be used anyway, in the future,
 125                  // we may want to enforce one element only (this is
 126                  // necessary for HTML Purifier to clean entire documents
 127                  $parent_index = $parent_name = $parent_def = null;
 128              }
 129  
 130              // calculate context
 131              if ($is_inline === false) {
 132                  // check if conditions make it inline
 133                  if (!empty($parent_def) && $parent_def->descendants_are_inline) {
 134                      $is_inline = $count - 1;
 135                  }
 136              } else {
 137                  // check if we're out of inline
 138                  if ($count === $is_inline) {
 139                      $is_inline = false;
 140                  }
 141              }
 142  
 143              //################################################################//
 144              // Determine whether element is explicitly excluded SGML-style
 145  
 146              // determine whether or not element is excluded by checking all
 147              // parent exclusions. The array should not be very large, two
 148              // elements at most.
 149              $excluded = false;
 150              if (!empty($exclude_stack)) {
 151                  foreach ($exclude_stack as $lookup) {
 152                      if (isset($lookup[$tokens[$i]->name])) {
 153                          $excluded = true;
 154                          // no need to continue processing
 155                          break;
 156                      }
 157                  }
 158              }
 159  
 160              //################################################################//
 161              // Perform child validation
 162  
 163              if ($excluded) {
 164                  // there is an exclusion, remove the entire node
 165                  $result = false;
 166                  $excludes = array(); // not used, but good to initialize anyway
 167              } else {
 168                  // DEFINITION CALL
 169                  if ($i === 0) {
 170                      // special processing for the first node
 171                      $def = $definition->info_parent_def;
 172                  } else {
 173                      $def = $definition->info[$tokens[$i]->name];
 174  
 175                  }
 176  
 177                  if (!empty($def->child)) {
 178                      // have DTD child def validate children
 179                      $result = $def->child->validateChildren(
 180                          $child_tokens, $config, $context);
 181                  } else {
 182                      // weird, no child definition, get rid of everything
 183                      $result = false;
 184                  }
 185  
 186                  // determine whether or not this element has any exclusions
 187                  $excludes = $def->excludes;
 188              }
 189  
 190              // $result is now a bool or array
 191  
 192              //################################################################//
 193              // Process result by interpreting $result
 194  
 195              if ($result === true || $child_tokens === $result) {
 196                  // leave the node as is
 197  
 198                  // register start token as a parental node start
 199                  $stack[] = $i;
 200  
 201                  // register exclusions if there are any
 202                  if (!empty($excludes)) $exclude_stack[] = $excludes;
 203  
 204                  // move cursor to next possible start node
 205                  $i++;
 206  
 207              } elseif($result === false) {
 208                  // remove entire node
 209  
 210                  if ($e) {
 211                      if ($excluded) {
 212                          $e->send(E_ERROR, 'Strategy_FixNesting: Node excluded');
 213                      } else {
 214                          $e->send(E_ERROR, 'Strategy_FixNesting: Node removed');
 215                      }
 216                  }
 217  
 218                  // calculate length of inner tokens and current tokens
 219                  $length = $j - $i + 1;
 220  
 221                  // perform removal
 222                  array_splice($tokens, $i, $length);
 223  
 224                  // update size
 225                  $size -= $length;
 226  
 227                  // there is no start token to register,
 228                  // current node is now the next possible start node
 229                  // unless it turns out that we need to do a double-check
 230  
 231                  // this is a rought heuristic that covers 100% of HTML's
 232                  // cases and 99% of all other cases. A child definition
 233                  // that would be tricked by this would be something like:
 234                  // ( | a b c) where it's all or nothing. Fortunately,
 235                  // our current implementation claims that that case would
 236                  // not allow empty, even if it did
 237                  if (!$parent_def->child->allow_empty) {
 238                      // we need to do a double-check
 239                      $i = $parent_index;
 240                      array_pop($stack);
 241                  }
 242  
 243                  // PROJECTED OPTIMIZATION: Process all children elements before
 244                  // reprocessing parent node.
 245  
 246              } else {
 247                  // replace node with $result
 248  
 249                  // calculate length of inner tokens
 250                  $length = $j - $i - 1;
 251  
 252                  if ($e) {
 253                      if (empty($result) && $length) {
 254                          $e->send(E_ERROR, 'Strategy_FixNesting: Node contents removed');
 255                      } else {
 256                          $e->send(E_WARNING, 'Strategy_FixNesting: Node reorganized');
 257                      }
 258                  }
 259  
 260                  // perform replacement
 261                  array_splice($tokens, $i + 1, $length, $result);
 262  
 263                  // update size
 264                  $size -= $length;
 265                  $size += count($result);
 266  
 267                  // register start token as a parental node start
 268                  $stack[] = $i;
 269  
 270                  // register exclusions if there are any
 271                  if (!empty($excludes)) $exclude_stack[] = $excludes;
 272  
 273                  // move cursor to next possible start node
 274                  $i++;
 275  
 276              }
 277  
 278              //################################################################//
 279              // Scroll to next start node
 280  
 281              // We assume, at this point, that $i is the index of the token
 282              // that is the first possible new start point for a node.
 283  
 284              // Test if the token indeed is a start tag, if not, move forward
 285              // and test again.
 286              $size = count($tokens);
 287              while ($i < $size and !$tokens[$i] instanceof HTMLPurifier_Token_Start) {
 288                  if ($tokens[$i] instanceof HTMLPurifier_Token_End) {
 289                      // pop a token index off the stack if we ended a node
 290                      array_pop($stack);
 291                      // pop an exclusion lookup off exclusion stack if
 292                      // we ended node and that node had exclusions
 293                      if ($i == 0 || $i == $size - 1) {
 294                          // use specialized var if it's the super-parent
 295                          $s_excludes = $definition->info_parent_def->excludes;
 296                      } else {
 297                          $s_excludes = $definition->info[$tokens[$i]->name]->excludes;
 298                      }
 299                      if ($s_excludes) {
 300                          array_pop($exclude_stack);
 301                      }
 302                  }
 303                  $i++;
 304              }
 305  
 306          }
 307  
 308          //####################################################################//
 309          // Post-processing
 310  
 311          // remove implicit parent tokens at the beginning and end
 312          array_shift($tokens);
 313          array_pop($tokens);
 314  
 315          // remove context variables
 316          $context->destroy('IsInline');
 317          $context->destroy('CurrentToken');
 318  
 319          //####################################################################//
 320          // Return
 321  
 322          return $tokens;
 323  
 324      }
 325  
 326  }
 327  
 328  // vim: et sw=4 sts=4


Generated: Fri Nov 28 20:08:37 2014 Cross-referenced by PHPXref 0.7.1