[ Index ] |
PHP Cross Reference of vtigercrm-6.1.0 |
[Summary view] [Print] [Text view]
1 <?php 2 3 /** 4 * Takes a well formed list of tokens and fixes their nesting. 5 * 6 * HTML elements dictate which elements are allowed to be their children, 7 * for example, you can't have a p tag in a span tag. Other elements have 8 * much more rigorous definitions: tables, for instance, require a specific 9 * order for their elements. There are also constraints not expressible by 10 * document type definitions, such as the chameleon nature of ins/del 11 * tags and global child exclusions. 12 * 13 * The first major objective of this strategy is to iterate through all the 14 * nodes (not tokens) of the list of tokens and determine whether or not 15 * their children conform to the element's definition. If they do not, the 16 * child definition may optionally supply an amended list of elements that 17 * is valid or require that the entire node be deleted (and the previous 18 * node rescanned). 19 * 20 * The second objective is to ensure that explicitly excluded elements of 21 * an element do not appear in its children. Code that accomplishes this 22 * task is pervasive through the strategy, though the two are distinct tasks 23 * and could, theoretically, be seperated (although it's not recommended). 24 * 25 * @note Whether or not unrecognized children are silently dropped or 26 * translated into text depends on the child definitions. 27 * 28 * @todo Enable nodes to be bubbled out of the structure. 29 */ 30 31 class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy 32 { 33 34 public function execute($tokens, $config, $context) { 35 //####################################################################// 36 // Pre-processing 37 38 // get a copy of the HTML definition 39 $definition = $config->getHTMLDefinition(); 40 41 // insert implicit "parent" node, will be removed at end. 42 // DEFINITION CALL 43 $parent_name = $definition->info_parent; 44 array_unshift($tokens, new HTMLPurifier_Token_Start($parent_name)); 45 $tokens[] = new HTMLPurifier_Token_End($parent_name); 46 47 // setup the context variable 'IsInline', for chameleon processing 48 // is 'false' when we are not inline, 'true' when it must always 49 // be inline, and an integer when it is inline for a certain 50 // branch of the document tree 51 $is_inline = $definition->info_parent_def->descendants_are_inline; 52 $context->register('IsInline', $is_inline); 53 54 // setup error collector 55 $e =& $context->get('ErrorCollector', true); 56 57 //####################################################################// 58 // Loop initialization 59 60 // stack that contains the indexes of all parents, 61 // $stack[count($stack)-1] being the current parent 62 $stack = array(); 63 64 // stack that contains all elements that are excluded 65 // it is organized by parent elements, similar to $stack, 66 // but it is only populated when an element with exclusions is 67 // processed, i.e. there won't be empty exclusions. 68 $exclude_stack = array(); 69 70 // variable that contains the start token while we are processing 71 // nodes. This enables error reporting to do its job 72 $start_token = false; 73 $context->register('CurrentToken', $start_token); 74 75 //####################################################################// 76 // Loop 77 78 // iterate through all start nodes. Determining the start node 79 // is complicated so it has been omitted from the loop construct 80 for ($i = 0, $size = count($tokens) ; $i < $size; ) { 81 82 //################################################################// 83 // Gather information on children 84 85 // child token accumulator 86 $child_tokens = array(); 87 88 // scroll to the end of this node, report number, and collect 89 // all children 90 for ($j = $i, $depth = 0; ; $j++) { 91 if ($tokens[$j] instanceof HTMLPurifier_Token_Start) { 92 $depth++; 93 // skip token assignment on first iteration, this is the 94 // token we currently are on 95 if ($depth == 1) continue; 96 } elseif ($tokens[$j] instanceof HTMLPurifier_Token_End) { 97 $depth--; 98 // skip token assignment on last iteration, this is the 99 // end token of the token we're currently on 100 if ($depth == 0) break; 101 } 102 $child_tokens[] = $tokens[$j]; 103 } 104 105 // $i is index of start token 106 // $j is index of end token 107 108 $start_token = $tokens[$i]; // to make token available via CurrentToken 109 110 //################################################################// 111 // Gather information on parent 112 113 // calculate parent information 114 if ($count = count($stack)) { 115 $parent_index = $stack[$count-1]; 116 $parent_name = $tokens[$parent_index]->name; 117 if ($parent_index == 0) { 118 $parent_def = $definition->info_parent_def; 119 } else { 120 $parent_def = $definition->info[$parent_name]; 121 } 122 } else { 123 // processing as if the parent were the "root" node 124 // unknown info, it won't be used anyway, in the future, 125 // we may want to enforce one element only (this is 126 // necessary for HTML Purifier to clean entire documents 127 $parent_index = $parent_name = $parent_def = null; 128 } 129 130 // calculate context 131 if ($is_inline === false) { 132 // check if conditions make it inline 133 if (!empty($parent_def) && $parent_def->descendants_are_inline) { 134 $is_inline = $count - 1; 135 } 136 } else { 137 // check if we're out of inline 138 if ($count === $is_inline) { 139 $is_inline = false; 140 } 141 } 142 143 //################################################################// 144 // Determine whether element is explicitly excluded SGML-style 145 146 // determine whether or not element is excluded by checking all 147 // parent exclusions. The array should not be very large, two 148 // elements at most. 149 $excluded = false; 150 if (!empty($exclude_stack)) { 151 foreach ($exclude_stack as $lookup) { 152 if (isset($lookup[$tokens[$i]->name])) { 153 $excluded = true; 154 // no need to continue processing 155 break; 156 } 157 } 158 } 159 160 //################################################################// 161 // Perform child validation 162 163 if ($excluded) { 164 // there is an exclusion, remove the entire node 165 $result = false; 166 $excludes = array(); // not used, but good to initialize anyway 167 } else { 168 // DEFINITION CALL 169 if ($i === 0) { 170 // special processing for the first node 171 $def = $definition->info_parent_def; 172 } else { 173 $def = $definition->info[$tokens[$i]->name]; 174 175 } 176 177 if (!empty($def->child)) { 178 // have DTD child def validate children 179 $result = $def->child->validateChildren( 180 $child_tokens, $config, $context); 181 } else { 182 // weird, no child definition, get rid of everything 183 $result = false; 184 } 185 186 // determine whether or not this element has any exclusions 187 $excludes = $def->excludes; 188 } 189 190 // $result is now a bool or array 191 192 //################################################################// 193 // Process result by interpreting $result 194 195 if ($result === true || $child_tokens === $result) { 196 // leave the node as is 197 198 // register start token as a parental node start 199 $stack[] = $i; 200 201 // register exclusions if there are any 202 if (!empty($excludes)) $exclude_stack[] = $excludes; 203 204 // move cursor to next possible start node 205 $i++; 206 207 } elseif($result === false) { 208 // remove entire node 209 210 if ($e) { 211 if ($excluded) { 212 $e->send(E_ERROR, 'Strategy_FixNesting: Node excluded'); 213 } else { 214 $e->send(E_ERROR, 'Strategy_FixNesting: Node removed'); 215 } 216 } 217 218 // calculate length of inner tokens and current tokens 219 $length = $j - $i + 1; 220 221 // perform removal 222 array_splice($tokens, $i, $length); 223 224 // update size 225 $size -= $length; 226 227 // there is no start token to register, 228 // current node is now the next possible start node 229 // unless it turns out that we need to do a double-check 230 231 // this is a rought heuristic that covers 100% of HTML's 232 // cases and 99% of all other cases. A child definition 233 // that would be tricked by this would be something like: 234 // ( | a b c) where it's all or nothing. Fortunately, 235 // our current implementation claims that that case would 236 // not allow empty, even if it did 237 if (!$parent_def->child->allow_empty) { 238 // we need to do a double-check 239 $i = $parent_index; 240 array_pop($stack); 241 } 242 243 // PROJECTED OPTIMIZATION: Process all children elements before 244 // reprocessing parent node. 245 246 } else { 247 // replace node with $result 248 249 // calculate length of inner tokens 250 $length = $j - $i - 1; 251 252 if ($e) { 253 if (empty($result) && $length) { 254 $e->send(E_ERROR, 'Strategy_FixNesting: Node contents removed'); 255 } else { 256 $e->send(E_WARNING, 'Strategy_FixNesting: Node reorganized'); 257 } 258 } 259 260 // perform replacement 261 array_splice($tokens, $i + 1, $length, $result); 262 263 // update size 264 $size -= $length; 265 $size += count($result); 266 267 // register start token as a parental node start 268 $stack[] = $i; 269 270 // register exclusions if there are any 271 if (!empty($excludes)) $exclude_stack[] = $excludes; 272 273 // move cursor to next possible start node 274 $i++; 275 276 } 277 278 //################################################################// 279 // Scroll to next start node 280 281 // We assume, at this point, that $i is the index of the token 282 // that is the first possible new start point for a node. 283 284 // Test if the token indeed is a start tag, if not, move forward 285 // and test again. 286 $size = count($tokens); 287 while ($i < $size and !$tokens[$i] instanceof HTMLPurifier_Token_Start) { 288 if ($tokens[$i] instanceof HTMLPurifier_Token_End) { 289 // pop a token index off the stack if we ended a node 290 array_pop($stack); 291 // pop an exclusion lookup off exclusion stack if 292 // we ended node and that node had exclusions 293 if ($i == 0 || $i == $size - 1) { 294 // use specialized var if it's the super-parent 295 $s_excludes = $definition->info_parent_def->excludes; 296 } else { 297 $s_excludes = $definition->info[$tokens[$i]->name]->excludes; 298 } 299 if ($s_excludes) { 300 array_pop($exclude_stack); 301 } 302 } 303 $i++; 304 } 305 306 } 307 308 //####################################################################// 309 // Post-processing 310 311 // remove implicit parent tokens at the beginning and end 312 array_shift($tokens); 313 array_pop($tokens); 314 315 // remove context variables 316 $context->destroy('IsInline'); 317 $context->destroy('CurrentToken'); 318 319 //####################################################################// 320 // Return 321 322 return $tokens; 323 324 } 325 326 } 327 328 // vim: et sw=4 sts=4
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Fri Nov 28 20:08:37 2014 | Cross-referenced by PHPXref 0.7.1 |