PHPXRef 0.7.1 : vtigercrm-6.1.0 : /libraries/htmlpurifier/library/HTMLPurifier/Injector/AutoParagraph.php source

[Summary view] [Print] [Text view]
   1  <?php
   2  
   3  /**
   4   * Injector that auto paragraphs text in the root node based on
   5   * double-spacing.
   6   * @todo Ensure all states are unit tested, including variations as well.
   7   * @todo Make a graph of the flow control for this Injector.
   8   */
   9  class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
  10  {
  11  
  12      public $name = 'AutoParagraph';
  13      public $needed = array('p');
  14  
  15      private function _pStart() {
  16          $par = new HTMLPurifier_Token_Start('p');
  17          $par->armor['MakeWellFormed_TagClosedError'] = true;
  18          return $par;
  19      }
  20  
  21      public function handleText(&$token) {
  22          $text = $token->data;
  23          // Does the current parent allow <p> tags?
  24          if ($this->allowsElement('p')) {
  25              if (empty($this->currentNesting) || strpos($text, "\n\n") !== false) {
  26                  // Note that we have differing behavior when dealing with text
  27                  // in the anonymous root node, or a node inside the document.
  28                  // If the text as a double-newline, the treatment is the same;
  29                  // if it doesn't, see the next if-block if you're in the document.
  30  
  31                  $i = $nesting = null;
  32                  if (!$this->forwardUntilEndToken($i, $current, $nesting) && $token->is_whitespace) {
  33                      // State 1.1: ...    ^ (whitespace, then document end)
  34                      //               ----
  35                      // This is a degenerate case
  36                  } else {
  37                      // State 1.2: PAR1
  38                      //            ----
  39  
  40                      // State 1.3: PAR1\n\nPAR2
  41                      //            ------------
  42  
  43                      // State 1.4: <div>PAR1\n\nPAR2 (see State 2)
  44                      //                 ------------
  45                      $token = array($this->_pStart());
  46                      $this->_splitText($text, $token);
  47                  }
  48              } else {
  49                  // State 2:   <div>PAR1... (similar to 1.4)
  50                  //                 ----
  51  
  52                  // We're in an element that allows paragraph tags, but we're not
  53                  // sure if we're going to need them.
  54                  if ($this->_pLookAhead()) {
  55                      // State 2.1: <div>PAR1<b>PAR1\n\nPAR2
  56                      //                 ----
  57                      // Note: This will always be the first child, since any
  58                      // previous inline element would have triggered this very
  59                      // same routine, and found the double newline. One possible
  60                      // exception would be a comment.
  61                      $token = array($this->_pStart(), $token);
  62                  } else {
  63                      // State 2.2.1: <div>PAR1<div>
  64                      //                   ----
  65  
  66                      // State 2.2.2: <div>PAR1<b>PAR1</b></div>
  67                      //                   ----
  68                  }
  69              }
  70          // Is the current parent a <p> tag?
  71          } elseif (
  72              !empty($this->currentNesting) &&
  73              $this->currentNesting[count($this->currentNesting)-1]->name == 'p'
  74          ) {
  75              // State 3.1: ...<p>PAR1
  76              //                  ----
  77  
  78              // State 3.2: ...<p>PAR1\n\nPAR2
  79              //                  ------------
  80              $token = array();
  81              $this->_splitText($text, $token);
  82          // Abort!
  83          } else {
  84              // State 4.1: ...<b>PAR1
  85              //                  ----
  86  
  87              // State 4.2: ...<b>PAR1\n\nPAR2
  88              //                  ------------
  89          }
  90      }
  91  
  92      public function handleElement(&$token) {
  93          // We don't have to check if we're already in a <p> tag for block
  94          // tokens, because the tag would have been autoclosed by MakeWellFormed.
  95          if ($this->allowsElement('p')) {
  96              if (!empty($this->currentNesting)) {
  97                  if ($this->_isInline($token)) {
  98                      // State 1: <div>...<b>
  99                      //                  ---
 100  
 101                      // Check if this token is adjacent to the parent token
 102                      // (seek backwards until token isn't whitespace)
 103                      $i = null;
 104                      $this->backward($i, $prev);
 105  
 106                      if (!$prev instanceof HTMLPurifier_Token_Start) {
 107                          // Token wasn't adjacent
 108  
 109                          if (
 110                              $prev instanceof HTMLPurifier_Token_Text &&
 111                              substr($prev->data, -2) === "\n\n"
 112                          ) {
 113                              // State 1.1.4: <div><p>PAR1</p>\n\n<b>
 114                              //                                  ---
 115  
 116                              // Quite frankly, this should be handled by splitText
 117                              $token = array($this->_pStart(), $token);
 118                          } else {
 119                              // State 1.1.1: <div><p>PAR1</p><b>
 120                              //                              ---
 121  
 122                              // State 1.1.2: <div><br /><b>
 123                              //                         ---
 124  
 125                              // State 1.1.3: <div>PAR<b>
 126                              //                      ---
 127                          }
 128  
 129                      } else {
 130                          // State 1.2.1: <div><b>
 131                          //                   ---
 132  
 133                          // Lookahead to see if <p> is needed.
 134                          if ($this->_pLookAhead()) {
 135                              // State 1.3.1: <div><b>PAR1\n\nPAR2
 136                              //                   ---
 137                              $token = array($this->_pStart(), $token);
 138                          } else {
 139                              // State 1.3.2: <div><b>PAR1</b></div>
 140                              //                   ---
 141  
 142                              // State 1.3.3: <div><b>PAR1</b><div></div>\n\n</div>
 143                              //                   ---
 144                          }
 145                      }
 146                  } else {
 147                      // State 2.3: ...<div>
 148                      //               -----
 149                  }
 150              } else {
 151                  if ($this->_isInline($token)) {
 152                      // State 3.1: <b>
 153                      //            ---
 154                      // This is where the {p} tag is inserted, not reflected in
 155                      // inputTokens yet, however.
 156                      $token = array($this->_pStart(), $token);
 157                  } else {
 158                      // State 3.2: <div>
 159                      //            -----
 160                  }
 161  
 162                  $i = null;
 163                  if ($this->backward($i, $prev)) {
 164                      if (
 165                          !$prev instanceof HTMLPurifier_Token_Text
 166                      ) {
 167                          // State 3.1.1: ...</p>{p}<b>
 168                          //                        ---
 169  
 170                          // State 3.2.1: ...</p><div>
 171                          //                     -----
 172  
 173                          if (!is_array($token)) $token = array($token);
 174                          array_unshift($token, new HTMLPurifier_Token_Text("\n\n"));
 175                      } else {
 176                          // State 3.1.2: ...</p>\n\n{p}<b>
 177                          //                            ---
 178  
 179                          // State 3.2.2: ...</p>\n\n<div>
 180                          //                         -----
 181  
 182                          // Note: PAR<ELEM> cannot occur because PAR would have been
 183                          // wrapped in <p> tags.
 184                      }
 185                  }
 186              }
 187          } else {
 188              // State 2.2: <ul><li>
 189              //                ----
 190  
 191              // State 2.4: <p><b>
 192              //               ---
 193          }
 194      }
 195  
 196      /**
 197       * Splits up a text in paragraph tokens and appends them
 198       * to the result stream that will replace the original
 199       * @param $data String text data that will be processed
 200       *    into paragraphs
 201       * @param $result Reference to array of tokens that the
 202       *    tags will be appended onto
 203       * @param $config Instance of HTMLPurifier_Config
 204       * @param $context Instance of HTMLPurifier_Context
 205       */
 206      private function _splitText($data, &$result) {
 207          $raw_paragraphs = explode("\n\n", $data);
 208          $paragraphs  = array(); // without empty paragraphs
 209          $needs_start = false;
 210          $needs_end   = false;
 211  
 212          $c = count($raw_paragraphs);
 213          if ($c == 1) {
 214              // There were no double-newlines, abort quickly. In theory this
 215              // should never happen.
 216              $result[] = new HTMLPurifier_Token_Text($data);
 217              return;
 218          }
 219          for ($i = 0; $i < $c; $i++) {
 220              $par = $raw_paragraphs[$i];
 221              if (trim($par) !== '') {
 222                  $paragraphs[] = $par;
 223              } else {
 224                  if ($i == 0) {
 225                      // Double newline at the front
 226                      if (empty($result)) {
 227                          // The empty result indicates that the AutoParagraph
 228                          // injector did not add any start paragraph tokens.
 229                          // This means that we have been in a paragraph for
 230                          // a while, and the newline means we should start a new one.
 231                          $result[] = new HTMLPurifier_Token_End('p');
 232                          $result[] = new HTMLPurifier_Token_Text("\n\n");
 233                          // However, the start token should only be added if
 234                          // there is more processing to be done (i.e. there are
 235                          // real paragraphs in here). If there are none, the
 236                          // next start paragraph tag will be handled by the
 237                          // next call to the injector
 238                          $needs_start = true;
 239                      } else {
 240                          // We just started a new paragraph!
 241                          // Reinstate a double-newline for presentation's sake, since
 242                          // it was in the source code.
 243                          array_unshift($result, new HTMLPurifier_Token_Text("\n\n"));
 244                      }
 245                  } elseif ($i + 1 == $c) {
 246                      // Double newline at the end
 247                      // There should be a trailing </p> when we're finally done.
 248                      $needs_end = true;
 249                  }
 250              }
 251          }
 252  
 253          // Check if this was just a giant blob of whitespace. Move this earlier,
 254          // perhaps?
 255          if (empty($paragraphs)) {
 256              return;
 257          }
 258  
 259          // Add the start tag indicated by \n\n at the beginning of $data
 260          if ($needs_start) {
 261              $result[] = $this->_pStart();
 262          }
 263  
 264          // Append the paragraphs onto the result
 265          foreach ($paragraphs as $par) {
 266              $result[] = new HTMLPurifier_Token_Text($par);
 267              $result[] = new HTMLPurifier_Token_End('p');
 268              $result[] = new HTMLPurifier_Token_Text("\n\n");
 269              $result[] = $this->_pStart();
 270          }
 271  
 272          // Remove trailing start token; Injector will handle this later if
 273          // it was indeed needed. This prevents from needing to do a lookahead,
 274          // at the cost of a lookbehind later.
 275          array_pop($result);
 276  
 277          // If there is no need for an end tag, remove all of it and let
 278          // MakeWellFormed close it later.
 279          if (!$needs_end) {
 280              array_pop($result); // removes \n\n
 281              array_pop($result); // removes </p>
 282          }
 283  
 284      }
 285  
 286      /**
 287       * Returns true if passed token is inline (and, ergo, allowed in
 288       * paragraph tags)
 289       */
 290      private function _isInline($token) {
 291          return isset($this->htmlDefinition->info['p']->child->elements[$token->name]);
 292      }
 293  
 294      /**
 295       * Looks ahead in the token list and determines whether or not we need
 296       * to insert a <p> tag.
 297       */
 298      private function _pLookAhead() {
 299          $this->current($i, $current);
 300          if ($current instanceof HTMLPurifier_Token_Start) $nesting = 1;
 301          else $nesting = 0;
 302          $ok = false;
 303          while ($this->forwardUntilEndToken($i, $current, $nesting)) {
 304              $result = $this->_checkNeedsP($current);
 305              if ($result !== null) {
 306                  $ok = $result;
 307                  break;
 308              }
 309          }
 310          return $ok;
 311      }
 312  
 313      /**
 314       * Determines if a particular token requires an earlier inline token
 315       * to get a paragraph. This should be used with _forwardUntilEndToken
 316       */
 317      private function _checkNeedsP($current) {
 318          if ($current instanceof HTMLPurifier_Token_Start){
 319              if (!$this->_isInline($current)) {
 320                  // <div>PAR1<div>
 321                  //      ----
 322                  // Terminate early, since we hit a block element
 323                  return false;
 324              }
 325          } elseif ($current instanceof HTMLPurifier_Token_Text) {
 326              if (strpos($current->data, "\n\n") !== false) {
 327                  // <div>PAR1<b>PAR1\n\nPAR2
 328                  //      ----
 329                  return true;
 330              } else {
 331                  // <div>PAR1<b>PAR1...
 332                  //      ----
 333              }
 334          }
 335          return null;
 336      }
 337  
 338  }
 339  
 340  // vim: et sw=4 sts=4
PHP Cross Reference of vtigercrm-6.1.0

/libraries/htmlpurifier/library/HTMLPurifier/Injector/ -> AutoParagraph.php (source)