[ Index ]

PHP Cross Reference of vtigercrm-6.1.0

title

Body

[close]

/libraries/tcpdf/ -> tcpdf_parser.php (source)

   1  <?php
   2  //============================================================+
   3  // File name   : tcpdf_parser.php
   4  // Version     : 1.0.000
   5  // Begin       : 2011-05-23
   6  // Last Update : 2012-01-28
   7  // Author      : Nicola Asuni - Tecnick.com LTD - Manor Coach House, Church Hill, Aldershot, Hants, GU12 4RQ, UK - www.tecnick.com - [email protected]
   8  // License     : http://www.tecnick.com/pagefiles/tcpdf/LICENSE.TXT GNU-LGPLv3
   9  // -------------------------------------------------------------------
  10  // Copyright (C) 2011-2012  Nicola Asuni - Tecnick.com LTD
  11  //
  12  // This file is part of TCPDF software library.
  13  //
  14  // TCPDF is free software: you can redistribute it and/or modify it
  15  // under the terms of the GNU Lesser General Public License as
  16  // published by the Free Software Foundation, either version 3 of the
  17  // License, or (at your option) any later version.
  18  //
  19  // TCPDF is distributed in the hope that it will be useful, but
  20  // WITHOUT ANY WARRANTY; without even the implied warranty of
  21  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  22  // See the GNU Lesser General Public License for more details.
  23  //
  24  // You should have received a copy of the License
  25  // along with TCPDF. If not, see
  26  // <http://www.tecnick.com/pagefiles/tcpdf/LICENSE.TXT>.
  27  //
  28  // See LICENSE.TXT file for more information.
  29  // -------------------------------------------------------------------
  30  //
  31  // Description : This is a PHP class for parsing PDF documents.
  32  //
  33  //============================================================+
  34  
  35  /**
  36   * @file
  37   * This is a PHP class for parsing PDF documents.<br>
  38   * @package com.tecnick.tcpdf
  39   * @author Nicola Asuni
  40   * @version 1.0.000
  41   */
  42  
  43  // include class for decoding filters
  44  require_once(dirname(__FILE__).'/tcpdf_filters.php');
  45  
  46  /**
  47   * @class TCPDF_PARSER
  48   * This is a PHP class for parsing PDF documents.<br>
  49   * @package com.tecnick.tcpdf
  50   * @brief This is a PHP class for parsing PDF documents..
  51   * @version 1.0.000
  52   * @author Nicola Asuni - [email protected]
  53   */
  54  class TCPDF_PARSER {
  55  
  56      /**
  57       * Raw content of the PDF document.
  58       * @private
  59       */
  60      private $pdfdata = '';
  61  
  62      /**
  63       * XREF data.
  64       * @protected
  65       */
  66      protected $xref = array();
  67  
  68      /**
  69       * Array of PDF objects.
  70       * @protected
  71       */
  72      protected $objects = array();
  73  
  74      /**
  75       * Class object for decoding filters.
  76       * @private
  77       */
  78      private $FilterDecoders;
  79  
  80  // -----------------------------------------------------------------------------
  81  
  82      /**
  83       * Parse a PDF document an return an array of objects.
  84       * @param $data (string) PDF data to parse.
  85       * @public
  86       * @since 1.0.000 (2011-05-24)
  87       */
  88  	public function __construct($data) {
  89          if (empty($data)) {
  90              $this->Error('Empty PDF data.');
  91          }
  92          $this->pdfdata = $data;
  93          // get length
  94          $pdflen = strlen($this->pdfdata);
  95          // initialize class for decoding filters
  96          $this->FilterDecoders = new TCPDF_FILTERS();
  97          // get xref and trailer data
  98          $this->xref = $this->getXrefData();
  99          // parse all document objects
 100          $this->objects = array();
 101          foreach ($this->xref['xref'] as $obj => $offset) {
 102              if (!isset($this->objects[$obj])) {
 103                  $this->objects[$obj] = $this->getIndirectObject($obj, $offset, true);
 104              }
 105          }
 106          // release some memory
 107          unset($this->pdfdata);
 108          $this->pdfdata = '';
 109      }
 110  
 111      /**
 112       * Return an array of parsed PDF document objects.
 113       * @return (array) Array of parsed PDF document objects.
 114       * @public
 115       * @since 1.0.000 (2011-06-26)
 116       */
 117  	public function getParsedData() {
 118          return array($this->xref, $this->objects);
 119      }
 120  
 121      /**
 122       * Get xref (cross-reference table) and trailer data from PDF document data.
 123       * @param $offset (int) xref offset (if know).
 124       * @param $xref (array) previous xref array (if any).
 125       * @return Array containing xref and trailer data.
 126       * @protected
 127       * @since 1.0.000 (2011-05-24)
 128       */
 129  	protected function getXrefData($offset=0, $xref=array()) {
 130          // find last startxref
 131          if (preg_match_all('/[\r\n]startxref[\s]*[\r\n]+([0-9]+)[\s]*[\r\n]+%%EOF/i', $this->pdfdata, $matches, PREG_SET_ORDER, $offset) == 0) {
 132              $this->Error('Unable to find startxref');
 133          }
 134          $matches = array_pop($matches);
 135          $startxref = $matches[1];
 136          // check xref position
 137          if (strpos($this->pdfdata, 'xref', $startxref) != $startxref) {
 138              $this->Error('Unable to find xref');
 139          }
 140          // extract xref data (object indexes and offsets)
 141          $offset = $startxref + 5;
 142          // initialize object number
 143          $obj_num = 0;
 144          while (preg_match('/^([0-9]+)[\s]([0-9]+)[\s]?([nf]?)/im', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset) > 0) {
 145              $offset = (strlen($matches[0][0]) + $matches[0][1]);
 146              if ($matches[3][0] == 'n') {
 147                  // create unique object index: [object number]_[generation number]
 148                  $index = $obj_num.'_'.intval($matches[2][0]);
 149                  // check if object already exist
 150                  if (!isset($xref['xref'][$index])) {
 151                      // store object offset position
 152                      $xref['xref'][$index] = intval($matches[1][0]);
 153                  }
 154                  ++$obj_num;
 155                  $offset += 2;
 156              } elseif ($matches[3][0] == 'f') {
 157                  ++$obj_num;
 158                  $offset += 2;
 159              } else {
 160                  // object number (index)
 161                  $obj_num = intval($matches[1][0]);
 162              }
 163          }
 164          // get trailer data
 165          if (preg_match('/trailer[\s]*<<(.*)>>[\s]*[\r\n]+startxref[\s]*[\r\n]+/isU', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset) > 0) {
 166              $trailer_data = $matches[1][0];
 167              if (!isset($xref['trailer'])) {
 168                  // get only the last updated version
 169                  $xref['trailer'] = array();
 170                  // parse trailer_data
 171                  if (preg_match('/Size[\s]+([0-9]+)/i', $trailer_data, $matches) > 0) {
 172                      $xref['trailer']['size'] = intval($matches[1]);
 173                  }
 174                  if (preg_match('/Root[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
 175                      $xref['trailer']['root'] = intval($matches[1]).'_'.intval($matches[2]);
 176                  }
 177                  if (preg_match('/Encrypt[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
 178                      $xref['trailer']['encrypt'] = intval($matches[1]).'_'.intval($matches[2]);
 179                  }
 180                  if (preg_match('/Info[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
 181                      $xref['trailer']['info'] = intval($matches[1]).'_'.intval($matches[2]);
 182                  }
 183                  if (preg_match('/ID[\s]*[\[][\s]*[<]([^>]*)[>][\s]*[<]([^>]*)[>]/i', $trailer_data, $matches) > 0) {
 184                      $xref['trailer']['id'] = array();
 185                      $xref['trailer']['id'][0] = $matches[1];
 186                      $xref['trailer']['id'][1] = $matches[2];
 187                  }
 188              }
 189              if (preg_match('/Prev[\s]+([0-9]+)/i', $trailer_data, $matches) > 0) {
 190                  // get previous xref
 191                  $xref = getXrefData(substr($this->pdfdata, 0, $startxref), intval($matches[1]), $xref);
 192              }
 193          } else {
 194              $this->Error('Unable to find trailer');
 195          }
 196          return $xref;
 197      }
 198  
 199      /**
 200       * Get object type, raw value and offset to next object
 201       * @param $offset (int) Object offset.
 202       * @return array containing object type, raw value and offset to next object
 203       * @protected
 204       * @since 1.0.000 (2011-06-20)
 205       */
 206  	protected function getRawObject($offset=0) {
 207          $objtype = ''; // object type to be returned
 208          $objval = ''; // object value to be returned
 209          // skip initial white space chars: \x00 null (NUL), \x09 horizontal tab (HT), \x0A line feed (LF), \x0C form feed (FF), \x0D carriage return (CR), \x20 space (SP)
 210          $offset += strspn($this->pdfdata, "\x00\x09\x0a\x0c\x0d\x20", $offset);
 211          // get first char
 212          $char = $this->pdfdata{$offset};
 213          // get object type
 214          switch ($char) {
 215              case '%': { // \x25 PERCENT SIGN
 216                  // skip comment and search for next token
 217                  $next = strcspn($this->pdfdata, "\r\n", $offset);
 218                  if ($next > 0) {
 219                      $offset += $next;
 220                      return $this->getRawObject($this->pdfdata, $offset);
 221                  }
 222                  break;
 223              }
 224              case '/': { // \x2F SOLIDUS
 225                  // name object
 226                  $objtype = $char;
 227                  ++$offset;
 228                  if (preg_match('/^([^\x00\x09\x0a\x0c\x0d\x20\s\x28\x29\x3c\x3e\x5b\x5d\x7b\x7d\x2f\x25]+)/', substr($this->pdfdata, $offset, 256), $matches) == 1) {
 229                      $objval = $matches[1]; // unescaped value
 230                      $offset += strlen($objval);
 231                  }
 232                  break;
 233              }
 234              case '(':   // \x28 LEFT PARENTHESIS
 235              case ')': { // \x29 RIGHT PARENTHESIS
 236                  // literal string object
 237                  $objtype = $char;
 238                  ++$offset;
 239                  $strpos = $offset;
 240                  if ($char == '(') {
 241                      $open_bracket = 1;
 242                      while ($open_bracket > 0) {
 243                          if (!isset($this->pdfdata{$strpos})) {
 244                              break;
 245                          }
 246                          $ch = $this->pdfdata{$strpos};
 247                          switch ($ch) {
 248                              case '\\': { // REVERSE SOLIDUS (5Ch) (Backslash)
 249                                  // skip next character
 250                                  ++$strpos;
 251                                  break;
 252                              }
 253                              case '(': { // LEFT PARENHESIS (28h)
 254                                  ++$open_bracket;
 255                                  break;
 256                              }
 257                              case ')': { // RIGHT PARENTHESIS (29h)
 258                                  --$open_bracket;
 259                                  break;
 260                              }
 261                          }
 262                          ++$strpos;
 263                      }
 264                      $objval = substr($this->pdfdata, $offset, ($strpos - $offset - 1));
 265                      $offset = $strpos;
 266                  }
 267                  break;
 268              }
 269              case '[':   // \x5B LEFT SQUARE BRACKET
 270              case ']': { // \x5D RIGHT SQUARE BRACKET
 271                  // array object
 272                  $objtype = $char;
 273                  ++$offset;
 274                  if ($char == '[') {
 275                      // get array content
 276                      $objval = array();
 277                      do {
 278                          // get element
 279                          $element = $this->getRawObject($offset);
 280                          $offset = $element[2];
 281                          $objval[] = $element;
 282                      } while ($element[0] != ']');
 283                      // remove closing delimiter
 284                      array_pop($objval);
 285                  }
 286                  break;
 287              }
 288              case '<':   // \x3C LESS-THAN SIGN
 289              case '>': { // \x3E GREATER-THAN SIGN
 290                  if (isset($this->pdfdata{($offset + 1)}) AND ($this->pdfdata{($offset + 1)} == $char)) {
 291                      // dictionary object
 292                      $objtype = $char.$char;
 293                      $offset += 2;
 294                      if ($char == '<') {
 295                          // get array content
 296                          $objval = array();
 297                          do {
 298                              // get element
 299                              $element = $this->getRawObject($offset);
 300                              $offset = $element[2];
 301                              $objval[] = $element;
 302                          } while ($element[0] != '>>');
 303                          // remove closing delimiter
 304                          array_pop($objval);
 305                      }
 306                  } else {
 307                      // hexadecimal string object
 308                      $objtype = $char;
 309                      ++$offset;
 310                      if (($char == '<') AND (preg_match('/^([0-9A-Fa-f]+)[>]/iU', substr($this->pdfdata, $offset), $matches) == 1)) {
 311                          $objval = $matches[1];
 312                          $offset += strlen($matches[0]);
 313                      }
 314                  }
 315                  break;
 316              }
 317              default: {
 318                  if (substr($this->pdfdata, $offset, 6) == 'endobj') {
 319                      // indirect object
 320                      $objtype = 'endobj';
 321                      $offset += 6;
 322                  } elseif (substr($this->pdfdata, $offset, 4) == 'null') {
 323                      // null object
 324                      $objtype = 'null';
 325                      $offset += 4;
 326                      $objval = 'null';
 327                  } elseif (substr($this->pdfdata, $offset, 4) == 'true') {
 328                      // boolean true object
 329                      $objtype = 'boolean';
 330                      $offset += 4;
 331                      $objval = 'true';
 332                  } elseif (substr($this->pdfdata, $offset, 5) == 'false') {
 333                      // boolean false object
 334                      $objtype = 'boolean';
 335                      $offset += 5;
 336                      $objval = 'false';
 337                  } elseif (substr($this->pdfdata, $offset, 6) == 'stream') {
 338                      // start stream object
 339                      $objtype = 'stream';
 340                      $offset += 6;
 341                      if (preg_match('/^[\r\n]+(.*)[\r\n]*endstream/isU', substr($this->pdfdata, $offset), $matches) == 1) {
 342                          $objval = $matches[1];
 343                          $offset += strlen($matches[0]);
 344                      }
 345                  } elseif (substr($this->pdfdata, $offset, 9) == 'endstream') {
 346                      // end stream object
 347                      $objtype = 'endstream';
 348                      $offset += 9;
 349                  } elseif (preg_match('/^([0-9]+)[\s]+([0-9]+)[\s]+R/iU', substr($this->pdfdata, $offset, 33), $matches) == 1) {
 350                      // indirect object reference
 351                      $objtype = 'ojbref';
 352                      $offset += strlen($matches[0]);
 353                      $objval = intval($matches[1]).'_'.intval($matches[2]);
 354                  } elseif (preg_match('/^([0-9]+)[\s]+([0-9]+)[\s]+obj/iU', substr($this->pdfdata, $offset, 33), $matches) == 1) {
 355                      // object start
 356                      $objtype = 'ojb';
 357                      $objval = intval($matches[1]).'_'.intval($matches[2]);
 358                      $offset += strlen ($matches[0]);
 359                  } elseif (($numlen = strspn($this->pdfdata, '+-.0123456789', $offset)) > 0) {
 360                      // numeric object
 361                      $objtype = 'numeric';
 362                      $objval = substr($this->pdfdata, $offset, $numlen);
 363                      $offset += $numlen;
 364                  }
 365                  break;
 366              }
 367          }
 368          return array($objtype, $objval, $offset);
 369      }
 370  
 371      /**
 372       * Get content of indirect object.
 373       * @param $obj_ref (string) Object number and generation number separated by underscore character.
 374       * @param $offset (int) Object offset.
 375       * @param $decoding (boolean) If true decode streams.
 376       * @return array containing object data.
 377       * @protected
 378       * @since 1.0.000 (2011-05-24)
 379       */
 380  	protected function getIndirectObject($obj_ref, $offset=0, $decoding=true) {
 381          $obj = explode('_', $obj_ref);
 382          if (($obj === false) OR (count($obj) != 2)) {
 383              $this->Error('Invalid object reference: '.$obj);
 384              return;
 385          }
 386          $objref = $obj[0].' '.$obj[1].' obj';
 387          if (strpos($this->pdfdata, $objref, $offset) != $offset) {
 388              // an indirect reference to an undefined object shall be considered a reference to the null object
 389              return array('null', 'null', $offset);
 390          }
 391          // starting position of object content
 392          $offset += strlen($objref);
 393          // get array of object content
 394          $objdata = array();
 395          $i = 0; // object main index
 396          do {
 397              // get element
 398              $element = $this->getRawObject($offset);
 399              $offset = $element[2];
 400              // decode stream using stream's dictionary information
 401              if ($decoding AND ($element[0] == 'stream') AND (isset($objdata[($i - 1)][0])) AND ($objdata[($i - 1)][0] == '<<')) {
 402                  $element[3] = $this->decodeStream($objdata[($i - 1)][1], $element[1]);
 403              }
 404              $objdata[$i] = $element;
 405              ++$i;
 406          } while ($element[0] != 'endobj');
 407          // remove closing delimiter
 408          array_pop($objdata);
 409          // return raw object content
 410          return $objdata;
 411      }
 412  
 413      /**
 414       * Get the content of object, resolving indect object reference if necessary.
 415       * @param $obj (string) Object value.
 416       * @return array containing object data.
 417       * @protected
 418       * @since 1.0.000 (2011-06-26)
 419       */
 420  	protected function getObjectVal($obj) {
 421          if ($obj[0] == 'objref') {
 422              // reference to indirect object
 423              if (isset($this->objects[$obj[1]])) {
 424                  // this object has been already parsed
 425                  return $this->objects[$obj[1]];
 426              } elseif (isset($this->xref[$obj[1]])) {
 427                  // parse new object
 428                  $this->objects[$obj[1]] = $this->getIndirectObject($obj[1], $this->xref[$obj[1]], false);
 429                  return $this->objects[$obj[1]];
 430              }
 431          }
 432          return $obj;
 433      }
 434  
 435      /**
 436       * Decode the specified stream.
 437       * @param $sdic (array) Stream's dictionary array.
 438       * @param $stream (string) Stream to decode.
 439       * @return array containing decoded stream data and remaining filters.
 440       * @protected
 441       * @since 1.0.000 (2011-06-22)
 442       */
 443  	protected function decodeStream($sdic, $stream) {
 444          // get stream lenght and filters
 445          $slength = strlen($stream);
 446          $filters = array();
 447          foreach ($sdic as $k => $v) {
 448              if ($v[0] == '/') {
 449                  if (($v[1] == 'Length') AND (isset($sdic[($k + 1)])) AND ($sdic[($k + 1)][0] == 'numeric')) {
 450                      // get declared stream lenght
 451                      $declength = intval($sdic[($k + 1)][1]);
 452                      if ($declength < $slength) {
 453                          $stream = substr($stream, 0, $declength);
 454                          $slength = $declength;
 455                      }
 456                  } elseif (($v[1] == 'Filter') AND (isset($sdic[($k + 1)]))) {
 457                      // resolve indirect object
 458                      $objval = $this->getObjectVal($sdic[($k + 1)]);
 459                      if ($objval[0] == '/') {
 460                          // single filter
 461                          $filters[] = $objval[1];
 462                      } elseif ($objval[0] == '[') {
 463                          // array of filters
 464                          foreach ($objval[1] as $flt) {
 465                              if ($flt[0] == '/') {
 466                                  $filters[] = $flt[1];
 467                              }
 468                          }
 469                      }
 470                  }
 471              }
 472          }
 473          // decode the stream
 474          $remaining_filters = array();
 475          foreach ($filters as $filter) {
 476              if (in_array($filter, $this->FilterDecoders->getAvailableFilters())) {
 477                  $stream = $this->FilterDecoders->decodeFilter($filter, $stream);
 478              } else {
 479                  // add missing filter to array
 480                  $remaining_filters[] = $filter;
 481              }
 482          }
 483          return array($stream, $remaining_filters);
 484      }
 485  
 486      /**
 487       * This method is automatically called in case of fatal error; it simply outputs the message and halts the execution.
 488       * @param $msg (string) The error message
 489       * @public
 490       * @since 1.0.000 (2011-05-23)
 491       */
 492  	public function Error($msg) {
 493          // exit program and print error
 494          die('<strong>TCPDF_PARSER ERROR: </strong>'.$msg);
 495      }
 496  
 497  } // END OF TCPDF_PARSER CLASS
 498  
 499  //============================================================+
 500  // END OF FILE
 501  //============================================================+


Generated: Fri Nov 28 20:08:37 2014 Cross-referenced by PHPXref 0.7.1