[ Index ] |
PHP Cross Reference of vtigercrm-6.1.0 |
[Summary view] [Print] [Text view]
1 <?php 2 //============================================================+ 3 // File name : tcpdf_parser.php 4 // Version : 1.0.000 5 // Begin : 2011-05-23 6 // Last Update : 2012-01-28 7 // Author : Nicola Asuni - Tecnick.com LTD - Manor Coach House, Church Hill, Aldershot, Hants, GU12 4RQ, UK - www.tecnick.com - [email protected] 8 // License : http://www.tecnick.com/pagefiles/tcpdf/LICENSE.TXT GNU-LGPLv3 9 // ------------------------------------------------------------------- 10 // Copyright (C) 2011-2012 Nicola Asuni - Tecnick.com LTD 11 // 12 // This file is part of TCPDF software library. 13 // 14 // TCPDF is free software: you can redistribute it and/or modify it 15 // under the terms of the GNU Lesser General Public License as 16 // published by the Free Software Foundation, either version 3 of the 17 // License, or (at your option) any later version. 18 // 19 // TCPDF is distributed in the hope that it will be useful, but 20 // WITHOUT ANY WARRANTY; without even the implied warranty of 21 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 22 // See the GNU Lesser General Public License for more details. 23 // 24 // You should have received a copy of the License 25 // along with TCPDF. If not, see 26 // <http://www.tecnick.com/pagefiles/tcpdf/LICENSE.TXT>. 27 // 28 // See LICENSE.TXT file for more information. 29 // ------------------------------------------------------------------- 30 // 31 // Description : This is a PHP class for parsing PDF documents. 32 // 33 //============================================================+ 34 35 /** 36 * @file 37 * This is a PHP class for parsing PDF documents.<br> 38 * @package com.tecnick.tcpdf 39 * @author Nicola Asuni 40 * @version 1.0.000 41 */ 42 43 // include class for decoding filters 44 require_once(dirname(__FILE__).'/tcpdf_filters.php'); 45 46 /** 47 * @class TCPDF_PARSER 48 * This is a PHP class for parsing PDF documents.<br> 49 * @package com.tecnick.tcpdf 50 * @brief This is a PHP class for parsing PDF documents.. 51 * @version 1.0.000 52 * @author Nicola Asuni - [email protected] 53 */ 54 class TCPDF_PARSER { 55 56 /** 57 * Raw content of the PDF document. 58 * @private 59 */ 60 private $pdfdata = ''; 61 62 /** 63 * XREF data. 64 * @protected 65 */ 66 protected $xref = array(); 67 68 /** 69 * Array of PDF objects. 70 * @protected 71 */ 72 protected $objects = array(); 73 74 /** 75 * Class object for decoding filters. 76 * @private 77 */ 78 private $FilterDecoders; 79 80 // ----------------------------------------------------------------------------- 81 82 /** 83 * Parse a PDF document an return an array of objects. 84 * @param $data (string) PDF data to parse. 85 * @public 86 * @since 1.0.000 (2011-05-24) 87 */ 88 public function __construct($data) { 89 if (empty($data)) { 90 $this->Error('Empty PDF data.'); 91 } 92 $this->pdfdata = $data; 93 // get length 94 $pdflen = strlen($this->pdfdata); 95 // initialize class for decoding filters 96 $this->FilterDecoders = new TCPDF_FILTERS(); 97 // get xref and trailer data 98 $this->xref = $this->getXrefData(); 99 // parse all document objects 100 $this->objects = array(); 101 foreach ($this->xref['xref'] as $obj => $offset) { 102 if (!isset($this->objects[$obj])) { 103 $this->objects[$obj] = $this->getIndirectObject($obj, $offset, true); 104 } 105 } 106 // release some memory 107 unset($this->pdfdata); 108 $this->pdfdata = ''; 109 } 110 111 /** 112 * Return an array of parsed PDF document objects. 113 * @return (array) Array of parsed PDF document objects. 114 * @public 115 * @since 1.0.000 (2011-06-26) 116 */ 117 public function getParsedData() { 118 return array($this->xref, $this->objects); 119 } 120 121 /** 122 * Get xref (cross-reference table) and trailer data from PDF document data. 123 * @param $offset (int) xref offset (if know). 124 * @param $xref (array) previous xref array (if any). 125 * @return Array containing xref and trailer data. 126 * @protected 127 * @since 1.0.000 (2011-05-24) 128 */ 129 protected function getXrefData($offset=0, $xref=array()) { 130 // find last startxref 131 if (preg_match_all('/[\r\n]startxref[\s]*[\r\n]+([0-9]+)[\s]*[\r\n]+%%EOF/i', $this->pdfdata, $matches, PREG_SET_ORDER, $offset) == 0) { 132 $this->Error('Unable to find startxref'); 133 } 134 $matches = array_pop($matches); 135 $startxref = $matches[1]; 136 // check xref position 137 if (strpos($this->pdfdata, 'xref', $startxref) != $startxref) { 138 $this->Error('Unable to find xref'); 139 } 140 // extract xref data (object indexes and offsets) 141 $offset = $startxref + 5; 142 // initialize object number 143 $obj_num = 0; 144 while (preg_match('/^([0-9]+)[\s]([0-9]+)[\s]?([nf]?)/im', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset) > 0) { 145 $offset = (strlen($matches[0][0]) + $matches[0][1]); 146 if ($matches[3][0] == 'n') { 147 // create unique object index: [object number]_[generation number] 148 $index = $obj_num.'_'.intval($matches[2][0]); 149 // check if object already exist 150 if (!isset($xref['xref'][$index])) { 151 // store object offset position 152 $xref['xref'][$index] = intval($matches[1][0]); 153 } 154 ++$obj_num; 155 $offset += 2; 156 } elseif ($matches[3][0] == 'f') { 157 ++$obj_num; 158 $offset += 2; 159 } else { 160 // object number (index) 161 $obj_num = intval($matches[1][0]); 162 } 163 } 164 // get trailer data 165 if (preg_match('/trailer[\s]*<<(.*)>>[\s]*[\r\n]+startxref[\s]*[\r\n]+/isU', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset) > 0) { 166 $trailer_data = $matches[1][0]; 167 if (!isset($xref['trailer'])) { 168 // get only the last updated version 169 $xref['trailer'] = array(); 170 // parse trailer_data 171 if (preg_match('/Size[\s]+([0-9]+)/i', $trailer_data, $matches) > 0) { 172 $xref['trailer']['size'] = intval($matches[1]); 173 } 174 if (preg_match('/Root[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) { 175 $xref['trailer']['root'] = intval($matches[1]).'_'.intval($matches[2]); 176 } 177 if (preg_match('/Encrypt[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) { 178 $xref['trailer']['encrypt'] = intval($matches[1]).'_'.intval($matches[2]); 179 } 180 if (preg_match('/Info[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) { 181 $xref['trailer']['info'] = intval($matches[1]).'_'.intval($matches[2]); 182 } 183 if (preg_match('/ID[\s]*[\[][\s]*[<]([^>]*)[>][\s]*[<]([^>]*)[>]/i', $trailer_data, $matches) > 0) { 184 $xref['trailer']['id'] = array(); 185 $xref['trailer']['id'][0] = $matches[1]; 186 $xref['trailer']['id'][1] = $matches[2]; 187 } 188 } 189 if (preg_match('/Prev[\s]+([0-9]+)/i', $trailer_data, $matches) > 0) { 190 // get previous xref 191 $xref = getXrefData(substr($this->pdfdata, 0, $startxref), intval($matches[1]), $xref); 192 } 193 } else { 194 $this->Error('Unable to find trailer'); 195 } 196 return $xref; 197 } 198 199 /** 200 * Get object type, raw value and offset to next object 201 * @param $offset (int) Object offset. 202 * @return array containing object type, raw value and offset to next object 203 * @protected 204 * @since 1.0.000 (2011-06-20) 205 */ 206 protected function getRawObject($offset=0) { 207 $objtype = ''; // object type to be returned 208 $objval = ''; // object value to be returned 209 // skip initial white space chars: \x00 null (NUL), \x09 horizontal tab (HT), \x0A line feed (LF), \x0C form feed (FF), \x0D carriage return (CR), \x20 space (SP) 210 $offset += strspn($this->pdfdata, "\x00\x09\x0a\x0c\x0d\x20", $offset); 211 // get first char 212 $char = $this->pdfdata{$offset}; 213 // get object type 214 switch ($char) { 215 case '%': { // \x25 PERCENT SIGN 216 // skip comment and search for next token 217 $next = strcspn($this->pdfdata, "\r\n", $offset); 218 if ($next > 0) { 219 $offset += $next; 220 return $this->getRawObject($this->pdfdata, $offset); 221 } 222 break; 223 } 224 case '/': { // \x2F SOLIDUS 225 // name object 226 $objtype = $char; 227 ++$offset; 228 if (preg_match('/^([^\x00\x09\x0a\x0c\x0d\x20\s\x28\x29\x3c\x3e\x5b\x5d\x7b\x7d\x2f\x25]+)/', substr($this->pdfdata, $offset, 256), $matches) == 1) { 229 $objval = $matches[1]; // unescaped value 230 $offset += strlen($objval); 231 } 232 break; 233 } 234 case '(': // \x28 LEFT PARENTHESIS 235 case ')': { // \x29 RIGHT PARENTHESIS 236 // literal string object 237 $objtype = $char; 238 ++$offset; 239 $strpos = $offset; 240 if ($char == '(') { 241 $open_bracket = 1; 242 while ($open_bracket > 0) { 243 if (!isset($this->pdfdata{$strpos})) { 244 break; 245 } 246 $ch = $this->pdfdata{$strpos}; 247 switch ($ch) { 248 case '\\': { // REVERSE SOLIDUS (5Ch) (Backslash) 249 // skip next character 250 ++$strpos; 251 break; 252 } 253 case '(': { // LEFT PARENHESIS (28h) 254 ++$open_bracket; 255 break; 256 } 257 case ')': { // RIGHT PARENTHESIS (29h) 258 --$open_bracket; 259 break; 260 } 261 } 262 ++$strpos; 263 } 264 $objval = substr($this->pdfdata, $offset, ($strpos - $offset - 1)); 265 $offset = $strpos; 266 } 267 break; 268 } 269 case '[': // \x5B LEFT SQUARE BRACKET 270 case ']': { // \x5D RIGHT SQUARE BRACKET 271 // array object 272 $objtype = $char; 273 ++$offset; 274 if ($char == '[') { 275 // get array content 276 $objval = array(); 277 do { 278 // get element 279 $element = $this->getRawObject($offset); 280 $offset = $element[2]; 281 $objval[] = $element; 282 } while ($element[0] != ']'); 283 // remove closing delimiter 284 array_pop($objval); 285 } 286 break; 287 } 288 case '<': // \x3C LESS-THAN SIGN 289 case '>': { // \x3E GREATER-THAN SIGN 290 if (isset($this->pdfdata{($offset + 1)}) AND ($this->pdfdata{($offset + 1)} == $char)) { 291 // dictionary object 292 $objtype = $char.$char; 293 $offset += 2; 294 if ($char == '<') { 295 // get array content 296 $objval = array(); 297 do { 298 // get element 299 $element = $this->getRawObject($offset); 300 $offset = $element[2]; 301 $objval[] = $element; 302 } while ($element[0] != '>>'); 303 // remove closing delimiter 304 array_pop($objval); 305 } 306 } else { 307 // hexadecimal string object 308 $objtype = $char; 309 ++$offset; 310 if (($char == '<') AND (preg_match('/^([0-9A-Fa-f]+)[>]/iU', substr($this->pdfdata, $offset), $matches) == 1)) { 311 $objval = $matches[1]; 312 $offset += strlen($matches[0]); 313 } 314 } 315 break; 316 } 317 default: { 318 if (substr($this->pdfdata, $offset, 6) == 'endobj') { 319 // indirect object 320 $objtype = 'endobj'; 321 $offset += 6; 322 } elseif (substr($this->pdfdata, $offset, 4) == 'null') { 323 // null object 324 $objtype = 'null'; 325 $offset += 4; 326 $objval = 'null'; 327 } elseif (substr($this->pdfdata, $offset, 4) == 'true') { 328 // boolean true object 329 $objtype = 'boolean'; 330 $offset += 4; 331 $objval = 'true'; 332 } elseif (substr($this->pdfdata, $offset, 5) == 'false') { 333 // boolean false object 334 $objtype = 'boolean'; 335 $offset += 5; 336 $objval = 'false'; 337 } elseif (substr($this->pdfdata, $offset, 6) == 'stream') { 338 // start stream object 339 $objtype = 'stream'; 340 $offset += 6; 341 if (preg_match('/^[\r\n]+(.*)[\r\n]*endstream/isU', substr($this->pdfdata, $offset), $matches) == 1) { 342 $objval = $matches[1]; 343 $offset += strlen($matches[0]); 344 } 345 } elseif (substr($this->pdfdata, $offset, 9) == 'endstream') { 346 // end stream object 347 $objtype = 'endstream'; 348 $offset += 9; 349 } elseif (preg_match('/^([0-9]+)[\s]+([0-9]+)[\s]+R/iU', substr($this->pdfdata, $offset, 33), $matches) == 1) { 350 // indirect object reference 351 $objtype = 'ojbref'; 352 $offset += strlen($matches[0]); 353 $objval = intval($matches[1]).'_'.intval($matches[2]); 354 } elseif (preg_match('/^([0-9]+)[\s]+([0-9]+)[\s]+obj/iU', substr($this->pdfdata, $offset, 33), $matches) == 1) { 355 // object start 356 $objtype = 'ojb'; 357 $objval = intval($matches[1]).'_'.intval($matches[2]); 358 $offset += strlen ($matches[0]); 359 } elseif (($numlen = strspn($this->pdfdata, '+-.0123456789', $offset)) > 0) { 360 // numeric object 361 $objtype = 'numeric'; 362 $objval = substr($this->pdfdata, $offset, $numlen); 363 $offset += $numlen; 364 } 365 break; 366 } 367 } 368 return array($objtype, $objval, $offset); 369 } 370 371 /** 372 * Get content of indirect object. 373 * @param $obj_ref (string) Object number and generation number separated by underscore character. 374 * @param $offset (int) Object offset. 375 * @param $decoding (boolean) If true decode streams. 376 * @return array containing object data. 377 * @protected 378 * @since 1.0.000 (2011-05-24) 379 */ 380 protected function getIndirectObject($obj_ref, $offset=0, $decoding=true) { 381 $obj = explode('_', $obj_ref); 382 if (($obj === false) OR (count($obj) != 2)) { 383 $this->Error('Invalid object reference: '.$obj); 384 return; 385 } 386 $objref = $obj[0].' '.$obj[1].' obj'; 387 if (strpos($this->pdfdata, $objref, $offset) != $offset) { 388 // an indirect reference to an undefined object shall be considered a reference to the null object 389 return array('null', 'null', $offset); 390 } 391 // starting position of object content 392 $offset += strlen($objref); 393 // get array of object content 394 $objdata = array(); 395 $i = 0; // object main index 396 do { 397 // get element 398 $element = $this->getRawObject($offset); 399 $offset = $element[2]; 400 // decode stream using stream's dictionary information 401 if ($decoding AND ($element[0] == 'stream') AND (isset($objdata[($i - 1)][0])) AND ($objdata[($i - 1)][0] == '<<')) { 402 $element[3] = $this->decodeStream($objdata[($i - 1)][1], $element[1]); 403 } 404 $objdata[$i] = $element; 405 ++$i; 406 } while ($element[0] != 'endobj'); 407 // remove closing delimiter 408 array_pop($objdata); 409 // return raw object content 410 return $objdata; 411 } 412 413 /** 414 * Get the content of object, resolving indect object reference if necessary. 415 * @param $obj (string) Object value. 416 * @return array containing object data. 417 * @protected 418 * @since 1.0.000 (2011-06-26) 419 */ 420 protected function getObjectVal($obj) { 421 if ($obj[0] == 'objref') { 422 // reference to indirect object 423 if (isset($this->objects[$obj[1]])) { 424 // this object has been already parsed 425 return $this->objects[$obj[1]]; 426 } elseif (isset($this->xref[$obj[1]])) { 427 // parse new object 428 $this->objects[$obj[1]] = $this->getIndirectObject($obj[1], $this->xref[$obj[1]], false); 429 return $this->objects[$obj[1]]; 430 } 431 } 432 return $obj; 433 } 434 435 /** 436 * Decode the specified stream. 437 * @param $sdic (array) Stream's dictionary array. 438 * @param $stream (string) Stream to decode. 439 * @return array containing decoded stream data and remaining filters. 440 * @protected 441 * @since 1.0.000 (2011-06-22) 442 */ 443 protected function decodeStream($sdic, $stream) { 444 // get stream lenght and filters 445 $slength = strlen($stream); 446 $filters = array(); 447 foreach ($sdic as $k => $v) { 448 if ($v[0] == '/') { 449 if (($v[1] == 'Length') AND (isset($sdic[($k + 1)])) AND ($sdic[($k + 1)][0] == 'numeric')) { 450 // get declared stream lenght 451 $declength = intval($sdic[($k + 1)][1]); 452 if ($declength < $slength) { 453 $stream = substr($stream, 0, $declength); 454 $slength = $declength; 455 } 456 } elseif (($v[1] == 'Filter') AND (isset($sdic[($k + 1)]))) { 457 // resolve indirect object 458 $objval = $this->getObjectVal($sdic[($k + 1)]); 459 if ($objval[0] == '/') { 460 // single filter 461 $filters[] = $objval[1]; 462 } elseif ($objval[0] == '[') { 463 // array of filters 464 foreach ($objval[1] as $flt) { 465 if ($flt[0] == '/') { 466 $filters[] = $flt[1]; 467 } 468 } 469 } 470 } 471 } 472 } 473 // decode the stream 474 $remaining_filters = array(); 475 foreach ($filters as $filter) { 476 if (in_array($filter, $this->FilterDecoders->getAvailableFilters())) { 477 $stream = $this->FilterDecoders->decodeFilter($filter, $stream); 478 } else { 479 // add missing filter to array 480 $remaining_filters[] = $filter; 481 } 482 } 483 return array($stream, $remaining_filters); 484 } 485 486 /** 487 * This method is automatically called in case of fatal error; it simply outputs the message and halts the execution. 488 * @param $msg (string) The error message 489 * @public 490 * @since 1.0.000 (2011-05-23) 491 */ 492 public function Error($msg) { 493 // exit program and print error 494 die('<strong>TCPDF_PARSER ERROR: </strong>'.$msg); 495 } 496 497 } // END OF TCPDF_PARSER CLASS 498 499 //============================================================+ 500 // END OF FILE 501 //============================================================+
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Fri Nov 28 20:08:37 2014 | Cross-referenced by PHPXref 0.7.1 |