[ Index ] |
PHP Cross Reference of moodle-2.8 |
[Summary view] [Print] [Text view]
1 <?php 2 // 3 // FPDI - Version 1.4.4 4 // 5 // Copyright 2004-2013 Setasign - Jan Slabon 6 // 7 // Licensed under the Apache License, Version 2.0 (the "License"); 8 // you may not use this file except in compliance with the License. 9 // You may obtain a copy of the License at 10 // 11 // http://www.apache.org/licenses/LICENSE-2.0 12 // 13 // Unless required by applicable law or agreed to in writing, software 14 // distributed under the License is distributed on an "AS IS" BASIS, 15 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 // See the License for the specific language governing permissions and 17 // limitations under the License. 18 // 19 20 if (!defined ('PDF_TYPE_NULL')) 21 define ('PDF_TYPE_NULL', 0); 22 if (!defined ('PDF_TYPE_NUMERIC')) 23 define ('PDF_TYPE_NUMERIC', 1); 24 if (!defined ('PDF_TYPE_TOKEN')) 25 define ('PDF_TYPE_TOKEN', 2); 26 if (!defined ('PDF_TYPE_HEX')) 27 define ('PDF_TYPE_HEX', 3); 28 if (!defined ('PDF_TYPE_STRING')) 29 define ('PDF_TYPE_STRING', 4); 30 if (!defined ('PDF_TYPE_DICTIONARY')) 31 define ('PDF_TYPE_DICTIONARY', 5); 32 if (!defined ('PDF_TYPE_ARRAY')) 33 define ('PDF_TYPE_ARRAY', 6); 34 if (!defined ('PDF_TYPE_OBJDEC')) 35 define ('PDF_TYPE_OBJDEC', 7); 36 if (!defined ('PDF_TYPE_OBJREF')) 37 define ('PDF_TYPE_OBJREF', 8); 38 if (!defined ('PDF_TYPE_OBJECT')) 39 define ('PDF_TYPE_OBJECT', 9); 40 if (!defined ('PDF_TYPE_STREAM')) 41 define ('PDF_TYPE_STREAM', 10); 42 if (!defined ('PDF_TYPE_BOOLEAN')) 43 define ('PDF_TYPE_BOOLEAN', 11); 44 if (!defined ('PDF_TYPE_REAL')) 45 define ('PDF_TYPE_REAL', 12); 46 47 require_once ('pdf_context.php'); 48 49 if (!class_exists('pdf_parser', false)) { 50 51 class pdf_parser { 52 53 /** 54 * Filename 55 * @var string 56 */ 57 var $filename; 58 59 /** 60 * File resource 61 * @var resource 62 */ 63 var $f; 64 65 /** 66 * PDF Context 67 * @var object pdf_context-Instance 68 */ 69 var $c; 70 71 /** 72 * xref-Data 73 * @var array 74 */ 75 var $xref; 76 77 /** 78 * root-Object 79 * @var array 80 */ 81 var $root; 82 83 /** 84 * PDF version of the loaded document 85 * @var string 86 */ 87 var $pdfVersion; 88 89 /** 90 * For reading encrypted documents and xref/objectstreams are in use 91 * 92 * @var boolean 93 */ 94 var $readPlain = true; 95 96 /** 97 * Constructor 98 * 99 * @param string $filename Source-Filename 100 */ 101 function pdf_parser($filename) { 102 $this->filename = $filename; 103 104 $this->f = @fopen($this->filename, 'rb'); 105 106 if (!$this->f) 107 $this->error(sprintf('Cannot open %s !', $filename)); 108 109 $this->getPDFVersion(); 110 111 $this->c = new pdf_context($this->f); 112 113 // Read xref-Data 114 $this->xref = array(); 115 $this->pdf_read_xref($this->xref, $this->pdf_find_xref()); 116 117 // Check for Encryption 118 $this->getEncryption(); 119 120 // Read root 121 $this->pdf_read_root(); 122 } 123 124 /** 125 * Close the opened file 126 */ 127 function closeFile() { 128 if (isset($this->f) && is_resource($this->f)) { 129 fclose($this->f); 130 unset($this->f); 131 } 132 } 133 134 /** 135 * Print Error and die 136 * 137 * @param string $msg Error-Message 138 */ 139 function error($msg) { 140 die('<b>PDF-Parser Error:</b> ' . $msg); 141 } 142 143 /** 144 * Check Trailer for Encryption 145 */ 146 function getEncryption() { 147 if (isset($this->xref['trailer'][1]['/Encrypt'])) { 148 $this->error('File is encrypted!'); 149 } 150 } 151 152 /** 153 * Find/Return /Root 154 * 155 * @return array 156 */ 157 function pdf_find_root() { 158 if ($this->xref['trailer'][1]['/Root'][0] != PDF_TYPE_OBJREF) { 159 $this->error('Wrong Type of Root-Element! Must be an indirect reference'); 160 } 161 162 return $this->xref['trailer'][1]['/Root']; 163 } 164 165 /** 166 * Read the /Root 167 */ 168 function pdf_read_root() { 169 // read root 170 $this->root = $this->pdf_resolve_object($this->c, $this->pdf_find_root()); 171 } 172 173 /** 174 * Get PDF-Version 175 * 176 * And reset the PDF Version used in FPDI if needed 177 */ 178 function getPDFVersion() { 179 fseek($this->f, 0); 180 preg_match('/\d\.\d/',fread($this->f, 16), $m); 181 if (isset($m[0])) 182 $this->pdfVersion = $m[0]; 183 return $this->pdfVersion; 184 } 185 186 /** 187 * Find the xref-Table 188 */ 189 function pdf_find_xref() { 190 $toRead = 1500; 191 192 $stat = fseek ($this->f, -$toRead, SEEK_END); 193 if ($stat === -1) { 194 fseek ($this->f, 0); 195 } 196 $data = fread($this->f, $toRead); 197 198 $pos = strlen($data) - strpos(strrev($data), strrev('startxref')); 199 $data = substr($data, $pos); 200 201 if (!preg_match('/\s*(\d+).*$/s', $data, $matches)) { 202 $this->error('Unable to find pointer to xref table'); 203 } 204 205 return (int) $matches[1]; 206 } 207 208 /** 209 * Read xref-table 210 * 211 * @param array $result Array of xref-table 212 * @param integer $offset of xref-table 213 */ 214 function pdf_read_xref(&$result, $offset) { 215 $o_pos = $offset-min(20, $offset); 216 fseek($this->f, $o_pos); // set some bytes backwards to fetch errorious docs 217 218 $data = fread($this->f, 100); 219 220 $xrefPos = strrpos($data, 'xref'); 221 222 if ($xrefPos === false) { 223 fseek($this->f, $offset); 224 $c = new pdf_context($this->f); 225 $xrefStreamObjDec = $this->pdf_read_value($c); 226 227 if (is_array($xrefStreamObjDec) && isset($xrefStreamObjDec[0]) && $xrefStreamObjDec[0] == PDF_TYPE_OBJDEC) { 228 $this->error(sprintf('This document (%s) probably uses a compression technique which is not supported by the free parser shipped with FPDI.', $this->filename)); 229 } else { 230 $this->error('Unable to find xref table.'); 231 } 232 } 233 234 if (!isset($result['xref_location'])) { 235 $result['xref_location'] = $o_pos + $xrefPos; 236 $result['max_object'] = 0; 237 } 238 239 $cylces = -1; 240 $bytesPerCycle = 100; 241 242 fseek($this->f, $o_pos = $o_pos + $xrefPos + 4); // set the handle directly after the "xref"-keyword 243 $data = fread($this->f, $bytesPerCycle); 244 245 while (($trailerPos = strpos($data, 'trailer', max($bytesPerCycle * $cylces++, 0))) === false && !feof($this->f)) { 246 $data .= fread($this->f, $bytesPerCycle); 247 } 248 249 if ($trailerPos === false) { 250 $this->error('Trailer keyword not found after xref table'); 251 } 252 253 $data = substr($data, 0, $trailerPos); 254 255 // get Line-Ending 256 preg_match_all("/(\r\n|\n|\r)/", substr($data, 0, 100), $m); // check the first 100 bytes for linebreaks 257 258 $differentLineEndings = count(array_unique($m[0])); 259 if ($differentLineEndings > 1) { 260 $lines = preg_split("/(\r\n|\n|\r)/", $data, -1, PREG_SPLIT_NO_EMPTY); 261 } else { 262 $lines = explode($m[0][1], $data); 263 } 264 265 $data = $differentLineEndings = $m = null; 266 unset($data, $differentLineEndings, $m); 267 268 $linesCount = count($lines); 269 270 $start = 1; 271 272 for ($i = 0; $i < $linesCount; $i++) { 273 $line = trim($lines[$i]); 274 if ($line) { 275 $pieces = explode(' ', $line); 276 $c = count($pieces); 277 switch($c) { 278 case 2: 279 $start = (int)$pieces[0]; 280 $end = $start + (int)$pieces[1]; 281 if ($end > $result['max_object']) 282 $result['max_object'] = $end; 283 break; 284 case 3: 285 if (!isset($result['xref'][$start])) 286 $result['xref'][$start] = array(); 287 288 if (!array_key_exists($gen = (int) $pieces[1], $result['xref'][$start])) { 289 $result['xref'][$start][$gen] = $pieces[2] == 'n' ? (int) $pieces[0] : null; 290 } 291 $start++; 292 break; 293 default: 294 $this->error('Unexpected data in xref table'); 295 } 296 } 297 } 298 299 $lines = $pieces = $line = $start = $end = $gen = null; 300 unset($lines, $pieces, $line, $start, $end, $gen); 301 302 fseek($this->f, $o_pos + $trailerPos + 7); 303 304 $c = new pdf_context($this->f); 305 $trailer = $this->pdf_read_value($c); 306 307 $c = null; 308 unset($c); 309 310 if (!isset($result['trailer'])) { 311 $result['trailer'] = $trailer; 312 } 313 314 if (isset($trailer[1]['/Prev'])) { 315 $this->pdf_read_xref($result, $trailer[1]['/Prev'][1]); 316 } 317 318 $trailer = null; 319 unset($trailer); 320 321 return true; 322 } 323 324 /** 325 * Reads an Value 326 * 327 * @param object $c pdf_context 328 * @param string $token a Token 329 * @return mixed 330 */ 331 function pdf_read_value(&$c, $token = null) { 332 if (is_null($token)) { 333 $token = $this->pdf_read_token($c); 334 } 335 336 if ($token === false) { 337 return false; 338 } 339 340 switch ($token) { 341 case '<': 342 // This is a hex string. 343 // Read the value, then the terminator 344 345 $pos = $c->offset; 346 347 while(1) { 348 349 $match = strpos ($c->buffer, '>', $pos); 350 351 // If you can't find it, try 352 // reading more data from the stream 353 354 if ($match === false) { 355 if (!$c->increase_length()) { 356 return false; 357 } else { 358 continue; 359 } 360 } 361 362 $result = substr ($c->buffer, $c->offset, $match - $c->offset); 363 $c->offset = $match + 1; 364 365 return array (PDF_TYPE_HEX, $result); 366 } 367 368 break; 369 case '<<': 370 // This is a dictionary. 371 372 $result = array(); 373 374 // Recurse into this function until we reach 375 // the end of the dictionary. 376 while (($key = $this->pdf_read_token($c)) !== '>>') { 377 if ($key === false) { 378 return false; 379 } 380 381 if (($value = $this->pdf_read_value($c)) === false) { 382 return false; 383 } 384 385 // Catch missing value 386 if ($value[0] == PDF_TYPE_TOKEN && $value[1] == '>>') { 387 $result[$key] = array(PDF_TYPE_NULL); 388 break; 389 } 390 391 $result[$key] = $value; 392 } 393 394 return array (PDF_TYPE_DICTIONARY, $result); 395 396 case '[': 397 // This is an array. 398 399 $result = array(); 400 401 // Recurse into this function until we reach 402 // the end of the array. 403 while (($token = $this->pdf_read_token($c)) !== ']') { 404 if ($token === false) { 405 return false; 406 } 407 408 if (($value = $this->pdf_read_value($c, $token)) === false) { 409 return false; 410 } 411 412 $result[] = $value; 413 } 414 415 return array (PDF_TYPE_ARRAY, $result); 416 417 case '(' : 418 // This is a string 419 $pos = $c->offset; 420 421 $openBrackets = 1; 422 do { 423 for (; $openBrackets != 0 && $pos < $c->length; $pos++) { 424 switch (ord($c->buffer[$pos])) { 425 case 0x28: // '(' 426 $openBrackets++; 427 break; 428 case 0x29: // ')' 429 $openBrackets--; 430 break; 431 case 0x5C: // backslash 432 $pos++; 433 } 434 } 435 } while($openBrackets != 0 && $c->increase_length()); 436 437 $result = substr($c->buffer, $c->offset, $pos - $c->offset - 1); 438 $c->offset = $pos; 439 440 return array (PDF_TYPE_STRING, $result); 441 442 case 'stream': 443 $o_pos = ftell($c->file)-strlen($c->buffer); 444 $o_offset = $c->offset; 445 446 $c->reset($startpos = $o_pos + $o_offset); 447 448 $e = 0; // ensure line breaks in front of the stream 449 if ($c->buffer[0] == chr(10) || $c->buffer[0] == chr(13)) 450 $e++; 451 if ($c->buffer[1] == chr(10) && $c->buffer[0] != chr(10)) 452 $e++; 453 454 if ($this->actual_obj[1][1]['/Length'][0] == PDF_TYPE_OBJREF) { 455 $tmp_c = new pdf_context($this->f); 456 $tmp_length = $this->pdf_resolve_object($tmp_c, $this->actual_obj[1][1]['/Length']); 457 $length = $tmp_length[1][1]; 458 } else { 459 $length = $this->actual_obj[1][1]['/Length'][1]; 460 } 461 462 if ($length > 0) { 463 $c->reset($startpos + $e,$length); 464 $v = $c->buffer; 465 } else { 466 $v = ''; 467 } 468 $c->reset($startpos + $e + $length + 9); // 9 = strlen("endstream") 469 470 return array(PDF_TYPE_STREAM, $v); 471 472 default : 473 if (is_numeric ($token)) { 474 // A numeric token. Make sure that 475 // it is not part of something else. 476 if (($tok2 = $this->pdf_read_token ($c)) !== false) { 477 if (is_numeric ($tok2)) { 478 479 // Two numeric tokens in a row. 480 // In this case, we're probably in 481 // front of either an object reference 482 // or an object specification. 483 // Determine the case and return the data 484 if (($tok3 = $this->pdf_read_token ($c)) !== false) { 485 switch ($tok3) { 486 case 'obj': 487 return array (PDF_TYPE_OBJDEC, (int) $token, (int) $tok2); 488 case 'R': 489 return array (PDF_TYPE_OBJREF, (int) $token, (int) $tok2); 490 } 491 // If we get to this point, that numeric value up 492 // there was just a numeric value. Push the extra 493 // tokens back into the stack and return the value. 494 array_push ($c->stack, $tok3); 495 } 496 } 497 498 array_push ($c->stack, $tok2); 499 } 500 501 if ($token === (string)((int)$token)) 502 return array (PDF_TYPE_NUMERIC, (int)$token); 503 else 504 return array (PDF_TYPE_REAL, (float)$token); 505 } elseif ($token == 'true' || $token == 'false') { 506 return array (PDF_TYPE_BOOLEAN, $token == 'true'); 507 } elseif ($token == 'null') { 508 return array (PDF_TYPE_NULL); 509 } else { 510 // Just a token. Return it. 511 return array (PDF_TYPE_TOKEN, $token); 512 } 513 } 514 } 515 516 /** 517 * Resolve an object 518 * 519 * @param object $c pdf_context 520 * @param array $obj_spec The object-data 521 * @param boolean $encapsulate Must set to true, cause the parsing and fpdi use this method only without this para 522 */ 523 function pdf_resolve_object(&$c, $obj_spec, $encapsulate = true) { 524 // Exit if we get invalid data 525 if (!is_array($obj_spec)) { 526 $ret = false; 527 return $ret; 528 } 529 530 if ($obj_spec[0] == PDF_TYPE_OBJREF) { 531 532 // This is a reference, resolve it 533 if (isset($this->xref['xref'][$obj_spec[1]][$obj_spec[2]])) { 534 535 // Save current file position 536 // This is needed if you want to resolve 537 // references while you're reading another object 538 // (e.g.: if you need to determine the length 539 // of a stream) 540 541 $old_pos = ftell($c->file); 542 543 // Reposition the file pointer and 544 // load the object header. 545 546 $c->reset($this->xref['xref'][$obj_spec[1]][$obj_spec[2]]); 547 548 $header = $this->pdf_read_value($c); 549 550 if ($header[0] != PDF_TYPE_OBJDEC || $header[1] != $obj_spec[1] || $header[2] != $obj_spec[2]) { 551 $toSearchFor = $obj_spec[1] . ' ' . $obj_spec[2] . ' obj'; 552 if (preg_match('/' . $toSearchFor . '/', $c->buffer)) { 553 $c->offset = strpos($c->buffer, $toSearchFor) + strlen($toSearchFor); 554 // reset stack 555 $c->stack = array(); 556 } else { 557 $this->error("Unable to find object ({$obj_spec[1]}, {$obj_spec[2]}) at expected location"); 558 } 559 } 560 561 // If we're being asked to store all the information 562 // about the object, we add the object ID and generation 563 // number for later use 564 $result = array(); 565 $this->actual_obj =& $result; 566 if ($encapsulate) { 567 $result = array ( 568 PDF_TYPE_OBJECT, 569 'obj' => $obj_spec[1], 570 'gen' => $obj_spec[2] 571 ); 572 } 573 574 // Now simply read the object data until 575 // we encounter an end-of-object marker 576 while(1) { 577 $value = $this->pdf_read_value($c); 578 if ($value === false || count($result) > 4) { 579 // in this case the parser coudn't find an endobj so we break here 580 break; 581 } 582 583 if ($value[0] == PDF_TYPE_TOKEN && $value[1] === 'endobj') { 584 break; 585 } 586 587 $result[] = $value; 588 } 589 590 $c->reset($old_pos); 591 592 if (isset($result[2][0]) && $result[2][0] == PDF_TYPE_STREAM) { 593 $result[0] = PDF_TYPE_STREAM; 594 } 595 596 return $result; 597 } 598 } else { 599 return $obj_spec; 600 } 601 } 602 603 604 605 /** 606 * Reads a token from the file 607 * 608 * @param object $c pdf_context 609 * @return mixed 610 */ 611 function pdf_read_token(&$c) 612 { 613 // If there is a token available 614 // on the stack, pop it out and 615 // return it. 616 617 if (count($c->stack)) { 618 return array_pop($c->stack); 619 } 620 621 // Strip away any whitespace 622 623 do { 624 if (!$c->ensure_content()) { 625 return false; 626 } 627 $c->offset += strspn($c->buffer, "\x20\x0A\x0C\x0D\x09\x00", $c->offset); 628 } while ($c->offset >= $c->length - 1); 629 630 // Get the first character in the stream 631 632 $char = $c->buffer[$c->offset++]; 633 634 switch ($char) { 635 636 case '[': 637 case ']': 638 case '(': 639 case ')': 640 641 // This is either an array or literal string 642 // delimiter, Return it 643 644 return $char; 645 646 case '<': 647 case '>': 648 649 // This could either be a hex string or 650 // dictionary delimiter. Determine the 651 // appropriate case and return the token 652 653 if ($c->buffer[$c->offset] == $char) { 654 if (!$c->ensure_content()) { 655 return false; 656 } 657 $c->offset++; 658 return $char . $char; 659 } else { 660 return $char; 661 } 662 663 case '%': 664 665 // This is a comment - jump over it! 666 667 $pos = $c->offset; 668 while(1) { 669 $match = preg_match("/(\r\n|\r|\n)/", $c->buffer, $m, PREG_OFFSET_CAPTURE, $pos); 670 if ($match === 0) { 671 if (!$c->increase_length()) { 672 return false; 673 } else { 674 continue; 675 } 676 } 677 678 $c->offset = $m[0][1]+strlen($m[0][0]); 679 680 return $this->pdf_read_token($c); 681 } 682 683 default: 684 685 // This is "another" type of token (probably 686 // a dictionary entry or a numeric value) 687 // Find the end and return it. 688 689 if (!$c->ensure_content()) { 690 return false; 691 } 692 693 while(1) { 694 695 // Determine the length of the token 696 697 $pos = strcspn($c->buffer, "\x20%[]<>()/\x0A\x0C\x0D\x09\x00", $c->offset); 698 699 if ($c->offset + $pos <= $c->length - 1) { 700 break; 701 } else { 702 // If the script reaches this point, 703 // the token may span beyond the end 704 // of the current buffer. Therefore, 705 // we increase the size of the buffer 706 // and try again--just to be safe. 707 708 $c->increase_length(); 709 } 710 } 711 712 $result = substr($c->buffer, $c->offset - 1, $pos + 1); 713 714 $c->offset += $pos; 715 return $result; 716 } 717 } 718 } 719 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Fri Nov 28 20:29:05 2014 | Cross-referenced by PHPXref 0.7.1 |