[ Index ]

PHP Cross Reference of moodle-2.8

title

Body

[close]

/mod/assign/feedback/editpdf/fpdi/ -> pdf_parser.php (source)

   1  <?php
   2  //

   3  //  FPDI - Version 1.4.4

   4  //

   5  //    Copyright 2004-2013 Setasign - Jan Slabon

   6  //

   7  //  Licensed under the Apache License, Version 2.0 (the "License");

   8  //  you may not use this file except in compliance with the License.

   9  //  You may obtain a copy of the License at

  10  //

  11  //      http://www.apache.org/licenses/LICENSE-2.0

  12  //

  13  //  Unless required by applicable law or agreed to in writing, software

  14  //  distributed under the License is distributed on an "AS IS" BASIS,

  15  //  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

  16  //  See the License for the specific language governing permissions and

  17  //  limitations under the License.

  18  //

  19  
  20  if (!defined ('PDF_TYPE_NULL'))
  21      define ('PDF_TYPE_NULL', 0);
  22  if (!defined ('PDF_TYPE_NUMERIC'))
  23      define ('PDF_TYPE_NUMERIC', 1);
  24  if (!defined ('PDF_TYPE_TOKEN'))
  25      define ('PDF_TYPE_TOKEN', 2);
  26  if (!defined ('PDF_TYPE_HEX'))
  27      define ('PDF_TYPE_HEX', 3);
  28  if (!defined ('PDF_TYPE_STRING'))
  29      define ('PDF_TYPE_STRING', 4);
  30  if (!defined ('PDF_TYPE_DICTIONARY'))
  31      define ('PDF_TYPE_DICTIONARY', 5);
  32  if (!defined ('PDF_TYPE_ARRAY'))
  33      define ('PDF_TYPE_ARRAY', 6);
  34  if (!defined ('PDF_TYPE_OBJDEC'))
  35      define ('PDF_TYPE_OBJDEC', 7);
  36  if (!defined ('PDF_TYPE_OBJREF'))
  37      define ('PDF_TYPE_OBJREF', 8);
  38  if (!defined ('PDF_TYPE_OBJECT'))
  39      define ('PDF_TYPE_OBJECT', 9);
  40  if (!defined ('PDF_TYPE_STREAM'))
  41      define ('PDF_TYPE_STREAM', 10);
  42  if (!defined ('PDF_TYPE_BOOLEAN'))
  43      define ('PDF_TYPE_BOOLEAN', 11);
  44  if (!defined ('PDF_TYPE_REAL'))
  45      define ('PDF_TYPE_REAL', 12);
  46      
  47  require_once ('pdf_context.php');
  48  
  49  if (!class_exists('pdf_parser', false)) {
  50      
  51      class pdf_parser {
  52          
  53          /**

  54           * Filename

  55           * @var string

  56           */
  57          var $filename;
  58          
  59          /**

  60           * File resource

  61           * @var resource

  62           */
  63          var $f;
  64          
  65          /**

  66           * PDF Context

  67           * @var object pdf_context-Instance

  68           */
  69          var $c;
  70          
  71          /**

  72           * xref-Data

  73           * @var array

  74           */
  75          var $xref;
  76      
  77          /**

  78           * root-Object

  79           * @var array

  80           */
  81          var $root;
  82          
  83          /**

  84           * PDF version of the loaded document

  85           * @var string

  86           */
  87          var $pdfVersion;
  88          
  89          /**

  90           * For reading encrypted documents and xref/objectstreams are in use

  91           *

  92           * @var boolean

  93           */
  94          var $readPlain = true;
  95          
  96          /**

  97           * Constructor

  98           *

  99           * @param string $filename  Source-Filename

 100           */
 101      	function pdf_parser($filename) {
 102              $this->filename = $filename;
 103              
 104              $this->f = @fopen($this->filename, 'rb');
 105      
 106              if (!$this->f)
 107                  $this->error(sprintf('Cannot open %s !', $filename));
 108      
 109              $this->getPDFVersion();
 110      
 111              $this->c = new pdf_context($this->f);
 112              
 113              // Read xref-Data

 114              $this->xref = array();
 115              $this->pdf_read_xref($this->xref, $this->pdf_find_xref());
 116              
 117              // Check for Encryption

 118              $this->getEncryption();
 119      
 120              // Read root

 121              $this->pdf_read_root();
 122          }
 123          
 124          /**

 125           * Close the opened file

 126           */
 127          function closeFile() {
 128              if (isset($this->f) && is_resource($this->f)) {
 129                  fclose($this->f);    
 130                  unset($this->f);
 131              }    
 132          }
 133          
 134          /**

 135           * Print Error and die

 136           *

 137           * @param string $msg  Error-Message

 138           */
 139          function error($msg) {
 140              die('<b>PDF-Parser Error:</b> ' . $msg);    
 141          }
 142          
 143          /**

 144           * Check Trailer for Encryption

 145           */
 146          function getEncryption() {
 147              if (isset($this->xref['trailer'][1]['/Encrypt'])) {
 148                  $this->error('File is encrypted!');
 149              }
 150          }
 151          
 152          /**

 153           * Find/Return /Root

 154           *

 155           * @return array

 156           */
 157          function pdf_find_root() {
 158              if ($this->xref['trailer'][1]['/Root'][0] != PDF_TYPE_OBJREF) {
 159                  $this->error('Wrong Type of Root-Element! Must be an indirect reference');
 160              }
 161              
 162              return $this->xref['trailer'][1]['/Root'];
 163          }
 164      
 165          /**

 166           * Read the /Root

 167           */
 168          function pdf_read_root() {
 169              // read root

 170              $this->root = $this->pdf_resolve_object($this->c, $this->pdf_find_root());
 171          }
 172          
 173          /**

 174           * Get PDF-Version

 175           *

 176           * And reset the PDF Version used in FPDI if needed

 177           */
 178          function getPDFVersion() {
 179              fseek($this->f, 0);
 180              preg_match('/\d\.\d/',fread($this->f, 16), $m);
 181              if (isset($m[0]))
 182                  $this->pdfVersion = $m[0];
 183              return $this->pdfVersion;
 184          }
 185          
 186          /**

 187           * Find the xref-Table

 188           */
 189          function pdf_find_xref() {
 190                 $toRead = 1500;
 191                      
 192              $stat = fseek ($this->f, -$toRead, SEEK_END);
 193              if ($stat === -1) {
 194                  fseek ($this->f, 0);
 195              }
 196                 $data = fread($this->f, $toRead);
 197              
 198              $pos = strlen($data) - strpos(strrev($data), strrev('startxref')); 
 199              $data = substr($data, $pos);
 200              
 201              if (!preg_match('/\s*(\d+).*$/s', $data, $matches)) {
 202                  $this->error('Unable to find pointer to xref table');
 203              }
 204      
 205              return (int) $matches[1];
 206          }
 207      
 208          /**

 209           * Read xref-table

 210           *

 211           * @param array $result Array of xref-table

 212           * @param integer $offset of xref-table

 213           */
 214          function pdf_read_xref(&$result, $offset) {
 215              $o_pos = $offset-min(20, $offset);
 216              fseek($this->f, $o_pos); // set some bytes backwards to fetch errorious docs

 217                  
 218              $data = fread($this->f, 100);
 219              
 220              $xrefPos = strrpos($data, 'xref');
 221      
 222              if ($xrefPos === false) {
 223                  fseek($this->f, $offset);
 224                  $c = new pdf_context($this->f);
 225                  $xrefStreamObjDec = $this->pdf_read_value($c);
 226                  
 227                  if (is_array($xrefStreamObjDec) && isset($xrefStreamObjDec[0]) && $xrefStreamObjDec[0] == PDF_TYPE_OBJDEC) {
 228                      $this->error(sprintf('This document (%s) probably uses a compression technique which is not supported by the free parser shipped with FPDI.', $this->filename));
 229                  } else {            
 230                      $this->error('Unable to find xref table.');
 231                  }
 232              }
 233              
 234              if (!isset($result['xref_location'])) {
 235                  $result['xref_location'] = $o_pos + $xrefPos;
 236                  $result['max_object'] = 0;
 237              }
 238      
 239              $cylces = -1;
 240              $bytesPerCycle = 100;
 241              
 242              fseek($this->f, $o_pos = $o_pos + $xrefPos + 4); // set the handle directly after the "xref"-keyword

 243              $data = fread($this->f, $bytesPerCycle);
 244              
 245              while (($trailerPos = strpos($data, 'trailer', max($bytesPerCycle * $cylces++, 0))) === false && !feof($this->f)) {
 246                  $data .= fread($this->f, $bytesPerCycle);
 247              }
 248              
 249              if ($trailerPos === false) {
 250                  $this->error('Trailer keyword not found after xref table');
 251              }
 252              
 253              $data = substr($data, 0, $trailerPos);
 254              
 255              // get Line-Ending

 256              preg_match_all("/(\r\n|\n|\r)/", substr($data, 0, 100), $m); // check the first 100 bytes for linebreaks

 257      
 258              $differentLineEndings = count(array_unique($m[0]));
 259              if ($differentLineEndings > 1) {
 260                  $lines = preg_split("/(\r\n|\n|\r)/", $data, -1, PREG_SPLIT_NO_EMPTY);
 261              } else {
 262                  $lines = explode($m[0][1], $data);
 263              }
 264              
 265              $data = $differentLineEndings = $m = null;
 266              unset($data, $differentLineEndings, $m);
 267              
 268              $linesCount = count($lines);
 269              
 270              $start = 1;
 271              
 272              for ($i = 0; $i < $linesCount; $i++) {
 273                  $line = trim($lines[$i]);
 274                  if ($line) {
 275                      $pieces = explode(' ', $line);
 276                      $c = count($pieces);
 277                      switch($c) {
 278                          case 2:
 279                              $start = (int)$pieces[0];
 280                              $end   = $start + (int)$pieces[1];
 281                              if ($end > $result['max_object'])
 282                                  $result['max_object'] = $end;
 283                              break;
 284                          case 3:
 285                              if (!isset($result['xref'][$start]))
 286                                  $result['xref'][$start] = array();
 287                              
 288                              if (!array_key_exists($gen = (int) $pieces[1], $result['xref'][$start])) {
 289                                  $result['xref'][$start][$gen] = $pieces[2] == 'n' ? (int) $pieces[0] : null;
 290                              }
 291                              $start++;
 292                              break;
 293                          default:
 294                              $this->error('Unexpected data in xref table');
 295                      }
 296                  }
 297              }
 298              
 299              $lines = $pieces = $line = $start = $end = $gen = null;
 300              unset($lines, $pieces, $line, $start, $end, $gen);
 301              
 302              fseek($this->f, $o_pos + $trailerPos + 7);
 303              
 304              $c = new pdf_context($this->f);
 305              $trailer = $this->pdf_read_value($c);
 306              
 307              $c = null;
 308              unset($c);
 309              
 310              if (!isset($result['trailer'])) {
 311                  $result['trailer'] = $trailer;          
 312              }
 313              
 314              if (isset($trailer[1]['/Prev'])) {
 315                  $this->pdf_read_xref($result, $trailer[1]['/Prev'][1]);
 316              } 
 317              
 318              $trailer = null;
 319              unset($trailer);
 320              
 321              return true;
 322          }
 323          
 324          /**

 325           * Reads an Value

 326           *

 327           * @param object $c pdf_context

 328           * @param string $token a Token

 329           * @return mixed

 330           */
 331          function pdf_read_value(&$c, $token = null) {
 332              if (is_null($token)) {
 333                  $token = $this->pdf_read_token($c);
 334              }
 335              
 336              if ($token === false) {
 337                  return false;
 338              }
 339      
 340              switch ($token) {
 341                  case    '<':
 342                      // This is a hex string.

 343                      // Read the value, then the terminator

 344      
 345                      $pos = $c->offset;
 346      
 347                      while(1) {
 348      
 349                          $match = strpos ($c->buffer, '>', $pos);
 350                      
 351                          // If you can't find it, try

 352                          // reading more data from the stream

 353      
 354                          if ($match === false) {
 355                              if (!$c->increase_length()) {
 356                                  return false;
 357                              } else {
 358                                  continue;
 359                              }
 360                          }
 361      
 362                          $result = substr ($c->buffer, $c->offset, $match - $c->offset);
 363                          $c->offset = $match + 1;
 364                          
 365                          return array (PDF_TYPE_HEX, $result);
 366                      }
 367                      
 368                      break;
 369                  case    '<<':
 370                      // This is a dictionary.

 371      
 372                      $result = array();
 373      
 374                      // Recurse into this function until we reach

 375                      // the end of the dictionary.

 376                      while (($key = $this->pdf_read_token($c)) !== '>>') {
 377                          if ($key === false) {
 378                              return false;
 379                          }
 380                          
 381                          if (($value =   $this->pdf_read_value($c)) === false) {
 382                              return false;
 383                          }
 384                          
 385                          // Catch missing value

 386                          if ($value[0] == PDF_TYPE_TOKEN && $value[1] == '>>') {
 387                              $result[$key] = array(PDF_TYPE_NULL);
 388                              break;
 389                          }
 390                          
 391                          $result[$key] = $value;
 392                      }
 393                      
 394                      return array (PDF_TYPE_DICTIONARY, $result);
 395      
 396                  case    '[':
 397                      // This is an array.

 398      
 399                      $result = array();
 400      
 401                      // Recurse into this function until we reach

 402                      // the end of the array.

 403                      while (($token = $this->pdf_read_token($c)) !== ']') {
 404                          if ($token === false) {
 405                              return false;
 406                          }
 407                          
 408                          if (($value = $this->pdf_read_value($c, $token)) === false) {
 409                              return false;
 410                          }
 411                          
 412                          $result[] = $value;
 413                      }
 414                      
 415                      return array (PDF_TYPE_ARRAY, $result);
 416      
 417                  case    '('        :
 418                      // This is a string

 419                      $pos = $c->offset;
 420                      
 421                      $openBrackets = 1;
 422                      do {
 423                          for (; $openBrackets != 0 && $pos < $c->length; $pos++) {
 424                              switch (ord($c->buffer[$pos])) {
 425                                  case 0x28: // '('
 426                                      $openBrackets++;
 427                                      break;
 428                                  case 0x29: // ')'
 429                                      $openBrackets--;
 430                                      break;
 431                                  case 0x5C: // backslash
 432                                      $pos++;
 433                              }
 434                          }
 435                      } while($openBrackets != 0 && $c->increase_length());
 436                      
 437                      $result = substr($c->buffer, $c->offset, $pos - $c->offset - 1);
 438                      $c->offset = $pos;
 439                      
 440                      return array (PDF_TYPE_STRING, $result);
 441      
 442                  case 'stream':
 443                      $o_pos = ftell($c->file)-strlen($c->buffer);
 444                      $o_offset = $c->offset;
 445                      
 446                      $c->reset($startpos = $o_pos + $o_offset);
 447                      
 448                      $e = 0; // ensure line breaks in front of the stream

 449                      if ($c->buffer[0] == chr(10) || $c->buffer[0] == chr(13))
 450                          $e++;
 451                      if ($c->buffer[1] == chr(10) && $c->buffer[0] != chr(10))
 452                          $e++;
 453                      
 454                      if ($this->actual_obj[1][1]['/Length'][0] == PDF_TYPE_OBJREF) {
 455                          $tmp_c = new pdf_context($this->f);
 456                          $tmp_length = $this->pdf_resolve_object($tmp_c, $this->actual_obj[1][1]['/Length']);
 457                          $length = $tmp_length[1][1];
 458                      } else {
 459                          $length = $this->actual_obj[1][1]['/Length'][1];    
 460                      }
 461                          
 462                      if ($length > 0) {
 463                          $c->reset($startpos + $e,$length);
 464                          $v = $c->buffer;
 465                      } else {
 466                          $v = '';   
 467                      }
 468                      $c->reset($startpos + $e + $length + 9); // 9 = strlen("endstream")

 469                      
 470                      return array(PDF_TYPE_STREAM, $v);
 471                      
 472                  default    :
 473                      if (is_numeric ($token)) {
 474                          // A numeric token. Make sure that

 475                          // it is not part of something else.

 476                          if (($tok2 = $this->pdf_read_token ($c)) !== false) {
 477                              if (is_numeric ($tok2)) {
 478      
 479                                  // Two numeric tokens in a row.

 480                                  // In this case, we're probably in

 481                                  // front of either an object reference

 482                                  // or an object specification.

 483                                  // Determine the case and return the data

 484                                  if (($tok3 = $this->pdf_read_token ($c)) !== false) {
 485                                      switch ($tok3) {
 486                                          case 'obj':
 487                                              return array (PDF_TYPE_OBJDEC, (int) $token, (int) $tok2);
 488                                          case 'R':
 489                                              return array (PDF_TYPE_OBJREF, (int) $token, (int) $tok2);
 490                                      }
 491                                      // If we get to this point, that numeric value up

 492                                      // there was just a numeric value. Push the extra

 493                                      // tokens back into the stack and return the value.

 494                                      array_push ($c->stack, $tok3);
 495                                  }
 496                              }
 497      
 498                              array_push ($c->stack, $tok2);
 499                          }
 500      
 501                          if ($token === (string)((int)$token))
 502                              return array (PDF_TYPE_NUMERIC, (int)$token);
 503                          else 
 504                              return array (PDF_TYPE_REAL, (float)$token);
 505                      } elseif ($token == 'true' || $token == 'false') {
 506                          return array (PDF_TYPE_BOOLEAN, $token == 'true');
 507                      } elseif ($token == 'null') {
 508                         return array (PDF_TYPE_NULL);
 509                      } else {
 510                          // Just a token. Return it.

 511                          return array (PDF_TYPE_TOKEN, $token);
 512                      }
 513               }
 514          }
 515          
 516          /**

 517           * Resolve an object

 518           *

 519           * @param object $c pdf_context

 520           * @param array $obj_spec The object-data

 521           * @param boolean $encapsulate Must set to true, cause the parsing and fpdi use this method only without this para

 522           */
 523          function pdf_resolve_object(&$c, $obj_spec, $encapsulate = true) {
 524              // Exit if we get invalid data

 525              if (!is_array($obj_spec)) {
 526                  $ret = false;
 527                  return $ret;
 528              }
 529      
 530              if ($obj_spec[0] == PDF_TYPE_OBJREF) {
 531      
 532                  // This is a reference, resolve it

 533                  if (isset($this->xref['xref'][$obj_spec[1]][$obj_spec[2]])) {
 534      
 535                      // Save current file position

 536                      // This is needed if you want to resolve

 537                      // references while you're reading another object

 538                      // (e.g.: if you need to determine the length

 539                      // of a stream)

 540      
 541                      $old_pos = ftell($c->file);
 542      
 543                      // Reposition the file pointer and

 544                      // load the object header.

 545                      
 546                      $c->reset($this->xref['xref'][$obj_spec[1]][$obj_spec[2]]);
 547      
 548                      $header = $this->pdf_read_value($c);
 549      
 550                      if ($header[0] != PDF_TYPE_OBJDEC || $header[1] != $obj_spec[1] || $header[2] != $obj_spec[2]) {
 551                          $toSearchFor = $obj_spec[1] . ' ' . $obj_spec[2] . ' obj';
 552                          if (preg_match('/' . $toSearchFor . '/', $c->buffer)) {
 553                              $c->offset = strpos($c->buffer, $toSearchFor) + strlen($toSearchFor);
 554                              // reset stack

 555                              $c->stack = array();
 556                          } else {
 557                              $this->error("Unable to find object ({$obj_spec[1]}, {$obj_spec[2]}) at expected location");
 558                          }
 559                      }
 560      
 561                      // If we're being asked to store all the information

 562                      // about the object, we add the object ID and generation

 563                      // number for later use

 564                      $result = array();
 565                      $this->actual_obj =& $result;
 566                      if ($encapsulate) {
 567                          $result = array (
 568                              PDF_TYPE_OBJECT,
 569                              'obj' => $obj_spec[1],
 570                              'gen' => $obj_spec[2]
 571                          );
 572                      } 
 573      
 574                      // Now simply read the object data until

 575                      // we encounter an end-of-object marker

 576                      while(1) {
 577                          $value = $this->pdf_read_value($c);
 578                          if ($value === false || count($result) > 4) {
 579                              // in this case the parser coudn't find an endobj so we break here

 580                              break;
 581                          }
 582      
 583                          if ($value[0] == PDF_TYPE_TOKEN && $value[1] === 'endobj') {
 584                              break;
 585                          }
 586      
 587                          $result[] = $value;
 588                      }
 589      
 590                      $c->reset($old_pos);
 591      
 592                      if (isset($result[2][0]) && $result[2][0] == PDF_TYPE_STREAM) {
 593                          $result[0] = PDF_TYPE_STREAM;
 594                      }
 595      
 596                      return $result;
 597                  }
 598              } else {
 599                  return $obj_spec;
 600              }
 601          }
 602      
 603          
 604          
 605          /**

 606           * Reads a token from the file

 607           *

 608           * @param object $c pdf_context

 609           * @return mixed

 610           */
 611          function pdf_read_token(&$c)
 612          {
 613              // If there is a token available

 614              // on the stack, pop it out and

 615              // return it.

 616      
 617              if (count($c->stack)) {
 618                  return array_pop($c->stack);
 619              }
 620      
 621              // Strip away any whitespace

 622      
 623              do {
 624                  if (!$c->ensure_content()) {
 625                      return false;
 626                  }
 627                  $c->offset += strspn($c->buffer, "\x20\x0A\x0C\x0D\x09\x00", $c->offset);
 628              } while ($c->offset >= $c->length - 1);
 629      
 630              // Get the first character in the stream

 631      
 632              $char = $c->buffer[$c->offset++];
 633      
 634              switch ($char) {
 635      
 636                  case '[':
 637                  case ']':
 638                  case '(':
 639                  case ')':
 640                  
 641                      // This is either an array or literal string

 642                      // delimiter, Return it

 643      
 644                      return $char;
 645      
 646                  case '<':
 647                  case '>':
 648      
 649                      // This could either be a hex string or

 650                      // dictionary delimiter. Determine the

 651                      // appropriate case and return the token

 652      
 653                      if ($c->buffer[$c->offset] == $char) {
 654                          if (!$c->ensure_content()) {
 655                              return false;
 656                          }
 657                          $c->offset++;
 658                          return $char . $char;
 659                      } else {
 660                          return $char;
 661                      }
 662      
 663                  case '%':
 664                      
 665                      // This is a comment - jump over it!

 666                      
 667                      $pos = $c->offset;
 668                      while(1) {
 669                          $match = preg_match("/(\r\n|\r|\n)/", $c->buffer, $m, PREG_OFFSET_CAPTURE, $pos);
 670                          if ($match === 0) {
 671                              if (!$c->increase_length()) {
 672                                  return false;
 673                              } else {
 674                                  continue;
 675                              }
 676                          }
 677      
 678                          $c->offset = $m[0][1]+strlen($m[0][0]);
 679                          
 680                          return $this->pdf_read_token($c);
 681                      }
 682                      
 683                  default:
 684      
 685                      // This is "another" type of token (probably

 686                      // a dictionary entry or a numeric value)

 687                      // Find the end and return it.

 688      
 689                      if (!$c->ensure_content()) {
 690                          return false;
 691                      }
 692      
 693                      while(1) {
 694      
 695                          // Determine the length of the token

 696      
 697                          $pos = strcspn($c->buffer, "\x20%[]<>()/\x0A\x0C\x0D\x09\x00", $c->offset);
 698                          
 699                          if ($c->offset + $pos <= $c->length - 1) {
 700                              break;
 701                          } else {
 702                              // If the script reaches this point,

 703                              // the token may span beyond the end

 704                              // of the current buffer. Therefore,

 705                              // we increase the size of the buffer

 706                              // and try again--just to be safe.

 707      
 708                              $c->increase_length();
 709                          }
 710                      }
 711      
 712                      $result = substr($c->buffer, $c->offset - 1, $pos + 1);
 713      
 714                      $c->offset += $pos;
 715                      return $result;
 716              }
 717          }
 718      }
 719  }


Generated: Fri Nov 28 20:29:05 2014 Cross-referenced by PHPXref 0.7.1