[ Index ]

PHP Cross Reference of MediaWiki-1.24.0

title

Body

[close]

/includes/media/ -> DjVu.php (source)

   1  <?php
   2  /**
   3   * Handler for DjVu images.
   4   *
   5   * This program is free software; you can redistribute it and/or modify
   6   * it under the terms of the GNU General Public License as published by
   7   * the Free Software Foundation; either version 2 of the License, or
   8   * (at your option) any later version.
   9   *
  10   * This program is distributed in the hope that it will be useful,
  11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13   * GNU General Public License for more details.
  14   *
  15   * You should have received a copy of the GNU General Public License along
  16   * with this program; if not, write to the Free Software Foundation, Inc.,
  17   * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18   * http://www.gnu.org/copyleft/gpl.html
  19   *
  20   * @file
  21   * @ingroup Media
  22   */
  23  
  24  /**
  25   * Handler for DjVu images
  26   *
  27   * @ingroup Media
  28   */
  29  class DjVuHandler extends ImageHandler {
  30      /**
  31       * @return bool
  32       */
  33  	function isEnabled() {
  34          global $wgDjvuRenderer, $wgDjvuDump, $wgDjvuToXML;
  35          if ( !$wgDjvuRenderer || ( !$wgDjvuDump && !$wgDjvuToXML ) ) {
  36              wfDebug( "DjVu is disabled, please set \$wgDjvuRenderer and \$wgDjvuDump\n" );
  37  
  38              return false;
  39          } else {
  40              return true;
  41          }
  42      }
  43  
  44      /**
  45       * @param File $file
  46       * @return bool
  47       */
  48  	function mustRender( $file ) {
  49          return true;
  50      }
  51  
  52      /**
  53       * @param File $file
  54       * @return bool
  55       */
  56  	function isMultiPage( $file ) {
  57          return true;
  58      }
  59  
  60      /**
  61       * @return array
  62       */
  63  	function getParamMap() {
  64          return array(
  65              'img_width' => 'width',
  66              'img_page' => 'page',
  67          );
  68      }
  69  
  70      /**
  71       * @param string $name
  72       * @param mixed $value
  73       * @return bool
  74       */
  75  	function validateParam( $name, $value ) {
  76          if ( $name === 'page' && trim( $value ) !== (string)intval( $value ) ) {
  77              // Extra junk on the end of page, probably actually a caption
  78              // e.g. [[File:Foo.djvu|thumb|Page 3 of the document shows foo]]
  79              return false;
  80          }
  81          if ( in_array( $name, array( 'width', 'height', 'page' ) ) ) {
  82              if ( $value <= 0 ) {
  83                  return false;
  84              } else {
  85                  return true;
  86              }
  87          } else {
  88              return false;
  89          }
  90      }
  91  
  92      /**
  93       * @param array $params
  94       * @return bool|string
  95       */
  96  	function makeParamString( $params ) {
  97          $page = isset( $params['page'] ) ? $params['page'] : 1;
  98          if ( !isset( $params['width'] ) ) {
  99              return false;
 100          }
 101  
 102          return "page{$page}-{$params['width']}px";
 103      }
 104  
 105      /**
 106       * @param string $str
 107       * @return array|bool
 108       */
 109  	function parseParamString( $str ) {
 110          $m = false;
 111          if ( preg_match( '/^page(\d+)-(\d+)px$/', $str, $m ) ) {
 112              return array( 'width' => $m[2], 'page' => $m[1] );
 113          } else {
 114              return false;
 115          }
 116      }
 117  
 118      /**
 119       * @param array $params
 120       * @return array
 121       */
 122  	function getScriptParams( $params ) {
 123          return array(
 124              'width' => $params['width'],
 125              'page' => $params['page'],
 126          );
 127      }
 128  
 129      /**
 130       * @param File $image
 131       * @param string $dstPath
 132       * @param string $dstUrl
 133       * @param array $params
 134       * @param int $flags
 135       * @return MediaTransformError|ThumbnailImage|TransformParameterError
 136       */
 137  	function doTransform( $image, $dstPath, $dstUrl, $params, $flags = 0 ) {
 138          global $wgDjvuRenderer, $wgDjvuPostProcessor;
 139  
 140          // Fetch XML and check it, to give a more informative error message than the one which
 141          // normaliseParams will inevitably give.
 142          $xml = $image->getMetadata();
 143          if ( !$xml ) {
 144              $width = isset( $params['width'] ) ? $params['width'] : 0;
 145              $height = isset( $params['height'] ) ? $params['height'] : 0;
 146  
 147              return new MediaTransformError( 'thumbnail_error', $width, $height,
 148                  wfMessage( 'djvu_no_xml' )->text() );
 149          }
 150  
 151          if ( !$this->normaliseParams( $image, $params ) ) {
 152              return new TransformParameterError( $params );
 153          }
 154          $width = $params['width'];
 155          $height = $params['height'];
 156          $page = $params['page'];
 157          if ( $page > $this->pageCount( $image ) ) {
 158              return new MediaTransformError(
 159                  'thumbnail_error',
 160                  $width,
 161                  $height,
 162                  wfMessage( 'djvu_page_error' )->text()
 163              );
 164          }
 165  
 166          if ( $flags & self::TRANSFORM_LATER ) {
 167              $params = array(
 168                  'width' => $width,
 169                  'height' => $height,
 170                  'page' => $page
 171              );
 172  
 173              return new ThumbnailImage( $image, $dstUrl, $dstPath, $params );
 174          }
 175  
 176          if ( !wfMkdirParents( dirname( $dstPath ), null, __METHOD__ ) ) {
 177              return new MediaTransformError(
 178                  'thumbnail_error',
 179                  $width,
 180                  $height,
 181                  wfMessage( 'thumbnail_dest_directory' )->text()
 182              );
 183          }
 184  
 185          // Get local copy source for shell scripts
 186          // Thumbnail extraction is very inefficient for large files.
 187          // Provide a way to pool count limit the number of downloaders.
 188          if ( $image->getSize() >= 1e7 ) { // 10MB
 189              $work = new PoolCounterWorkViaCallback( 'GetLocalFileCopy', sha1( $image->getName() ),
 190                  array(
 191                      'doWork' => function () use ( $image ) {
 192                          return $image->getLocalRefPath();
 193                      }
 194                  )
 195              );
 196              $srcPath = $work->execute();
 197          } else {
 198              $srcPath = $image->getLocalRefPath();
 199          }
 200  
 201          if ( $srcPath === false ) { // Failed to get local copy
 202              wfDebugLog( 'thumbnail',
 203                  sprintf( 'Thumbnail failed on %s: could not get local copy of "%s"',
 204                      wfHostname(), $image->getName() ) );
 205  
 206              return new MediaTransformError( 'thumbnail_error',
 207                  $params['width'], $params['height'],
 208                  wfMessage( 'filemissing' )->text()
 209              );
 210          }
 211  
 212          # Use a subshell (brackets) to aggregate stderr from both pipeline commands
 213          # before redirecting it to the overall stdout. This works in both Linux and Windows XP.
 214          $cmd = '(' . wfEscapeShellArg(
 215              $wgDjvuRenderer,
 216              "-format=ppm",
 217              "-page={$page}",
 218              "-size={$params['physicalWidth']}x{$params['physicalHeight']}",
 219              $srcPath );
 220          if ( $wgDjvuPostProcessor ) {
 221              $cmd .= " | {$wgDjvuPostProcessor}";
 222          }
 223          $cmd .= ' > ' . wfEscapeShellArg( $dstPath ) . ') 2>&1';
 224          wfProfileIn( 'ddjvu' );
 225          wfDebug( __METHOD__ . ": $cmd\n" );
 226          $retval = '';
 227          $err = wfShellExec( $cmd, $retval );
 228          wfProfileOut( 'ddjvu' );
 229  
 230          $removed = $this->removeBadFile( $dstPath, $retval );
 231          if ( $retval != 0 || $removed ) {
 232              $this->logErrorForExternalProcess( $retval, $err, $cmd );
 233              return new MediaTransformError( 'thumbnail_error', $width, $height, $err );
 234          } else {
 235              $params = array(
 236                  'width' => $width,
 237                  'height' => $height,
 238                  'page' => $page
 239              );
 240  
 241              return new ThumbnailImage( $image, $dstUrl, $dstPath, $params );
 242          }
 243      }
 244  
 245      /**
 246       * Cache an instance of DjVuImage in an Image object, return that instance
 247       *
 248       * @param File $image
 249       * @param string $path
 250       * @return DjVuImage
 251       */
 252  	function getDjVuImage( $image, $path ) {
 253          if ( !$image ) {
 254              $deja = new DjVuImage( $path );
 255          } elseif ( !isset( $image->dejaImage ) ) {
 256              $deja = $image->dejaImage = new DjVuImage( $path );
 257          } else {
 258              $deja = $image->dejaImage;
 259          }
 260  
 261          return $deja;
 262      }
 263  
 264      /**
 265       * Get metadata, unserializing it if neccessary.
 266       *
 267       * @param File $file The DjVu file in question
 268       * @return string XML metadata as a string.
 269       */
 270  	private function getUnserializedMetadata( File $file ) {
 271          $metadata = $file->getMetadata();
 272          if ( substr( $metadata, 0, 3 ) === '<?xml' ) {
 273              // Old style. Not serialized but instead just a raw string of XML.
 274              return $metadata;
 275          }
 276  
 277          wfSuppressWarnings();
 278          $unser = unserialize( $metadata );
 279          wfRestoreWarnings();
 280          if ( is_array( $unser ) ) {
 281              if ( isset( $unser['error'] ) ) {
 282                  return false;
 283              } elseif ( isset( $unser['xml'] ) ) {
 284                  return $unser['xml'];
 285              } else {
 286                  // Should never ever reach here.
 287                  throw new MWException( "Error unserializing DjVu metadata." );
 288              }
 289          }
 290  
 291          // unserialize failed. Guess it wasn't really serialized after all,
 292          return $metadata;
 293      }
 294  
 295      /**
 296       * Cache a document tree for the DjVu XML metadata
 297       * @param File $image
 298       * @param bool $gettext DOCUMENT (Default: false)
 299       * @return bool|SimpleXMLElement
 300       */
 301  	function getMetaTree( $image, $gettext = false ) {
 302          if ( $gettext && isset( $image->djvuTextTree ) ) {
 303              return $image->djvuTextTree;
 304          }
 305          if ( !$gettext && isset( $image->dejaMetaTree ) ) {
 306              return $image->dejaMetaTree;
 307          }
 308  
 309          $metadata = $this->getUnserializedMetadata( $image );
 310          if ( !$this->isMetadataValid( $image, $metadata ) ) {
 311              wfDebug( "DjVu XML metadata is invalid or missing, should have been fixed in upgradeRow\n" );
 312  
 313              return false;
 314          }
 315          wfProfileIn( __METHOD__ );
 316  
 317          wfSuppressWarnings();
 318          try {
 319              // Set to false rather than null to avoid further attempts
 320              $image->dejaMetaTree = false;
 321              $image->djvuTextTree = false;
 322              $tree = new SimpleXMLElement( $metadata );
 323              if ( $tree->getName() == 'mw-djvu' ) {
 324                  /** @var SimpleXMLElement $b */
 325                  foreach ( $tree->children() as $b ) {
 326                      if ( $b->getName() == 'DjVuTxt' ) {
 327                          // @todo File::djvuTextTree and File::dejaMetaTree are declared
 328                          // dynamically. Add a public File::$data to facilitate this?
 329                          $image->djvuTextTree = $b;
 330                      } elseif ( $b->getName() == 'DjVuXML' ) {
 331                          $image->dejaMetaTree = $b;
 332                      }
 333                  }
 334              } else {
 335                  $image->dejaMetaTree = $tree;
 336              }
 337          } catch ( Exception $e ) {
 338              wfDebug( "Bogus multipage XML metadata on '{$image->getName()}'\n" );
 339          }
 340          wfRestoreWarnings();
 341          wfProfileOut( __METHOD__ );
 342          if ( $gettext ) {
 343              return $image->djvuTextTree;
 344          } else {
 345              return $image->dejaMetaTree;
 346          }
 347      }
 348  
 349      /**
 350       * @param File $image
 351       * @param string $path
 352       * @return bool|array False on failure
 353       */
 354  	function getImageSize( $image, $path ) {
 355          return $this->getDjVuImage( $image, $path )->getImageSize();
 356      }
 357  
 358  	function getThumbType( $ext, $mime, $params = null ) {
 359          global $wgDjvuOutputExtension;
 360          static $mime;
 361          if ( !isset( $mime ) ) {
 362              $magic = MimeMagic::singleton();
 363              $mime = $magic->guessTypesForExtension( $wgDjvuOutputExtension );
 364          }
 365  
 366          return array( $wgDjvuOutputExtension, $mime );
 367      }
 368  
 369  	function getMetadata( $image, $path ) {
 370          wfDebug( "Getting DjVu metadata for $path\n" );
 371  
 372          $xml = $this->getDjVuImage( $image, $path )->retrieveMetaData();
 373          if ( $xml === false ) {
 374              // Special value so that we don't repetitively try and decode a broken file.
 375              return serialize( array( 'error' => 'Error extracting metadata' ) );
 376          } else {
 377              return serialize( array( 'xml' => $xml ) );
 378          }
 379      }
 380  
 381  	function getMetadataType( $image ) {
 382          return 'djvuxml';
 383      }
 384  
 385  	function isMetadataValid( $image, $metadata ) {
 386          return !empty( $metadata ) && $metadata != serialize( array() );
 387      }
 388  
 389  	function pageCount( $image ) {
 390          $tree = $this->getMetaTree( $image );
 391          if ( !$tree ) {
 392              return false;
 393          }
 394  
 395          return count( $tree->xpath( '//OBJECT' ) );
 396      }
 397  
 398  	function getPageDimensions( $image, $page ) {
 399          $tree = $this->getMetaTree( $image );
 400          if ( !$tree ) {
 401              return false;
 402          }
 403  
 404          $o = $tree->BODY[0]->OBJECT[$page - 1];
 405          if ( $o ) {
 406              return array(
 407                  'width' => intval( $o['width'] ),
 408                  'height' => intval( $o['height'] )
 409              );
 410          } else {
 411              return false;
 412          }
 413      }
 414  
 415      /**
 416       * @param File $image
 417       * @param int $page Page number to get information for
 418       * @return bool|string Page text or false when no text found.
 419       */
 420  	function getPageText( $image, $page ) {
 421          $tree = $this->getMetaTree( $image, true );
 422          if ( !$tree ) {
 423              return false;
 424          }
 425  
 426          $o = $tree->BODY[0]->PAGE[$page - 1];
 427          if ( $o ) {
 428              $txt = $o['value'];
 429  
 430              return $txt;
 431          } else {
 432              return false;
 433          }
 434      }
 435  }


Generated: Fri Nov 28 14:03:12 2014 Cross-referenced by PHPXref 0.7.1