MediaWiki  REL1_24
DjVu.php
Go to the documentation of this file.
00001 <?php
00029 class DjVuHandler extends ImageHandler {
00033     function isEnabled() {
00034         global $wgDjvuRenderer, $wgDjvuDump, $wgDjvuToXML;
00035         if ( !$wgDjvuRenderer || ( !$wgDjvuDump && !$wgDjvuToXML ) ) {
00036             wfDebug( "DjVu is disabled, please set \$wgDjvuRenderer and \$wgDjvuDump\n" );
00037 
00038             return false;
00039         } else {
00040             return true;
00041         }
00042     }
00043 
00048     function mustRender( $file ) {
00049         return true;
00050     }
00051 
00056     function isMultiPage( $file ) {
00057         return true;
00058     }
00059 
00063     function getParamMap() {
00064         return array(
00065             'img_width' => 'width',
00066             'img_page' => 'page',
00067         );
00068     }
00069 
00075     function validateParam( $name, $value ) {
00076         if ( $name === 'page' && trim( $value ) !== (string)intval( $value ) ) {
00077             // Extra junk on the end of page, probably actually a caption
00078             // e.g. [[File:Foo.djvu|thumb|Page 3 of the document shows foo]]
00079             return false;
00080         }
00081         if ( in_array( $name, array( 'width', 'height', 'page' ) ) ) {
00082             if ( $value <= 0 ) {
00083                 return false;
00084             } else {
00085                 return true;
00086             }
00087         } else {
00088             return false;
00089         }
00090     }
00091 
00096     function makeParamString( $params ) {
00097         $page = isset( $params['page'] ) ? $params['page'] : 1;
00098         if ( !isset( $params['width'] ) ) {
00099             return false;
00100         }
00101 
00102         return "page{$page}-{$params['width']}px";
00103     }
00104 
00109     function parseParamString( $str ) {
00110         $m = false;
00111         if ( preg_match( '/^page(\d+)-(\d+)px$/', $str, $m ) ) {
00112             return array( 'width' => $m[2], 'page' => $m[1] );
00113         } else {
00114             return false;
00115         }
00116     }
00117 
00122     function getScriptParams( $params ) {
00123         return array(
00124             'width' => $params['width'],
00125             'page' => $params['page'],
00126         );
00127     }
00128 
00137     function doTransform( $image, $dstPath, $dstUrl, $params, $flags = 0 ) {
00138         global $wgDjvuRenderer, $wgDjvuPostProcessor;
00139 
00140         // Fetch XML and check it, to give a more informative error message than the one which
00141         // normaliseParams will inevitably give.
00142         $xml = $image->getMetadata();
00143         if ( !$xml ) {
00144             $width = isset( $params['width'] ) ? $params['width'] : 0;
00145             $height = isset( $params['height'] ) ? $params['height'] : 0;
00146 
00147             return new MediaTransformError( 'thumbnail_error', $width, $height,
00148                 wfMessage( 'djvu_no_xml' )->text() );
00149         }
00150 
00151         if ( !$this->normaliseParams( $image, $params ) ) {
00152             return new TransformParameterError( $params );
00153         }
00154         $width = $params['width'];
00155         $height = $params['height'];
00156         $page = $params['page'];
00157         if ( $page > $this->pageCount( $image ) ) {
00158             return new MediaTransformError(
00159                 'thumbnail_error',
00160                 $width,
00161                 $height,
00162                 wfMessage( 'djvu_page_error' )->text()
00163             );
00164         }
00165 
00166         if ( $flags & self::TRANSFORM_LATER ) {
00167             $params = array(
00168                 'width' => $width,
00169                 'height' => $height,
00170                 'page' => $page
00171             );
00172 
00173             return new ThumbnailImage( $image, $dstUrl, $dstPath, $params );
00174         }
00175 
00176         if ( !wfMkdirParents( dirname( $dstPath ), null, __METHOD__ ) ) {
00177             return new MediaTransformError(
00178                 'thumbnail_error',
00179                 $width,
00180                 $height,
00181                 wfMessage( 'thumbnail_dest_directory' )->text()
00182             );
00183         }
00184 
00185         // Get local copy source for shell scripts
00186         // Thumbnail extraction is very inefficient for large files.
00187         // Provide a way to pool count limit the number of downloaders.
00188         if ( $image->getSize() >= 1e7 ) { // 10MB
00189             $work = new PoolCounterWorkViaCallback( 'GetLocalFileCopy', sha1( $image->getName() ),
00190                 array(
00191                     'doWork' => function () use ( $image ) {
00192                         return $image->getLocalRefPath();
00193                     }
00194                 )
00195             );
00196             $srcPath = $work->execute();
00197         } else {
00198             $srcPath = $image->getLocalRefPath();
00199         }
00200 
00201         if ( $srcPath === false ) { // Failed to get local copy
00202             wfDebugLog( 'thumbnail',
00203                 sprintf( 'Thumbnail failed on %s: could not get local copy of "%s"',
00204                     wfHostname(), $image->getName() ) );
00205 
00206             return new MediaTransformError( 'thumbnail_error',
00207                 $params['width'], $params['height'],
00208                 wfMessage( 'filemissing' )->text()
00209             );
00210         }
00211 
00212         # Use a subshell (brackets) to aggregate stderr from both pipeline commands
00213         # before redirecting it to the overall stdout. This works in both Linux and Windows XP.
00214         $cmd = '(' . wfEscapeShellArg(
00215             $wgDjvuRenderer,
00216             "-format=ppm",
00217             "-page={$page}",
00218             "-size={$params['physicalWidth']}x{$params['physicalHeight']}",
00219             $srcPath );
00220         if ( $wgDjvuPostProcessor ) {
00221             $cmd .= " | {$wgDjvuPostProcessor}";
00222         }
00223         $cmd .= ' > ' . wfEscapeShellArg( $dstPath ) . ') 2>&1';
00224         wfProfileIn( 'ddjvu' );
00225         wfDebug( __METHOD__ . ": $cmd\n" );
00226         $retval = '';
00227         $err = wfShellExec( $cmd, $retval );
00228         wfProfileOut( 'ddjvu' );
00229 
00230         $removed = $this->removeBadFile( $dstPath, $retval );
00231         if ( $retval != 0 || $removed ) {
00232             $this->logErrorForExternalProcess( $retval, $err, $cmd );
00233             return new MediaTransformError( 'thumbnail_error', $width, $height, $err );
00234         } else {
00235             $params = array(
00236                 'width' => $width,
00237                 'height' => $height,
00238                 'page' => $page
00239             );
00240 
00241             return new ThumbnailImage( $image, $dstUrl, $dstPath, $params );
00242         }
00243     }
00244 
00252     function getDjVuImage( $image, $path ) {
00253         if ( !$image ) {
00254             $deja = new DjVuImage( $path );
00255         } elseif ( !isset( $image->dejaImage ) ) {
00256             $deja = $image->dejaImage = new DjVuImage( $path );
00257         } else {
00258             $deja = $image->dejaImage;
00259         }
00260 
00261         return $deja;
00262     }
00263 
00270     private function getUnserializedMetadata( File $file ) {
00271         $metadata = $file->getMetadata();
00272         if ( substr( $metadata, 0, 3 ) === '<?xml' ) {
00273             // Old style. Not serialized but instead just a raw string of XML.
00274             return $metadata;
00275         }
00276 
00277         wfSuppressWarnings();
00278         $unser = unserialize( $metadata );
00279         wfRestoreWarnings();
00280         if ( is_array( $unser ) ) {
00281             if ( isset( $unser['error'] ) ) {
00282                 return false;
00283             } elseif ( isset( $unser['xml'] ) ) {
00284                 return $unser['xml'];
00285             } else {
00286                 // Should never ever reach here.
00287                 throw new MWException( "Error unserializing DjVu metadata." );
00288             }
00289         }
00290 
00291         // unserialize failed. Guess it wasn't really serialized after all,
00292         return $metadata;
00293     }
00294 
00301     function getMetaTree( $image, $gettext = false ) {
00302         if ( $gettext && isset( $image->djvuTextTree ) ) {
00303             return $image->djvuTextTree;
00304         }
00305         if ( !$gettext && isset( $image->dejaMetaTree ) ) {
00306             return $image->dejaMetaTree;
00307         }
00308 
00309         $metadata = $this->getUnserializedMetadata( $image );
00310         if ( !$this->isMetadataValid( $image, $metadata ) ) {
00311             wfDebug( "DjVu XML metadata is invalid or missing, should have been fixed in upgradeRow\n" );
00312 
00313             return false;
00314         }
00315         wfProfileIn( __METHOD__ );
00316 
00317         wfSuppressWarnings();
00318         try {
00319             // Set to false rather than null to avoid further attempts
00320             $image->dejaMetaTree = false;
00321             $image->djvuTextTree = false;
00322             $tree = new SimpleXMLElement( $metadata );
00323             if ( $tree->getName() == 'mw-djvu' ) {
00325                 foreach ( $tree->children() as $b ) {
00326                     if ( $b->getName() == 'DjVuTxt' ) {
00327                         // @todo File::djvuTextTree and File::dejaMetaTree are declared
00328                         // dynamically. Add a public File::$data to facilitate this?
00329                         $image->djvuTextTree = $b;
00330                     } elseif ( $b->getName() == 'DjVuXML' ) {
00331                         $image->dejaMetaTree = $b;
00332                     }
00333                 }
00334             } else {
00335                 $image->dejaMetaTree = $tree;
00336             }
00337         } catch ( Exception $e ) {
00338             wfDebug( "Bogus multipage XML metadata on '{$image->getName()}'\n" );
00339         }
00340         wfRestoreWarnings();
00341         wfProfileOut( __METHOD__ );
00342         if ( $gettext ) {
00343             return $image->djvuTextTree;
00344         } else {
00345             return $image->dejaMetaTree;
00346         }
00347     }
00348 
00354     function getImageSize( $image, $path ) {
00355         return $this->getDjVuImage( $image, $path )->getImageSize();
00356     }
00357 
00358     function getThumbType( $ext, $mime, $params = null ) {
00359         global $wgDjvuOutputExtension;
00360         static $mime;
00361         if ( !isset( $mime ) ) {
00362             $magic = MimeMagic::singleton();
00363             $mime = $magic->guessTypesForExtension( $wgDjvuOutputExtension );
00364         }
00365 
00366         return array( $wgDjvuOutputExtension, $mime );
00367     }
00368 
00369     function getMetadata( $image, $path ) {
00370         wfDebug( "Getting DjVu metadata for $path\n" );
00371 
00372         $xml = $this->getDjVuImage( $image, $path )->retrieveMetaData();
00373         if ( $xml === false ) {
00374             // Special value so that we don't repetitively try and decode a broken file.
00375             return serialize( array( 'error' => 'Error extracting metadata' ) );
00376         } else {
00377             return serialize( array( 'xml' => $xml ) );
00378         }
00379     }
00380 
00381     function getMetadataType( $image ) {
00382         return 'djvuxml';
00383     }
00384 
00385     function isMetadataValid( $image, $metadata ) {
00386         return !empty( $metadata ) && $metadata != serialize( array() );
00387     }
00388 
00389     function pageCount( $image ) {
00390         $tree = $this->getMetaTree( $image );
00391         if ( !$tree ) {
00392             return false;
00393         }
00394 
00395         return count( $tree->xpath( '//OBJECT' ) );
00396     }
00397 
00398     function getPageDimensions( $image, $page ) {
00399         $tree = $this->getMetaTree( $image );
00400         if ( !$tree ) {
00401             return false;
00402         }
00403 
00404         $o = $tree->BODY[0]->OBJECT[$page - 1];
00405         if ( $o ) {
00406             return array(
00407                 'width' => intval( $o['width'] ),
00408                 'height' => intval( $o['height'] )
00409             );
00410         } else {
00411             return false;
00412         }
00413     }
00414 
00420     function getPageText( $image, $page ) {
00421         $tree = $this->getMetaTree( $image, true );
00422         if ( !$tree ) {
00423             return false;
00424         }
00425 
00426         $o = $tree->BODY[0]->PAGE[$page - 1];
00427         if ( $o ) {
00428             $txt = $o['value'];
00429 
00430             return $txt;
00431         } else {
00432             return false;
00433         }
00434     }
00435 }