MediaWiki
REL1_24
|
00001 <?php 00029 class DjVuHandler extends ImageHandler { 00033 function isEnabled() { 00034 global $wgDjvuRenderer, $wgDjvuDump, $wgDjvuToXML; 00035 if ( !$wgDjvuRenderer || ( !$wgDjvuDump && !$wgDjvuToXML ) ) { 00036 wfDebug( "DjVu is disabled, please set \$wgDjvuRenderer and \$wgDjvuDump\n" ); 00037 00038 return false; 00039 } else { 00040 return true; 00041 } 00042 } 00043 00048 function mustRender( $file ) { 00049 return true; 00050 } 00051 00056 function isMultiPage( $file ) { 00057 return true; 00058 } 00059 00063 function getParamMap() { 00064 return array( 00065 'img_width' => 'width', 00066 'img_page' => 'page', 00067 ); 00068 } 00069 00075 function validateParam( $name, $value ) { 00076 if ( $name === 'page' && trim( $value ) !== (string)intval( $value ) ) { 00077 // Extra junk on the end of page, probably actually a caption 00078 // e.g. [[File:Foo.djvu|thumb|Page 3 of the document shows foo]] 00079 return false; 00080 } 00081 if ( in_array( $name, array( 'width', 'height', 'page' ) ) ) { 00082 if ( $value <= 0 ) { 00083 return false; 00084 } else { 00085 return true; 00086 } 00087 } else { 00088 return false; 00089 } 00090 } 00091 00096 function makeParamString( $params ) { 00097 $page = isset( $params['page'] ) ? $params['page'] : 1; 00098 if ( !isset( $params['width'] ) ) { 00099 return false; 00100 } 00101 00102 return "page{$page}-{$params['width']}px"; 00103 } 00104 00109 function parseParamString( $str ) { 00110 $m = false; 00111 if ( preg_match( '/^page(\d+)-(\d+)px$/', $str, $m ) ) { 00112 return array( 'width' => $m[2], 'page' => $m[1] ); 00113 } else { 00114 return false; 00115 } 00116 } 00117 00122 function getScriptParams( $params ) { 00123 return array( 00124 'width' => $params['width'], 00125 'page' => $params['page'], 00126 ); 00127 } 00128 00137 function doTransform( $image, $dstPath, $dstUrl, $params, $flags = 0 ) { 00138 global $wgDjvuRenderer, $wgDjvuPostProcessor; 00139 00140 // Fetch XML and check it, to give a more informative error message than the one which 00141 // normaliseParams will inevitably give. 00142 $xml = $image->getMetadata(); 00143 if ( !$xml ) { 00144 $width = isset( $params['width'] ) ? $params['width'] : 0; 00145 $height = isset( $params['height'] ) ? $params['height'] : 0; 00146 00147 return new MediaTransformError( 'thumbnail_error', $width, $height, 00148 wfMessage( 'djvu_no_xml' )->text() ); 00149 } 00150 00151 if ( !$this->normaliseParams( $image, $params ) ) { 00152 return new TransformParameterError( $params ); 00153 } 00154 $width = $params['width']; 00155 $height = $params['height']; 00156 $page = $params['page']; 00157 if ( $page > $this->pageCount( $image ) ) { 00158 return new MediaTransformError( 00159 'thumbnail_error', 00160 $width, 00161 $height, 00162 wfMessage( 'djvu_page_error' )->text() 00163 ); 00164 } 00165 00166 if ( $flags & self::TRANSFORM_LATER ) { 00167 $params = array( 00168 'width' => $width, 00169 'height' => $height, 00170 'page' => $page 00171 ); 00172 00173 return new ThumbnailImage( $image, $dstUrl, $dstPath, $params ); 00174 } 00175 00176 if ( !wfMkdirParents( dirname( $dstPath ), null, __METHOD__ ) ) { 00177 return new MediaTransformError( 00178 'thumbnail_error', 00179 $width, 00180 $height, 00181 wfMessage( 'thumbnail_dest_directory' )->text() 00182 ); 00183 } 00184 00185 // Get local copy source for shell scripts 00186 // Thumbnail extraction is very inefficient for large files. 00187 // Provide a way to pool count limit the number of downloaders. 00188 if ( $image->getSize() >= 1e7 ) { // 10MB 00189 $work = new PoolCounterWorkViaCallback( 'GetLocalFileCopy', sha1( $image->getName() ), 00190 array( 00191 'doWork' => function () use ( $image ) { 00192 return $image->getLocalRefPath(); 00193 } 00194 ) 00195 ); 00196 $srcPath = $work->execute(); 00197 } else { 00198 $srcPath = $image->getLocalRefPath(); 00199 } 00200 00201 if ( $srcPath === false ) { // Failed to get local copy 00202 wfDebugLog( 'thumbnail', 00203 sprintf( 'Thumbnail failed on %s: could not get local copy of "%s"', 00204 wfHostname(), $image->getName() ) ); 00205 00206 return new MediaTransformError( 'thumbnail_error', 00207 $params['width'], $params['height'], 00208 wfMessage( 'filemissing' )->text() 00209 ); 00210 } 00211 00212 # Use a subshell (brackets) to aggregate stderr from both pipeline commands 00213 # before redirecting it to the overall stdout. This works in both Linux and Windows XP. 00214 $cmd = '(' . wfEscapeShellArg( 00215 $wgDjvuRenderer, 00216 "-format=ppm", 00217 "-page={$page}", 00218 "-size={$params['physicalWidth']}x{$params['physicalHeight']}", 00219 $srcPath ); 00220 if ( $wgDjvuPostProcessor ) { 00221 $cmd .= " | {$wgDjvuPostProcessor}"; 00222 } 00223 $cmd .= ' > ' . wfEscapeShellArg( $dstPath ) . ') 2>&1'; 00224 wfProfileIn( 'ddjvu' ); 00225 wfDebug( __METHOD__ . ": $cmd\n" ); 00226 $retval = ''; 00227 $err = wfShellExec( $cmd, $retval ); 00228 wfProfileOut( 'ddjvu' ); 00229 00230 $removed = $this->removeBadFile( $dstPath, $retval ); 00231 if ( $retval != 0 || $removed ) { 00232 $this->logErrorForExternalProcess( $retval, $err, $cmd ); 00233 return new MediaTransformError( 'thumbnail_error', $width, $height, $err ); 00234 } else { 00235 $params = array( 00236 'width' => $width, 00237 'height' => $height, 00238 'page' => $page 00239 ); 00240 00241 return new ThumbnailImage( $image, $dstUrl, $dstPath, $params ); 00242 } 00243 } 00244 00252 function getDjVuImage( $image, $path ) { 00253 if ( !$image ) { 00254 $deja = new DjVuImage( $path ); 00255 } elseif ( !isset( $image->dejaImage ) ) { 00256 $deja = $image->dejaImage = new DjVuImage( $path ); 00257 } else { 00258 $deja = $image->dejaImage; 00259 } 00260 00261 return $deja; 00262 } 00263 00270 private function getUnserializedMetadata( File $file ) { 00271 $metadata = $file->getMetadata(); 00272 if ( substr( $metadata, 0, 3 ) === '<?xml' ) { 00273 // Old style. Not serialized but instead just a raw string of XML. 00274 return $metadata; 00275 } 00276 00277 wfSuppressWarnings(); 00278 $unser = unserialize( $metadata ); 00279 wfRestoreWarnings(); 00280 if ( is_array( $unser ) ) { 00281 if ( isset( $unser['error'] ) ) { 00282 return false; 00283 } elseif ( isset( $unser['xml'] ) ) { 00284 return $unser['xml']; 00285 } else { 00286 // Should never ever reach here. 00287 throw new MWException( "Error unserializing DjVu metadata." ); 00288 } 00289 } 00290 00291 // unserialize failed. Guess it wasn't really serialized after all, 00292 return $metadata; 00293 } 00294 00301 function getMetaTree( $image, $gettext = false ) { 00302 if ( $gettext && isset( $image->djvuTextTree ) ) { 00303 return $image->djvuTextTree; 00304 } 00305 if ( !$gettext && isset( $image->dejaMetaTree ) ) { 00306 return $image->dejaMetaTree; 00307 } 00308 00309 $metadata = $this->getUnserializedMetadata( $image ); 00310 if ( !$this->isMetadataValid( $image, $metadata ) ) { 00311 wfDebug( "DjVu XML metadata is invalid or missing, should have been fixed in upgradeRow\n" ); 00312 00313 return false; 00314 } 00315 wfProfileIn( __METHOD__ ); 00316 00317 wfSuppressWarnings(); 00318 try { 00319 // Set to false rather than null to avoid further attempts 00320 $image->dejaMetaTree = false; 00321 $image->djvuTextTree = false; 00322 $tree = new SimpleXMLElement( $metadata ); 00323 if ( $tree->getName() == 'mw-djvu' ) { 00325 foreach ( $tree->children() as $b ) { 00326 if ( $b->getName() == 'DjVuTxt' ) { 00327 // @todo File::djvuTextTree and File::dejaMetaTree are declared 00328 // dynamically. Add a public File::$data to facilitate this? 00329 $image->djvuTextTree = $b; 00330 } elseif ( $b->getName() == 'DjVuXML' ) { 00331 $image->dejaMetaTree = $b; 00332 } 00333 } 00334 } else { 00335 $image->dejaMetaTree = $tree; 00336 } 00337 } catch ( Exception $e ) { 00338 wfDebug( "Bogus multipage XML metadata on '{$image->getName()}'\n" ); 00339 } 00340 wfRestoreWarnings(); 00341 wfProfileOut( __METHOD__ ); 00342 if ( $gettext ) { 00343 return $image->djvuTextTree; 00344 } else { 00345 return $image->dejaMetaTree; 00346 } 00347 } 00348 00354 function getImageSize( $image, $path ) { 00355 return $this->getDjVuImage( $image, $path )->getImageSize(); 00356 } 00357 00358 function getThumbType( $ext, $mime, $params = null ) { 00359 global $wgDjvuOutputExtension; 00360 static $mime; 00361 if ( !isset( $mime ) ) { 00362 $magic = MimeMagic::singleton(); 00363 $mime = $magic->guessTypesForExtension( $wgDjvuOutputExtension ); 00364 } 00365 00366 return array( $wgDjvuOutputExtension, $mime ); 00367 } 00368 00369 function getMetadata( $image, $path ) { 00370 wfDebug( "Getting DjVu metadata for $path\n" ); 00371 00372 $xml = $this->getDjVuImage( $image, $path )->retrieveMetaData(); 00373 if ( $xml === false ) { 00374 // Special value so that we don't repetitively try and decode a broken file. 00375 return serialize( array( 'error' => 'Error extracting metadata' ) ); 00376 } else { 00377 return serialize( array( 'xml' => $xml ) ); 00378 } 00379 } 00380 00381 function getMetadataType( $image ) { 00382 return 'djvuxml'; 00383 } 00384 00385 function isMetadataValid( $image, $metadata ) { 00386 return !empty( $metadata ) && $metadata != serialize( array() ); 00387 } 00388 00389 function pageCount( $image ) { 00390 $tree = $this->getMetaTree( $image ); 00391 if ( !$tree ) { 00392 return false; 00393 } 00394 00395 return count( $tree->xpath( '//OBJECT' ) ); 00396 } 00397 00398 function getPageDimensions( $image, $page ) { 00399 $tree = $this->getMetaTree( $image ); 00400 if ( !$tree ) { 00401 return false; 00402 } 00403 00404 $o = $tree->BODY[0]->OBJECT[$page - 1]; 00405 if ( $o ) { 00406 return array( 00407 'width' => intval( $o['width'] ), 00408 'height' => intval( $o['height'] ) 00409 ); 00410 } else { 00411 return false; 00412 } 00413 } 00414 00420 function getPageText( $image, $page ) { 00421 $tree = $this->getMetaTree( $image, true ); 00422 if ( !$tree ) { 00423 return false; 00424 } 00425 00426 $o = $tree->BODY[0]->PAGE[$page - 1]; 00427 if ( $o ) { 00428 $txt = $o['value']; 00429 00430 return $txt; 00431 } else { 00432 return false; 00433 } 00434 } 00435 }