[ Index ] |
PHP Cross Reference of MediaWiki-1.24.0 |
[Summary view] [Print] [Text view]
1 <?php 2 /** 3 * DjVu image handler. 4 * 5 * Copyright © 2006 Brion Vibber <[email protected]> 6 * https://www.mediawiki.org/ 7 * 8 * This program is free software; you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License as published by 10 * the Free Software Foundation; either version 2 of the License, or 11 * (at your option) any later version. 12 * 13 * This program is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU General Public License for more details. 17 * 18 * You should have received a copy of the GNU General Public License along 19 * with this program; if not, write to the Free Software Foundation, Inc., 20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 21 * http://www.gnu.org/copyleft/gpl.html 22 * 23 * @file 24 * @ingroup Media 25 */ 26 27 /** 28 * Support for detecting/validating DjVu image files and getting 29 * some basic file metadata (resolution etc) 30 * 31 * File format docs are available in source package for DjVuLibre: 32 * http://djvulibre.djvuzone.org/ 33 * 34 * @ingroup Media 35 */ 36 class DjVuImage { 37 /** 38 * @const DJVUTXT_MEMORY_LIMIT Memory limit for the DjVu description software 39 */ 40 const DJVUTXT_MEMORY_LIMIT = 300000; 41 42 /** 43 * Constructor 44 * 45 * @param string $filename The DjVu file name. 46 */ 47 function __construct( $filename ) { 48 $this->mFilename = $filename; 49 } 50 51 /** 52 * Check if the given file is indeed a valid DjVu image file 53 * @return bool 54 */ 55 public function isValid() { 56 $info = $this->getInfo(); 57 58 return $info !== false; 59 } 60 61 /** 62 * Return data in the style of getimagesize() 63 * @return array|bool Array or false on failure 64 */ 65 public function getImageSize() { 66 $data = $this->getInfo(); 67 68 if ( $data !== false ) { 69 $width = $data['width']; 70 $height = $data['height']; 71 72 return array( $width, $height, 'DjVu', 73 "width=\"$width\" height=\"$height\"" ); 74 } 75 76 return false; 77 } 78 79 // --------- 80 81 /** 82 * For debugging; dump the IFF chunk structure 83 */ 84 function dump() { 85 $file = fopen( $this->mFilename, 'rb' ); 86 $header = fread( $file, 12 ); 87 // @todo FIXME: Would be good to replace this extract() call with 88 // something that explicitly initializes local variables. 89 extract( unpack( 'a4magic/a4chunk/NchunkLength', $header ) ); 90 /** @var string $chunk 91 * @var string $chunkLength */ 92 echo "$chunk $chunkLength\n"; 93 $this->dumpForm( $file, $chunkLength, 1 ); 94 fclose( $file ); 95 } 96 97 private function dumpForm( $file, $length, $indent ) { 98 $start = ftell( $file ); 99 $secondary = fread( $file, 4 ); 100 echo str_repeat( ' ', $indent * 4 ) . "($secondary)\n"; 101 while ( ftell( $file ) - $start < $length ) { 102 $chunkHeader = fread( $file, 8 ); 103 if ( $chunkHeader == '' ) { 104 break; 105 } 106 // @todo FIXME: Would be good to replace this extract() call with 107 // something that explicitly initializes local variables. 108 extract( unpack( 'a4chunk/NchunkLength', $chunkHeader ) ); 109 /** @var string $chunk 110 * @var string $chunkLength */ 111 echo str_repeat( ' ', $indent * 4 ) . "$chunk $chunkLength\n"; 112 113 if ( $chunk == 'FORM' ) { 114 $this->dumpForm( $file, $chunkLength, $indent + 1 ); 115 } else { 116 fseek( $file, $chunkLength, SEEK_CUR ); 117 if ( $chunkLength & 1 == 1 ) { 118 // Padding byte between chunks 119 fseek( $file, 1, SEEK_CUR ); 120 } 121 } 122 } 123 } 124 125 function getInfo() { 126 wfSuppressWarnings(); 127 $file = fopen( $this->mFilename, 'rb' ); 128 wfRestoreWarnings(); 129 if ( $file === false ) { 130 wfDebug( __METHOD__ . ": missing or failed file read\n" ); 131 132 return false; 133 } 134 135 $header = fread( $file, 16 ); 136 $info = false; 137 138 if ( strlen( $header ) < 16 ) { 139 wfDebug( __METHOD__ . ": too short file header\n" ); 140 } else { 141 // @todo FIXME: Would be good to replace this extract() call with 142 // something that explicitly initializes local variables. 143 extract( unpack( 'a4magic/a4form/NformLength/a4subtype', $header ) ); 144 145 /** @var string $magic 146 * @var string $subtype 147 * @var string $formLength 148 * @var string $formType */ 149 if ( $magic != 'AT&T' ) { 150 wfDebug( __METHOD__ . ": not a DjVu file\n" ); 151 } elseif ( $subtype == 'DJVU' ) { 152 // Single-page document 153 $info = $this->getPageInfo( $file, $formLength ); 154 } elseif ( $subtype == 'DJVM' ) { 155 // Multi-page document 156 $info = $this->getMultiPageInfo( $file, $formLength ); 157 } else { 158 wfDebug( __METHOD__ . ": unrecognized DJVU file type '$formType'\n" ); 159 } 160 } 161 fclose( $file ); 162 163 return $info; 164 } 165 166 private function readChunk( $file ) { 167 $header = fread( $file, 8 ); 168 if ( strlen( $header ) < 8 ) { 169 return array( false, 0 ); 170 } else { 171 // @todo FIXME: Would be good to replace this extract() call with 172 // something that explicitly initializes local variables. 173 extract( unpack( 'a4chunk/Nlength', $header ) ); 174 175 /** @var string $chunk 176 * @var string $length */ 177 return array( $chunk, $length ); 178 } 179 } 180 181 private function skipChunk( $file, $chunkLength ) { 182 fseek( $file, $chunkLength, SEEK_CUR ); 183 184 if ( $chunkLength & 0x01 == 1 && !feof( $file ) ) { 185 // padding byte 186 fseek( $file, 1, SEEK_CUR ); 187 } 188 } 189 190 private function getMultiPageInfo( $file, $formLength ) { 191 // For now, we'll just look for the first page in the file 192 // and report its information, hoping others are the same size. 193 $start = ftell( $file ); 194 do { 195 list( $chunk, $length ) = $this->readChunk( $file ); 196 if ( !$chunk ) { 197 break; 198 } 199 200 if ( $chunk == 'FORM' ) { 201 $subtype = fread( $file, 4 ); 202 if ( $subtype == 'DJVU' ) { 203 wfDebug( __METHOD__ . ": found first subpage\n" ); 204 205 return $this->getPageInfo( $file, $length ); 206 } 207 $this->skipChunk( $file, $length - 4 ); 208 } else { 209 wfDebug( __METHOD__ . ": skipping '$chunk' chunk\n" ); 210 $this->skipChunk( $file, $length ); 211 } 212 } while ( $length != 0 && !feof( $file ) && ftell( $file ) - $start < $formLength ); 213 214 wfDebug( __METHOD__ . ": multi-page DJVU file contained no pages\n" ); 215 216 return false; 217 } 218 219 private function getPageInfo( $file, $formLength ) { 220 list( $chunk, $length ) = $this->readChunk( $file ); 221 if ( $chunk != 'INFO' ) { 222 wfDebug( __METHOD__ . ": expected INFO chunk, got '$chunk'\n" ); 223 224 return false; 225 } 226 227 if ( $length < 9 ) { 228 wfDebug( __METHOD__ . ": INFO should be 9 or 10 bytes, found $length\n" ); 229 230 return false; 231 } 232 $data = fread( $file, $length ); 233 if ( strlen( $data ) < $length ) { 234 wfDebug( __METHOD__ . ": INFO chunk cut off\n" ); 235 236 return false; 237 } 238 239 // @todo FIXME: Would be good to replace this extract() call with 240 // something that explicitly initializes local variables. 241 extract( unpack( 242 'nwidth/' . 243 'nheight/' . 244 'Cminor/' . 245 'Cmajor/' . 246 'vresolution/' . 247 'Cgamma', $data ) ); 248 249 # Newer files have rotation info in byte 10, but we don't use it yet. 250 251 /** @var string $width 252 * @var string $height 253 * @var string $major 254 * @var string $minor 255 * @var string $resolution 256 * @var string $length 257 * @var string $gamma */ 258 return array( 259 'width' => $width, 260 'height' => $height, 261 'version' => "$major.$minor", 262 'resolution' => $resolution, 263 'gamma' => $gamma / 10.0 ); 264 } 265 266 /** 267 * Return an XML string describing the DjVu image 268 * @return string 269 */ 270 function retrieveMetaData() { 271 global $wgDjvuToXML, $wgDjvuDump, $wgDjvuTxt; 272 wfProfileIn( __METHOD__ ); 273 274 if ( isset( $wgDjvuDump ) ) { 275 # djvudump is faster as of version 3.5 276 # http://sourceforge.net/tracker/index.php?func=detail&aid=1704049&group_id=32953&atid=406583 277 wfProfileIn( 'djvudump' ); 278 $cmd = wfEscapeShellArg( $wgDjvuDump ) . ' ' . wfEscapeShellArg( $this->mFilename ); 279 $dump = wfShellExec( $cmd ); 280 $xml = $this->convertDumpToXML( $dump ); 281 wfProfileOut( 'djvudump' ); 282 } elseif ( isset( $wgDjvuToXML ) ) { 283 wfProfileIn( 'djvutoxml' ); 284 $cmd = wfEscapeShellArg( $wgDjvuToXML ) . ' --without-anno --without-text ' . 285 wfEscapeShellArg( $this->mFilename ); 286 $xml = wfShellExec( $cmd ); 287 wfProfileOut( 'djvutoxml' ); 288 } else { 289 $xml = null; 290 } 291 # Text layer 292 if ( isset( $wgDjvuTxt ) ) { 293 wfProfileIn( 'djvutxt' ); 294 $cmd = wfEscapeShellArg( $wgDjvuTxt ) . ' --detail=page ' . wfEscapeShellArg( $this->mFilename ); 295 wfDebug( __METHOD__ . ": $cmd\n" ); 296 $retval = ''; 297 $txt = wfShellExec( $cmd, $retval, array(), array( 'memory' => self::DJVUTXT_MEMORY_LIMIT ) ); 298 wfProfileOut( 'djvutxt' ); 299 if ( $retval == 0 ) { 300 # Strip some control characters 301 $txt = preg_replace( "/[\013\035\037]/", "", $txt ); 302 $reg = <<<EOR 303 /\(page\s[\d-]*\s[\d-]*\s[\d-]*\s[\d-]*\s*" 304 ((?> # Text to match is composed of atoms of either: 305 \\\\. # - any escaped character 306 | # - any character different from " and \ 307 [^"\\\\]+ 308 )*?) 309 "\s*\) 310 | # Or page can be empty ; in this case, djvutxt dumps () 311 \(\s*()\)/sx 312 EOR; 313 $txt = preg_replace_callback( $reg, array( $this, 'pageTextCallback' ), $txt ); 314 $txt = "<DjVuTxt>\n<HEAD></HEAD>\n<BODY>\n" . $txt . "</BODY>\n</DjVuTxt>\n"; 315 $xml = preg_replace( "/<DjVuXML>/", "<mw-djvu><DjVuXML>", $xml, 1 ); 316 $xml = $xml . $txt . '</mw-djvu>'; 317 } 318 } 319 wfProfileOut( __METHOD__ ); 320 321 return $xml; 322 } 323 324 function pageTextCallback( $matches ) { 325 # Get rid of invalid UTF-8, strip control characters 326 $val = htmlspecialchars( UtfNormal::cleanUp( stripcslashes( $matches[1] ) ) ); 327 $val = str_replace( array( "\n", '�' ), array( ' ', '' ), $val ); 328 return '<PAGE value="' . $val . '" />'; 329 } 330 331 /** 332 * Hack to temporarily work around djvutoxml bug 333 * @param string $dump 334 * @return string 335 */ 336 function convertDumpToXML( $dump ) { 337 if ( strval( $dump ) == '' ) { 338 return false; 339 } 340 341 $xml = <<<EOT 342 <?xml version="1.0" ?> 343 <!DOCTYPE DjVuXML PUBLIC "-//W3C//DTD DjVuXML 1.1//EN" "pubtext/DjVuXML-s.dtd"> 344 <DjVuXML> 345 <HEAD></HEAD> 346 <BODY> 347 EOT; 348 349 $dump = str_replace( "\r", '', $dump ); 350 $line = strtok( $dump, "\n" ); 351 $m = false; 352 $good = false; 353 if ( preg_match( '/^( *)FORM:DJVU/', $line, $m ) ) { 354 # Single-page 355 if ( $this->parseFormDjvu( $line, $xml ) ) { 356 $good = true; 357 } else { 358 return false; 359 } 360 } elseif ( preg_match( '/^( *)FORM:DJVM/', $line, $m ) ) { 361 # Multi-page 362 $parentLevel = strlen( $m[1] ); 363 # Find DIRM 364 $line = strtok( "\n" ); 365 while ( $line !== false ) { 366 $childLevel = strspn( $line, ' ' ); 367 if ( $childLevel <= $parentLevel ) { 368 # End of chunk 369 break; 370 } 371 372 if ( preg_match( '/^ *DIRM.*indirect/', $line ) ) { 373 wfDebug( "Indirect multi-page DjVu document, bad for server!\n" ); 374 375 return false; 376 } 377 if ( preg_match( '/^ *FORM:DJVU/', $line ) ) { 378 # Found page 379 if ( $this->parseFormDjvu( $line, $xml ) ) { 380 $good = true; 381 } else { 382 return false; 383 } 384 } 385 $line = strtok( "\n" ); 386 } 387 } 388 if ( !$good ) { 389 return false; 390 } 391 392 $xml .= "</BODY>\n</DjVuXML>\n"; 393 394 return $xml; 395 } 396 397 function parseFormDjvu( $line, &$xml ) { 398 $parentLevel = strspn( $line, ' ' ); 399 $line = strtok( "\n" ); 400 401 # Find INFO 402 while ( $line !== false ) { 403 $childLevel = strspn( $line, ' ' ); 404 if ( $childLevel <= $parentLevel ) { 405 # End of chunk 406 break; 407 } 408 409 if ( preg_match( 410 '/^ *INFO *\[\d*\] *DjVu *(\d+)x(\d+), *\w*, *(\d+) *dpi, *gamma=([0-9.-]+)/', 411 $line, 412 $m 413 ) ) { 414 $xml .= Xml::tags( 415 'OBJECT', 416 array( 417 #'data' => '', 418 #'type' => 'image/x.djvu', 419 'height' => $m[2], 420 'width' => $m[1], 421 #'usemap' => '', 422 ), 423 "\n" . 424 Xml::element( 'PARAM', array( 'name' => 'DPI', 'value' => $m[3] ) ) . "\n" . 425 Xml::element( 'PARAM', array( 'name' => 'GAMMA', 'value' => $m[4] ) ) . "\n" 426 ) . "\n"; 427 428 return true; 429 } 430 $line = strtok( "\n" ); 431 } 432 433 # Not found 434 return false; 435 } 436 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Fri Nov 28 14:03:12 2014 | Cross-referenced by PHPXref 0.7.1 |