View Javadoc

1   /*
2    * Copyright 2014 The Netty Project
3    *
4    * The Netty Project licenses this file to you under the Apache License,
5    * version 2.0 (the "License"); you may not use this file except in compliance
6    * with the License. You may obtain a copy of the License at:
7    *
8    *   http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13   * License for the specific language governing permissions and limitations
14   * under the License.
15   */
16  package io.netty.handler.codec.compression;
17  
18  import io.netty.buffer.ByteBuf;
19  
20  import static io.netty.handler.codec.compression.Bzip2Constants.*;
21  
22  /**
23   * Compresses and writes a single Bzip2 block.<br><br>
24   *
25   * Block encoding consists of the following stages:<br>
26   * 1. Run-Length Encoding[1] - [email protected] #write(int)}<br>
27   * 2. Burrows Wheeler Transform - [email protected] #close(ByteBuf)} (through [email protected] Bzip2DivSufSort})<br>
28   * 3. Write block header - [email protected] #close(ByteBuf)}<br>
29   * 4. Move To Front Transform - [email protected] #close(ByteBuf)} (through [email protected] Bzip2HuffmanStageEncoder})<br>
30   * 5. Run-Length Encoding[2] - [email protected] #close(ByteBuf)}  (through [email protected] Bzip2HuffmanStageEncoder})<br>
31   * 6. Create and write Huffman tables - [email protected] #close(ByteBuf)} (through [email protected] Bzip2HuffmanStageEncoder})<br>
32   * 7. Huffman encode and write data - [email protected] #close(ByteBuf)} (through [email protected] Bzip2HuffmanStageEncoder})
33   */
34  final class Bzip2BlockCompressor {
35      /**
36       * A writer that provides bit-level writes.
37       */
38      private final Bzip2BitWriter writer;
39  
40      /**
41       * CRC builder for the block.
42       */
43      private final Crc32 crc = new Crc32();
44  
45      /**
46       * The RLE'd block data.
47       */
48      private final byte[] block;
49  
50      /**
51       * Current length of the data within the [email protected] #block} array.
52       */
53      private int blockLength;
54  
55      /**
56       * A limit beyond which new data will not be accepted into the block.
57       */
58      private final int blockLengthLimit;
59  
60      /**
61       * The values that are present within the RLE'd block data. For each index, [email protected] true} if that
62       * value is present within the data, otherwise [email protected] false}.
63       */
64      private final boolean[] blockValuesPresent = new boolean[256];
65  
66      /**
67       * The Burrows Wheeler Transformed block data.
68       */
69      private final int[] bwtBlock;
70  
71      /**
72       * The current RLE value being accumulated (undefined when [email protected] #rleLength} is 0).
73       */
74      private int rleCurrentValue = -1;
75  
76      /**
77       * The repeat count of the current RLE value.
78       */
79      private int rleLength;
80  
81      /**
82       * @param writer The [email protected] Bzip2BitWriter} which provides bit-level writes
83       * @param blockSize The declared block size in bytes. Up to this many bytes will be accepted
84       *                  into the block after Run-Length Encoding is applied
85       */
86      Bzip2BlockCompressor(final Bzip2BitWriter writer, final int blockSize) {
87          this.writer = writer;
88  
89          // One extra byte is added to allow for the block wrap applied in close()
90          block = new byte[blockSize + 1];
91          bwtBlock = new int[blockSize + 1];
92          blockLengthLimit = blockSize - 6; // 5 bytes for one RLE run plus one byte - see [email protected] #write(int)}
93      }
94  
95      /**
96       * Write the Huffman symbol to output byte map.
97       */
98      private void writeSymbolMap(ByteBuf out) {
99          Bzip2BitWriter writer = this.writer;
100 
101         final boolean[] blockValuesPresent = this.blockValuesPresent;
102         final boolean[] condensedInUse = new boolean[16];
103 
104         for (int i = 0; i < condensedInUse.length; i++) {
105             for (int j = 0, k = i << 4; j < HUFFMAN_SYMBOL_RANGE_SIZE; j++, k++) {
106                 if (blockValuesPresent[k]) {
107                     condensedInUse[i] = true;
108                 }
109             }
110         }
111 
112         for (int i = 0; i < condensedInUse.length; i++) {
113             writer.writeBoolean(out, condensedInUse[i]);
114         }
115 
116         for (int i = 0; i < condensedInUse.length; i++) {
117             if (condensedInUse[i]) {
118                 for (int j = 0, k = i << 4; j < HUFFMAN_SYMBOL_RANGE_SIZE; j++, k++) {
119                     writer.writeBoolean(out, blockValuesPresent[k]);
120                 }
121             }
122         }
123     }
124 
125     /**
126      * Writes an RLE run to the block array, updating the block CRC and present values array as required.
127      * @param value The value to write
128      * @param runLength The run length of the value to write
129      */
130     private void writeRun(final int value, int runLength) {
131         final int blockLength = this.blockLength;
132         final byte[] block = this.block;
133 
134         blockValuesPresent[value] = true;
135         crc.updateCRC(value, runLength);
136 
137         final byte byteValue = (byte) value;
138         switch (runLength) {
139             case 1:
140                 block[blockLength] = byteValue;
141                 this.blockLength = blockLength + 1;
142                 break;
143             case 2:
144                 block[blockLength] = byteValue;
145                 block[blockLength + 1] = byteValue;
146                 this.blockLength = blockLength + 2;
147                 break;
148             case 3:
149                 block[blockLength] = byteValue;
150                 block[blockLength + 1] = byteValue;
151                 block[blockLength + 2] = byteValue;
152                 this.blockLength = blockLength + 3;
153                 break;
154             default:
155                 runLength -= 4;
156                 blockValuesPresent[runLength] = true;
157                 block[blockLength] = byteValue;
158                 block[blockLength + 1] = byteValue;
159                 block[blockLength + 2] = byteValue;
160                 block[blockLength + 3] = byteValue;
161                 block[blockLength + 4] = (byte) runLength;
162                 this.blockLength = blockLength + 5;
163                 break;
164         }
165     }
166 
167     /**
168      * Writes a byte to the block, accumulating to an RLE run where possible.
169      * @param value The byte to write
170      * @return [email protected] true} if the byte was written, or [email protected] false} if the block is already full
171      */
172     boolean write(final int value) {
173         if (blockLength > blockLengthLimit) {
174             return false;
175         }
176         final int rleCurrentValue = this.rleCurrentValue;
177         final int rleLength = this.rleLength;
178 
179         if (rleLength == 0) {
180             this.rleCurrentValue = value;
181             this.rleLength = 1;
182         } else if (rleCurrentValue != value) {
183             // This path commits us to write 6 bytes - one RLE run (5 bytes) plus one extra
184             writeRun(rleCurrentValue & 0xff, rleLength);
185             this.rleCurrentValue = value;
186             this.rleLength = 1;
187         } else {
188             if (rleLength == 254) {
189                 writeRun(rleCurrentValue & 0xff, 255);
190                 this.rleLength = 0;
191             } else {
192                 this.rleLength = rleLength + 1;
193             }
194         }
195         return true;
196     }
197 
198     /**
199      * Writes an array to the block.
200      * @param data The array to write
201      * @param offset The offset within the input data to write from
202      * @param length The number of bytes of input data to write
203      * @return The actual number of input bytes written. May be less than the number requested, or
204      *         zero if the block is already full
205      */
206     int write(final byte[] data, int offset, int length) {
207         int written = 0;
208 
209         while (length-- > 0) {
210             if (!write(data[offset++])) {
211                 break;
212             }
213             written++;
214         }
215         return written;
216     }
217 
218     /**
219      * Compresses and writes out the block.
220      */
221     void close(ByteBuf out) {
222         // If an RLE run is in progress, write it out
223         if (rleLength > 0) {
224             writeRun(rleCurrentValue & 0xff, rleLength);
225         }
226 
227         // Apply a one byte block wrap required by the BWT implementation
228         block[blockLength] = block[0];
229 
230         // Perform the Burrows Wheeler Transform
231         Bzip2DivSufSort divSufSort = new Bzip2DivSufSort(block, bwtBlock, blockLength);
232         int bwtStartPointer = divSufSort.bwt();
233 
234         Bzip2BitWriter writer = this.writer;
235 
236         // Write out the block header
237         writer.writeBits(out, 24, BLOCK_HEADER_MAGIC_1);
238         writer.writeBits(out, 24, BLOCK_HEADER_MAGIC_2);
239         writer.writeInt(out, crc.getCRC());
240         writer.writeBoolean(out, false); // Randomised block flag. We never create randomised blocks
241         writer.writeBits(out, 24, bwtStartPointer);
242 
243         // Write out the symbol map
244         writeSymbolMap(out);
245 
246         // Perform the Move To Front Transform and Run-Length Encoding[2] stages
247         Bzip2MTFAndRLE2StageEncoder mtfEncoder = new Bzip2MTFAndRLE2StageEncoder(bwtBlock, blockLength,
248                                                                                     blockValuesPresent);
249         mtfEncoder.encode();
250 
251         // Perform the Huffman Encoding stage and write out the encoded data
252         Bzip2HuffmanStageEncoder huffmanEncoder = new Bzip2HuffmanStageEncoder(writer,
253                 mtfEncoder.mtfBlock(),
254                 mtfEncoder.mtfLength(),
255                 mtfEncoder.mtfAlphabetSize(),
256                 mtfEncoder.mtfSymbolFrequencies());
257         huffmanEncoder.encode(out);
258     }
259 
260     /**
261      * Gets available size of the current block.
262      * @return Number of available bytes which can be written
263      */
264     int availableSize() {
265         if (blockLength == 0) {
266             return blockLengthLimit + 2;
267         }
268         return blockLengthLimit - blockLength + 1;
269     }
270 
271     /**
272      * Determines if the block is full and ready for compression.
273      * @return [email protected] true} if the block is full, otherwise [email protected] false}
274      */
275     boolean isFull() {
276         return blockLength > blockLengthLimit;
277     }
278 
279     /**
280      * Determines if any bytes have been written to the block.
281      * @return [email protected] true} if one or more bytes has been written to the block, otherwise [email protected] false}
282      */
283     boolean isEmpty() {
284         return blockLength == 0 && rleLength == 0;
285     }
286 
287     /**
288      * Gets the CRC of the completed block. Only valid after calling [email protected] #close(ByteBuf)}.
289      * @return The block's CRC
290      */
291     int crc() {
292         return crc.getCRC();
293     }
294 }