1 /*
2 * Copyright 2014 The Netty Project
3 *
4 * The Netty Project licenses this file to you under the Apache License,
5 * version 2.0 (the "License"); you may not use this file except in compliance
6 * with the License. You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations
14 * under the License.
15 */
16 package io.netty.handler.codec.compression;
17
18 import io.netty.buffer.ByteBuf;
19
20 import static io.netty.handler.codec.compression.Bzip2Constants.*;
21
22 /**
23 * Compresses and writes a single Bzip2 block.<br><br>
24 *
25 * Block encoding consists of the following stages:<br>
26 * 1. Run-Length Encoding[1] - [email protected] #write(int)}<br>
27 * 2. Burrows Wheeler Transform - [email protected] #close(ByteBuf)} (through [email protected] Bzip2DivSufSort})<br>
28 * 3. Write block header - [email protected] #close(ByteBuf)}<br>
29 * 4. Move To Front Transform - [email protected] #close(ByteBuf)} (through [email protected] Bzip2HuffmanStageEncoder})<br>
30 * 5. Run-Length Encoding[2] - [email protected] #close(ByteBuf)} (through [email protected] Bzip2HuffmanStageEncoder})<br>
31 * 6. Create and write Huffman tables - [email protected] #close(ByteBuf)} (through [email protected] Bzip2HuffmanStageEncoder})<br>
32 * 7. Huffman encode and write data - [email protected] #close(ByteBuf)} (through [email protected] Bzip2HuffmanStageEncoder})
33 */
34 final class Bzip2BlockCompressor {
35 /**
36 * A writer that provides bit-level writes.
37 */
38 private final Bzip2BitWriter writer;
39
40 /**
41 * CRC builder for the block.
42 */
43 private final Crc32 crc = new Crc32();
44
45 /**
46 * The RLE'd block data.
47 */
48 private final byte[] block;
49
50 /**
51 * Current length of the data within the [email protected] #block} array.
52 */
53 private int blockLength;
54
55 /**
56 * A limit beyond which new data will not be accepted into the block.
57 */
58 private final int blockLengthLimit;
59
60 /**
61 * The values that are present within the RLE'd block data. For each index, [email protected] true} if that
62 * value is present within the data, otherwise [email protected] false}.
63 */
64 private final boolean[] blockValuesPresent = new boolean[256];
65
66 /**
67 * The Burrows Wheeler Transformed block data.
68 */
69 private final int[] bwtBlock;
70
71 /**
72 * The current RLE value being accumulated (undefined when [email protected] #rleLength} is 0).
73 */
74 private int rleCurrentValue = -1;
75
76 /**
77 * The repeat count of the current RLE value.
78 */
79 private int rleLength;
80
81 /**
82 * @param writer The [email protected] Bzip2BitWriter} which provides bit-level writes
83 * @param blockSize The declared block size in bytes. Up to this many bytes will be accepted
84 * into the block after Run-Length Encoding is applied
85 */
86 Bzip2BlockCompressor(final Bzip2BitWriter writer, final int blockSize) {
87 this.writer = writer;
88
89 // One extra byte is added to allow for the block wrap applied in close()
90 block = new byte[blockSize + 1];
91 bwtBlock = new int[blockSize + 1];
92 blockLengthLimit = blockSize - 6; // 5 bytes for one RLE run plus one byte - see [email protected] #write(int)}
93 }
94
95 /**
96 * Write the Huffman symbol to output byte map.
97 */
98 private void writeSymbolMap(ByteBuf out) {
99 Bzip2BitWriter writer = this.writer;
100
101 final boolean[] blockValuesPresent = this.blockValuesPresent;
102 final boolean[] condensedInUse = new boolean[16];
103
104 for (int i = 0; i < condensedInUse.length; i++) {
105 for (int j = 0, k = i << 4; j < HUFFMAN_SYMBOL_RANGE_SIZE; j++, k++) {
106 if (blockValuesPresent[k]) {
107 condensedInUse[i] = true;
108 }
109 }
110 }
111
112 for (int i = 0; i < condensedInUse.length; i++) {
113 writer.writeBoolean(out, condensedInUse[i]);
114 }
115
116 for (int i = 0; i < condensedInUse.length; i++) {
117 if (condensedInUse[i]) {
118 for (int j = 0, k = i << 4; j < HUFFMAN_SYMBOL_RANGE_SIZE; j++, k++) {
119 writer.writeBoolean(out, blockValuesPresent[k]);
120 }
121 }
122 }
123 }
124
125 /**
126 * Writes an RLE run to the block array, updating the block CRC and present values array as required.
127 * @param value The value to write
128 * @param runLength The run length of the value to write
129 */
130 private void writeRun(final int value, int runLength) {
131 final int blockLength = this.blockLength;
132 final byte[] block = this.block;
133
134 blockValuesPresent[value] = true;
135 crc.updateCRC(value, runLength);
136
137 final byte byteValue = (byte) value;
138 switch (runLength) {
139 case 1:
140 block[blockLength] = byteValue;
141 this.blockLength = blockLength + 1;
142 break;
143 case 2:
144 block[blockLength] = byteValue;
145 block[blockLength + 1] = byteValue;
146 this.blockLength = blockLength + 2;
147 break;
148 case 3:
149 block[blockLength] = byteValue;
150 block[blockLength + 1] = byteValue;
151 block[blockLength + 2] = byteValue;
152 this.blockLength = blockLength + 3;
153 break;
154 default:
155 runLength -= 4;
156 blockValuesPresent[runLength] = true;
157 block[blockLength] = byteValue;
158 block[blockLength + 1] = byteValue;
159 block[blockLength + 2] = byteValue;
160 block[blockLength + 3] = byteValue;
161 block[blockLength + 4] = (byte) runLength;
162 this.blockLength = blockLength + 5;
163 break;
164 }
165 }
166
167 /**
168 * Writes a byte to the block, accumulating to an RLE run where possible.
169 * @param value The byte to write
170 * @return [email protected] true} if the byte was written, or [email protected] false} if the block is already full
171 */
172 boolean write(final int value) {
173 if (blockLength > blockLengthLimit) {
174 return false;
175 }
176 final int rleCurrentValue = this.rleCurrentValue;
177 final int rleLength = this.rleLength;
178
179 if (rleLength == 0) {
180 this.rleCurrentValue = value;
181 this.rleLength = 1;
182 } else if (rleCurrentValue != value) {
183 // This path commits us to write 6 bytes - one RLE run (5 bytes) plus one extra
184 writeRun(rleCurrentValue & 0xff, rleLength);
185 this.rleCurrentValue = value;
186 this.rleLength = 1;
187 } else {
188 if (rleLength == 254) {
189 writeRun(rleCurrentValue & 0xff, 255);
190 this.rleLength = 0;
191 } else {
192 this.rleLength = rleLength + 1;
193 }
194 }
195 return true;
196 }
197
198 /**
199 * Writes an array to the block.
200 * @param data The array to write
201 * @param offset The offset within the input data to write from
202 * @param length The number of bytes of input data to write
203 * @return The actual number of input bytes written. May be less than the number requested, or
204 * zero if the block is already full
205 */
206 int write(final byte[] data, int offset, int length) {
207 int written = 0;
208
209 while (length-- > 0) {
210 if (!write(data[offset++])) {
211 break;
212 }
213 written++;
214 }
215 return written;
216 }
217
218 /**
219 * Compresses and writes out the block.
220 */
221 void close(ByteBuf out) {
222 // If an RLE run is in progress, write it out
223 if (rleLength > 0) {
224 writeRun(rleCurrentValue & 0xff, rleLength);
225 }
226
227 // Apply a one byte block wrap required by the BWT implementation
228 block[blockLength] = block[0];
229
230 // Perform the Burrows Wheeler Transform
231 Bzip2DivSufSort divSufSort = new Bzip2DivSufSort(block, bwtBlock, blockLength);
232 int bwtStartPointer = divSufSort.bwt();
233
234 Bzip2BitWriter writer = this.writer;
235
236 // Write out the block header
237 writer.writeBits(out, 24, BLOCK_HEADER_MAGIC_1);
238 writer.writeBits(out, 24, BLOCK_HEADER_MAGIC_2);
239 writer.writeInt(out, crc.getCRC());
240 writer.writeBoolean(out, false); // Randomised block flag. We never create randomised blocks
241 writer.writeBits(out, 24, bwtStartPointer);
242
243 // Write out the symbol map
244 writeSymbolMap(out);
245
246 // Perform the Move To Front Transform and Run-Length Encoding[2] stages
247 Bzip2MTFAndRLE2StageEncoder mtfEncoder = new Bzip2MTFAndRLE2StageEncoder(bwtBlock, blockLength,
248 blockValuesPresent);
249 mtfEncoder.encode();
250
251 // Perform the Huffman Encoding stage and write out the encoded data
252 Bzip2HuffmanStageEncoder huffmanEncoder = new Bzip2HuffmanStageEncoder(writer,
253 mtfEncoder.mtfBlock(),
254 mtfEncoder.mtfLength(),
255 mtfEncoder.mtfAlphabetSize(),
256 mtfEncoder.mtfSymbolFrequencies());
257 huffmanEncoder.encode(out);
258 }
259
260 /**
261 * Gets available size of the current block.
262 * @return Number of available bytes which can be written
263 */
264 int availableSize() {
265 if (blockLength == 0) {
266 return blockLengthLimit + 2;
267 }
268 return blockLengthLimit - blockLength + 1;
269 }
270
271 /**
272 * Determines if the block is full and ready for compression.
273 * @return [email protected] true} if the block is full, otherwise [email protected] false}
274 */
275 boolean isFull() {
276 return blockLength > blockLengthLimit;
277 }
278
279 /**
280 * Determines if any bytes have been written to the block.
281 * @return [email protected] true} if one or more bytes has been written to the block, otherwise [email protected] false}
282 */
283 boolean isEmpty() {
284 return blockLength == 0 && rleLength == 0;
285 }
286
287 /**
288 * Gets the CRC of the completed block. Only valid after calling [email protected] #close(ByteBuf)}.
289 * @return The block's CRC
290 */
291 int crc() {
292 return crc.getCRC();
293 }
294 }