00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 package com.sleepycat.util;
00011
00018 public class UtfOps {
00019
00020 private static byte[] EMPTY_BYTES = {};
00021 private static String EMPTY_STRING = "";
00022
00035 public static int getZeroTerminatedByteLength(byte[] bytes, int offset)
00036 throws IndexOutOfBoundsException {
00037
00038 int len = 0;
00039 while (bytes[offset++] != 0) {
00040 len++;
00041 }
00042 return len;
00043 }
00044
00053 public static int getByteLength(char[] chars) {
00054
00055 return getByteLength(chars, 0, chars.length);
00056 }
00057
00070 public static int getByteLength(char[] chars, int offset, int length) {
00071
00072 int len = 0;
00073 length += offset;
00074 for (int i = offset; i < length; i++) {
00075 int c = chars[i];
00076 if ((c >= 0x0001) && (c <= 0x007F)) {
00077 len++;
00078 } else if (c > 0x07FF) {
00079 len += 3;
00080 } else {
00081 len += 2;
00082 }
00083 }
00084 return len;
00085 }
00086
00100 public static int getCharLength(byte[] bytes)
00101 throws IllegalArgumentException, IndexOutOfBoundsException {
00102
00103 return getCharLength(bytes, 0, bytes.length);
00104 }
00105
00121 public static int getCharLength(byte[] bytes, int offset, int length)
00122 throws IllegalArgumentException, IndexOutOfBoundsException {
00123
00124 int charCount = 0;
00125 length += offset;
00126 while (offset < length) {
00127 switch ((bytes[offset] & 0xff) >> 4) {
00128 case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
00129 offset++;
00130 break;
00131 case 12: case 13:
00132 offset += 2;
00133 break;
00134 case 14:
00135 offset += 3;
00136 break;
00137 default:
00138 throw new IllegalArgumentException();
00139 }
00140 charCount++;
00141 }
00142 return charCount;
00143 }
00144
00168 public static int bytesToChars(byte[] bytes, int byteOffset,
00169 char[] chars, int charOffset,
00170 int len, boolean isByteLen)
00171 throws IllegalArgumentException, IndexOutOfBoundsException {
00172
00173 int char1, char2, char3;
00174 len += isByteLen ? byteOffset : charOffset;
00175 while ((isByteLen ? byteOffset : charOffset) < len) {
00176 char1 = bytes[byteOffset++] & 0xff;
00177 switch ((char1 & 0xff) >> 4) {
00178 case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
00179 chars[charOffset++] = (char) char1;
00180 break;
00181 case 12: case 13:
00182 char2 = bytes[byteOffset++];
00183 if ((char2 & 0xC0) != 0x80) {
00184 throw new IllegalArgumentException();
00185 }
00186 chars[charOffset++] = (char)(((char1 & 0x1F) << 6) |
00187 (char2 & 0x3F));
00188 break;
00189 case 14:
00190 char2 = bytes[byteOffset++];
00191 char3 = bytes[byteOffset++];
00192 if (((char2 & 0xC0) != 0x80) || ((char3 & 0xC0) != 0x80))
00193 throw new IllegalArgumentException();
00194 chars[charOffset++] = (char)(((char1 & 0x0F) << 12) |
00195 ((char2 & 0x3F) << 6) |
00196 ((char3 & 0x3F) << 0));
00197 break;
00198 default:
00199 throw new IllegalArgumentException();
00200 }
00201 }
00202 return byteOffset;
00203 }
00204
00219 public static void charsToBytes(char[] chars, int charOffset,
00220 byte[] bytes, int byteOffset,
00221 int charLength) {
00222 charLength += charOffset;
00223 for (int i = charOffset; i < charLength; i++) {
00224 int c = chars[i];
00225 if ((c >= 0x0001) && (c <= 0x007F)) {
00226 bytes[byteOffset++] = (byte) c;
00227 } else if (c > 0x07FF) {
00228 bytes[byteOffset++] = (byte) (0xE0 | ((c >> 12) & 0x0F));
00229 bytes[byteOffset++] = (byte) (0x80 | ((c >> 6) & 0x3F));
00230 bytes[byteOffset++] = (byte) (0x80 | ((c >> 0) & 0x3F));
00231 } else {
00232 bytes[byteOffset++] = (byte) (0xC0 | ((c >> 6) & 0x1F));
00233 bytes[byteOffset++] = (byte) (0x80 | ((c >> 0) & 0x3F));
00234 }
00235 }
00236 }
00237
00256 public static String bytesToString(byte[] bytes, int offset, int length)
00257 throws IllegalArgumentException, IndexOutOfBoundsException {
00258
00259 if (length == 0) return EMPTY_STRING;
00260 int charLen = UtfOps.getCharLength(bytes, offset, length);
00261 char[] chars = new char[charLen];
00262 UtfOps.bytesToChars(bytes, offset, chars, 0, length, true);
00263 return new String(chars, 0, charLen);
00264 }
00265
00273 public static byte[] stringToBytes(String string) {
00274
00275 if (string.length() == 0) return EMPTY_BYTES;
00276 char[] chars = string.toCharArray();
00277 byte[] bytes = new byte[UtfOps.getByteLength(chars)];
00278 UtfOps.charsToBytes(chars, 0, bytes, 0, chars.length);
00279 return bytes;
00280 }
00281 }