Main Page | Class Hierarchy | Data Structures | Directories | File List | Data Fields | Related Pages

UtfOps.java

00001 /*-
00002  * See the file LICENSE for redistribution information.
00003  *
00004  * Copyright (c) 2000-2005
00005  *      Sleepycat Software.  All rights reserved.
00006  *
00007  * $Id: UtfOps.java,v 12.1 2005/01/31 19:27:35 mark Exp $
00008  */
00009 
00010 package com.sleepycat.util;
00011 
00018 public class UtfOps {
00019 
00020     private static byte[] EMPTY_BYTES = {};
00021     private static String EMPTY_STRING = "";
00022 
00035     public static int getZeroTerminatedByteLength(byte[] bytes, int offset)
00036         throws IndexOutOfBoundsException {
00037 
00038         int len = 0;
00039         while (bytes[offset++] != 0) {
00040             len++;
00041         }
00042         return len;
00043     }
00044 
00053     public static int getByteLength(char[] chars) {
00054 
00055         return getByteLength(chars, 0, chars.length);
00056     }
00057 
00070     public static int getByteLength(char[] chars, int offset, int length) {
00071 
00072         int len = 0;
00073         length += offset;
00074         for (int i = offset; i < length; i++) {
00075             int c = chars[i];
00076             if ((c >= 0x0001) && (c <= 0x007F)) {
00077                 len++;
00078             } else if (c > 0x07FF) {
00079                 len += 3;
00080             } else {
00081                 len += 2;
00082             }
00083         }
00084         return len;
00085     }
00086 
00100     public static int getCharLength(byte[] bytes)
00101         throws IllegalArgumentException, IndexOutOfBoundsException {
00102 
00103         return getCharLength(bytes, 0, bytes.length);
00104     }
00105 
00121     public static int getCharLength(byte[] bytes, int offset, int length)
00122         throws IllegalArgumentException, IndexOutOfBoundsException {
00123 
00124         int charCount = 0;
00125         length += offset;
00126         while (offset < length) {
00127             switch ((bytes[offset] & 0xff) >> 4) {
00128             case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
00129                 offset++;
00130                 break;
00131             case 12: case 13:
00132                 offset += 2;
00133                 break;
00134             case 14:
00135                 offset += 3;
00136                 break;
00137             default:
00138                 throw new IllegalArgumentException();
00139             }
00140             charCount++;
00141         }
00142         return charCount;
00143     }
00144 
00168     public static int bytesToChars(byte[] bytes, int byteOffset,
00169                                    char[] chars, int charOffset,
00170                                    int len, boolean isByteLen)
00171         throws IllegalArgumentException, IndexOutOfBoundsException {
00172 
00173         int char1, char2, char3;
00174         len += isByteLen ? byteOffset : charOffset;
00175         while ((isByteLen ? byteOffset : charOffset) < len) {
00176             char1 = bytes[byteOffset++] & 0xff;
00177             switch ((char1 & 0xff) >> 4) {
00178             case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
00179                 chars[charOffset++] = (char) char1;
00180                 break;
00181             case 12: case 13:
00182                 char2 = bytes[byteOffset++];
00183                 if ((char2 & 0xC0) != 0x80) {
00184                     throw new IllegalArgumentException();
00185                 }
00186                 chars[charOffset++] = (char)(((char1 & 0x1F) << 6) |
00187                                              (char2 & 0x3F));
00188                 break;
00189             case 14:
00190                 char2 = bytes[byteOffset++];
00191                 char3 = bytes[byteOffset++];
00192                 if (((char2 & 0xC0) != 0x80) || ((char3 & 0xC0) != 0x80))
00193                     throw new IllegalArgumentException();
00194                 chars[charOffset++] = (char)(((char1 & 0x0F) << 12) |
00195                                              ((char2 & 0x3F) << 6)  |
00196                                              ((char3 & 0x3F) << 0));
00197                 break;
00198             default:
00199                 throw new IllegalArgumentException();
00200             }
00201         }
00202         return byteOffset;
00203     }
00204 
00219     public static void charsToBytes(char[] chars, int charOffset,
00220                                     byte[] bytes, int byteOffset,
00221                                     int charLength) {
00222         charLength += charOffset;
00223         for (int i = charOffset; i < charLength; i++) {
00224             int c = chars[i];
00225             if ((c >= 0x0001) && (c <= 0x007F)) {
00226                 bytes[byteOffset++] = (byte) c;
00227             } else if (c > 0x07FF) {
00228                 bytes[byteOffset++] = (byte) (0xE0 | ((c >> 12) & 0x0F));
00229                 bytes[byteOffset++] = (byte) (0x80 | ((c >>  6) & 0x3F));
00230                 bytes[byteOffset++] = (byte) (0x80 | ((c >>  0) & 0x3F));
00231             } else {
00232                 bytes[byteOffset++] = (byte) (0xC0 | ((c >>  6) & 0x1F));
00233                 bytes[byteOffset++] = (byte) (0x80 | ((c >>  0) & 0x3F));
00234             }
00235         }
00236     }
00237 
00256     public static String bytesToString(byte[] bytes, int offset, int length)
00257         throws IllegalArgumentException, IndexOutOfBoundsException {
00258 
00259         if (length == 0) return EMPTY_STRING;
00260         int charLen = UtfOps.getCharLength(bytes, offset, length);
00261         char[] chars = new char[charLen];
00262         UtfOps.bytesToChars(bytes, offset, chars, 0, length, true);
00263         return new String(chars, 0, charLen);
00264     }
00265 
00273     public static byte[] stringToBytes(String string) {
00274 
00275         if (string.length() == 0) return EMPTY_BYTES;
00276         char[] chars = string.toCharArray();
00277         byte[] bytes = new byte[UtfOps.getByteLength(chars)];
00278         UtfOps.charsToBytes(chars, 0, bytes, 0, chars.length);
00279         return bytes;
00280     }
00281 }

Generated on Sun Dec 25 12:14:33 2005 for Berkeley DB 4.4.16 by  doxygen 1.4.2