Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
unicode.c
Go to the documentation of this file.
1 /*
2  * linux/fs/hfsplus/unicode.c
3  *
4  * Copyright (C) 2001
5  * Brad Boyer ([email protected])
6  * (C) 2003 Ardis Technologies <[email protected]>
7  *
8  * Handler routines for unicode strings
9  */
10 
11 #include <linux/types.h>
12 #include <linux/nls.h>
13 #include "hfsplus_fs.h"
14 #include "hfsplus_raw.h"
15 
16 /* Fold the case of a unicode char, given the 16 bit value */
17 /* Returns folded char, or 0 if ignorable */
18 static inline u16 case_fold(u16 c)
19 {
20  u16 tmp;
21 
22  tmp = hfsplus_case_fold_table[c >> 8];
23  if (tmp)
24  tmp = hfsplus_case_fold_table[tmp + (c & 0xff)];
25  else
26  tmp = c;
27  return tmp;
28 }
29 
30 /* Compare unicode strings, return values like normal strcmp */
32  const struct hfsplus_unistr *s2)
33 {
34  u16 len1, len2, c1, c2;
35  const hfsplus_unichr *p1, *p2;
36 
37  len1 = be16_to_cpu(s1->length);
38  len2 = be16_to_cpu(s2->length);
39  p1 = s1->unicode;
40  p2 = s2->unicode;
41 
42  while (1) {
43  c1 = c2 = 0;
44 
45  while (len1 && !c1) {
46  c1 = case_fold(be16_to_cpu(*p1));
47  p1++;
48  len1--;
49  }
50  while (len2 && !c2) {
51  c2 = case_fold(be16_to_cpu(*p2));
52  p2++;
53  len2--;
54  }
55 
56  if (c1 != c2)
57  return (c1 < c2) ? -1 : 1;
58  if (!c1 && !c2)
59  return 0;
60  }
61 }
62 
63 /* Compare names as a sequence of 16-bit unsigned integers */
64 int hfsplus_strcmp(const struct hfsplus_unistr *s1,
65  const struct hfsplus_unistr *s2)
66 {
67  u16 len1, len2, c1, c2;
68  const hfsplus_unichr *p1, *p2;
69  int len;
70 
71  len1 = be16_to_cpu(s1->length);
72  len2 = be16_to_cpu(s2->length);
73  p1 = s1->unicode;
74  p2 = s2->unicode;
75 
76  for (len = min(len1, len2); len > 0; len--) {
77  c1 = be16_to_cpu(*p1);
78  c2 = be16_to_cpu(*p2);
79  if (c1 != c2)
80  return c1 < c2 ? -1 : 1;
81  p1++;
82  p2++;
83  }
84 
85  return len1 < len2 ? -1 :
86  len1 > len2 ? 1 : 0;
87 }
88 
89 
90 #define Hangul_SBase 0xac00
91 #define Hangul_LBase 0x1100
92 #define Hangul_VBase 0x1161
93 #define Hangul_TBase 0x11a7
94 #define Hangul_SCount 11172
95 #define Hangul_LCount 19
96 #define Hangul_VCount 21
97 #define Hangul_TCount 28
98 #define Hangul_NCount (Hangul_VCount * Hangul_TCount)
99 
100 
101 static u16 *hfsplus_compose_lookup(u16 *p, u16 cc)
102 {
103  int i, s, e;
104 
105  s = 1;
106  e = p[1];
107  if (!e || cc < p[s * 2] || cc > p[e * 2])
108  return NULL;
109  do {
110  i = (s + e) / 2;
111  if (cc > p[i * 2])
112  s = i + 1;
113  else if (cc < p[i * 2])
114  e = i - 1;
115  else
116  return hfsplus_compose_table + p[i * 2 + 1];
117  } while (s <= e);
118  return NULL;
119 }
120 
122  const struct hfsplus_unistr *ustr,
123  char *astr, int *len_p)
124 {
125  const hfsplus_unichr *ip;
126  struct nls_table *nls = HFSPLUS_SB(sb)->nls;
127  u8 *op;
128  u16 cc, c0, c1;
129  u16 *ce1, *ce2;
130  int i, len, ustrlen, res, compose;
131 
132  op = astr;
133  ip = ustr->unicode;
134  ustrlen = be16_to_cpu(ustr->length);
135  len = *len_p;
136  ce1 = NULL;
137  compose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
138 
139  while (ustrlen > 0) {
140  c0 = be16_to_cpu(*ip++);
141  ustrlen--;
142  /* search for single decomposed char */
143  if (likely(compose))
144  ce1 = hfsplus_compose_lookup(hfsplus_compose_table, c0);
145  if (ce1)
146  cc = ce1[0];
147  else
148  cc = 0;
149  if (cc) {
150  /* start of a possibly decomposed Hangul char */
151  if (cc != 0xffff)
152  goto done;
153  if (!ustrlen)
154  goto same;
155  c1 = be16_to_cpu(*ip) - Hangul_VBase;
156  if (c1 < Hangul_VCount) {
157  /* compose the Hangul char */
158  cc = (c0 - Hangul_LBase) * Hangul_VCount;
159  cc = (cc + c1) * Hangul_TCount;
160  cc += Hangul_SBase;
161  ip++;
162  ustrlen--;
163  if (!ustrlen)
164  goto done;
165  c1 = be16_to_cpu(*ip) - Hangul_TBase;
166  if (c1 > 0 && c1 < Hangul_TCount) {
167  cc += c1;
168  ip++;
169  ustrlen--;
170  }
171  goto done;
172  }
173  }
174  while (1) {
175  /* main loop for common case of not composed chars */
176  if (!ustrlen)
177  goto same;
178  c1 = be16_to_cpu(*ip);
179  if (likely(compose))
180  ce1 = hfsplus_compose_lookup(
182  if (ce1)
183  break;
184  switch (c0) {
185  case 0:
186  c0 = 0x2400;
187  break;
188  case '/':
189  c0 = ':';
190  break;
191  }
192  res = nls->uni2char(c0, op, len);
193  if (res < 0) {
194  if (res == -ENAMETOOLONG)
195  goto out;
196  *op = '?';
197  res = 1;
198  }
199  op += res;
200  len -= res;
201  c0 = c1;
202  ip++;
203  ustrlen--;
204  }
205  ce2 = hfsplus_compose_lookup(ce1, c0);
206  if (ce2) {
207  i = 1;
208  while (i < ustrlen) {
209  ce1 = hfsplus_compose_lookup(ce2,
210  be16_to_cpu(ip[i]));
211  if (!ce1)
212  break;
213  i++;
214  ce2 = ce1;
215  }
216  cc = ce2[0];
217  if (cc) {
218  ip += i;
219  ustrlen -= i;
220  goto done;
221  }
222  }
223 same:
224  switch (c0) {
225  case 0:
226  cc = 0x2400;
227  break;
228  case '/':
229  cc = ':';
230  break;
231  default:
232  cc = c0;
233  }
234 done:
235  res = nls->uni2char(cc, op, len);
236  if (res < 0) {
237  if (res == -ENAMETOOLONG)
238  goto out;
239  *op = '?';
240  res = 1;
241  }
242  op += res;
243  len -= res;
244  }
245  res = 0;
246 out:
247  *len_p = (char *)op - astr;
248  return res;
249 }
250 
251 /*
252  * Convert one or more ASCII characters into a single unicode character.
253  * Returns the number of ASCII characters corresponding to the unicode char.
254  */
255 static inline int asc2unichar(struct super_block *sb, const char *astr, int len,
256  wchar_t *uc)
257 {
258  int size = HFSPLUS_SB(sb)->nls->char2uni(astr, len, uc);
259  if (size <= 0) {
260  *uc = '?';
261  size = 1;
262  }
263  switch (*uc) {
264  case 0x2400:
265  *uc = 0;
266  break;
267  case ':':
268  *uc = '/';
269  break;
270  }
271  return size;
272 }
273 
274 /* Decomposes a single unicode character. */
275 static inline u16 *decompose_unichar(wchar_t uc, int *size)
276 {
277  int off;
278 
279  off = hfsplus_decompose_table[(uc >> 12) & 0xf];
280  if (off == 0 || off == 0xffff)
281  return NULL;
282 
283  off = hfsplus_decompose_table[off + ((uc >> 8) & 0xf)];
284  if (!off)
285  return NULL;
286 
287  off = hfsplus_decompose_table[off + ((uc >> 4) & 0xf)];
288  if (!off)
289  return NULL;
290 
291  off = hfsplus_decompose_table[off + (uc & 0xf)];
292  *size = off & 3;
293  if (*size == 0)
294  return NULL;
295  return hfsplus_decompose_table + (off / 4);
296 }
297 
299  const char *astr, int len)
300 {
301  int size, dsize, decompose;
302  u16 *dstr, outlen = 0;
303  wchar_t c;
304 
305  decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
306  while (outlen < HFSPLUS_MAX_STRLEN && len > 0) {
307  size = asc2unichar(sb, astr, len, &c);
308 
309  if (decompose)
310  dstr = decompose_unichar(c, &dsize);
311  else
312  dstr = NULL;
313  if (dstr) {
314  if (outlen + dsize > HFSPLUS_MAX_STRLEN)
315  break;
316  do {
317  ustr->unicode[outlen++] = cpu_to_be16(*dstr++);
318  } while (--dsize > 0);
319  } else
320  ustr->unicode[outlen++] = cpu_to_be16(c);
321 
322  astr += size;
323  len -= size;
324  }
325  ustr->length = cpu_to_be16(outlen);
326  if (len > 0)
327  return -ENAMETOOLONG;
328  return 0;
329 }
330 
331 /*
332  * Hash a string to an integer as appropriate for the HFS+ filesystem.
333  * Composed unicode characters are decomposed and case-folding is performed
334  * if the appropriate bits are (un)set on the superblock.
335  */
336 int hfsplus_hash_dentry(const struct dentry *dentry, const struct inode *inode,
337  struct qstr *str)
338 {
339  struct super_block *sb = dentry->d_sb;
340  const char *astr;
341  const u16 *dstr;
342  int casefold, decompose, size, len;
343  unsigned long hash;
344  wchar_t c;
345  u16 c2;
346 
347  casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
348  decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
349  hash = init_name_hash();
350  astr = str->name;
351  len = str->len;
352  while (len > 0) {
353  int uninitialized_var(dsize);
354  size = asc2unichar(sb, astr, len, &c);
355  astr += size;
356  len -= size;
357 
358  if (decompose)
359  dstr = decompose_unichar(c, &dsize);
360  else
361  dstr = NULL;
362  if (dstr) {
363  do {
364  c2 = *dstr++;
365  if (casefold)
366  c2 = case_fold(c2);
367  if (!casefold || c2)
368  hash = partial_name_hash(c2, hash);
369  } while (--dsize > 0);
370  } else {
371  c2 = c;
372  if (casefold)
373  c2 = case_fold(c2);
374  if (!casefold || c2)
375  hash = partial_name_hash(c2, hash);
376  }
377  }
378  str->hash = end_name_hash(hash);
379 
380  return 0;
381 }
382 
383 /*
384  * Compare strings with HFS+ filename ordering.
385  * Composed unicode characters are decomposed and case-folding is performed
386  * if the appropriate bits are (un)set on the superblock.
387  */
388 int hfsplus_compare_dentry(const struct dentry *parent,
389  const struct inode *pinode,
390  const struct dentry *dentry, const struct inode *inode,
391  unsigned int len, const char *str, const struct qstr *name)
392 {
393  struct super_block *sb = parent->d_sb;
394  int casefold, decompose, size;
395  int dsize1, dsize2, len1, len2;
396  const u16 *dstr1, *dstr2;
397  const char *astr1, *astr2;
398  u16 c1, c2;
399  wchar_t c;
400 
401  casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
402  decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
403  astr1 = str;
404  len1 = len;
405  astr2 = name->name;
406  len2 = name->len;
407  dsize1 = dsize2 = 0;
408  dstr1 = dstr2 = NULL;
409 
410  while (len1 > 0 && len2 > 0) {
411  if (!dsize1) {
412  size = asc2unichar(sb, astr1, len1, &c);
413  astr1 += size;
414  len1 -= size;
415 
416  if (decompose)
417  dstr1 = decompose_unichar(c, &dsize1);
418  if (!decompose || !dstr1) {
419  c1 = c;
420  dstr1 = &c1;
421  dsize1 = 1;
422  }
423  }
424 
425  if (!dsize2) {
426  size = asc2unichar(sb, astr2, len2, &c);
427  astr2 += size;
428  len2 -= size;
429 
430  if (decompose)
431  dstr2 = decompose_unichar(c, &dsize2);
432  if (!decompose || !dstr2) {
433  c2 = c;
434  dstr2 = &c2;
435  dsize2 = 1;
436  }
437  }
438 
439  c1 = *dstr1;
440  c2 = *dstr2;
441  if (casefold) {
442  c1 = case_fold(c1);
443  if (!c1) {
444  dstr1++;
445  dsize1--;
446  continue;
447  }
448  c2 = case_fold(c2);
449  if (!c2) {
450  dstr2++;
451  dsize2--;
452  continue;
453  }
454  }
455  if (c1 < c2)
456  return -1;
457  else if (c1 > c2)
458  return 1;
459 
460  dstr1++;
461  dsize1--;
462  dstr2++;
463  dsize2--;
464  }
465 
466  if (len1 < len2)
467  return -1;
468  if (len1 > len2)
469  return 1;
470  return 0;
471 }