Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
btree.c
Go to the documentation of this file.
1 /*
2  * linux/fs/befs/btree.c
3  *
4  * Copyright (C) 2001-2002 Will Dyson <[email protected]>
5  *
6  * Licensed under the GNU GPL. See the file COPYING for details.
7  *
8  * 2002-02-05: Sergey S. Kostyliov added binary search within
9  * btree nodes.
10  *
11  * Many thanks to:
12  *
13  * Dominic Giampaolo, author of "Practical File System
14  * Design with the Be File System", for such a helpful book.
15  *
16  * Marcus J. Ranum, author of the b+tree package in
17  * comp.sources.misc volume 10. This code is not copied from that
18  * work, but it is partially based on it.
19  *
20  * Makoto Kato, author of the original BeFS for linux filesystem
21  * driver.
22  */
23 
24 #include <linux/kernel.h>
25 #include <linux/string.h>
26 #include <linux/slab.h>
27 #include <linux/mm.h>
28 #include <linux/buffer_head.h>
29 
30 #include "befs.h"
31 #include "btree.h"
32 #include "datastream.h"
33 
34 /*
35  * The btree functions in this file are built on top of the
36  * datastream.c interface, which is in turn built on top of the
37  * io.c interface.
38  */
39 
40 /* Befs B+tree structure:
41  *
42  * The first thing in the tree is the tree superblock. It tells you
43  * all kinds of useful things about the tree, like where the rootnode
44  * is located, and the size of the nodes (always 1024 with current version
45  * of BeOS).
46  *
47  * The rest of the tree consists of a series of nodes. Nodes contain a header
48  * (struct befs_btree_nodehead), the packed key data, an array of shorts
49  * containing the ending offsets for each of the keys, and an array of
50  * befs_off_t values. In interior nodes, the keys are the ending keys for
51  * the childnode they point to, and the values are offsets into the
52  * datastream containing the tree.
53  */
54 
55 /* Note:
56  *
57  * The book states 2 confusing things about befs b+trees. First,
58  * it states that the overflow field of node headers is used by internal nodes
59  * to point to another node that "effectively continues this one". Here is what
60  * I believe that means. Each key in internal nodes points to another node that
61  * contains key values less than itself. Inspection reveals that the last key
62  * in the internal node is not the last key in the index. Keys that are
63  * greater than the last key in the internal node go into the overflow node.
64  * I imagine there is a performance reason for this.
65  *
66  * Second, it states that the header of a btree node is sufficient to
67  * distinguish internal nodes from leaf nodes. Without saying exactly how.
68  * After figuring out the first, it becomes obvious that internal nodes have
69  * overflow nodes and leafnodes do not.
70  */
71 
72 /*
73  * Currently, this code is only good for directory B+trees.
74  * In order to be used for other BFS indexes, it needs to be extended to handle
75  * duplicate keys and non-string keytypes (int32, int64, float, double).
76  */
77 
78 /*
79  * In memory structure of each btree node
80  */
81 typedef struct {
82  befs_host_btree_nodehead head; /* head of node converted to cpu byteorder */
83  struct buffer_head *bh;
84  befs_btree_nodehead *od_node; /* on disk node */
86 
87 /* local constants */
88 static const befs_off_t befs_bt_inval = 0xffffffffffffffffULL;
89 
90 /* local functions */
91 static int befs_btree_seekleaf(struct super_block *sb, befs_data_stream * ds,
92  befs_btree_super * bt_super,
93  befs_btree_node * this_node,
94  befs_off_t * node_off);
95 
96 static int befs_bt_read_super(struct super_block *sb, befs_data_stream * ds,
97  befs_btree_super * sup);
98 
99 static int befs_bt_read_node(struct super_block *sb, befs_data_stream * ds,
100  befs_btree_node * node, befs_off_t node_off);
101 
102 static int befs_leafnode(befs_btree_node * node);
103 
104 static fs16 *befs_bt_keylen_index(befs_btree_node * node);
105 
106 static fs64 *befs_bt_valarray(befs_btree_node * node);
107 
108 static char *befs_bt_keydata(befs_btree_node * node);
109 
110 static int befs_find_key(struct super_block *sb, befs_btree_node * node,
111  const char *findkey, befs_off_t * value);
112 
113 static char *befs_bt_get_key(struct super_block *sb, befs_btree_node * node,
114  int index, u16 * keylen);
115 
116 static int befs_compare_strings(const void *key1, int keylen1,
117  const void *key2, int keylen2);
118 
133 static int
134 befs_bt_read_super(struct super_block *sb, befs_data_stream * ds,
135  befs_btree_super * sup)
136 {
137  struct buffer_head *bh = NULL;
138  befs_disk_btree_super *od_sup = NULL;
139 
140  befs_debug(sb, "---> befs_btree_read_super()");
141 
142  bh = befs_read_datastream(sb, ds, 0, NULL);
143 
144  if (!bh) {
145  befs_error(sb, "Couldn't read index header.");
146  goto error;
147  }
148  od_sup = (befs_disk_btree_super *) bh->b_data;
149  befs_dump_index_entry(sb, od_sup);
150 
151  sup->magic = fs32_to_cpu(sb, od_sup->magic);
152  sup->node_size = fs32_to_cpu(sb, od_sup->node_size);
153  sup->max_depth = fs32_to_cpu(sb, od_sup->max_depth);
154  sup->data_type = fs32_to_cpu(sb, od_sup->data_type);
155  sup->root_node_ptr = fs64_to_cpu(sb, od_sup->root_node_ptr);
156  sup->free_node_ptr = fs64_to_cpu(sb, od_sup->free_node_ptr);
157  sup->max_size = fs64_to_cpu(sb, od_sup->max_size);
158 
159  brelse(bh);
160  if (sup->magic != BEFS_BTREE_MAGIC) {
161  befs_error(sb, "Index header has bad magic.");
162  goto error;
163  }
164 
165  befs_debug(sb, "<--- befs_btree_read_super()");
166  return BEFS_OK;
167 
168  error:
169  befs_debug(sb, "<--- befs_btree_read_super() ERROR");
170  return BEFS_ERR;
171 }
172 
192 static int
193 befs_bt_read_node(struct super_block *sb, befs_data_stream * ds,
194  befs_btree_node * node, befs_off_t node_off)
195 {
196  uint off = 0;
197 
198  befs_debug(sb, "---> befs_bt_read_node()");
199 
200  if (node->bh)
201  brelse(node->bh);
202 
203  node->bh = befs_read_datastream(sb, ds, node_off, &off);
204  if (!node->bh) {
205  befs_error(sb, "befs_bt_read_node() failed to read "
206  "node at %Lu", node_off);
207  befs_debug(sb, "<--- befs_bt_read_node() ERROR");
208 
209  return BEFS_ERR;
210  }
211  node->od_node =
212  (befs_btree_nodehead *) ((void *) node->bh->b_data + off);
213 
214  befs_dump_index_node(sb, node->od_node);
215 
216  node->head.left = fs64_to_cpu(sb, node->od_node->left);
217  node->head.right = fs64_to_cpu(sb, node->od_node->right);
218  node->head.overflow = fs64_to_cpu(sb, node->od_node->overflow);
219  node->head.all_key_count =
220  fs16_to_cpu(sb, node->od_node->all_key_count);
221  node->head.all_key_length =
222  fs16_to_cpu(sb, node->od_node->all_key_length);
223 
224  befs_debug(sb, "<--- befs_btree_read_node()");
225  return BEFS_OK;
226 }
227 
246 int
247 befs_btree_find(struct super_block *sb, befs_data_stream * ds,
248  const char *key, befs_off_t * value)
249 {
250  befs_btree_node *this_node = NULL;
251  befs_btree_super bt_super;
252  befs_off_t node_off;
253  int res;
254 
255  befs_debug(sb, "---> befs_btree_find() Key: %s", key);
256 
257  if (befs_bt_read_super(sb, ds, &bt_super) != BEFS_OK) {
258  befs_error(sb,
259  "befs_btree_find() failed to read index superblock");
260  goto error;
261  }
262 
263  this_node = kmalloc(sizeof (befs_btree_node),
264  GFP_NOFS);
265  if (!this_node) {
266  befs_error(sb, "befs_btree_find() failed to allocate %u "
267  "bytes of memory", sizeof (befs_btree_node));
268  goto error;
269  }
270 
271  this_node->bh = NULL;
272 
273  /* read in root node */
274  node_off = bt_super.root_node_ptr;
275  if (befs_bt_read_node(sb, ds, this_node, node_off) != BEFS_OK) {
276  befs_error(sb, "befs_btree_find() failed to read "
277  "node at %Lu", node_off);
278  goto error_alloc;
279  }
280 
281  while (!befs_leafnode(this_node)) {
282  res = befs_find_key(sb, this_node, key, &node_off);
283  if (res == BEFS_BT_NOT_FOUND)
284  node_off = this_node->head.overflow;
285  /* if no match, go to overflow node */
286  if (befs_bt_read_node(sb, ds, this_node, node_off) != BEFS_OK) {
287  befs_error(sb, "befs_btree_find() failed to read "
288  "node at %Lu", node_off);
289  goto error_alloc;
290  }
291  }
292 
293  /* at the correct leaf node now */
294 
295  res = befs_find_key(sb, this_node, key, value);
296 
297  brelse(this_node->bh);
298  kfree(this_node);
299 
300  if (res != BEFS_BT_MATCH) {
301  befs_debug(sb, "<--- befs_btree_find() Key %s not found", key);
302  *value = 0;
303  return BEFS_BT_NOT_FOUND;
304  }
305  befs_debug(sb, "<--- befs_btree_find() Found key %s, value %Lu",
306  key, *value);
307  return BEFS_OK;
308 
309  error_alloc:
310  kfree(this_node);
311  error:
312  *value = 0;
313  befs_debug(sb, "<--- befs_btree_find() ERROR");
314  return BEFS_ERR;
315 }
316 
335 static int
336 befs_find_key(struct super_block *sb, befs_btree_node * node,
337  const char *findkey, befs_off_t * value)
338 {
339  int first, last, mid;
340  int eq;
341  u16 keylen;
342  int findkey_len;
343  char *thiskey;
344  fs64 *valarray;
345 
346  befs_debug(sb, "---> befs_find_key() %s", findkey);
347 
348  *value = 0;
349 
350  findkey_len = strlen(findkey);
351 
352  /* if node can not contain key, just skeep this node */
353  last = node->head.all_key_count - 1;
354  thiskey = befs_bt_get_key(sb, node, last, &keylen);
355 
356  eq = befs_compare_strings(thiskey, keylen, findkey, findkey_len);
357  if (eq < 0) {
358  befs_debug(sb, "<--- befs_find_key() %s not found", findkey);
359  return BEFS_BT_NOT_FOUND;
360  }
361 
362  valarray = befs_bt_valarray(node);
363 
364  /* simple binary search */
365  first = 0;
366  mid = 0;
367  while (last >= first) {
368  mid = (last + first) / 2;
369  befs_debug(sb, "first: %d, last: %d, mid: %d", first, last,
370  mid);
371  thiskey = befs_bt_get_key(sb, node, mid, &keylen);
372  eq = befs_compare_strings(thiskey, keylen, findkey,
373  findkey_len);
374 
375  if (eq == 0) {
376  befs_debug(sb, "<--- befs_find_key() found %s at %d",
377  thiskey, mid);
378 
379  *value = fs64_to_cpu(sb, valarray[mid]);
380  return BEFS_BT_MATCH;
381  }
382  if (eq > 0)
383  last = mid - 1;
384  else
385  first = mid + 1;
386  }
387  if (eq < 0)
388  *value = fs64_to_cpu(sb, valarray[mid + 1]);
389  else
390  *value = fs64_to_cpu(sb, valarray[mid]);
391  befs_debug(sb, "<--- befs_find_key() found %s at %d", thiskey, mid);
392  return BEFS_BT_PARMATCH;
393 }
394 
415 int
416 befs_btree_read(struct super_block *sb, befs_data_stream * ds,
417  loff_t key_no, size_t bufsize, char *keybuf, size_t * keysize,
418  befs_off_t * value)
419 {
420  befs_btree_node *this_node;
421  befs_btree_super bt_super;
422  befs_off_t node_off = 0;
423  int cur_key;
424  fs64 *valarray;
425  char *keystart;
426  u16 keylen;
427  int res;
428 
429  uint key_sum = 0;
430 
431  befs_debug(sb, "---> befs_btree_read()");
432 
433  if (befs_bt_read_super(sb, ds, &bt_super) != BEFS_OK) {
434  befs_error(sb,
435  "befs_btree_read() failed to read index superblock");
436  goto error;
437  }
438 
439  if ((this_node = (befs_btree_node *)
440  kmalloc(sizeof (befs_btree_node), GFP_NOFS)) == NULL) {
441  befs_error(sb, "befs_btree_read() failed to allocate %u "
442  "bytes of memory", sizeof (befs_btree_node));
443  goto error;
444  }
445 
446  node_off = bt_super.root_node_ptr;
447  this_node->bh = NULL;
448 
449  /* seeks down to first leafnode, reads it into this_node */
450  res = befs_btree_seekleaf(sb, ds, &bt_super, this_node, &node_off);
451  if (res == BEFS_BT_EMPTY) {
452  brelse(this_node->bh);
453  kfree(this_node);
454  *value = 0;
455  *keysize = 0;
456  befs_debug(sb, "<--- befs_btree_read() Tree is EMPTY");
457  return BEFS_BT_EMPTY;
458  } else if (res == BEFS_ERR) {
459  goto error_alloc;
460  }
461 
462  /* find the leaf node containing the key_no key */
463 
464  while (key_sum + this_node->head.all_key_count <= key_no) {
465 
466  /* no more nodes to look in: key_no is too large */
467  if (this_node->head.right == befs_bt_inval) {
468  *keysize = 0;
469  *value = 0;
470  befs_debug(sb,
471  "<--- befs_btree_read() END of keys at %Lu",
472  key_sum + this_node->head.all_key_count);
473  brelse(this_node->bh);
474  kfree(this_node);
475  return BEFS_BT_END;
476  }
477 
478  key_sum += this_node->head.all_key_count;
479  node_off = this_node->head.right;
480 
481  if (befs_bt_read_node(sb, ds, this_node, node_off) != BEFS_OK) {
482  befs_error(sb, "befs_btree_read() failed to read "
483  "node at %Lu", node_off);
484  goto error_alloc;
485  }
486  }
487 
488  /* how many keys into this_node is key_no */
489  cur_key = key_no - key_sum;
490 
491  /* get pointers to datastructures within the node body */
492  valarray = befs_bt_valarray(this_node);
493 
494  keystart = befs_bt_get_key(sb, this_node, cur_key, &keylen);
495 
496  befs_debug(sb, "Read [%Lu,%d]: keysize %d", node_off, cur_key, keylen);
497 
498  if (bufsize < keylen + 1) {
499  befs_error(sb, "befs_btree_read() keybuf too small (%u) "
500  "for key of size %d", bufsize, keylen);
501  brelse(this_node->bh);
502  goto error_alloc;
503  };
504 
505  strncpy(keybuf, keystart, keylen);
506  *value = fs64_to_cpu(sb, valarray[cur_key]);
507  *keysize = keylen;
508  keybuf[keylen] = '\0';
509 
510  befs_debug(sb, "Read [%Lu,%d]: Key \"%.*s\", Value %Lu", node_off,
511  cur_key, keylen, keybuf, *value);
512 
513  brelse(this_node->bh);
514  kfree(this_node);
515 
516  befs_debug(sb, "<--- befs_btree_read()");
517 
518  return BEFS_OK;
519 
520  error_alloc:
521  kfree(this_node);
522 
523  error:
524  *keysize = 0;
525  *value = 0;
526  befs_debug(sb, "<--- befs_btree_read() ERROR");
527  return BEFS_ERR;
528 }
529 
545 static int
546 befs_btree_seekleaf(struct super_block *sb, befs_data_stream * ds,
547  befs_btree_super * bt_super, befs_btree_node * this_node,
548  befs_off_t * node_off)
549 {
550 
551  befs_debug(sb, "---> befs_btree_seekleaf()");
552 
553  if (befs_bt_read_node(sb, ds, this_node, *node_off) != BEFS_OK) {
554  befs_error(sb, "befs_btree_seekleaf() failed to read "
555  "node at %Lu", *node_off);
556  goto error;
557  }
558  befs_debug(sb, "Seekleaf to root node %Lu", *node_off);
559 
560  if (this_node->head.all_key_count == 0 && befs_leafnode(this_node)) {
561  befs_debug(sb, "<--- befs_btree_seekleaf() Tree is EMPTY");
562  return BEFS_BT_EMPTY;
563  }
564 
565  while (!befs_leafnode(this_node)) {
566 
567  if (this_node->head.all_key_count == 0) {
568  befs_debug(sb, "befs_btree_seekleaf() encountered "
569  "an empty interior node: %Lu. Using Overflow "
570  "node: %Lu", *node_off,
571  this_node->head.overflow);
572  *node_off = this_node->head.overflow;
573  } else {
574  fs64 *valarray = befs_bt_valarray(this_node);
575  *node_off = fs64_to_cpu(sb, valarray[0]);
576  }
577  if (befs_bt_read_node(sb, ds, this_node, *node_off) != BEFS_OK) {
578  befs_error(sb, "befs_btree_seekleaf() failed to read "
579  "node at %Lu", *node_off);
580  goto error;
581  }
582 
583  befs_debug(sb, "Seekleaf to child node %Lu", *node_off);
584  }
585  befs_debug(sb, "Node %Lu is a leaf node", *node_off);
586 
587  return BEFS_OK;
588 
589  error:
590  befs_debug(sb, "<--- befs_btree_seekleaf() ERROR");
591  return BEFS_ERR;
592 }
593 
601 static int
602 befs_leafnode(befs_btree_node * node)
603 {
604  /* all interior nodes (and only interior nodes) have an overflow node */
605  if (node->head.overflow == befs_bt_inval)
606  return 1;
607  else
608  return 0;
609 }
610 
624 static fs16 *
625 befs_bt_keylen_index(befs_btree_node * node)
626 {
627  const int keylen_align = 8;
628  unsigned long int off =
629  (sizeof (befs_btree_nodehead) + node->head.all_key_length);
630  ulong tmp = off % keylen_align;
631 
632  if (tmp)
633  off += keylen_align - tmp;
634 
635  return (fs16 *) ((void *) node->od_node + off);
636 }
637 
645 static fs64 *
646 befs_bt_valarray(befs_btree_node * node)
647 {
648  void *keylen_index_start = (void *) befs_bt_keylen_index(node);
649  size_t keylen_index_size = node->head.all_key_count * sizeof (fs16);
650 
651  return (fs64 *) (keylen_index_start + keylen_index_size);
652 }
653 
661 static char *
662 befs_bt_keydata(befs_btree_node * node)
663 {
664  return (char *) ((void *) node->od_node + sizeof (befs_btree_nodehead));
665 }
666 
677 static char *
678 befs_bt_get_key(struct super_block *sb, befs_btree_node * node,
679  int index, u16 * keylen)
680 {
681  int prev_key_end;
682  char *keystart;
683  fs16 *keylen_index;
684 
685  if (index < 0 || index > node->head.all_key_count) {
686  *keylen = 0;
687  return NULL;
688  }
689 
690  keystart = befs_bt_keydata(node);
691  keylen_index = befs_bt_keylen_index(node);
692 
693  if (index == 0)
694  prev_key_end = 0;
695  else
696  prev_key_end = fs16_to_cpu(sb, keylen_index[index - 1]);
697 
698  *keylen = fs16_to_cpu(sb, keylen_index[index]) - prev_key_end;
699 
700  return keystart + prev_key_end;
701 }
702 
714 static int
715 befs_compare_strings(const void *key1, int keylen1,
716  const void *key2, int keylen2)
717 {
718  int len = min_t(int, keylen1, keylen2);
719  int result = strncmp(key1, key2, len);
720  if (result == 0)
721  result = keylen1 - keylen2;
722  return result;
723 }
724 
725 /* These will be used for non-string keyed btrees */
726 #if 0
727 static int
728 btree_compare_int32(cont void *key1, int keylen1, const void *key2, int keylen2)
729 {
730  return *(int32_t *) key1 - *(int32_t *) key2;
731 }
732 
733 static int
734 btree_compare_uint32(cont void *key1, int keylen1,
735  const void *key2, int keylen2)
736 {
737  if (*(u_int32_t *) key1 == *(u_int32_t *) key2)
738  return 0;
739  else if (*(u_int32_t *) key1 > *(u_int32_t *) key2)
740  return 1;
741 
742  return -1;
743 }
744 static int
745 btree_compare_int64(cont void *key1, int keylen1, const void *key2, int keylen2)
746 {
747  if (*(int64_t *) key1 == *(int64_t *) key2)
748  return 0;
749  else if (*(int64_t *) key1 > *(int64_t *) key2)
750  return 1;
751 
752  return -1;
753 }
754 
755 static int
756 btree_compare_uint64(cont void *key1, int keylen1,
757  const void *key2, int keylen2)
758 {
759  if (*(u_int64_t *) key1 == *(u_int64_t *) key2)
760  return 0;
761  else if (*(u_int64_t *) key1 > *(u_int64_t *) key2)
762  return 1;
763 
764  return -1;
765 }
766 
767 static int
768 btree_compare_float(cont void *key1, int keylen1, const void *key2, int keylen2)
769 {
770  float result = *(float *) key1 - *(float *) key2;
771  if (result == 0.0f)
772  return 0;
773 
774  return (result < 0.0f) ? -1 : 1;
775 }
776 
777 static int
778 btree_compare_double(cont void *key1, int keylen1,
779  const void *key2, int keylen2)
780 {
781  double result = *(double *) key1 - *(double *) key2;
782  if (result == 0.0)
783  return 0;
784 
785  return (result < 0.0) ? -1 : 1;
786 }
787 #endif //0