Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
radix-tree.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2001 Momchil Velikov
3  * Portions Copyright (C) 2001 Christoph Hellwig
4  * Copyright (C) 2005 SGI, Christoph Lameter
5  * Copyright (C) 2006 Nick Piggin
6  * Copyright (C) 2012 Konstantin Khlebnikov
7  *
8  * This program is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU General Public License as
10  * published by the Free Software Foundation; either version 2, or (at
11  * your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful, but
14  * WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; if not, write to the Free Software
20  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21  */
22 
23 #include <linux/errno.h>
24 #include <linux/init.h>
25 #include <linux/kernel.h>
26 #include <linux/export.h>
27 #include <linux/radix-tree.h>
28 #include <linux/percpu.h>
29 #include <linux/slab.h>
30 #include <linux/notifier.h>
31 #include <linux/cpu.h>
32 #include <linux/string.h>
33 #include <linux/bitops.h>
34 #include <linux/rcupdate.h>
35 
36 
37 #ifdef __KERNEL__
38 #define RADIX_TREE_MAP_SHIFT (CONFIG_BASE_SMALL ? 4 : 6)
39 #else
40 #define RADIX_TREE_MAP_SHIFT 3 /* For more stressful testing */
41 #endif
42 
43 #define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT)
44 #define RADIX_TREE_MAP_MASK (RADIX_TREE_MAP_SIZE-1)
45 
46 #define RADIX_TREE_TAG_LONGS \
47  ((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG)
48 
50  unsigned int height; /* Height from the bottom */
51  unsigned int count;
52  union {
53  struct radix_tree_node *parent; /* Used when ascending tree */
54  struct rcu_head rcu_head; /* Used when freeing node */
55  };
58 };
59 
60 #define RADIX_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long))
61 #define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \
62  RADIX_TREE_MAP_SHIFT))
63 
64 /*
65  * The height_to_maxindex array needs to be one deeper than the maximum
66  * path as height 0 holds only 1 entry.
67  */
68 static unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH + 1] __read_mostly;
69 
70 /*
71  * Radix tree node cache.
72  */
73 static struct kmem_cache *radix_tree_node_cachep;
74 
75 /*
76  * The radix tree is variable-height, so an insert operation not only has
77  * to build the branch to its corresponding item, it also has to build the
78  * branch to existing items if the size has to be increased (by
79  * radix_tree_extend).
80  *
81  * The worst case is a zero height tree with just a single item at index 0,
82  * and then inserting an item at index ULONG_MAX. This requires 2 new branches
83  * of RADIX_TREE_MAX_PATH size to be created, with only the root node shared.
84  * Hence:
85  */
86 #define RADIX_TREE_PRELOAD_SIZE (RADIX_TREE_MAX_PATH * 2 - 1)
87 
88 /*
89  * Per-cpu pool of preloaded nodes
90  */
92  int nr;
94 };
95 static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, };
96 
97 static inline void *ptr_to_indirect(void *ptr)
98 {
99  return (void *)((unsigned long)ptr | RADIX_TREE_INDIRECT_PTR);
100 }
101 
102 static inline void *indirect_to_ptr(void *ptr)
103 {
104  return (void *)((unsigned long)ptr & ~RADIX_TREE_INDIRECT_PTR);
105 }
106 
107 static inline gfp_t root_gfp_mask(struct radix_tree_root *root)
108 {
109  return root->gfp_mask & __GFP_BITS_MASK;
110 }
111 
112 static inline void tag_set(struct radix_tree_node *node, unsigned int tag,
113  int offset)
114 {
115  __set_bit(offset, node->tags[tag]);
116 }
117 
118 static inline void tag_clear(struct radix_tree_node *node, unsigned int tag,
119  int offset)
120 {
121  __clear_bit(offset, node->tags[tag]);
122 }
123 
124 static inline int tag_get(struct radix_tree_node *node, unsigned int tag,
125  int offset)
126 {
127  return test_bit(offset, node->tags[tag]);
128 }
129 
130 static inline void root_tag_set(struct radix_tree_root *root, unsigned int tag)
131 {
132  root->gfp_mask |= (__force gfp_t)(1 << (tag + __GFP_BITS_SHIFT));
133 }
134 
135 static inline void root_tag_clear(struct radix_tree_root *root, unsigned int tag)
136 {
137  root->gfp_mask &= (__force gfp_t)~(1 << (tag + __GFP_BITS_SHIFT));
138 }
139 
140 static inline void root_tag_clear_all(struct radix_tree_root *root)
141 {
142  root->gfp_mask &= __GFP_BITS_MASK;
143 }
144 
145 static inline int root_tag_get(struct radix_tree_root *root, unsigned int tag)
146 {
147  return (__force unsigned)root->gfp_mask & (1 << (tag + __GFP_BITS_SHIFT));
148 }
149 
150 /*
151  * Returns 1 if any slot in the node has this tag set.
152  * Otherwise returns 0.
153  */
154 static inline int any_tag_set(struct radix_tree_node *node, unsigned int tag)
155 {
156  int idx;
157  for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) {
158  if (node->tags[tag][idx])
159  return 1;
160  }
161  return 0;
162 }
163 
175 static __always_inline unsigned long
176 radix_tree_find_next_bit(const unsigned long *addr,
177  unsigned long size, unsigned long offset)
178 {
179  if (!__builtin_constant_p(size))
180  return find_next_bit(addr, size, offset);
181 
182  if (offset < size) {
183  unsigned long tmp;
184 
185  addr += offset / BITS_PER_LONG;
186  tmp = *addr >> (offset % BITS_PER_LONG);
187  if (tmp)
188  return __ffs(tmp) + offset;
189  offset = (offset + BITS_PER_LONG) & ~(BITS_PER_LONG - 1);
190  while (offset < size) {
191  tmp = *++addr;
192  if (tmp)
193  return __ffs(tmp) + offset;
194  offset += BITS_PER_LONG;
195  }
196  }
197  return size;
198 }
199 
200 /*
201  * This assumes that the caller has performed appropriate preallocation, and
202  * that the caller has pinned this thread of control to the current CPU.
203  */
204 static struct radix_tree_node *
205 radix_tree_node_alloc(struct radix_tree_root *root)
206 {
207  struct radix_tree_node *ret = NULL;
208  gfp_t gfp_mask = root_gfp_mask(root);
209 
210  if (!(gfp_mask & __GFP_WAIT)) {
211  struct radix_tree_preload *rtp;
212 
213  /*
214  * Provided the caller has preloaded here, we will always
215  * succeed in getting a node here (and never reach
216  * kmem_cache_alloc)
217  */
218  rtp = &__get_cpu_var(radix_tree_preloads);
219  if (rtp->nr) {
220  ret = rtp->nodes[rtp->nr - 1];
221  rtp->nodes[rtp->nr - 1] = NULL;
222  rtp->nr--;
223  }
224  }
225  if (ret == NULL)
226  ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
227 
228  BUG_ON(radix_tree_is_indirect_ptr(ret));
229  return ret;
230 }
231 
232 static void radix_tree_node_rcu_free(struct rcu_head *head)
233 {
234  struct radix_tree_node *node =
235  container_of(head, struct radix_tree_node, rcu_head);
236  int i;
237 
238  /*
239  * must only free zeroed nodes into the slab. radix_tree_shrink
240  * can leave us with a non-NULL entry in the first slot, so clear
241  * that here to make sure.
242  */
243  for (i = 0; i < RADIX_TREE_MAX_TAGS; i++)
244  tag_clear(node, i, 0);
245 
246  node->slots[0] = NULL;
247  node->count = 0;
248 
249  kmem_cache_free(radix_tree_node_cachep, node);
250 }
251 
252 static inline void
253 radix_tree_node_free(struct radix_tree_node *node)
254 {
255  call_rcu(&node->rcu_head, radix_tree_node_rcu_free);
256 }
257 
258 /*
259  * Load up this CPU's radix_tree_node buffer with sufficient objects to
260  * ensure that the addition of a single element in the tree cannot fail. On
261  * success, return zero, with preemption disabled. On error, return -ENOMEM
262  * with preemption not disabled.
263  *
264  * To make use of this facility, the radix tree must be initialised without
265  * __GFP_WAIT being passed to INIT_RADIX_TREE().
266  */
268 {
269  struct radix_tree_preload *rtp;
270  struct radix_tree_node *node;
271  int ret = -ENOMEM;
272 
273  preempt_disable();
274  rtp = &__get_cpu_var(radix_tree_preloads);
275  while (rtp->nr < ARRAY_SIZE(rtp->nodes)) {
276  preempt_enable();
277  node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
278  if (node == NULL)
279  goto out;
280  preempt_disable();
281  rtp = &__get_cpu_var(radix_tree_preloads);
282  if (rtp->nr < ARRAY_SIZE(rtp->nodes))
283  rtp->nodes[rtp->nr++] = node;
284  else
285  kmem_cache_free(radix_tree_node_cachep, node);
286  }
287  ret = 0;
288 out:
289  return ret;
290 }
292 
293 /*
294  * Return the maximum key which can be store into a
295  * radix tree with height HEIGHT.
296  */
297 static inline unsigned long radix_tree_maxindex(unsigned int height)
298 {
299  return height_to_maxindex[height];
300 }
301 
302 /*
303  * Extend a radix tree so it can store key @index.
304  */
305 static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
306 {
307  struct radix_tree_node *node;
308  struct radix_tree_node *slot;
309  unsigned int height;
310  int tag;
311 
312  /* Figure out what the height should be. */
313  height = root->height + 1;
314  while (index > radix_tree_maxindex(height))
315  height++;
316 
317  if (root->rnode == NULL) {
318  root->height = height;
319  goto out;
320  }
321 
322  do {
323  unsigned int newheight;
324  if (!(node = radix_tree_node_alloc(root)))
325  return -ENOMEM;
326 
327  /* Propagate the aggregated tag info into the new root */
328  for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
329  if (root_tag_get(root, tag))
330  tag_set(node, tag, 0);
331  }
332 
333  /* Increase the height. */
334  newheight = root->height+1;
335  node->height = newheight;
336  node->count = 1;
337  node->parent = NULL;
338  slot = root->rnode;
339  if (newheight > 1) {
340  slot = indirect_to_ptr(slot);
341  slot->parent = node;
342  }
343  node->slots[0] = slot;
344  node = ptr_to_indirect(node);
345  rcu_assign_pointer(root->rnode, node);
346  root->height = newheight;
347  } while (height > root->height);
348 out:
349  return 0;
350 }
351 
361  unsigned long index, void *item)
362 {
363  struct radix_tree_node *node = NULL, *slot;
364  unsigned int height, shift;
365  int offset;
366  int error;
367 
368  BUG_ON(radix_tree_is_indirect_ptr(item));
369 
370  /* Make sure the tree is high enough. */
371  if (index > radix_tree_maxindex(root->height)) {
372  error = radix_tree_extend(root, index);
373  if (error)
374  return error;
375  }
376 
377  slot = indirect_to_ptr(root->rnode);
378 
379  height = root->height;
380  shift = (height-1) * RADIX_TREE_MAP_SHIFT;
381 
382  offset = 0; /* uninitialised var warning */
383  while (height > 0) {
384  if (slot == NULL) {
385  /* Have to add a child node. */
386  if (!(slot = radix_tree_node_alloc(root)))
387  return -ENOMEM;
388  slot->height = height;
389  slot->parent = node;
390  if (node) {
391  rcu_assign_pointer(node->slots[offset], slot);
392  node->count++;
393  } else
394  rcu_assign_pointer(root->rnode, ptr_to_indirect(slot));
395  }
396 
397  /* Go a level down */
398  offset = (index >> shift) & RADIX_TREE_MAP_MASK;
399  node = slot;
400  slot = node->slots[offset];
401  shift -= RADIX_TREE_MAP_SHIFT;
402  height--;
403  }
404 
405  if (slot != NULL)
406  return -EEXIST;
407 
408  if (node) {
409  node->count++;
410  rcu_assign_pointer(node->slots[offset], item);
411  BUG_ON(tag_get(node, 0, offset));
412  BUG_ON(tag_get(node, 1, offset));
413  } else {
414  rcu_assign_pointer(root->rnode, item);
415  BUG_ON(root_tag_get(root, 0));
416  BUG_ON(root_tag_get(root, 1));
417  }
418 
419  return 0;
420 }
422 
423 /*
424  * is_slot == 1 : search for the slot.
425  * is_slot == 0 : search for the node.
426  */
427 static void *radix_tree_lookup_element(struct radix_tree_root *root,
428  unsigned long index, int is_slot)
429 {
430  unsigned int height, shift;
431  struct radix_tree_node *node, **slot;
432 
433  node = rcu_dereference_raw(root->rnode);
434  if (node == NULL)
435  return NULL;
436 
437  if (!radix_tree_is_indirect_ptr(node)) {
438  if (index > 0)
439  return NULL;
440  return is_slot ? (void *)&root->rnode : node;
441  }
442  node = indirect_to_ptr(node);
443 
444  height = node->height;
445  if (index > radix_tree_maxindex(height))
446  return NULL;
447 
448  shift = (height-1) * RADIX_TREE_MAP_SHIFT;
449 
450  do {
451  slot = (struct radix_tree_node **)
452  (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK));
453  node = rcu_dereference_raw(*slot);
454  if (node == NULL)
455  return NULL;
456 
457  shift -= RADIX_TREE_MAP_SHIFT;
458  height--;
459  } while (height > 0);
460 
461  return is_slot ? (void *)slot : indirect_to_ptr(node);
462 }
463 
477 void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index)
478 {
479  return (void **)radix_tree_lookup_element(root, index, 1);
480 }
482 
495 void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index)
496 {
497  return radix_tree_lookup_element(root, index, 0);
498 }
500 
515  unsigned long index, unsigned int tag)
516 {
517  unsigned int height, shift;
518  struct radix_tree_node *slot;
519 
520  height = root->height;
521  BUG_ON(index > radix_tree_maxindex(height));
522 
523  slot = indirect_to_ptr(root->rnode);
524  shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
525 
526  while (height > 0) {
527  int offset;
528 
529  offset = (index >> shift) & RADIX_TREE_MAP_MASK;
530  if (!tag_get(slot, tag, offset))
531  tag_set(slot, tag, offset);
532  slot = slot->slots[offset];
533  BUG_ON(slot == NULL);
534  shift -= RADIX_TREE_MAP_SHIFT;
535  height--;
536  }
537 
538  /* set the root's tag bit */
539  if (slot && !root_tag_get(root, tag))
540  root_tag_set(root, tag);
541 
542  return slot;
543 }
545 
561  unsigned long index, unsigned int tag)
562 {
563  struct radix_tree_node *node = NULL;
564  struct radix_tree_node *slot = NULL;
565  unsigned int height, shift;
566  int uninitialized_var(offset);
567 
568  height = root->height;
569  if (index > radix_tree_maxindex(height))
570  goto out;
571 
572  shift = height * RADIX_TREE_MAP_SHIFT;
573  slot = indirect_to_ptr(root->rnode);
574 
575  while (shift) {
576  if (slot == NULL)
577  goto out;
578 
579  shift -= RADIX_TREE_MAP_SHIFT;
580  offset = (index >> shift) & RADIX_TREE_MAP_MASK;
581  node = slot;
582  slot = slot->slots[offset];
583  }
584 
585  if (slot == NULL)
586  goto out;
587 
588  while (node) {
589  if (!tag_get(node, tag, offset))
590  goto out;
591  tag_clear(node, tag, offset);
592  if (any_tag_set(node, tag))
593  goto out;
594 
595  index >>= RADIX_TREE_MAP_SHIFT;
596  offset = index & RADIX_TREE_MAP_MASK;
597  node = node->parent;
598  }
599 
600  /* clear the root's tag bit */
601  if (root_tag_get(root, tag))
602  root_tag_clear(root, tag);
603 
604 out:
605  return slot;
606 }
608 
625  unsigned long index, unsigned int tag)
626 {
627  unsigned int height, shift;
628  struct radix_tree_node *node;
629 
630  /* check the root's tag bit */
631  if (!root_tag_get(root, tag))
632  return 0;
633 
634  node = rcu_dereference_raw(root->rnode);
635  if (node == NULL)
636  return 0;
637 
638  if (!radix_tree_is_indirect_ptr(node))
639  return (index == 0);
640  node = indirect_to_ptr(node);
641 
642  height = node->height;
643  if (index > radix_tree_maxindex(height))
644  return 0;
645 
646  shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
647 
648  for ( ; ; ) {
649  int offset;
650 
651  if (node == NULL)
652  return 0;
653 
654  offset = (index >> shift) & RADIX_TREE_MAP_MASK;
655  if (!tag_get(node, tag, offset))
656  return 0;
657  if (height == 1)
658  return 1;
659  node = rcu_dereference_raw(node->slots[offset]);
660  shift -= RADIX_TREE_MAP_SHIFT;
661  height--;
662  }
663 }
665 
675  struct radix_tree_iter *iter, unsigned flags)
676 {
677  unsigned shift, tag = flags & RADIX_TREE_ITER_TAG_MASK;
678  struct radix_tree_node *rnode, *node;
679  unsigned long index, offset;
680 
681  if ((flags & RADIX_TREE_ITER_TAGGED) && !root_tag_get(root, tag))
682  return NULL;
683 
684  /*
685  * Catch next_index overflow after ~0UL. iter->index never overflows
686  * during iterating; it can be zero only at the beginning.
687  * And we cannot overflow iter->next_index in a single step,
688  * because RADIX_TREE_MAP_SHIFT < BITS_PER_LONG.
689  *
690  * This condition also used by radix_tree_next_slot() to stop
691  * contiguous iterating, and forbid swithing to the next chunk.
692  */
693  index = iter->next_index;
694  if (!index && iter->index)
695  return NULL;
696 
697  rnode = rcu_dereference_raw(root->rnode);
698  if (radix_tree_is_indirect_ptr(rnode)) {
699  rnode = indirect_to_ptr(rnode);
700  } else if (rnode && !index) {
701  /* Single-slot tree */
702  iter->index = 0;
703  iter->next_index = 1;
704  iter->tags = 1;
705  return (void **)&root->rnode;
706  } else
707  return NULL;
708 
709 restart:
710  shift = (rnode->height - 1) * RADIX_TREE_MAP_SHIFT;
711  offset = index >> shift;
712 
713  /* Index outside of the tree */
714  if (offset >= RADIX_TREE_MAP_SIZE)
715  return NULL;
716 
717  node = rnode;
718  while (1) {
719  if ((flags & RADIX_TREE_ITER_TAGGED) ?
720  !test_bit(offset, node->tags[tag]) :
721  !node->slots[offset]) {
722  /* Hole detected */
723  if (flags & RADIX_TREE_ITER_CONTIG)
724  return NULL;
725 
726  if (flags & RADIX_TREE_ITER_TAGGED)
727  offset = radix_tree_find_next_bit(
728  node->tags[tag],
730  offset + 1);
731  else
732  while (++offset < RADIX_TREE_MAP_SIZE) {
733  if (node->slots[offset])
734  break;
735  }
736  index &= ~((RADIX_TREE_MAP_SIZE << shift) - 1);
737  index += offset << shift;
738  /* Overflow after ~0UL */
739  if (!index)
740  return NULL;
741  if (offset == RADIX_TREE_MAP_SIZE)
742  goto restart;
743  }
744 
745  /* This is leaf-node */
746  if (!shift)
747  break;
748 
749  node = rcu_dereference_raw(node->slots[offset]);
750  if (node == NULL)
751  goto restart;
752  shift -= RADIX_TREE_MAP_SHIFT;
753  offset = (index >> shift) & RADIX_TREE_MAP_MASK;
754  }
755 
756  /* Update the iterator state */
757  iter->index = index;
758  iter->next_index = (index | RADIX_TREE_MAP_MASK) + 1;
759 
760  /* Construct iter->tags bit-mask from node->tags[tag] array */
761  if (flags & RADIX_TREE_ITER_TAGGED) {
762  unsigned tag_long, tag_bit;
763 
764  tag_long = offset / BITS_PER_LONG;
765  tag_bit = offset % BITS_PER_LONG;
766  iter->tags = node->tags[tag][tag_long] >> tag_bit;
767  /* This never happens if RADIX_TREE_TAG_LONGS == 1 */
768  if (tag_long < RADIX_TREE_TAG_LONGS - 1) {
769  /* Pick tags from next element */
770  if (tag_bit)
771  iter->tags |= node->tags[tag][tag_long + 1] <<
772  (BITS_PER_LONG - tag_bit);
773  /* Clip chunk size, here only BITS_PER_LONG tags */
774  iter->next_index = index + BITS_PER_LONG;
775  }
776  }
777 
778  return node->slots + offset;
779 }
781 
810  unsigned long *first_indexp, unsigned long last_index,
811  unsigned long nr_to_tag,
812  unsigned int iftag, unsigned int settag)
813 {
814  unsigned int height = root->height;
815  struct radix_tree_node *node = NULL;
816  struct radix_tree_node *slot;
817  unsigned int shift;
818  unsigned long tagged = 0;
819  unsigned long index = *first_indexp;
820 
821  last_index = min(last_index, radix_tree_maxindex(height));
822  if (index > last_index)
823  return 0;
824  if (!nr_to_tag)
825  return 0;
826  if (!root_tag_get(root, iftag)) {
827  *first_indexp = last_index + 1;
828  return 0;
829  }
830  if (height == 0) {
831  *first_indexp = last_index + 1;
832  root_tag_set(root, settag);
833  return 1;
834  }
835 
836  shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
837  slot = indirect_to_ptr(root->rnode);
838 
839  for (;;) {
840  unsigned long upindex;
841  int offset;
842 
843  offset = (index >> shift) & RADIX_TREE_MAP_MASK;
844  if (!slot->slots[offset])
845  goto next;
846  if (!tag_get(slot, iftag, offset))
847  goto next;
848  if (shift) {
849  /* Go down one level */
850  shift -= RADIX_TREE_MAP_SHIFT;
851  node = slot;
852  slot = slot->slots[offset];
853  continue;
854  }
855 
856  /* tag the leaf */
857  tagged++;
858  tag_set(slot, settag, offset);
859 
860  /* walk back up the path tagging interior nodes */
861  upindex = index;
862  while (node) {
863  upindex >>= RADIX_TREE_MAP_SHIFT;
864  offset = upindex & RADIX_TREE_MAP_MASK;
865 
866  /* stop if we find a node with the tag already set */
867  if (tag_get(node, settag, offset))
868  break;
869  tag_set(node, settag, offset);
870  node = node->parent;
871  }
872 
873  /*
874  * Small optimization: now clear that node pointer.
875  * Since all of this slot's ancestors now have the tag set
876  * from setting it above, we have no further need to walk
877  * back up the tree setting tags, until we update slot to
878  * point to another radix_tree_node.
879  */
880  node = NULL;
881 
882 next:
883  /* Go to next item at level determined by 'shift' */
884  index = ((index >> shift) + 1) << shift;
885  /* Overflow can happen when last_index is ~0UL... */
886  if (index > last_index || !index)
887  break;
888  if (tagged >= nr_to_tag)
889  break;
890  while (((index >> shift) & RADIX_TREE_MAP_MASK) == 0) {
891  /*
892  * We've fully scanned this node. Go up. Because
893  * last_index is guaranteed to be in the tree, what
894  * we do below cannot wander astray.
895  */
896  slot = slot->parent;
897  shift += RADIX_TREE_MAP_SHIFT;
898  }
899  }
900  /*
901  * We need not to tag the root tag if there is no tag which is set with
902  * settag within the range from *first_indexp to last_index.
903  */
904  if (tagged > 0)
905  root_tag_set(root, settag);
906  *first_indexp = index;
907 
908  return tagged;
909 }
911 
912 
933 unsigned long radix_tree_next_hole(struct radix_tree_root *root,
934  unsigned long index, unsigned long max_scan)
935 {
936  unsigned long i;
937 
938  for (i = 0; i < max_scan; i++) {
939  if (!radix_tree_lookup(root, index))
940  break;
941  index++;
942  if (index == 0)
943  break;
944  }
945 
946  return index;
947 }
949 
970 unsigned long radix_tree_prev_hole(struct radix_tree_root *root,
971  unsigned long index, unsigned long max_scan)
972 {
973  unsigned long i;
974 
975  for (i = 0; i < max_scan; i++) {
976  if (!radix_tree_lookup(root, index))
977  break;
978  index--;
979  if (index == ULONG_MAX)
980  break;
981  }
982 
983  return index;
984 }
986 
1006 unsigned int
1007 radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
1008  unsigned long first_index, unsigned int max_items)
1009 {
1010  struct radix_tree_iter iter;
1011  void **slot;
1012  unsigned int ret = 0;
1013 
1014  if (unlikely(!max_items))
1015  return 0;
1016 
1017  radix_tree_for_each_slot(slot, root, &iter, first_index) {
1018  results[ret] = indirect_to_ptr(rcu_dereference_raw(*slot));
1019  if (!results[ret])
1020  continue;
1021  if (++ret == max_items)
1022  break;
1023  }
1024 
1025  return ret;
1026 }
1028 
1047 unsigned int
1049  void ***results, unsigned long *indices,
1050  unsigned long first_index, unsigned int max_items)
1051 {
1052  struct radix_tree_iter iter;
1053  void **slot;
1054  unsigned int ret = 0;
1055 
1056  if (unlikely(!max_items))
1057  return 0;
1058 
1059  radix_tree_for_each_slot(slot, root, &iter, first_index) {
1060  results[ret] = slot;
1061  if (indices)
1062  indices[ret] = iter.index;
1063  if (++ret == max_items)
1064  break;
1065  }
1066 
1067  return ret;
1068 }
1070 
1084 unsigned int
1085 radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
1086  unsigned long first_index, unsigned int max_items,
1087  unsigned int tag)
1088 {
1089  struct radix_tree_iter iter;
1090  void **slot;
1091  unsigned int ret = 0;
1092 
1093  if (unlikely(!max_items))
1094  return 0;
1095 
1096  radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) {
1097  results[ret] = indirect_to_ptr(rcu_dereference_raw(*slot));
1098  if (!results[ret])
1099  continue;
1100  if (++ret == max_items)
1101  break;
1102  }
1103 
1104  return ret;
1105 }
1107 
1121 unsigned int
1122 radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results,
1123  unsigned long first_index, unsigned int max_items,
1124  unsigned int tag)
1125 {
1126  struct radix_tree_iter iter;
1127  void **slot;
1128  unsigned int ret = 0;
1129 
1130  if (unlikely(!max_items))
1131  return 0;
1132 
1133  radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) {
1134  results[ret] = slot;
1135  if (++ret == max_items)
1136  break;
1137  }
1138 
1139  return ret;
1140 }
1142 
1143 #if defined(CONFIG_SHMEM) && defined(CONFIG_SWAP)
1144 #include <linux/sched.h> /* for cond_resched() */
1145 
1146 /*
1147  * This linear search is at present only useful to shmem_unuse_inode().
1148  */
1149 static unsigned long __locate(struct radix_tree_node *slot, void *item,
1150  unsigned long index, unsigned long *found_index)
1151 {
1152  unsigned int shift, height;
1153  unsigned long i;
1154 
1155  height = slot->height;
1156  shift = (height-1) * RADIX_TREE_MAP_SHIFT;
1157 
1158  for ( ; height > 1; height--) {
1159  i = (index >> shift) & RADIX_TREE_MAP_MASK;
1160  for (;;) {
1161  if (slot->slots[i] != NULL)
1162  break;
1163  index &= ~((1UL << shift) - 1);
1164  index += 1UL << shift;
1165  if (index == 0)
1166  goto out; /* 32-bit wraparound */
1167  i++;
1168  if (i == RADIX_TREE_MAP_SIZE)
1169  goto out;
1170  }
1171 
1172  shift -= RADIX_TREE_MAP_SHIFT;
1173  slot = rcu_dereference_raw(slot->slots[i]);
1174  if (slot == NULL)
1175  goto out;
1176  }
1177 
1178  /* Bottom level: check items */
1179  for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) {
1180  if (slot->slots[i] == item) {
1181  *found_index = index + i;
1182  index = 0;
1183  goto out;
1184  }
1185  }
1186  index += RADIX_TREE_MAP_SIZE;
1187 out:
1188  return index;
1189 }
1190 
1200 unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item)
1201 {
1202  struct radix_tree_node *node;
1203  unsigned long max_index;
1204  unsigned long cur_index = 0;
1205  unsigned long found_index = -1;
1206 
1207  do {
1208  rcu_read_lock();
1209  node = rcu_dereference_raw(root->rnode);
1210  if (!radix_tree_is_indirect_ptr(node)) {
1211  rcu_read_unlock();
1212  if (node == item)
1213  found_index = 0;
1214  break;
1215  }
1216 
1217  node = indirect_to_ptr(node);
1218  max_index = radix_tree_maxindex(node->height);
1219  if (cur_index > max_index)
1220  break;
1221 
1222  cur_index = __locate(node, item, cur_index, &found_index);
1223  rcu_read_unlock();
1224  cond_resched();
1225  } while (cur_index != 0 && cur_index <= max_index);
1226 
1227  return found_index;
1228 }
1229 #else
1230 unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item)
1231 {
1232  return -1;
1233 }
1234 #endif /* CONFIG_SHMEM && CONFIG_SWAP */
1235 
1240 static inline void radix_tree_shrink(struct radix_tree_root *root)
1241 {
1242  /* try to shrink tree height */
1243  while (root->height > 0) {
1244  struct radix_tree_node *to_free = root->rnode;
1245  struct radix_tree_node *slot;
1246 
1247  BUG_ON(!radix_tree_is_indirect_ptr(to_free));
1248  to_free = indirect_to_ptr(to_free);
1249 
1250  /*
1251  * The candidate node has more than one child, or its child
1252  * is not at the leftmost slot, we cannot shrink.
1253  */
1254  if (to_free->count != 1)
1255  break;
1256  if (!to_free->slots[0])
1257  break;
1258 
1259  /*
1260  * We don't need rcu_assign_pointer(), since we are simply
1261  * moving the node from one part of the tree to another: if it
1262  * was safe to dereference the old pointer to it
1263  * (to_free->slots[0]), it will be safe to dereference the new
1264  * one (root->rnode) as far as dependent read barriers go.
1265  */
1266  slot = to_free->slots[0];
1267  if (root->height > 1) {
1268  slot->parent = NULL;
1269  slot = ptr_to_indirect(slot);
1270  }
1271  root->rnode = slot;
1272  root->height--;
1273 
1274  /*
1275  * We have a dilemma here. The node's slot[0] must not be
1276  * NULLed in case there are concurrent lookups expecting to
1277  * find the item. However if this was a bottom-level node,
1278  * then it may be subject to the slot pointer being visible
1279  * to callers dereferencing it. If item corresponding to
1280  * slot[0] is subsequently deleted, these callers would expect
1281  * their slot to become empty sooner or later.
1282  *
1283  * For example, lockless pagecache will look up a slot, deref
1284  * the page pointer, and if the page is 0 refcount it means it
1285  * was concurrently deleted from pagecache so try the deref
1286  * again. Fortunately there is already a requirement for logic
1287  * to retry the entire slot lookup -- the indirect pointer
1288  * problem (replacing direct root node with an indirect pointer
1289  * also results in a stale slot). So tag the slot as indirect
1290  * to force callers to retry.
1291  */
1292  if (root->height == 0)
1293  *((unsigned long *)&to_free->slots[0]) |=
1295 
1296  radix_tree_node_free(to_free);
1297  }
1298 }
1299 
1309 void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
1310 {
1311  struct radix_tree_node *node = NULL;
1312  struct radix_tree_node *slot = NULL;
1313  struct radix_tree_node *to_free;
1314  unsigned int height, shift;
1315  int tag;
1316  int uninitialized_var(offset);
1317 
1318  height = root->height;
1319  if (index > radix_tree_maxindex(height))
1320  goto out;
1321 
1322  slot = root->rnode;
1323  if (height == 0) {
1324  root_tag_clear_all(root);
1325  root->rnode = NULL;
1326  goto out;
1327  }
1328  slot = indirect_to_ptr(slot);
1329  shift = height * RADIX_TREE_MAP_SHIFT;
1330 
1331  do {
1332  if (slot == NULL)
1333  goto out;
1334 
1335  shift -= RADIX_TREE_MAP_SHIFT;
1336  offset = (index >> shift) & RADIX_TREE_MAP_MASK;
1337  node = slot;
1338  slot = slot->slots[offset];
1339  } while (shift);
1340 
1341  if (slot == NULL)
1342  goto out;
1343 
1344  /*
1345  * Clear all tags associated with the item to be deleted.
1346  * This way of doing it would be inefficient, but seldom is any set.
1347  */
1348  for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
1349  if (tag_get(node, tag, offset))
1350  radix_tree_tag_clear(root, index, tag);
1351  }
1352 
1353  to_free = NULL;
1354  /* Now free the nodes we do not need anymore */
1355  while (node) {
1356  node->slots[offset] = NULL;
1357  node->count--;
1358  /*
1359  * Queue the node for deferred freeing after the
1360  * last reference to it disappears (set NULL, above).
1361  */
1362  if (to_free)
1363  radix_tree_node_free(to_free);
1364 
1365  if (node->count) {
1366  if (node == indirect_to_ptr(root->rnode))
1367  radix_tree_shrink(root);
1368  goto out;
1369  }
1370 
1371  /* Node with zero slots in use so free it */
1372  to_free = node;
1373 
1374  index >>= RADIX_TREE_MAP_SHIFT;
1375  offset = index & RADIX_TREE_MAP_MASK;
1376  node = node->parent;
1377  }
1378 
1379  root_tag_clear_all(root);
1380  root->height = 0;
1381  root->rnode = NULL;
1382  if (to_free)
1383  radix_tree_node_free(to_free);
1384 
1385 out:
1386  return slot;
1387 }
1389 
1395 int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag)
1396 {
1397  return root_tag_get(root, tag);
1398 }
1400 
1401 static void
1402 radix_tree_node_ctor(void *node)
1403 {
1404  memset(node, 0, sizeof(struct radix_tree_node));
1405 }
1406 
1407 static __init unsigned long __maxindex(unsigned int height)
1408 {
1409  unsigned int width = height * RADIX_TREE_MAP_SHIFT;
1410  int shift = RADIX_TREE_INDEX_BITS - width;
1411 
1412  if (shift < 0)
1413  return ~0UL;
1414  if (shift >= BITS_PER_LONG)
1415  return 0UL;
1416  return ~0UL >> shift;
1417 }
1418 
1419 static __init void radix_tree_init_maxindex(void)
1420 {
1421  unsigned int i;
1422 
1423  for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++)
1424  height_to_maxindex[i] = __maxindex(i);
1425 }
1426 
1427 static int radix_tree_callback(struct notifier_block *nfb,
1428  unsigned long action,
1429  void *hcpu)
1430 {
1431  int cpu = (long)hcpu;
1432  struct radix_tree_preload *rtp;
1433 
1434  /* Free per-cpu pool of perloaded nodes */
1435  if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
1436  rtp = &per_cpu(radix_tree_preloads, cpu);
1437  while (rtp->nr) {
1438  kmem_cache_free(radix_tree_node_cachep,
1439  rtp->nodes[rtp->nr-1]);
1440  rtp->nodes[rtp->nr-1] = NULL;
1441  rtp->nr--;
1442  }
1443  }
1444  return NOTIFY_OK;
1445 }
1446 
1448 {
1449  radix_tree_node_cachep = kmem_cache_create("radix_tree_node",
1450  sizeof(struct radix_tree_node), 0,
1452  radix_tree_node_ctor);
1453  radix_tree_init_maxindex();
1454  hotcpu_notifier(radix_tree_callback, 0);
1455 }