Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
tmem.c
Go to the documentation of this file.
1 /*
2  * In-kernel transcendent memory (generic implementation)
3  *
4  * Copyright (c) 2009-2011, Dan Magenheimer, Oracle Corp.
5  *
6  * The primary purpose of Transcendent Memory ("tmem") is to map object-oriented
7  * "handles" (triples containing a pool id, and object id, and an index), to
8  * pages in a page-accessible memory (PAM). Tmem references the PAM pages via
9  * an abstract "pampd" (PAM page-descriptor), which can be operated on by a
10  * set of functions (pamops). Each pampd contains some representation of
11  * PAGE_SIZE bytes worth of data. Tmem must support potentially millions of
12  * pages and must be able to insert, find, and delete these pages at a
13  * potential frequency of thousands per second concurrently across many CPUs,
14  * (and, if used with KVM, across many vcpus across many guests).
15  * Tmem is tracked with a hierarchy of data structures, organized by
16  * the elements in a handle-tuple: pool_id, object_id, and page index.
17  * One or more "clients" (e.g. guests) each provide one or more tmem_pools.
18  * Each pool, contains a hash table of rb_trees of tmem_objs. Each
19  * tmem_obj contains a radix-tree-like tree of pointers, with intermediate
20  * nodes called tmem_objnodes. Each leaf pointer in this tree points to
21  * a pampd, which is accessible only through a small set of callbacks
22  * registered by the PAM implementation (see tmem_register_pamops). Tmem
23  * does all memory allocation via a set of callbacks registered by the tmem
24  * host implementation (e.g. see tmem_register_hostops).
25  */
26 
27 #include <linux/list.h>
28 #include <linux/spinlock.h>
29 #include <linux/atomic.h>
30 
31 #include "tmem.h"
32 
33 /* data structure sentinels used for debugging... see tmem.h */
34 #define POOL_SENTINEL 0x87658765
35 #define OBJ_SENTINEL 0x12345678
36 #define OBJNODE_SENTINEL 0xfedcba09
37 
38 /*
39  * A tmem host implementation must use this function to register callbacks
40  * for memory allocation.
41  */
42 static struct tmem_hostops tmem_hostops;
43 
44 static void tmem_objnode_tree_init(void);
45 
47 {
48  tmem_objnode_tree_init();
49  tmem_hostops = *m;
50 }
51 
52 /*
53  * A tmem host implementation must use this function to register
54  * callbacks for a page-accessible memory (PAM) implementation
55  */
56 static struct tmem_pamops tmem_pamops;
57 
59 {
60  tmem_pamops = *m;
61 }
62 
63 /*
64  * Oid's are potentially very sparse and tmem_objs may have an indeterminately
65  * short life, being added and deleted at a relatively high frequency.
66  * So an rb_tree is an ideal data structure to manage tmem_objs. But because
67  * of the potentially huge number of tmem_objs, each pool manages a hashtable
68  * of rb_trees to reduce search, insert, delete, and rebalancing time.
69  * Each hashbucket also has a lock to manage concurrent access.
70  *
71  * The following routines manage tmem_objs. When any tmem_obj is accessed,
72  * the hashbucket lock must be held.
73  */
74 
75 static struct tmem_obj
76 *__tmem_obj_find(struct tmem_hashbucket*hb, struct tmem_oid *oidp,
77  struct rb_node **parent, struct rb_node ***link)
78 {
79  struct rb_node *_parent = NULL, **rbnode;
80  struct tmem_obj *obj = NULL;
81 
82  rbnode = &hb->obj_rb_root.rb_node;
83  while (*rbnode) {
84  BUG_ON(RB_EMPTY_NODE(*rbnode));
85  _parent = *rbnode;
86  obj = rb_entry(*rbnode, struct tmem_obj,
87  rb_tree_node);
88  switch (tmem_oid_compare(oidp, &obj->oid)) {
89  case 0: /* equal */
90  goto out;
91  case -1:
92  rbnode = &(*rbnode)->rb_left;
93  break;
94  case 1:
95  rbnode = &(*rbnode)->rb_right;
96  break;
97  }
98  }
99 
100  if (parent)
101  *parent = _parent;
102  if (link)
103  *link = rbnode;
104 
105  obj = NULL;
106 out:
107  return obj;
108 }
109 
110 
111 /* searches for object==oid in pool, returns locked object if found */
112 static struct tmem_obj *tmem_obj_find(struct tmem_hashbucket *hb,
113  struct tmem_oid *oidp)
114 {
115  return __tmem_obj_find(hb, oidp, NULL, NULL);
116 }
117 
118 static void tmem_pampd_destroy_all_in_obj(struct tmem_obj *);
119 
120 /* free an object that has no more pampds in it */
121 static void tmem_obj_free(struct tmem_obj *obj, struct tmem_hashbucket *hb)
122 {
123  struct tmem_pool *pool;
124 
125  BUG_ON(obj == NULL);
126  ASSERT_SENTINEL(obj, OBJ);
127  BUG_ON(obj->pampd_count > 0);
128  pool = obj->pool;
129  BUG_ON(pool == NULL);
130  if (obj->objnode_tree_root != NULL) /* may be "stump" with no leaves */
131  tmem_pampd_destroy_all_in_obj(obj);
132  BUG_ON(obj->objnode_tree_root != NULL);
133  BUG_ON((long)obj->objnode_count != 0);
134  atomic_dec(&pool->obj_count);
135  BUG_ON(atomic_read(&pool->obj_count) < 0);
136  INVERT_SENTINEL(obj, OBJ);
137  obj->pool = NULL;
138  tmem_oid_set_invalid(&obj->oid);
139  rb_erase(&obj->rb_tree_node, &hb->obj_rb_root);
140 }
141 
142 /*
143  * initialize, and insert an tmem_object_root (called only if find failed)
144  */
145 static void tmem_obj_init(struct tmem_obj *obj, struct tmem_hashbucket *hb,
146  struct tmem_pool *pool,
147  struct tmem_oid *oidp)
148 {
149  struct rb_root *root = &hb->obj_rb_root;
150  struct rb_node **new = NULL, *parent = NULL;
151 
152  BUG_ON(pool == NULL);
153  atomic_inc(&pool->obj_count);
154  obj->objnode_tree_height = 0;
155  obj->objnode_tree_root = NULL;
156  obj->pool = pool;
157  obj->oid = *oidp;
158  obj->objnode_count = 0;
159  obj->pampd_count = 0;
160  (*tmem_pamops.new_obj)(obj);
161  SET_SENTINEL(obj, OBJ);
162 
163  if (__tmem_obj_find(hb, oidp, &parent, &new))
164  BUG();
165 
166  rb_link_node(&obj->rb_tree_node, parent, new);
167  rb_insert_color(&obj->rb_tree_node, root);
168 }
169 
170 /*
171  * Tmem is managed as a set of tmem_pools with certain attributes, such as
172  * "ephemeral" vs "persistent". These attributes apply to all tmem_objs
173  * and all pampds that belong to a tmem_pool. A tmem_pool is created
174  * or deleted relatively rarely (for example, when a filesystem is
175  * mounted or unmounted.
176  */
177 
178 /* flush all data from a pool and, optionally, free it */
179 static void tmem_pool_flush(struct tmem_pool *pool, bool destroy)
180 {
181  struct rb_node *rbnode;
182  struct tmem_obj *obj;
183  struct tmem_hashbucket *hb = &pool->hashbucket[0];
184  int i;
185 
186  BUG_ON(pool == NULL);
187  for (i = 0; i < TMEM_HASH_BUCKETS; i++, hb++) {
188  spin_lock(&hb->lock);
189  rbnode = rb_first(&hb->obj_rb_root);
190  while (rbnode != NULL) {
191  obj = rb_entry(rbnode, struct tmem_obj, rb_tree_node);
192  rbnode = rb_next(rbnode);
193  tmem_pampd_destroy_all_in_obj(obj);
194  tmem_obj_free(obj, hb);
195  (*tmem_hostops.obj_free)(obj, pool);
196  }
197  spin_unlock(&hb->lock);
198  }
199  if (destroy)
200  list_del(&pool->pool_list);
201 }
202 
203 /*
204  * A tmem_obj contains a radix-tree-like tree in which the intermediate
205  * nodes are called tmem_objnodes. (The kernel lib/radix-tree.c implementation
206  * is very specialized and tuned for specific uses and is not particularly
207  * suited for use from this code, though some code from the core algorithms has
208  * been reused, thus the copyright notices below). Each tmem_objnode contains
209  * a set of pointers which point to either a set of intermediate tmem_objnodes
210  * or a set of of pampds.
211  *
212  * Portions Copyright (C) 2001 Momchil Velikov
213  * Portions Copyright (C) 2001 Christoph Hellwig
214  * Portions Copyright (C) 2005 SGI, Christoph Lameter <[email protected]>
215  */
216 
217 struct tmem_objnode_tree_path {
218  struct tmem_objnode *objnode;
219  int offset;
220 };
221 
222 /* objnode height_to_maxindex translation */
223 static unsigned long tmem_objnode_tree_h2max[OBJNODE_TREE_MAX_PATH + 1];
224 
225 static void tmem_objnode_tree_init(void)
226 {
227  unsigned int ht, tmp;
228 
229  for (ht = 0; ht < ARRAY_SIZE(tmem_objnode_tree_h2max); ht++) {
230  tmp = ht * OBJNODE_TREE_MAP_SHIFT;
231  if (tmp >= OBJNODE_TREE_INDEX_BITS)
232  tmem_objnode_tree_h2max[ht] = ~0UL;
233  else
234  tmem_objnode_tree_h2max[ht] =
235  (~0UL >> (OBJNODE_TREE_INDEX_BITS - tmp - 1)) >> 1;
236  }
237 }
238 
239 static struct tmem_objnode *tmem_objnode_alloc(struct tmem_obj *obj)
240 {
241  struct tmem_objnode *objnode;
242 
243  ASSERT_SENTINEL(obj, OBJ);
244  BUG_ON(obj->pool == NULL);
245  ASSERT_SENTINEL(obj->pool, POOL);
246  objnode = (*tmem_hostops.objnode_alloc)(obj->pool);
247  if (unlikely(objnode == NULL))
248  goto out;
249  objnode->obj = obj;
250  SET_SENTINEL(objnode, OBJNODE);
251  memset(&objnode->slots, 0, sizeof(objnode->slots));
252  objnode->slots_in_use = 0;
253  obj->objnode_count++;
254 out:
255  return objnode;
256 }
257 
258 static void tmem_objnode_free(struct tmem_objnode *objnode)
259 {
260  struct tmem_pool *pool;
261  int i;
262 
263  BUG_ON(objnode == NULL);
264  for (i = 0; i < OBJNODE_TREE_MAP_SIZE; i++)
265  BUG_ON(objnode->slots[i] != NULL);
266  ASSERT_SENTINEL(objnode, OBJNODE);
267  INVERT_SENTINEL(objnode, OBJNODE);
268  BUG_ON(objnode->obj == NULL);
269  ASSERT_SENTINEL(objnode->obj, OBJ);
270  pool = objnode->obj->pool;
271  BUG_ON(pool == NULL);
272  ASSERT_SENTINEL(pool, POOL);
273  objnode->obj->objnode_count--;
274  objnode->obj = NULL;
275  (*tmem_hostops.objnode_free)(objnode, pool);
276 }
277 
278 /*
279  * lookup index in object and return associated pampd (or NULL if not found)
280  */
281 static void **__tmem_pampd_lookup_in_obj(struct tmem_obj *obj, uint32_t index)
282 {
283  unsigned int height, shift;
284  struct tmem_objnode **slot = NULL;
285 
286  BUG_ON(obj == NULL);
287  ASSERT_SENTINEL(obj, OBJ);
288  BUG_ON(obj->pool == NULL);
289  ASSERT_SENTINEL(obj->pool, POOL);
290 
291  height = obj->objnode_tree_height;
292  if (index > tmem_objnode_tree_h2max[obj->objnode_tree_height])
293  goto out;
294  if (height == 0 && obj->objnode_tree_root) {
295  slot = &obj->objnode_tree_root;
296  goto out;
297  }
298  shift = (height-1) * OBJNODE_TREE_MAP_SHIFT;
299  slot = &obj->objnode_tree_root;
300  while (height > 0) {
301  if (*slot == NULL)
302  goto out;
303  slot = (struct tmem_objnode **)
304  ((*slot)->slots +
305  ((index >> shift) & OBJNODE_TREE_MAP_MASK));
306  shift -= OBJNODE_TREE_MAP_SHIFT;
307  height--;
308  }
309 out:
310  return slot != NULL ? (void **)slot : NULL;
311 }
312 
313 static void *tmem_pampd_lookup_in_obj(struct tmem_obj *obj, uint32_t index)
314 {
315  struct tmem_objnode **slot;
316 
317  slot = (struct tmem_objnode **)__tmem_pampd_lookup_in_obj(obj, index);
318  return slot != NULL ? *slot : NULL;
319 }
320 
321 static void *tmem_pampd_replace_in_obj(struct tmem_obj *obj, uint32_t index,
322  void *new_pampd)
323 {
324  struct tmem_objnode **slot;
325  void *ret = NULL;
326 
327  slot = (struct tmem_objnode **)__tmem_pampd_lookup_in_obj(obj, index);
328  if ((slot != NULL) && (*slot != NULL)) {
329  void *old_pampd = *(void **)slot;
330  *(void **)slot = new_pampd;
331  (*tmem_pamops.free)(old_pampd, obj->pool, NULL, 0);
332  ret = new_pampd;
333  }
334  return ret;
335 }
336 
337 static int tmem_pampd_add_to_obj(struct tmem_obj *obj, uint32_t index,
338  void *pampd)
339 {
340  int ret = 0;
341  struct tmem_objnode *objnode = NULL, *newnode, *slot;
342  unsigned int height, shift;
343  int offset = 0;
344 
345  /* if necessary, extend the tree to be higher */
346  if (index > tmem_objnode_tree_h2max[obj->objnode_tree_height]) {
347  height = obj->objnode_tree_height + 1;
348  if (index > tmem_objnode_tree_h2max[height])
349  while (index > tmem_objnode_tree_h2max[height])
350  height++;
351  if (obj->objnode_tree_root == NULL) {
353  goto insert;
354  }
355  do {
356  newnode = tmem_objnode_alloc(obj);
357  if (!newnode) {
358  ret = -ENOMEM;
359  goto out;
360  }
361  newnode->slots[0] = obj->objnode_tree_root;
362  newnode->slots_in_use = 1;
363  obj->objnode_tree_root = newnode;
364  obj->objnode_tree_height++;
365  } while (height > obj->objnode_tree_height);
366  }
367 insert:
368  slot = obj->objnode_tree_root;
369  height = obj->objnode_tree_height;
370  shift = (height-1) * OBJNODE_TREE_MAP_SHIFT;
371  while (height > 0) {
372  if (slot == NULL) {
373  /* add a child objnode. */
374  slot = tmem_objnode_alloc(obj);
375  if (!slot) {
376  ret = -ENOMEM;
377  goto out;
378  }
379  if (objnode) {
380 
381  objnode->slots[offset] = slot;
382  objnode->slots_in_use++;
383  } else
384  obj->objnode_tree_root = slot;
385  }
386  /* go down a level */
387  offset = (index >> shift) & OBJNODE_TREE_MAP_MASK;
388  objnode = slot;
389  slot = objnode->slots[offset];
390  shift -= OBJNODE_TREE_MAP_SHIFT;
391  height--;
392  }
393  BUG_ON(slot != NULL);
394  if (objnode) {
395  objnode->slots_in_use++;
396  objnode->slots[offset] = pampd;
397  } else
398  obj->objnode_tree_root = pampd;
399  obj->pampd_count++;
400 out:
401  return ret;
402 }
403 
404 static void *tmem_pampd_delete_from_obj(struct tmem_obj *obj, uint32_t index)
405 {
407  struct tmem_objnode_tree_path *pathp = path;
408  struct tmem_objnode *slot = NULL;
409  unsigned int height, shift;
410  int offset;
411 
412  BUG_ON(obj == NULL);
413  ASSERT_SENTINEL(obj, OBJ);
414  BUG_ON(obj->pool == NULL);
415  ASSERT_SENTINEL(obj->pool, POOL);
416  height = obj->objnode_tree_height;
417  if (index > tmem_objnode_tree_h2max[height])
418  goto out;
419  slot = obj->objnode_tree_root;
420  if (height == 0 && obj->objnode_tree_root) {
421  obj->objnode_tree_root = NULL;
422  goto out;
423  }
424  shift = (height - 1) * OBJNODE_TREE_MAP_SHIFT;
425  pathp->objnode = NULL;
426  do {
427  if (slot == NULL)
428  goto out;
429  pathp++;
430  offset = (index >> shift) & OBJNODE_TREE_MAP_MASK;
431  pathp->offset = offset;
432  pathp->objnode = slot;
433  slot = slot->slots[offset];
434  shift -= OBJNODE_TREE_MAP_SHIFT;
435  height--;
436  } while (height > 0);
437  if (slot == NULL)
438  goto out;
439  while (pathp->objnode) {
440  pathp->objnode->slots[pathp->offset] = NULL;
441  pathp->objnode->slots_in_use--;
442  if (pathp->objnode->slots_in_use) {
443  if (pathp->objnode == obj->objnode_tree_root) {
444  while (obj->objnode_tree_height > 0 &&
445  obj->objnode_tree_root->slots_in_use == 1 &&
446  obj->objnode_tree_root->slots[0]) {
447  struct tmem_objnode *to_free =
448  obj->objnode_tree_root;
449 
450  obj->objnode_tree_root =
451  to_free->slots[0];
452  obj->objnode_tree_height--;
453  to_free->slots[0] = NULL;
454  to_free->slots_in_use = 0;
455  tmem_objnode_free(to_free);
456  }
457  }
458  goto out;
459  }
460  tmem_objnode_free(pathp->objnode); /* 0 slots used, free it */
461  pathp--;
462  }
463  obj->objnode_tree_height = 0;
464  obj->objnode_tree_root = NULL;
465 
466 out:
467  if (slot != NULL)
468  obj->pampd_count--;
469  BUG_ON(obj->pampd_count < 0);
470  return slot;
471 }
472 
473 /* recursively walk the objnode_tree destroying pampds and objnodes */
474 static void tmem_objnode_node_destroy(struct tmem_obj *obj,
475  struct tmem_objnode *objnode,
476  unsigned int ht)
477 {
478  int i;
479 
480  if (ht == 0)
481  return;
482  for (i = 0; i < OBJNODE_TREE_MAP_SIZE; i++) {
483  if (objnode->slots[i]) {
484  if (ht == 1) {
485  obj->pampd_count--;
486  (*tmem_pamops.free)(objnode->slots[i],
487  obj->pool, NULL, 0);
488  objnode->slots[i] = NULL;
489  continue;
490  }
491  tmem_objnode_node_destroy(obj, objnode->slots[i], ht-1);
492  tmem_objnode_free(objnode->slots[i]);
493  objnode->slots[i] = NULL;
494  }
495  }
496 }
497 
498 static void tmem_pampd_destroy_all_in_obj(struct tmem_obj *obj)
499 {
500  if (obj->objnode_tree_root == NULL)
501  return;
502  if (obj->objnode_tree_height == 0) {
503  obj->pampd_count--;
504  (*tmem_pamops.free)(obj->objnode_tree_root, obj->pool, NULL, 0);
505  } else {
506  tmem_objnode_node_destroy(obj, obj->objnode_tree_root,
507  obj->objnode_tree_height);
508  tmem_objnode_free(obj->objnode_tree_root);
509  obj->objnode_tree_height = 0;
510  }
511  obj->objnode_tree_root = NULL;
512  (*tmem_pamops.free_obj)(obj->pool, obj);
513 }
514 
515 /*
516  * Tmem is operated on by a set of well-defined actions:
517  * "put", "get", "flush", "flush_object", "new pool" and "destroy pool".
518  * (The tmem ABI allows for subpages and exchanges but these operations
519  * are not included in this implementation.)
520  *
521  * These "tmem core" operations are implemented in the following functions.
522  */
523 
524 /*
525  * "Put" a page, e.g. copy a page from the kernel into newly allocated
526  * PAM space (if such space is available). Tmem_put is complicated by
527  * a corner case: What if a page with matching handle already exists in
528  * tmem? To guarantee coherency, one of two actions is necessary: Either
529  * the data for the page must be overwritten, or the page must be
530  * "flushed" so that the data is not accessible to a subsequent "get".
531  * Since these "duplicate puts" are relatively rare, this implementation
532  * always flushes for simplicity.
533  */
534 int tmem_put(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index,
535  char *data, size_t size, bool raw, bool ephemeral)
536 {
537  struct tmem_obj *obj = NULL, *objfound = NULL, *objnew = NULL;
538  void *pampd = NULL, *pampd_del = NULL;
539  int ret = -ENOMEM;
540  struct tmem_hashbucket *hb;
541 
542  hb = &pool->hashbucket[tmem_oid_hash(oidp)];
543  spin_lock(&hb->lock);
544  obj = objfound = tmem_obj_find(hb, oidp);
545  if (obj != NULL) {
546  pampd = tmem_pampd_lookup_in_obj(objfound, index);
547  if (pampd != NULL) {
548  /* if found, is a dup put, flush the old one */
549  pampd_del = tmem_pampd_delete_from_obj(obj, index);
550  BUG_ON(pampd_del != pampd);
551  (*tmem_pamops.free)(pampd, pool, oidp, index);
552  if (obj->pampd_count == 0) {
553  objnew = obj;
554  objfound = NULL;
555  }
556  pampd = NULL;
557  }
558  } else {
559  obj = objnew = (*tmem_hostops.obj_alloc)(pool);
560  if (unlikely(obj == NULL)) {
561  ret = -ENOMEM;
562  goto out;
563  }
564  tmem_obj_init(obj, hb, pool, oidp);
565  }
566  BUG_ON(obj == NULL);
567  BUG_ON(((objnew != obj) && (objfound != obj)) || (objnew == objfound));
568  pampd = (*tmem_pamops.create)(data, size, raw, ephemeral,
569  obj->pool, &obj->oid, index);
570  if (unlikely(pampd == NULL))
571  goto free;
572  ret = tmem_pampd_add_to_obj(obj, index, pampd);
573  if (unlikely(ret == -ENOMEM))
574  /* may have partially built objnode tree ("stump") */
575  goto delete_and_free;
576  goto out;
577 
578 delete_and_free:
579  (void)tmem_pampd_delete_from_obj(obj, index);
580 free:
581  if (pampd)
582  (*tmem_pamops.free)(pampd, pool, NULL, 0);
583  if (objnew) {
584  tmem_obj_free(objnew, hb);
585  (*tmem_hostops.obj_free)(objnew, pool);
586  }
587 out:
588  spin_unlock(&hb->lock);
589  return ret;
590 }
591 
592 /*
593  * "Get" a page, e.g. if one can be found, copy the tmem page with the
594  * matching handle from PAM space to the kernel. By tmem definition,
595  * when a "get" is successful on an ephemeral page, the page is "flushed",
596  * and when a "get" is successful on a persistent page, the page is retained
597  * in tmem. Note that to preserve
598  * coherency, "get" can never be skipped if tmem contains the data.
599  * That is, if a get is done with a certain handle and fails, any
600  * subsequent "get" must also fail (unless of course there is a
601  * "put" done with the same handle).
602 
603  */
604 int tmem_get(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index,
605  char *data, size_t *size, bool raw, int get_and_free)
606 {
607  struct tmem_obj *obj;
608  void *pampd;
609  bool ephemeral = is_ephemeral(pool);
610  int ret = -1;
611  struct tmem_hashbucket *hb;
612  bool free = (get_and_free == 1) || ((get_and_free == 0) && ephemeral);
613  bool lock_held = false;
614 
615  hb = &pool->hashbucket[tmem_oid_hash(oidp)];
616  spin_lock(&hb->lock);
617  lock_held = true;
618  obj = tmem_obj_find(hb, oidp);
619  if (obj == NULL)
620  goto out;
621  if (free)
622  pampd = tmem_pampd_delete_from_obj(obj, index);
623  else
624  pampd = tmem_pampd_lookup_in_obj(obj, index);
625  if (pampd == NULL)
626  goto out;
627  if (free) {
628  if (obj->pampd_count == 0) {
629  tmem_obj_free(obj, hb);
630  (*tmem_hostops.obj_free)(obj, pool);
631  obj = NULL;
632  }
633  }
634  if (tmem_pamops.is_remote(pampd)) {
635  lock_held = false;
636  spin_unlock(&hb->lock);
637  }
638  if (free)
640  data, size, raw, pampd, pool, oidp, index);
641  else
642  ret = (*tmem_pamops.get_data)(
643  data, size, raw, pampd, pool, oidp, index);
644  if (ret < 0)
645  goto out;
646  ret = 0;
647 out:
648  if (lock_held)
649  spin_unlock(&hb->lock);
650  return ret;
651 }
652 
653 /*
654  * If a page in tmem matches the handle, "flush" this page from tmem such
655  * that any subsequent "get" does not succeed (unless, of course, there
656  * was another "put" with the same handle).
657  */
658 int tmem_flush_page(struct tmem_pool *pool,
659  struct tmem_oid *oidp, uint32_t index)
660 {
661  struct tmem_obj *obj;
662  void *pampd;
663  int ret = -1;
664  struct tmem_hashbucket *hb;
665 
666  hb = &pool->hashbucket[tmem_oid_hash(oidp)];
667  spin_lock(&hb->lock);
668  obj = tmem_obj_find(hb, oidp);
669  if (obj == NULL)
670  goto out;
671  pampd = tmem_pampd_delete_from_obj(obj, index);
672  if (pampd == NULL)
673  goto out;
674  (*tmem_pamops.free)(pampd, pool, oidp, index);
675  if (obj->pampd_count == 0) {
676  tmem_obj_free(obj, hb);
677  (*tmem_hostops.obj_free)(obj, pool);
678  }
679  ret = 0;
680 
681 out:
682  spin_unlock(&hb->lock);
683  return ret;
684 }
685 
686 /*
687  * If a page in tmem matches the handle, replace the page so that any
688  * subsequent "get" gets the new page. Returns 0 if
689  * there was a page to replace, else returns -1.
690  */
691 int tmem_replace(struct tmem_pool *pool, struct tmem_oid *oidp,
692  uint32_t index, void *new_pampd)
693 {
694  struct tmem_obj *obj;
695  int ret = -1;
696  struct tmem_hashbucket *hb;
697 
698  hb = &pool->hashbucket[tmem_oid_hash(oidp)];
699  spin_lock(&hb->lock);
700  obj = tmem_obj_find(hb, oidp);
701  if (obj == NULL)
702  goto out;
703  new_pampd = tmem_pampd_replace_in_obj(obj, index, new_pampd);
704  ret = (*tmem_pamops.replace_in_obj)(new_pampd, obj);
705 out:
706  spin_unlock(&hb->lock);
707  return ret;
708 }
709 
710 /*
711  * "Flush" all pages in tmem matching this oid.
712  */
713 int tmem_flush_object(struct tmem_pool *pool, struct tmem_oid *oidp)
714 {
715  struct tmem_obj *obj;
716  struct tmem_hashbucket *hb;
717  int ret = -1;
718 
719  hb = &pool->hashbucket[tmem_oid_hash(oidp)];
720  spin_lock(&hb->lock);
721  obj = tmem_obj_find(hb, oidp);
722  if (obj == NULL)
723  goto out;
724  tmem_pampd_destroy_all_in_obj(obj);
725  tmem_obj_free(obj, hb);
726  (*tmem_hostops.obj_free)(obj, pool);
727  ret = 0;
728 
729 out:
730  spin_unlock(&hb->lock);
731  return ret;
732 }
733 
734 /*
735  * "Flush" all pages (and tmem_objs) from this tmem_pool and disable
736  * all subsequent access to this tmem_pool.
737  */
738 int tmem_destroy_pool(struct tmem_pool *pool)
739 {
740  int ret = -1;
741 
742  if (pool == NULL)
743  goto out;
744  tmem_pool_flush(pool, 1);
745  ret = 0;
746 out:
747  return ret;
748 }
749 
750 static LIST_HEAD(tmem_global_pool_list);
751 
752 /*
753  * Create a new tmem_pool with the provided flag and return
754  * a pool id provided by the tmem host implementation.
755  */
757 {
758  int persistent = flags & TMEM_POOL_PERSIST;
759  int shared = flags & TMEM_POOL_SHARED;
760  struct tmem_hashbucket *hb = &pool->hashbucket[0];
761  int i;
762 
763  for (i = 0; i < TMEM_HASH_BUCKETS; i++, hb++) {
764  hb->obj_rb_root = RB_ROOT;
765  spin_lock_init(&hb->lock);
766  }
767  INIT_LIST_HEAD(&pool->pool_list);
768  atomic_set(&pool->obj_count, 0);
769  SET_SENTINEL(pool, POOL);
770  list_add_tail(&pool->pool_list, &tmem_global_pool_list);
771  pool->persistent = persistent;
772  pool->shared = shared;
773 }