Merge branch 'rc-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/mmarek/kbuil...
[pandora-kernel.git] / drivers / staging / zcache / tmem.c
1 /*
2  * In-kernel transcendent memory (generic implementation)
3  *
4  * Copyright (c) 2009-2011, Dan Magenheimer, Oracle Corp.
5  *
6  * The primary purpose of Transcedent Memory ("tmem") is to map object-oriented
7  * "handles" (triples containing a pool id, and object id, and an index), to
8  * pages in a page-accessible memory (PAM).  Tmem references the PAM pages via
9  * an abstract "pampd" (PAM page-descriptor), which can be operated on by a
10  * set of functions (pamops).  Each pampd contains some representation of
11  * PAGE_SIZE bytes worth of data. Tmem must support potentially millions of
12  * pages and must be able to insert, find, and delete these pages at a
13  * potential frequency of thousands per second concurrently across many CPUs,
14  * (and, if used with KVM, across many vcpus across many guests).
15  * Tmem is tracked with a hierarchy of data structures, organized by
16  * the elements in a handle-tuple: pool_id, object_id, and page index.
17  * One or more "clients" (e.g. guests) each provide one or more tmem_pools.
18  * Each pool, contains a hash table of rb_trees of tmem_objs.  Each
19  * tmem_obj contains a radix-tree-like tree of pointers, with intermediate
20  * nodes called tmem_objnodes.  Each leaf pointer in this tree points to
21  * a pampd, which is accessible only through a small set of callbacks
22  * registered by the PAM implementation (see tmem_register_pamops). Tmem
23  * does all memory allocation via a set of callbacks registered by the tmem
24  * host implementation (e.g. see tmem_register_hostops).
25  */
26
27 #include <linux/list.h>
28 #include <linux/spinlock.h>
29 #include <linux/atomic.h>
30
31 #include "tmem.h"
32
33 /* data structure sentinels used for debugging... see tmem.h */
34 #define POOL_SENTINEL 0x87658765
35 #define OBJ_SENTINEL 0x12345678
36 #define OBJNODE_SENTINEL 0xfedcba09
37
38 /*
39  * A tmem host implementation must use this function to register callbacks
40  * for memory allocation.
41  */
42 static struct tmem_hostops tmem_hostops;
43
44 static void tmem_objnode_tree_init(void);
45
46 void tmem_register_hostops(struct tmem_hostops *m)
47 {
48         tmem_objnode_tree_init();
49         tmem_hostops = *m;
50 }
51
52 /*
53  * A tmem host implementation must use this function to register
54  * callbacks for a page-accessible memory (PAM) implementation
55  */
56 static struct tmem_pamops tmem_pamops;
57
58 void tmem_register_pamops(struct tmem_pamops *m)
59 {
60         tmem_pamops = *m;
61 }
62
63 /*
64  * Oid's are potentially very sparse and tmem_objs may have an indeterminately
65  * short life, being added and deleted at a relatively high frequency.
66  * So an rb_tree is an ideal data structure to manage tmem_objs.  But because
67  * of the potentially huge number of tmem_objs, each pool manages a hashtable
68  * of rb_trees to reduce search, insert, delete, and rebalancing time.
69  * Each hashbucket also has a lock to manage concurrent access.
70  *
71  * The following routines manage tmem_objs.  When any tmem_obj is accessed,
72  * the hashbucket lock must be held.
73  */
74
75 /* searches for object==oid in pool, returns locked object if found */
76 static struct tmem_obj *tmem_obj_find(struct tmem_hashbucket *hb,
77                                         struct tmem_oid *oidp)
78 {
79         struct rb_node *rbnode;
80         struct tmem_obj *obj;
81
82         rbnode = hb->obj_rb_root.rb_node;
83         while (rbnode) {
84                 BUG_ON(RB_EMPTY_NODE(rbnode));
85                 obj = rb_entry(rbnode, struct tmem_obj, rb_tree_node);
86                 switch (tmem_oid_compare(oidp, &obj->oid)) {
87                 case 0: /* equal */
88                         goto out;
89                 case -1:
90                         rbnode = rbnode->rb_left;
91                         break;
92                 case 1:
93                         rbnode = rbnode->rb_right;
94                         break;
95                 }
96         }
97         obj = NULL;
98 out:
99         return obj;
100 }
101
102 static void tmem_pampd_destroy_all_in_obj(struct tmem_obj *);
103
104 /* free an object that has no more pampds in it */
105 static void tmem_obj_free(struct tmem_obj *obj, struct tmem_hashbucket *hb)
106 {
107         struct tmem_pool *pool;
108
109         BUG_ON(obj == NULL);
110         ASSERT_SENTINEL(obj, OBJ);
111         BUG_ON(obj->pampd_count > 0);
112         pool = obj->pool;
113         BUG_ON(pool == NULL);
114         if (obj->objnode_tree_root != NULL) /* may be "stump" with no leaves */
115                 tmem_pampd_destroy_all_in_obj(obj);
116         BUG_ON(obj->objnode_tree_root != NULL);
117         BUG_ON((long)obj->objnode_count != 0);
118         atomic_dec(&pool->obj_count);
119         BUG_ON(atomic_read(&pool->obj_count) < 0);
120         INVERT_SENTINEL(obj, OBJ);
121         obj->pool = NULL;
122         tmem_oid_set_invalid(&obj->oid);
123         rb_erase(&obj->rb_tree_node, &hb->obj_rb_root);
124 }
125
126 /*
127  * initialize, and insert an tmem_object_root (called only if find failed)
128  */
129 static void tmem_obj_init(struct tmem_obj *obj, struct tmem_hashbucket *hb,
130                                         struct tmem_pool *pool,
131                                         struct tmem_oid *oidp)
132 {
133         struct rb_root *root = &hb->obj_rb_root;
134         struct rb_node **new = &(root->rb_node), *parent = NULL;
135         struct tmem_obj *this;
136
137         BUG_ON(pool == NULL);
138         atomic_inc(&pool->obj_count);
139         obj->objnode_tree_height = 0;
140         obj->objnode_tree_root = NULL;
141         obj->pool = pool;
142         obj->oid = *oidp;
143         obj->objnode_count = 0;
144         obj->pampd_count = 0;
145         SET_SENTINEL(obj, OBJ);
146         while (*new) {
147                 BUG_ON(RB_EMPTY_NODE(*new));
148                 this = rb_entry(*new, struct tmem_obj, rb_tree_node);
149                 parent = *new;
150                 switch (tmem_oid_compare(oidp, &this->oid)) {
151                 case 0:
152                         BUG(); /* already present; should never happen! */
153                         break;
154                 case -1:
155                         new = &(*new)->rb_left;
156                         break;
157                 case 1:
158                         new = &(*new)->rb_right;
159                         break;
160                 }
161         }
162         rb_link_node(&obj->rb_tree_node, parent, new);
163         rb_insert_color(&obj->rb_tree_node, root);
164 }
165
166 /*
167  * Tmem is managed as a set of tmem_pools with certain attributes, such as
168  * "ephemeral" vs "persistent".  These attributes apply to all tmem_objs
169  * and all pampds that belong to a tmem_pool.  A tmem_pool is created
170  * or deleted relatively rarely (for example, when a filesystem is
171  * mounted or unmounted.
172  */
173
174 /* flush all data from a pool and, optionally, free it */
175 static void tmem_pool_flush(struct tmem_pool *pool, bool destroy)
176 {
177         struct rb_node *rbnode;
178         struct tmem_obj *obj;
179         struct tmem_hashbucket *hb = &pool->hashbucket[0];
180         int i;
181
182         BUG_ON(pool == NULL);
183         for (i = 0; i < TMEM_HASH_BUCKETS; i++, hb++) {
184                 spin_lock(&hb->lock);
185                 rbnode = rb_first(&hb->obj_rb_root);
186                 while (rbnode != NULL) {
187                         obj = rb_entry(rbnode, struct tmem_obj, rb_tree_node);
188                         rbnode = rb_next(rbnode);
189                         tmem_pampd_destroy_all_in_obj(obj);
190                         tmem_obj_free(obj, hb);
191                         (*tmem_hostops.obj_free)(obj, pool);
192                 }
193                 spin_unlock(&hb->lock);
194         }
195         if (destroy)
196                 list_del(&pool->pool_list);
197 }
198
199 /*
200  * A tmem_obj contains a radix-tree-like tree in which the intermediate
201  * nodes are called tmem_objnodes.  (The kernel lib/radix-tree.c implementation
202  * is very specialized and tuned for specific uses and is not particularly
203  * suited for use from this code, though some code from the core algorithms has
204  * been reused, thus the copyright notices below).  Each tmem_objnode contains
205  * a set of pointers which point to either a set of intermediate tmem_objnodes
206  * or a set of of pampds.
207  *
208  * Portions Copyright (C) 2001 Momchil Velikov
209  * Portions Copyright (C) 2001 Christoph Hellwig
210  * Portions Copyright (C) 2005 SGI, Christoph Lameter <clameter@sgi.com>
211  */
212
213 struct tmem_objnode_tree_path {
214         struct tmem_objnode *objnode;
215         int offset;
216 };
217
218 /* objnode height_to_maxindex translation */
219 static unsigned long tmem_objnode_tree_h2max[OBJNODE_TREE_MAX_PATH + 1];
220
221 static void tmem_objnode_tree_init(void)
222 {
223         unsigned int ht, tmp;
224
225         for (ht = 0; ht < ARRAY_SIZE(tmem_objnode_tree_h2max); ht++) {
226                 tmp = ht * OBJNODE_TREE_MAP_SHIFT;
227                 if (tmp >= OBJNODE_TREE_INDEX_BITS)
228                         tmem_objnode_tree_h2max[ht] = ~0UL;
229                 else
230                         tmem_objnode_tree_h2max[ht] =
231                             (~0UL >> (OBJNODE_TREE_INDEX_BITS - tmp - 1)) >> 1;
232         }
233 }
234
235 static struct tmem_objnode *tmem_objnode_alloc(struct tmem_obj *obj)
236 {
237         struct tmem_objnode *objnode;
238
239         ASSERT_SENTINEL(obj, OBJ);
240         BUG_ON(obj->pool == NULL);
241         ASSERT_SENTINEL(obj->pool, POOL);
242         objnode = (*tmem_hostops.objnode_alloc)(obj->pool);
243         if (unlikely(objnode == NULL))
244                 goto out;
245         objnode->obj = obj;
246         SET_SENTINEL(objnode, OBJNODE);
247         memset(&objnode->slots, 0, sizeof(objnode->slots));
248         objnode->slots_in_use = 0;
249         obj->objnode_count++;
250 out:
251         return objnode;
252 }
253
254 static void tmem_objnode_free(struct tmem_objnode *objnode)
255 {
256         struct tmem_pool *pool;
257         int i;
258
259         BUG_ON(objnode == NULL);
260         for (i = 0; i < OBJNODE_TREE_MAP_SIZE; i++)
261                 BUG_ON(objnode->slots[i] != NULL);
262         ASSERT_SENTINEL(objnode, OBJNODE);
263         INVERT_SENTINEL(objnode, OBJNODE);
264         BUG_ON(objnode->obj == NULL);
265         ASSERT_SENTINEL(objnode->obj, OBJ);
266         pool = objnode->obj->pool;
267         BUG_ON(pool == NULL);
268         ASSERT_SENTINEL(pool, POOL);
269         objnode->obj->objnode_count--;
270         objnode->obj = NULL;
271         (*tmem_hostops.objnode_free)(objnode, pool);
272 }
273
274 /*
275  * lookup index in object and return associated pampd (or NULL if not found)
276  */
277 static void *tmem_pampd_lookup_in_obj(struct tmem_obj *obj, uint32_t index)
278 {
279         unsigned int height, shift;
280         struct tmem_objnode **slot = NULL;
281
282         BUG_ON(obj == NULL);
283         ASSERT_SENTINEL(obj, OBJ);
284         BUG_ON(obj->pool == NULL);
285         ASSERT_SENTINEL(obj->pool, POOL);
286
287         height = obj->objnode_tree_height;
288         if (index > tmem_objnode_tree_h2max[obj->objnode_tree_height])
289                 goto out;
290         if (height == 0 && obj->objnode_tree_root) {
291                 slot = &obj->objnode_tree_root;
292                 goto out;
293         }
294         shift = (height-1) * OBJNODE_TREE_MAP_SHIFT;
295         slot = &obj->objnode_tree_root;
296         while (height > 0) {
297                 if (*slot == NULL)
298                         goto out;
299                 slot = (struct tmem_objnode **)
300                         ((*slot)->slots +
301                          ((index >> shift) & OBJNODE_TREE_MAP_MASK));
302                 shift -= OBJNODE_TREE_MAP_SHIFT;
303                 height--;
304         }
305 out:
306         return slot != NULL ? *slot : NULL;
307 }
308
309 static int tmem_pampd_add_to_obj(struct tmem_obj *obj, uint32_t index,
310                                         void *pampd)
311 {
312         int ret = 0;
313         struct tmem_objnode *objnode = NULL, *newnode, *slot;
314         unsigned int height, shift;
315         int offset = 0;
316
317         /* if necessary, extend the tree to be higher  */
318         if (index > tmem_objnode_tree_h2max[obj->objnode_tree_height]) {
319                 height = obj->objnode_tree_height + 1;
320                 if (index > tmem_objnode_tree_h2max[height])
321                         while (index > tmem_objnode_tree_h2max[height])
322                                 height++;
323                 if (obj->objnode_tree_root == NULL) {
324                         obj->objnode_tree_height = height;
325                         goto insert;
326                 }
327                 do {
328                         newnode = tmem_objnode_alloc(obj);
329                         if (!newnode) {
330                                 ret = -ENOMEM;
331                                 goto out;
332                         }
333                         newnode->slots[0] = obj->objnode_tree_root;
334                         newnode->slots_in_use = 1;
335                         obj->objnode_tree_root = newnode;
336                         obj->objnode_tree_height++;
337                 } while (height > obj->objnode_tree_height);
338         }
339 insert:
340         slot = obj->objnode_tree_root;
341         height = obj->objnode_tree_height;
342         shift = (height-1) * OBJNODE_TREE_MAP_SHIFT;
343         while (height > 0) {
344                 if (slot == NULL) {
345                         /* add a child objnode.  */
346                         slot = tmem_objnode_alloc(obj);
347                         if (!slot) {
348                                 ret = -ENOMEM;
349                                 goto out;
350                         }
351                         if (objnode) {
352
353                                 objnode->slots[offset] = slot;
354                                 objnode->slots_in_use++;
355                         } else
356                                 obj->objnode_tree_root = slot;
357                 }
358                 /* go down a level */
359                 offset = (index >> shift) & OBJNODE_TREE_MAP_MASK;
360                 objnode = slot;
361                 slot = objnode->slots[offset];
362                 shift -= OBJNODE_TREE_MAP_SHIFT;
363                 height--;
364         }
365         BUG_ON(slot != NULL);
366         if (objnode) {
367                 objnode->slots_in_use++;
368                 objnode->slots[offset] = pampd;
369         } else
370                 obj->objnode_tree_root = pampd;
371         obj->pampd_count++;
372 out:
373         return ret;
374 }
375
376 static void *tmem_pampd_delete_from_obj(struct tmem_obj *obj, uint32_t index)
377 {
378         struct tmem_objnode_tree_path path[OBJNODE_TREE_MAX_PATH + 1];
379         struct tmem_objnode_tree_path *pathp = path;
380         struct tmem_objnode *slot = NULL;
381         unsigned int height, shift;
382         int offset;
383
384         BUG_ON(obj == NULL);
385         ASSERT_SENTINEL(obj, OBJ);
386         BUG_ON(obj->pool == NULL);
387         ASSERT_SENTINEL(obj->pool, POOL);
388         height = obj->objnode_tree_height;
389         if (index > tmem_objnode_tree_h2max[height])
390                 goto out;
391         slot = obj->objnode_tree_root;
392         if (height == 0 && obj->objnode_tree_root) {
393                 obj->objnode_tree_root = NULL;
394                 goto out;
395         }
396         shift = (height - 1) * OBJNODE_TREE_MAP_SHIFT;
397         pathp->objnode = NULL;
398         do {
399                 if (slot == NULL)
400                         goto out;
401                 pathp++;
402                 offset = (index >> shift) & OBJNODE_TREE_MAP_MASK;
403                 pathp->offset = offset;
404                 pathp->objnode = slot;
405                 slot = slot->slots[offset];
406                 shift -= OBJNODE_TREE_MAP_SHIFT;
407                 height--;
408         } while (height > 0);
409         if (slot == NULL)
410                 goto out;
411         while (pathp->objnode) {
412                 pathp->objnode->slots[pathp->offset] = NULL;
413                 pathp->objnode->slots_in_use--;
414                 if (pathp->objnode->slots_in_use) {
415                         if (pathp->objnode == obj->objnode_tree_root) {
416                                 while (obj->objnode_tree_height > 0 &&
417                                   obj->objnode_tree_root->slots_in_use == 1 &&
418                                   obj->objnode_tree_root->slots[0]) {
419                                         struct tmem_objnode *to_free =
420                                                 obj->objnode_tree_root;
421
422                                         obj->objnode_tree_root =
423                                                         to_free->slots[0];
424                                         obj->objnode_tree_height--;
425                                         to_free->slots[0] = NULL;
426                                         to_free->slots_in_use = 0;
427                                         tmem_objnode_free(to_free);
428                                 }
429                         }
430                         goto out;
431                 }
432                 tmem_objnode_free(pathp->objnode); /* 0 slots used, free it */
433                 pathp--;
434         }
435         obj->objnode_tree_height = 0;
436         obj->objnode_tree_root = NULL;
437
438 out:
439         if (slot != NULL)
440                 obj->pampd_count--;
441         BUG_ON(obj->pampd_count < 0);
442         return slot;
443 }
444
445 /* recursively walk the objnode_tree destroying pampds and objnodes */
446 static void tmem_objnode_node_destroy(struct tmem_obj *obj,
447                                         struct tmem_objnode *objnode,
448                                         unsigned int ht)
449 {
450         int i;
451
452         if (ht == 0)
453                 return;
454         for (i = 0; i < OBJNODE_TREE_MAP_SIZE; i++) {
455                 if (objnode->slots[i]) {
456                         if (ht == 1) {
457                                 obj->pampd_count--;
458                                 (*tmem_pamops.free)(objnode->slots[i],
459                                                                 obj->pool);
460                                 objnode->slots[i] = NULL;
461                                 continue;
462                         }
463                         tmem_objnode_node_destroy(obj, objnode->slots[i], ht-1);
464                         tmem_objnode_free(objnode->slots[i]);
465                         objnode->slots[i] = NULL;
466                 }
467         }
468 }
469
470 static void tmem_pampd_destroy_all_in_obj(struct tmem_obj *obj)
471 {
472         if (obj->objnode_tree_root == NULL)
473                 return;
474         if (obj->objnode_tree_height == 0) {
475                 obj->pampd_count--;
476                 (*tmem_pamops.free)(obj->objnode_tree_root, obj->pool);
477         } else {
478                 tmem_objnode_node_destroy(obj, obj->objnode_tree_root,
479                                         obj->objnode_tree_height);
480                 tmem_objnode_free(obj->objnode_tree_root);
481                 obj->objnode_tree_height = 0;
482         }
483         obj->objnode_tree_root = NULL;
484 }
485
486 /*
487  * Tmem is operated on by a set of well-defined actions:
488  * "put", "get", "flush", "flush_object", "new pool" and "destroy pool".
489  * (The tmem ABI allows for subpages and exchanges but these operations
490  * are not included in this implementation.)
491  *
492  * These "tmem core" operations are implemented in the following functions.
493  */
494
495 /*
496  * "Put" a page, e.g. copy a page from the kernel into newly allocated
497  * PAM space (if such space is available).  Tmem_put is complicated by
498  * a corner case: What if a page with matching handle already exists in
499  * tmem?  To guarantee coherency, one of two actions is necessary: Either
500  * the data for the page must be overwritten, or the page must be
501  * "flushed" so that the data is not accessible to a subsequent "get".
502  * Since these "duplicate puts" are relatively rare, this implementation
503  * always flushes for simplicity.
504  */
505 int tmem_put(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index,
506                 struct page *page)
507 {
508         struct tmem_obj *obj = NULL, *objfound = NULL, *objnew = NULL;
509         void *pampd = NULL, *pampd_del = NULL;
510         int ret = -ENOMEM;
511         bool ephemeral;
512         struct tmem_hashbucket *hb;
513
514         ephemeral = is_ephemeral(pool);
515         hb = &pool->hashbucket[tmem_oid_hash(oidp)];
516         spin_lock(&hb->lock);
517         obj = objfound = tmem_obj_find(hb, oidp);
518         if (obj != NULL) {
519                 pampd = tmem_pampd_lookup_in_obj(objfound, index);
520                 if (pampd != NULL) {
521                         /* if found, is a dup put, flush the old one */
522                         pampd_del = tmem_pampd_delete_from_obj(obj, index);
523                         BUG_ON(pampd_del != pampd);
524                         (*tmem_pamops.free)(pampd, pool);
525                         if (obj->pampd_count == 0) {
526                                 objnew = obj;
527                                 objfound = NULL;
528                         }
529                         pampd = NULL;
530                 }
531         } else {
532                 obj = objnew = (*tmem_hostops.obj_alloc)(pool);
533                 if (unlikely(obj == NULL)) {
534                         ret = -ENOMEM;
535                         goto out;
536                 }
537                 tmem_obj_init(obj, hb, pool, oidp);
538         }
539         BUG_ON(obj == NULL);
540         BUG_ON(((objnew != obj) && (objfound != obj)) || (objnew == objfound));
541         pampd = (*tmem_pamops.create)(obj->pool, &obj->oid, index, page);
542         if (unlikely(pampd == NULL))
543                 goto free;
544         ret = tmem_pampd_add_to_obj(obj, index, pampd);
545         if (unlikely(ret == -ENOMEM))
546                 /* may have partially built objnode tree ("stump") */
547                 goto delete_and_free;
548         goto out;
549
550 delete_and_free:
551         (void)tmem_pampd_delete_from_obj(obj, index);
552 free:
553         if (pampd)
554                 (*tmem_pamops.free)(pampd, pool);
555         if (objnew) {
556                 tmem_obj_free(objnew, hb);
557                 (*tmem_hostops.obj_free)(objnew, pool);
558         }
559 out:
560         spin_unlock(&hb->lock);
561         return ret;
562 }
563
564 /*
565  * "Get" a page, e.g. if one can be found, copy the tmem page with the
566  * matching handle from PAM space to the kernel.  By tmem definition,
567  * when a "get" is successful on an ephemeral page, the page is "flushed",
568  * and when a "get" is successful on a persistent page, the page is retained
569  * in tmem.  Note that to preserve
570  * coherency, "get" can never be skipped if tmem contains the data.
571  * That is, if a get is done with a certain handle and fails, any
572  * subsequent "get" must also fail (unless of course there is a
573  * "put" done with the same handle).
574
575  */
576 int tmem_get(struct tmem_pool *pool, struct tmem_oid *oidp,
577                                 uint32_t index, struct page *page)
578 {
579         struct tmem_obj *obj;
580         void *pampd;
581         bool ephemeral = is_ephemeral(pool);
582         uint32_t ret = -1;
583         struct tmem_hashbucket *hb;
584
585         hb = &pool->hashbucket[tmem_oid_hash(oidp)];
586         spin_lock(&hb->lock);
587         obj = tmem_obj_find(hb, oidp);
588         if (obj == NULL)
589                 goto out;
590         ephemeral = is_ephemeral(pool);
591         if (ephemeral)
592                 pampd = tmem_pampd_delete_from_obj(obj, index);
593         else
594                 pampd = tmem_pampd_lookup_in_obj(obj, index);
595         if (pampd == NULL)
596                 goto out;
597         ret = (*tmem_pamops.get_data)(page, pampd, pool);
598         if (ret < 0)
599                 goto out;
600         if (ephemeral) {
601                 (*tmem_pamops.free)(pampd, pool);
602                 if (obj->pampd_count == 0) {
603                         tmem_obj_free(obj, hb);
604                         (*tmem_hostops.obj_free)(obj, pool);
605                         obj = NULL;
606                 }
607         }
608         ret = 0;
609 out:
610         spin_unlock(&hb->lock);
611         return ret;
612 }
613
614 /*
615  * If a page in tmem matches the handle, "flush" this page from tmem such
616  * that any subsequent "get" does not succeed (unless, of course, there
617  * was another "put" with the same handle).
618  */
619 int tmem_flush_page(struct tmem_pool *pool,
620                                 struct tmem_oid *oidp, uint32_t index)
621 {
622         struct tmem_obj *obj;
623         void *pampd;
624         int ret = -1;
625         struct tmem_hashbucket *hb;
626
627         hb = &pool->hashbucket[tmem_oid_hash(oidp)];
628         spin_lock(&hb->lock);
629         obj = tmem_obj_find(hb, oidp);
630         if (obj == NULL)
631                 goto out;
632         pampd = tmem_pampd_delete_from_obj(obj, index);
633         if (pampd == NULL)
634                 goto out;
635         (*tmem_pamops.free)(pampd, pool);
636         if (obj->pampd_count == 0) {
637                 tmem_obj_free(obj, hb);
638                 (*tmem_hostops.obj_free)(obj, pool);
639         }
640         ret = 0;
641
642 out:
643         spin_unlock(&hb->lock);
644         return ret;
645 }
646
647 /*
648  * "Flush" all pages in tmem matching this oid.
649  */
650 int tmem_flush_object(struct tmem_pool *pool, struct tmem_oid *oidp)
651 {
652         struct tmem_obj *obj;
653         struct tmem_hashbucket *hb;
654         int ret = -1;
655
656         hb = &pool->hashbucket[tmem_oid_hash(oidp)];
657         spin_lock(&hb->lock);
658         obj = tmem_obj_find(hb, oidp);
659         if (obj == NULL)
660                 goto out;
661         tmem_pampd_destroy_all_in_obj(obj);
662         tmem_obj_free(obj, hb);
663         (*tmem_hostops.obj_free)(obj, pool);
664         ret = 0;
665
666 out:
667         spin_unlock(&hb->lock);
668         return ret;
669 }
670
671 /*
672  * "Flush" all pages (and tmem_objs) from this tmem_pool and disable
673  * all subsequent access to this tmem_pool.
674  */
675 int tmem_destroy_pool(struct tmem_pool *pool)
676 {
677         int ret = -1;
678
679         if (pool == NULL)
680                 goto out;
681         tmem_pool_flush(pool, 1);
682         ret = 0;
683 out:
684         return ret;
685 }
686
687 static LIST_HEAD(tmem_global_pool_list);
688
689 /*
690  * Create a new tmem_pool with the provided flag and return
691  * a pool id provided by the tmem host implementation.
692  */
693 void tmem_new_pool(struct tmem_pool *pool, uint32_t flags)
694 {
695         int persistent = flags & TMEM_POOL_PERSIST;
696         int shared = flags & TMEM_POOL_SHARED;
697         struct tmem_hashbucket *hb = &pool->hashbucket[0];
698         int i;
699
700         for (i = 0; i < TMEM_HASH_BUCKETS; i++, hb++) {
701                 hb->obj_rb_root = RB_ROOT;
702                 spin_lock_init(&hb->lock);
703         }
704         INIT_LIST_HEAD(&pool->pool_list);
705         atomic_set(&pool->obj_count, 0);
706         SET_SENTINEL(pool, POOL);
707         list_add_tail(&pool->pool_list, &tmem_global_pool_list);
708         pool->persistent = persistent;
709         pool->shared = shared;
710 }