nilfs2: remove pointless NULL check of bpop_commit_alloc_ptr function
[pandora-kernel.git] / fs / nilfs2 / btree.c
1 /*
2  * btree.c - NILFS B-tree.
3  *
4  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19  *
20  * Written by Koji Sato <koji@osrg.net>.
21  */
22
23 #include <linux/slab.h>
24 #include <linux/string.h>
25 #include <linux/errno.h>
26 #include <linux/pagevec.h>
27 #include "nilfs.h"
28 #include "page.h"
29 #include "btnode.h"
30 #include "btree.h"
31 #include "alloc.h"
32
33 /**
34  * struct nilfs_btree_path - A path on which B-tree operations are executed
35  * @bp_bh: buffer head of node block
36  * @bp_sib_bh: buffer head of sibling node block
37  * @bp_index: index of child node
38  * @bp_oldreq: ptr end request for old ptr
39  * @bp_newreq: ptr alloc request for new ptr
40  * @bp_op: rebalance operation
41  */
42 struct nilfs_btree_path {
43         struct buffer_head *bp_bh;
44         struct buffer_head *bp_sib_bh;
45         int bp_index;
46         union nilfs_bmap_ptr_req bp_oldreq;
47         union nilfs_bmap_ptr_req bp_newreq;
48         struct nilfs_btnode_chkey_ctxt bp_ctxt;
49         void (*bp_op)(struct nilfs_btree *, struct nilfs_btree_path *,
50                       int, __u64 *, __u64 *);
51 };
52
53 /*
54  * B-tree path operations
55  */
56
57 static struct kmem_cache *nilfs_btree_path_cache;
58
59 int __init nilfs_btree_path_cache_init(void)
60 {
61         nilfs_btree_path_cache =
62                 kmem_cache_create("nilfs2_btree_path_cache",
63                                   sizeof(struct nilfs_btree_path) *
64                                   NILFS_BTREE_LEVEL_MAX, 0, 0, NULL);
65         return (nilfs_btree_path_cache != NULL) ? 0 : -ENOMEM;
66 }
67
68 void nilfs_btree_path_cache_destroy(void)
69 {
70         kmem_cache_destroy(nilfs_btree_path_cache);
71 }
72
73 static inline struct nilfs_btree_path *
74 nilfs_btree_alloc_path(const struct nilfs_btree *btree)
75 {
76         return (struct nilfs_btree_path *)
77                 kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS);
78 }
79
80 static inline void nilfs_btree_free_path(const struct nilfs_btree *btree,
81                                          struct nilfs_btree_path *path)
82 {
83         kmem_cache_free(nilfs_btree_path_cache, path);
84 }
85
86 static void nilfs_btree_init_path(const struct nilfs_btree *btree,
87                                   struct nilfs_btree_path *path)
88 {
89         int level;
90
91         for (level = NILFS_BTREE_LEVEL_DATA;
92              level < NILFS_BTREE_LEVEL_MAX;
93              level++) {
94                 path[level].bp_bh = NULL;
95                 path[level].bp_sib_bh = NULL;
96                 path[level].bp_index = 0;
97                 path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
98                 path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
99                 path[level].bp_op = NULL;
100         }
101 }
102
103 static void nilfs_btree_clear_path(const struct nilfs_btree *btree,
104                                    struct nilfs_btree_path *path)
105 {
106         int level;
107
108         for (level = NILFS_BTREE_LEVEL_DATA;
109              level < NILFS_BTREE_LEVEL_MAX;
110              level++) {
111                 if (path[level].bp_bh != NULL) {
112                         brelse(path[level].bp_bh);
113                         path[level].bp_bh = NULL;
114                 }
115                 /* sib_bh is released or deleted by prepare or commit
116                  * operations. */
117                 path[level].bp_sib_bh = NULL;
118                 path[level].bp_index = 0;
119                 path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
120                 path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
121                 path[level].bp_op = NULL;
122         }
123 }
124
125 /*
126  * B-tree node operations
127  */
128 static int nilfs_btree_get_block(const struct nilfs_btree *btree, __u64 ptr,
129                                  struct buffer_head **bhp)
130 {
131         struct address_space *btnc =
132                 &NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache;
133         return nilfs_btnode_get(btnc, ptr, 0, bhp, 0);
134 }
135
136 static int nilfs_btree_get_new_block(const struct nilfs_btree *btree,
137                                      __u64 ptr, struct buffer_head **bhp)
138 {
139         struct address_space *btnc =
140                 &NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache;
141         int ret;
142
143         ret = nilfs_btnode_get(btnc, ptr, 0, bhp, 1);
144         if (!ret)
145                 set_buffer_nilfs_volatile(*bhp);
146         return ret;
147 }
148
149 static inline int
150 nilfs_btree_node_get_flags(const struct nilfs_btree *btree,
151                            const struct nilfs_btree_node *node)
152 {
153         return node->bn_flags;
154 }
155
156 static inline void
157 nilfs_btree_node_set_flags(struct nilfs_btree *btree,
158                            struct nilfs_btree_node *node,
159                            int flags)
160 {
161         node->bn_flags = flags;
162 }
163
164 static inline int nilfs_btree_node_root(const struct nilfs_btree *btree,
165                                         const struct nilfs_btree_node *node)
166 {
167         return nilfs_btree_node_get_flags(btree, node) & NILFS_BTREE_NODE_ROOT;
168 }
169
170 static inline int
171 nilfs_btree_node_get_level(const struct nilfs_btree *btree,
172                            const struct nilfs_btree_node *node)
173 {
174         return node->bn_level;
175 }
176
177 static inline void
178 nilfs_btree_node_set_level(struct nilfs_btree *btree,
179                            struct nilfs_btree_node *node,
180                            int level)
181 {
182         node->bn_level = level;
183 }
184
185 static inline int
186 nilfs_btree_node_get_nchildren(const struct nilfs_btree *btree,
187                                const struct nilfs_btree_node *node)
188 {
189         return le16_to_cpu(node->bn_nchildren);
190 }
191
192 static inline void
193 nilfs_btree_node_set_nchildren(struct nilfs_btree *btree,
194                                struct nilfs_btree_node *node,
195                                int nchildren)
196 {
197         node->bn_nchildren = cpu_to_le16(nchildren);
198 }
199
200 static inline int
201 nilfs_btree_node_size(const struct nilfs_btree *btree)
202 {
203         return 1 << btree->bt_bmap.b_inode->i_blkbits;
204 }
205
206 static inline int
207 nilfs_btree_node_nchildren_min(const struct nilfs_btree *btree,
208                                const struct nilfs_btree_node *node)
209 {
210         return nilfs_btree_node_root(btree, node) ?
211                 NILFS_BTREE_ROOT_NCHILDREN_MIN :
212                 NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
213 }
214
215 static inline int
216 nilfs_btree_node_nchildren_max(const struct nilfs_btree *btree,
217                                const struct nilfs_btree_node *node)
218 {
219         return nilfs_btree_node_root(btree, node) ?
220                 NILFS_BTREE_ROOT_NCHILDREN_MAX :
221                 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(btree));
222 }
223
224 static inline __le64 *
225 nilfs_btree_node_dkeys(const struct nilfs_btree *btree,
226                        const struct nilfs_btree_node *node)
227 {
228         return (__le64 *)((char *)(node + 1) +
229                           (nilfs_btree_node_root(btree, node) ?
230                            0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE));
231 }
232
233 static inline __le64 *
234 nilfs_btree_node_dptrs(const struct nilfs_btree *btree,
235                        const struct nilfs_btree_node *node)
236 {
237         return (__le64 *)(nilfs_btree_node_dkeys(btree, node) +
238                           nilfs_btree_node_nchildren_max(btree, node));
239 }
240
241 static inline __u64
242 nilfs_btree_node_get_key(const struct nilfs_btree *btree,
243                          const struct nilfs_btree_node *node, int index)
244 {
245         return nilfs_bmap_dkey_to_key(*(nilfs_btree_node_dkeys(btree, node) +
246                                         index));
247 }
248
249 static inline void
250 nilfs_btree_node_set_key(struct nilfs_btree *btree,
251                          struct nilfs_btree_node *node, int index, __u64 key)
252 {
253         *(nilfs_btree_node_dkeys(btree, node) + index) =
254                 nilfs_bmap_key_to_dkey(key);
255 }
256
257 static inline __u64
258 nilfs_btree_node_get_ptr(const struct nilfs_btree *btree,
259                          const struct nilfs_btree_node *node,
260                          int index)
261 {
262         return nilfs_bmap_dptr_to_ptr(*(nilfs_btree_node_dptrs(btree, node) +
263                                         index));
264 }
265
266 static inline void
267 nilfs_btree_node_set_ptr(struct nilfs_btree *btree,
268                          struct nilfs_btree_node *node,
269                          int index,
270                          __u64 ptr)
271 {
272         *(nilfs_btree_node_dptrs(btree, node) + index) =
273                 nilfs_bmap_ptr_to_dptr(ptr);
274 }
275
276 static void nilfs_btree_node_init(struct nilfs_btree *btree,
277                                   struct nilfs_btree_node *node,
278                                   int flags, int level, int nchildren,
279                                   const __u64 *keys, const __u64 *ptrs)
280 {
281         __le64 *dkeys;
282         __le64 *dptrs;
283         int i;
284
285         nilfs_btree_node_set_flags(btree, node, flags);
286         nilfs_btree_node_set_level(btree, node, level);
287         nilfs_btree_node_set_nchildren(btree, node, nchildren);
288
289         dkeys = nilfs_btree_node_dkeys(btree, node);
290         dptrs = nilfs_btree_node_dptrs(btree, node);
291         for (i = 0; i < nchildren; i++) {
292                 dkeys[i] = nilfs_bmap_key_to_dkey(keys[i]);
293                 dptrs[i] = nilfs_bmap_ptr_to_dptr(ptrs[i]);
294         }
295 }
296
297 /* Assume the buffer heads corresponding to left and right are locked. */
298 static void nilfs_btree_node_move_left(struct nilfs_btree *btree,
299                                        struct nilfs_btree_node *left,
300                                        struct nilfs_btree_node *right,
301                                        int n)
302 {
303         __le64 *ldkeys, *rdkeys;
304         __le64 *ldptrs, *rdptrs;
305         int lnchildren, rnchildren;
306
307         ldkeys = nilfs_btree_node_dkeys(btree, left);
308         ldptrs = nilfs_btree_node_dptrs(btree, left);
309         lnchildren = nilfs_btree_node_get_nchildren(btree, left);
310
311         rdkeys = nilfs_btree_node_dkeys(btree, right);
312         rdptrs = nilfs_btree_node_dptrs(btree, right);
313         rnchildren = nilfs_btree_node_get_nchildren(btree, right);
314
315         memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys));
316         memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs));
317         memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys));
318         memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs));
319
320         lnchildren += n;
321         rnchildren -= n;
322         nilfs_btree_node_set_nchildren(btree, left, lnchildren);
323         nilfs_btree_node_set_nchildren(btree, right, rnchildren);
324 }
325
326 /* Assume that the buffer heads corresponding to left and right are locked. */
327 static void nilfs_btree_node_move_right(struct nilfs_btree *btree,
328                                         struct nilfs_btree_node *left,
329                                         struct nilfs_btree_node *right,
330                                         int n)
331 {
332         __le64 *ldkeys, *rdkeys;
333         __le64 *ldptrs, *rdptrs;
334         int lnchildren, rnchildren;
335
336         ldkeys = nilfs_btree_node_dkeys(btree, left);
337         ldptrs = nilfs_btree_node_dptrs(btree, left);
338         lnchildren = nilfs_btree_node_get_nchildren(btree, left);
339
340         rdkeys = nilfs_btree_node_dkeys(btree, right);
341         rdptrs = nilfs_btree_node_dptrs(btree, right);
342         rnchildren = nilfs_btree_node_get_nchildren(btree, right);
343
344         memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys));
345         memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs));
346         memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys));
347         memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs));
348
349         lnchildren -= n;
350         rnchildren += n;
351         nilfs_btree_node_set_nchildren(btree, left, lnchildren);
352         nilfs_btree_node_set_nchildren(btree, right, rnchildren);
353 }
354
355 /* Assume that the buffer head corresponding to node is locked. */
356 static void nilfs_btree_node_insert(struct nilfs_btree *btree,
357                                     struct nilfs_btree_node *node,
358                                     __u64 key, __u64 ptr, int index)
359 {
360         __le64 *dkeys;
361         __le64 *dptrs;
362         int nchildren;
363
364         dkeys = nilfs_btree_node_dkeys(btree, node);
365         dptrs = nilfs_btree_node_dptrs(btree, node);
366         nchildren = nilfs_btree_node_get_nchildren(btree, node);
367         if (index < nchildren) {
368                 memmove(dkeys + index + 1, dkeys + index,
369                         (nchildren - index) * sizeof(*dkeys));
370                 memmove(dptrs + index + 1, dptrs + index,
371                         (nchildren - index) * sizeof(*dptrs));
372         }
373         dkeys[index] = nilfs_bmap_key_to_dkey(key);
374         dptrs[index] = nilfs_bmap_ptr_to_dptr(ptr);
375         nchildren++;
376         nilfs_btree_node_set_nchildren(btree, node, nchildren);
377 }
378
379 /* Assume that the buffer head corresponding to node is locked. */
380 static void nilfs_btree_node_delete(struct nilfs_btree *btree,
381                                     struct nilfs_btree_node *node,
382                                     __u64 *keyp, __u64 *ptrp, int index)
383 {
384         __u64 key;
385         __u64 ptr;
386         __le64 *dkeys;
387         __le64 *dptrs;
388         int nchildren;
389
390         dkeys = nilfs_btree_node_dkeys(btree, node);
391         dptrs = nilfs_btree_node_dptrs(btree, node);
392         key = nilfs_bmap_dkey_to_key(dkeys[index]);
393         ptr = nilfs_bmap_dptr_to_ptr(dptrs[index]);
394         nchildren = nilfs_btree_node_get_nchildren(btree, node);
395         if (keyp != NULL)
396                 *keyp = key;
397         if (ptrp != NULL)
398                 *ptrp = ptr;
399
400         if (index < nchildren - 1) {
401                 memmove(dkeys + index, dkeys + index + 1,
402                         (nchildren - index - 1) * sizeof(*dkeys));
403                 memmove(dptrs + index, dptrs + index + 1,
404                         (nchildren - index - 1) * sizeof(*dptrs));
405         }
406         nchildren--;
407         nilfs_btree_node_set_nchildren(btree, node, nchildren);
408 }
409
410 static int nilfs_btree_node_lookup(const struct nilfs_btree *btree,
411                                    const struct nilfs_btree_node *node,
412                                    __u64 key, int *indexp)
413 {
414         __u64 nkey;
415         int index, low, high, s;
416
417         /* binary search */
418         low = 0;
419         high = nilfs_btree_node_get_nchildren(btree, node) - 1;
420         index = 0;
421         s = 0;
422         while (low <= high) {
423                 index = (low + high) / 2;
424                 nkey = nilfs_btree_node_get_key(btree, node, index);
425                 if (nkey == key) {
426                         s = 0;
427                         goto out;
428                 } else if (nkey < key) {
429                         low = index + 1;
430                         s = -1;
431                 } else {
432                         high = index - 1;
433                         s = 1;
434                 }
435         }
436
437         /* adjust index */
438         if (nilfs_btree_node_get_level(btree, node) >
439             NILFS_BTREE_LEVEL_NODE_MIN) {
440                 if ((s > 0) && (index > 0))
441                         index--;
442         } else if (s < 0)
443                 index++;
444
445  out:
446         *indexp = index;
447
448         return s == 0;
449 }
450
451 static inline struct nilfs_btree_node *
452 nilfs_btree_get_root(const struct nilfs_btree *btree)
453 {
454         return (struct nilfs_btree_node *)btree->bt_bmap.b_u.u_data;
455 }
456
457 static inline struct nilfs_btree_node *
458 nilfs_btree_get_nonroot_node(const struct nilfs_btree *btree,
459                              const struct nilfs_btree_path *path,
460                              int level)
461 {
462         return (struct nilfs_btree_node *)path[level].bp_bh->b_data;
463 }
464
465 static inline struct nilfs_btree_node *
466 nilfs_btree_get_sib_node(const struct nilfs_btree *btree,
467                          const struct nilfs_btree_path *path,
468                          int level)
469 {
470         return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data;
471 }
472
473 static inline int nilfs_btree_height(const struct nilfs_btree *btree)
474 {
475         return nilfs_btree_node_get_level(btree, nilfs_btree_get_root(btree))
476                 + 1;
477 }
478
479 static inline struct nilfs_btree_node *
480 nilfs_btree_get_node(const struct nilfs_btree *btree,
481                      const struct nilfs_btree_path *path,
482                      int level)
483 {
484         return (level == nilfs_btree_height(btree) - 1) ?
485                 nilfs_btree_get_root(btree) :
486                 nilfs_btree_get_nonroot_node(btree, path, level);
487 }
488
489 static int nilfs_btree_do_lookup(const struct nilfs_btree *btree,
490                                  struct nilfs_btree_path *path,
491                                  __u64 key, __u64 *ptrp, int minlevel)
492 {
493         struct nilfs_btree_node *node;
494         __u64 ptr;
495         int level, index, found, ret;
496
497         node = nilfs_btree_get_root(btree);
498         level = nilfs_btree_node_get_level(btree, node);
499         if ((level < minlevel) ||
500             (nilfs_btree_node_get_nchildren(btree, node) <= 0))
501                 return -ENOENT;
502
503         found = nilfs_btree_node_lookup(btree, node, key, &index);
504         ptr = nilfs_btree_node_get_ptr(btree, node, index);
505         path[level].bp_bh = NULL;
506         path[level].bp_index = index;
507
508         for (level--; level >= minlevel; level--) {
509                 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
510                 if (ret < 0)
511                         return ret;
512                 node = nilfs_btree_get_nonroot_node(btree, path, level);
513                 BUG_ON(level != nilfs_btree_node_get_level(btree, node));
514                 if (!found)
515                         found = nilfs_btree_node_lookup(btree, node, key,
516                                                         &index);
517                 else
518                         index = 0;
519                 if (index < nilfs_btree_node_nchildren_max(btree, node))
520                         ptr = nilfs_btree_node_get_ptr(btree, node, index);
521                 else {
522                         WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN);
523                         /* insert */
524                         ptr = NILFS_BMAP_INVALID_PTR;
525                 }
526                 path[level].bp_index = index;
527         }
528         if (!found)
529                 return -ENOENT;
530
531         if (ptrp != NULL)
532                 *ptrp = ptr;
533
534         return 0;
535 }
536
537 static int nilfs_btree_do_lookup_last(const struct nilfs_btree *btree,
538                                       struct nilfs_btree_path *path,
539                                       __u64 *keyp, __u64 *ptrp)
540 {
541         struct nilfs_btree_node *node;
542         __u64 ptr;
543         int index, level, ret;
544
545         node = nilfs_btree_get_root(btree);
546         index = nilfs_btree_node_get_nchildren(btree, node) - 1;
547         if (index < 0)
548                 return -ENOENT;
549         level = nilfs_btree_node_get_level(btree, node);
550         ptr = nilfs_btree_node_get_ptr(btree, node, index);
551         path[level].bp_bh = NULL;
552         path[level].bp_index = index;
553
554         for (level--; level > 0; level--) {
555                 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
556                 if (ret < 0)
557                         return ret;
558                 node = nilfs_btree_get_nonroot_node(btree, path, level);
559                 BUG_ON(level != nilfs_btree_node_get_level(btree, node));
560                 index = nilfs_btree_node_get_nchildren(btree, node) - 1;
561                 ptr = nilfs_btree_node_get_ptr(btree, node, index);
562                 path[level].bp_index = index;
563         }
564
565         if (keyp != NULL)
566                 *keyp = nilfs_btree_node_get_key(btree, node, index);
567         if (ptrp != NULL)
568                 *ptrp = ptr;
569
570         return 0;
571 }
572
573 static int nilfs_btree_lookup(const struct nilfs_bmap *bmap,
574                               __u64 key, int level, __u64 *ptrp)
575 {
576         struct nilfs_btree *btree;
577         struct nilfs_btree_path *path;
578         __u64 ptr;
579         int ret;
580
581         btree = (struct nilfs_btree *)bmap;
582         path = nilfs_btree_alloc_path(btree);
583         if (path == NULL)
584                 return -ENOMEM;
585         nilfs_btree_init_path(btree, path);
586
587         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level);
588
589         if (ptrp != NULL)
590                 *ptrp = ptr;
591
592         nilfs_btree_clear_path(btree, path);
593         nilfs_btree_free_path(btree, path);
594
595         return ret;
596 }
597
598 static void nilfs_btree_promote_key(struct nilfs_btree *btree,
599                                     struct nilfs_btree_path *path,
600                                     int level, __u64 key)
601 {
602         if (level < nilfs_btree_height(btree) - 1) {
603                 do {
604                         lock_buffer(path[level].bp_bh);
605                         nilfs_btree_node_set_key(
606                                 btree,
607                                 nilfs_btree_get_nonroot_node(
608                                         btree, path, level),
609                                 path[level].bp_index, key);
610                         if (!buffer_dirty(path[level].bp_bh))
611                                 nilfs_btnode_mark_dirty(path[level].bp_bh);
612                         unlock_buffer(path[level].bp_bh);
613                 } while ((path[level].bp_index == 0) &&
614                          (++level < nilfs_btree_height(btree) - 1));
615         }
616
617         /* root */
618         if (level == nilfs_btree_height(btree) - 1) {
619                 nilfs_btree_node_set_key(btree,
620                                          nilfs_btree_get_root(btree),
621                                          path[level].bp_index, key);
622         }
623 }
624
625 static void nilfs_btree_do_insert(struct nilfs_btree *btree,
626                                   struct nilfs_btree_path *path,
627                                   int level, __u64 *keyp, __u64 *ptrp)
628 {
629         struct nilfs_btree_node *node;
630
631         if (level < nilfs_btree_height(btree) - 1) {
632                 lock_buffer(path[level].bp_bh);
633                 node = nilfs_btree_get_nonroot_node(btree, path, level);
634                 nilfs_btree_node_insert(btree, node, *keyp, *ptrp,
635                                         path[level].bp_index);
636                 if (!buffer_dirty(path[level].bp_bh))
637                         nilfs_btnode_mark_dirty(path[level].bp_bh);
638                 unlock_buffer(path[level].bp_bh);
639
640                 if (path[level].bp_index == 0)
641                         nilfs_btree_promote_key(btree, path, level + 1,
642                                                 nilfs_btree_node_get_key(
643                                                         btree, node, 0));
644         } else {
645                 node = nilfs_btree_get_root(btree);
646                 nilfs_btree_node_insert(btree, node, *keyp, *ptrp,
647                                         path[level].bp_index);
648         }
649 }
650
651 static void nilfs_btree_carry_left(struct nilfs_btree *btree,
652                                    struct nilfs_btree_path *path,
653                                    int level, __u64 *keyp, __u64 *ptrp)
654 {
655         struct nilfs_btree_node *node, *left;
656         int nchildren, lnchildren, n, move;
657
658         lock_buffer(path[level].bp_bh);
659         lock_buffer(path[level].bp_sib_bh);
660
661         node = nilfs_btree_get_nonroot_node(btree, path, level);
662         left = nilfs_btree_get_sib_node(btree, path, level);
663         nchildren = nilfs_btree_node_get_nchildren(btree, node);
664         lnchildren = nilfs_btree_node_get_nchildren(btree, left);
665         move = 0;
666
667         n = (nchildren + lnchildren + 1) / 2 - lnchildren;
668         if (n > path[level].bp_index) {
669                 /* move insert point */
670                 n--;
671                 move = 1;
672         }
673
674         nilfs_btree_node_move_left(btree, left, node, n);
675
676         if (!buffer_dirty(path[level].bp_bh))
677                 nilfs_btnode_mark_dirty(path[level].bp_bh);
678         if (!buffer_dirty(path[level].bp_sib_bh))
679                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
680
681         unlock_buffer(path[level].bp_bh);
682         unlock_buffer(path[level].bp_sib_bh);
683
684         nilfs_btree_promote_key(btree, path, level + 1,
685                                 nilfs_btree_node_get_key(btree, node, 0));
686
687         if (move) {
688                 brelse(path[level].bp_bh);
689                 path[level].bp_bh = path[level].bp_sib_bh;
690                 path[level].bp_sib_bh = NULL;
691                 path[level].bp_index += lnchildren;
692                 path[level + 1].bp_index--;
693         } else {
694                 brelse(path[level].bp_sib_bh);
695                 path[level].bp_sib_bh = NULL;
696                 path[level].bp_index -= n;
697         }
698
699         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
700 }
701
702 static void nilfs_btree_carry_right(struct nilfs_btree *btree,
703                                     struct nilfs_btree_path *path,
704                                     int level, __u64 *keyp, __u64 *ptrp)
705 {
706         struct nilfs_btree_node *node, *right;
707         int nchildren, rnchildren, n, move;
708
709         lock_buffer(path[level].bp_bh);
710         lock_buffer(path[level].bp_sib_bh);
711
712         node = nilfs_btree_get_nonroot_node(btree, path, level);
713         right = nilfs_btree_get_sib_node(btree, path, level);
714         nchildren = nilfs_btree_node_get_nchildren(btree, node);
715         rnchildren = nilfs_btree_node_get_nchildren(btree, right);
716         move = 0;
717
718         n = (nchildren + rnchildren + 1) / 2 - rnchildren;
719         if (n > nchildren - path[level].bp_index) {
720                 /* move insert point */
721                 n--;
722                 move = 1;
723         }
724
725         nilfs_btree_node_move_right(btree, node, right, n);
726
727         if (!buffer_dirty(path[level].bp_bh))
728                 nilfs_btnode_mark_dirty(path[level].bp_bh);
729         if (!buffer_dirty(path[level].bp_sib_bh))
730                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
731
732         unlock_buffer(path[level].bp_bh);
733         unlock_buffer(path[level].bp_sib_bh);
734
735         path[level + 1].bp_index++;
736         nilfs_btree_promote_key(btree, path, level + 1,
737                                 nilfs_btree_node_get_key(btree, right, 0));
738         path[level + 1].bp_index--;
739
740         if (move) {
741                 brelse(path[level].bp_bh);
742                 path[level].bp_bh = path[level].bp_sib_bh;
743                 path[level].bp_sib_bh = NULL;
744                 path[level].bp_index -=
745                         nilfs_btree_node_get_nchildren(btree, node);
746                 path[level + 1].bp_index++;
747         } else {
748                 brelse(path[level].bp_sib_bh);
749                 path[level].bp_sib_bh = NULL;
750         }
751
752         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
753 }
754
755 static void nilfs_btree_split(struct nilfs_btree *btree,
756                               struct nilfs_btree_path *path,
757                               int level, __u64 *keyp, __u64 *ptrp)
758 {
759         struct nilfs_btree_node *node, *right;
760         __u64 newkey;
761         __u64 newptr;
762         int nchildren, n, move;
763
764         lock_buffer(path[level].bp_bh);
765         lock_buffer(path[level].bp_sib_bh);
766
767         node = nilfs_btree_get_nonroot_node(btree, path, level);
768         right = nilfs_btree_get_sib_node(btree, path, level);
769         nchildren = nilfs_btree_node_get_nchildren(btree, node);
770         move = 0;
771
772         n = (nchildren + 1) / 2;
773         if (n > nchildren - path[level].bp_index) {
774                 n--;
775                 move = 1;
776         }
777
778         nilfs_btree_node_move_right(btree, node, right, n);
779
780         if (!buffer_dirty(path[level].bp_bh))
781                 nilfs_btnode_mark_dirty(path[level].bp_bh);
782         if (!buffer_dirty(path[level].bp_sib_bh))
783                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
784
785         unlock_buffer(path[level].bp_bh);
786         unlock_buffer(path[level].bp_sib_bh);
787
788         newkey = nilfs_btree_node_get_key(btree, right, 0);
789         newptr = path[level].bp_newreq.bpr_ptr;
790
791         if (move) {
792                 path[level].bp_index -=
793                         nilfs_btree_node_get_nchildren(btree, node);
794                 nilfs_btree_node_insert(btree, right, *keyp, *ptrp,
795                                         path[level].bp_index);
796
797                 *keyp = nilfs_btree_node_get_key(btree, right, 0);
798                 *ptrp = path[level].bp_newreq.bpr_ptr;
799
800                 brelse(path[level].bp_bh);
801                 path[level].bp_bh = path[level].bp_sib_bh;
802                 path[level].bp_sib_bh = NULL;
803         } else {
804                 nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
805
806                 *keyp = nilfs_btree_node_get_key(btree, right, 0);
807                 *ptrp = path[level].bp_newreq.bpr_ptr;
808
809                 brelse(path[level].bp_sib_bh);
810                 path[level].bp_sib_bh = NULL;
811         }
812
813         path[level + 1].bp_index++;
814 }
815
816 static void nilfs_btree_grow(struct nilfs_btree *btree,
817                              struct nilfs_btree_path *path,
818                              int level, __u64 *keyp, __u64 *ptrp)
819 {
820         struct nilfs_btree_node *root, *child;
821         int n;
822
823         lock_buffer(path[level].bp_sib_bh);
824
825         root = nilfs_btree_get_root(btree);
826         child = nilfs_btree_get_sib_node(btree, path, level);
827
828         n = nilfs_btree_node_get_nchildren(btree, root);
829
830         nilfs_btree_node_move_right(btree, root, child, n);
831         nilfs_btree_node_set_level(btree, root, level + 1);
832
833         if (!buffer_dirty(path[level].bp_sib_bh))
834                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
835
836         unlock_buffer(path[level].bp_sib_bh);
837
838         path[level].bp_bh = path[level].bp_sib_bh;
839         path[level].bp_sib_bh = NULL;
840
841         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
842
843         *keyp = nilfs_btree_node_get_key(btree, child, 0);
844         *ptrp = path[level].bp_newreq.bpr_ptr;
845 }
846
847 static __u64 nilfs_btree_find_near(const struct nilfs_btree *btree,
848                                    const struct nilfs_btree_path *path)
849 {
850         struct nilfs_btree_node *node;
851         int level;
852
853         if (path == NULL)
854                 return NILFS_BMAP_INVALID_PTR;
855
856         /* left sibling */
857         level = NILFS_BTREE_LEVEL_NODE_MIN;
858         if (path[level].bp_index > 0) {
859                 node = nilfs_btree_get_node(btree, path, level);
860                 return nilfs_btree_node_get_ptr(btree, node,
861                                                 path[level].bp_index - 1);
862         }
863
864         /* parent */
865         level = NILFS_BTREE_LEVEL_NODE_MIN + 1;
866         if (level <= nilfs_btree_height(btree) - 1) {
867                 node = nilfs_btree_get_node(btree, path, level);
868                 return nilfs_btree_node_get_ptr(btree, node,
869                                                 path[level].bp_index);
870         }
871
872         return NILFS_BMAP_INVALID_PTR;
873 }
874
875 static __u64 nilfs_btree_find_target_v(const struct nilfs_btree *btree,
876                                        const struct nilfs_btree_path *path,
877                                        __u64 key)
878 {
879         __u64 ptr;
880
881         ptr = nilfs_bmap_find_target_seq(&btree->bt_bmap, key);
882         if (ptr != NILFS_BMAP_INVALID_PTR)
883                 /* sequential access */
884                 return ptr;
885         else {
886                 ptr = nilfs_btree_find_near(btree, path);
887                 if (ptr != NILFS_BMAP_INVALID_PTR)
888                         /* near */
889                         return ptr;
890         }
891         /* block group */
892         return nilfs_bmap_find_target_in_group(&btree->bt_bmap);
893 }
894
895 static void nilfs_btree_set_target_v(struct nilfs_btree *btree, __u64 key,
896                                      __u64 ptr)
897 {
898         btree->bt_bmap.b_last_allocated_key = key;
899         btree->bt_bmap.b_last_allocated_ptr = ptr;
900 }
901
902 static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
903                                       struct nilfs_btree_path *path,
904                                       int *levelp, __u64 key, __u64 ptr,
905                                       struct nilfs_bmap_stats *stats)
906 {
907         struct buffer_head *bh;
908         struct nilfs_btree_node *node, *parent, *sib;
909         __u64 sibptr;
910         int pindex, level, ret;
911
912         stats->bs_nblocks = 0;
913         level = NILFS_BTREE_LEVEL_DATA;
914
915         /* allocate a new ptr for data block */
916         if (btree->bt_ops->btop_find_target != NULL)
917                 path[level].bp_newreq.bpr_ptr =
918                         btree->bt_ops->btop_find_target(btree, path, key);
919
920         ret = btree->bt_bmap.b_pops->bpop_prepare_alloc_ptr(
921                 &btree->bt_bmap, &path[level].bp_newreq);
922         if (ret < 0)
923                 goto err_out_data;
924
925         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
926              level < nilfs_btree_height(btree) - 1;
927              level++) {
928                 node = nilfs_btree_get_nonroot_node(btree, path, level);
929                 if (nilfs_btree_node_get_nchildren(btree, node) <
930                     nilfs_btree_node_nchildren_max(btree, node)) {
931                         path[level].bp_op = nilfs_btree_do_insert;
932                         stats->bs_nblocks++;
933                         goto out;
934                 }
935
936                 parent = nilfs_btree_get_node(btree, path, level + 1);
937                 pindex = path[level + 1].bp_index;
938
939                 /* left sibling */
940                 if (pindex > 0) {
941                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
942                                                           pindex - 1);
943                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
944                         if (ret < 0)
945                                 goto err_out_child_node;
946                         sib = (struct nilfs_btree_node *)bh->b_data;
947                         if (nilfs_btree_node_get_nchildren(btree, sib) <
948                             nilfs_btree_node_nchildren_max(btree, sib)) {
949                                 path[level].bp_sib_bh = bh;
950                                 path[level].bp_op = nilfs_btree_carry_left;
951                                 stats->bs_nblocks++;
952                                 goto out;
953                         } else
954                                 brelse(bh);
955                 }
956
957                 /* right sibling */
958                 if (pindex <
959                     nilfs_btree_node_get_nchildren(btree, parent) - 1) {
960                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
961                                                           pindex + 1);
962                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
963                         if (ret < 0)
964                                 goto err_out_child_node;
965                         sib = (struct nilfs_btree_node *)bh->b_data;
966                         if (nilfs_btree_node_get_nchildren(btree, sib) <
967                             nilfs_btree_node_nchildren_max(btree, sib)) {
968                                 path[level].bp_sib_bh = bh;
969                                 path[level].bp_op = nilfs_btree_carry_right;
970                                 stats->bs_nblocks++;
971                                 goto out;
972                         } else
973                                 brelse(bh);
974                 }
975
976                 /* split */
977                 path[level].bp_newreq.bpr_ptr =
978                         path[level - 1].bp_newreq.bpr_ptr + 1;
979                 ret = btree->bt_bmap.b_pops->bpop_prepare_alloc_ptr(
980                         &btree->bt_bmap, &path[level].bp_newreq);
981                 if (ret < 0)
982                         goto err_out_child_node;
983                 ret = nilfs_btree_get_new_block(btree,
984                                                 path[level].bp_newreq.bpr_ptr,
985                                                 &bh);
986                 if (ret < 0)
987                         goto err_out_curr_node;
988
989                 stats->bs_nblocks++;
990
991                 lock_buffer(bh);
992                 nilfs_btree_node_init(btree,
993                                       (struct nilfs_btree_node *)bh->b_data,
994                                       0, level, 0, NULL, NULL);
995                 unlock_buffer(bh);
996                 path[level].bp_sib_bh = bh;
997                 path[level].bp_op = nilfs_btree_split;
998         }
999
1000         /* root */
1001         node = nilfs_btree_get_root(btree);
1002         if (nilfs_btree_node_get_nchildren(btree, node) <
1003             nilfs_btree_node_nchildren_max(btree, node)) {
1004                 path[level].bp_op = nilfs_btree_do_insert;
1005                 stats->bs_nblocks++;
1006                 goto out;
1007         }
1008
1009         /* grow */
1010         path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1;
1011         ret = btree->bt_bmap.b_pops->bpop_prepare_alloc_ptr(
1012                 &btree->bt_bmap, &path[level].bp_newreq);
1013         if (ret < 0)
1014                 goto err_out_child_node;
1015         ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr,
1016                                         &bh);
1017         if (ret < 0)
1018                 goto err_out_curr_node;
1019
1020         lock_buffer(bh);
1021         nilfs_btree_node_init(btree, (struct nilfs_btree_node *)bh->b_data,
1022                               0, level, 0, NULL, NULL);
1023         unlock_buffer(bh);
1024         path[level].bp_sib_bh = bh;
1025         path[level].bp_op = nilfs_btree_grow;
1026
1027         level++;
1028         path[level].bp_op = nilfs_btree_do_insert;
1029
1030         /* a newly-created node block and a data block are added */
1031         stats->bs_nblocks += 2;
1032
1033         /* success */
1034  out:
1035         *levelp = level;
1036         return ret;
1037
1038         /* error */
1039  err_out_curr_node:
1040         btree->bt_bmap.b_pops->bpop_abort_alloc_ptr(&btree->bt_bmap,
1041                                                     &path[level].bp_newreq);
1042  err_out_child_node:
1043         for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) {
1044                 nilfs_btnode_delete(path[level].bp_sib_bh);
1045                 btree->bt_bmap.b_pops->bpop_abort_alloc_ptr(
1046                         &btree->bt_bmap, &path[level].bp_newreq);
1047
1048         }
1049
1050         btree->bt_bmap.b_pops->bpop_abort_alloc_ptr(&btree->bt_bmap,
1051                                                        &path[level].bp_newreq);
1052  err_out_data:
1053         *levelp = level;
1054         stats->bs_nblocks = 0;
1055         return ret;
1056 }
1057
1058 static void nilfs_btree_commit_insert(struct nilfs_btree *btree,
1059                                       struct nilfs_btree_path *path,
1060                                       int maxlevel, __u64 key, __u64 ptr)
1061 {
1062         int level;
1063
1064         set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1065         ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr;
1066         if (btree->bt_ops->btop_set_target != NULL)
1067                 btree->bt_ops->btop_set_target(btree, key, ptr);
1068
1069         for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1070                 btree->bt_bmap.b_pops->bpop_commit_alloc_ptr(
1071                         &btree->bt_bmap, &path[level - 1].bp_newreq);
1072                 path[level].bp_op(btree, path, level, &key, &ptr);
1073         }
1074
1075         if (!nilfs_bmap_dirty(&btree->bt_bmap))
1076                 nilfs_bmap_set_dirty(&btree->bt_bmap);
1077 }
1078
1079 static int nilfs_btree_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
1080 {
1081         struct nilfs_btree *btree;
1082         struct nilfs_btree_path *path;
1083         struct nilfs_bmap_stats stats;
1084         int level, ret;
1085
1086         btree = (struct nilfs_btree *)bmap;
1087         path = nilfs_btree_alloc_path(btree);
1088         if (path == NULL)
1089                 return -ENOMEM;
1090         nilfs_btree_init_path(btree, path);
1091
1092         ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1093                                     NILFS_BTREE_LEVEL_NODE_MIN);
1094         if (ret != -ENOENT) {
1095                 if (ret == 0)
1096                         ret = -EEXIST;
1097                 goto out;
1098         }
1099
1100         ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats);
1101         if (ret < 0)
1102                 goto out;
1103         nilfs_btree_commit_insert(btree, path, level, key, ptr);
1104         nilfs_bmap_add_blocks(bmap, stats.bs_nblocks);
1105
1106  out:
1107         nilfs_btree_clear_path(btree, path);
1108         nilfs_btree_free_path(btree, path);
1109         return ret;
1110 }
1111
1112 static void nilfs_btree_do_delete(struct nilfs_btree *btree,
1113                                   struct nilfs_btree_path *path,
1114                                   int level, __u64 *keyp, __u64 *ptrp)
1115 {
1116         struct nilfs_btree_node *node;
1117
1118         if (level < nilfs_btree_height(btree) - 1) {
1119                 lock_buffer(path[level].bp_bh);
1120                 node = nilfs_btree_get_nonroot_node(btree, path, level);
1121                 nilfs_btree_node_delete(btree, node, keyp, ptrp,
1122                                         path[level].bp_index);
1123                 if (!buffer_dirty(path[level].bp_bh))
1124                         nilfs_btnode_mark_dirty(path[level].bp_bh);
1125                 unlock_buffer(path[level].bp_bh);
1126                 if (path[level].bp_index == 0)
1127                         nilfs_btree_promote_key(btree, path, level + 1,
1128                                 nilfs_btree_node_get_key(btree, node, 0));
1129         } else {
1130                 node = nilfs_btree_get_root(btree);
1131                 nilfs_btree_node_delete(btree, node, keyp, ptrp,
1132                                         path[level].bp_index);
1133         }
1134 }
1135
1136 static void nilfs_btree_borrow_left(struct nilfs_btree *btree,
1137                                     struct nilfs_btree_path *path,
1138                                     int level, __u64 *keyp, __u64 *ptrp)
1139 {
1140         struct nilfs_btree_node *node, *left;
1141         int nchildren, lnchildren, n;
1142
1143         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1144
1145         lock_buffer(path[level].bp_bh);
1146         lock_buffer(path[level].bp_sib_bh);
1147
1148         node = nilfs_btree_get_nonroot_node(btree, path, level);
1149         left = nilfs_btree_get_sib_node(btree, path, level);
1150         nchildren = nilfs_btree_node_get_nchildren(btree, node);
1151         lnchildren = nilfs_btree_node_get_nchildren(btree, left);
1152
1153         n = (nchildren + lnchildren) / 2 - nchildren;
1154
1155         nilfs_btree_node_move_right(btree, left, node, n);
1156
1157         if (!buffer_dirty(path[level].bp_bh))
1158                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1159         if (!buffer_dirty(path[level].bp_sib_bh))
1160                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1161
1162         unlock_buffer(path[level].bp_bh);
1163         unlock_buffer(path[level].bp_sib_bh);
1164
1165         nilfs_btree_promote_key(btree, path, level + 1,
1166                                 nilfs_btree_node_get_key(btree, node, 0));
1167
1168         brelse(path[level].bp_sib_bh);
1169         path[level].bp_sib_bh = NULL;
1170         path[level].bp_index += n;
1171 }
1172
1173 static void nilfs_btree_borrow_right(struct nilfs_btree *btree,
1174                                      struct nilfs_btree_path *path,
1175                                      int level, __u64 *keyp, __u64 *ptrp)
1176 {
1177         struct nilfs_btree_node *node, *right;
1178         int nchildren, rnchildren, n;
1179
1180         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1181
1182         lock_buffer(path[level].bp_bh);
1183         lock_buffer(path[level].bp_sib_bh);
1184
1185         node = nilfs_btree_get_nonroot_node(btree, path, level);
1186         right = nilfs_btree_get_sib_node(btree, path, level);
1187         nchildren = nilfs_btree_node_get_nchildren(btree, node);
1188         rnchildren = nilfs_btree_node_get_nchildren(btree, right);
1189
1190         n = (nchildren + rnchildren) / 2 - nchildren;
1191
1192         nilfs_btree_node_move_left(btree, node, right, n);
1193
1194         if (!buffer_dirty(path[level].bp_bh))
1195                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1196         if (!buffer_dirty(path[level].bp_sib_bh))
1197                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1198
1199         unlock_buffer(path[level].bp_bh);
1200         unlock_buffer(path[level].bp_sib_bh);
1201
1202         path[level + 1].bp_index++;
1203         nilfs_btree_promote_key(btree, path, level + 1,
1204                                 nilfs_btree_node_get_key(btree, right, 0));
1205         path[level + 1].bp_index--;
1206
1207         brelse(path[level].bp_sib_bh);
1208         path[level].bp_sib_bh = NULL;
1209 }
1210
1211 static void nilfs_btree_concat_left(struct nilfs_btree *btree,
1212                                     struct nilfs_btree_path *path,
1213                                     int level, __u64 *keyp, __u64 *ptrp)
1214 {
1215         struct nilfs_btree_node *node, *left;
1216         int n;
1217
1218         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1219
1220         lock_buffer(path[level].bp_bh);
1221         lock_buffer(path[level].bp_sib_bh);
1222
1223         node = nilfs_btree_get_nonroot_node(btree, path, level);
1224         left = nilfs_btree_get_sib_node(btree, path, level);
1225
1226         n = nilfs_btree_node_get_nchildren(btree, node);
1227
1228         nilfs_btree_node_move_left(btree, left, node, n);
1229
1230         if (!buffer_dirty(path[level].bp_sib_bh))
1231                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1232
1233         unlock_buffer(path[level].bp_bh);
1234         unlock_buffer(path[level].bp_sib_bh);
1235
1236         nilfs_btnode_delete(path[level].bp_bh);
1237         path[level].bp_bh = path[level].bp_sib_bh;
1238         path[level].bp_sib_bh = NULL;
1239         path[level].bp_index += nilfs_btree_node_get_nchildren(btree, left);
1240 }
1241
1242 static void nilfs_btree_concat_right(struct nilfs_btree *btree,
1243                                      struct nilfs_btree_path *path,
1244                                      int level, __u64 *keyp, __u64 *ptrp)
1245 {
1246         struct nilfs_btree_node *node, *right;
1247         int n;
1248
1249         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1250
1251         lock_buffer(path[level].bp_bh);
1252         lock_buffer(path[level].bp_sib_bh);
1253
1254         node = nilfs_btree_get_nonroot_node(btree, path, level);
1255         right = nilfs_btree_get_sib_node(btree, path, level);
1256
1257         n = nilfs_btree_node_get_nchildren(btree, right);
1258
1259         nilfs_btree_node_move_left(btree, node, right, n);
1260
1261         if (!buffer_dirty(path[level].bp_bh))
1262                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1263
1264         unlock_buffer(path[level].bp_bh);
1265         unlock_buffer(path[level].bp_sib_bh);
1266
1267         nilfs_btnode_delete(path[level].bp_sib_bh);
1268         path[level].bp_sib_bh = NULL;
1269         path[level + 1].bp_index++;
1270 }
1271
1272 static void nilfs_btree_shrink(struct nilfs_btree *btree,
1273                                struct nilfs_btree_path *path,
1274                                int level, __u64 *keyp, __u64 *ptrp)
1275 {
1276         struct nilfs_btree_node *root, *child;
1277         int n;
1278
1279         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1280
1281         lock_buffer(path[level].bp_bh);
1282         root = nilfs_btree_get_root(btree);
1283         child = nilfs_btree_get_nonroot_node(btree, path, level);
1284
1285         nilfs_btree_node_delete(btree, root, NULL, NULL, 0);
1286         nilfs_btree_node_set_level(btree, root, level);
1287         n = nilfs_btree_node_get_nchildren(btree, child);
1288         nilfs_btree_node_move_left(btree, root, child, n);
1289         unlock_buffer(path[level].bp_bh);
1290
1291         nilfs_btnode_delete(path[level].bp_bh);
1292         path[level].bp_bh = NULL;
1293 }
1294
1295
1296 static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
1297                                       struct nilfs_btree_path *path,
1298                                       int *levelp,
1299                                       struct nilfs_bmap_stats *stats)
1300 {
1301         struct buffer_head *bh;
1302         struct nilfs_btree_node *node, *parent, *sib;
1303         __u64 sibptr;
1304         int pindex, level, ret;
1305
1306         ret = 0;
1307         stats->bs_nblocks = 0;
1308         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1309              level < nilfs_btree_height(btree) - 1;
1310              level++) {
1311                 node = nilfs_btree_get_nonroot_node(btree, path, level);
1312                 path[level].bp_oldreq.bpr_ptr =
1313                         nilfs_btree_node_get_ptr(btree, node,
1314                                                  path[level].bp_index);
1315                 if (btree->bt_bmap.b_pops->bpop_prepare_end_ptr != NULL) {
1316                         ret = btree->bt_bmap.b_pops->bpop_prepare_end_ptr(
1317                                 &btree->bt_bmap, &path[level].bp_oldreq);
1318                         if (ret < 0)
1319                                 goto err_out_child_node;
1320                 }
1321
1322                 if (nilfs_btree_node_get_nchildren(btree, node) >
1323                     nilfs_btree_node_nchildren_min(btree, node)) {
1324                         path[level].bp_op = nilfs_btree_do_delete;
1325                         stats->bs_nblocks++;
1326                         goto out;
1327                 }
1328
1329                 parent = nilfs_btree_get_node(btree, path, level + 1);
1330                 pindex = path[level + 1].bp_index;
1331
1332                 if (pindex > 0) {
1333                         /* left sibling */
1334                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
1335                                                           pindex - 1);
1336                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1337                         if (ret < 0)
1338                                 goto err_out_curr_node;
1339                         sib = (struct nilfs_btree_node *)bh->b_data;
1340                         if (nilfs_btree_node_get_nchildren(btree, sib) >
1341                             nilfs_btree_node_nchildren_min(btree, sib)) {
1342                                 path[level].bp_sib_bh = bh;
1343                                 path[level].bp_op = nilfs_btree_borrow_left;
1344                                 stats->bs_nblocks++;
1345                                 goto out;
1346                         } else {
1347                                 path[level].bp_sib_bh = bh;
1348                                 path[level].bp_op = nilfs_btree_concat_left;
1349                                 stats->bs_nblocks++;
1350                                 /* continue; */
1351                         }
1352                 } else if (pindex <
1353                            nilfs_btree_node_get_nchildren(btree, parent) - 1) {
1354                         /* right sibling */
1355                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
1356                                                           pindex + 1);
1357                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1358                         if (ret < 0)
1359                                 goto err_out_curr_node;
1360                         sib = (struct nilfs_btree_node *)bh->b_data;
1361                         if (nilfs_btree_node_get_nchildren(btree, sib) >
1362                             nilfs_btree_node_nchildren_min(btree, sib)) {
1363                                 path[level].bp_sib_bh = bh;
1364                                 path[level].bp_op = nilfs_btree_borrow_right;
1365                                 stats->bs_nblocks++;
1366                                 goto out;
1367                         } else {
1368                                 path[level].bp_sib_bh = bh;
1369                                 path[level].bp_op = nilfs_btree_concat_right;
1370                                 stats->bs_nblocks++;
1371                                 /* continue; */
1372                         }
1373                 } else {
1374                         /* no siblings */
1375                         /* the only child of the root node */
1376                         WARN_ON(level != nilfs_btree_height(btree) - 2);
1377                         if (nilfs_btree_node_get_nchildren(btree, node) - 1 <=
1378                             NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1379                                 path[level].bp_op = nilfs_btree_shrink;
1380                                 stats->bs_nblocks += 2;
1381                         } else {
1382                                 path[level].bp_op = nilfs_btree_do_delete;
1383                                 stats->bs_nblocks++;
1384                         }
1385
1386                         goto out;
1387
1388                 }
1389         }
1390
1391         node = nilfs_btree_get_root(btree);
1392         path[level].bp_oldreq.bpr_ptr =
1393                 nilfs_btree_node_get_ptr(btree, node, path[level].bp_index);
1394         if (btree->bt_bmap.b_pops->bpop_prepare_end_ptr != NULL) {
1395                 ret = btree->bt_bmap.b_pops->bpop_prepare_end_ptr(
1396                         &btree->bt_bmap, &path[level].bp_oldreq);
1397                 if (ret < 0)
1398                         goto err_out_child_node;
1399         }
1400         /* child of the root node is deleted */
1401         path[level].bp_op = nilfs_btree_do_delete;
1402         stats->bs_nblocks++;
1403
1404         /* success */
1405  out:
1406         *levelp = level;
1407         return ret;
1408
1409         /* error */
1410  err_out_curr_node:
1411         if (btree->bt_bmap.b_pops->bpop_abort_end_ptr != NULL)
1412                 btree->bt_bmap.b_pops->bpop_abort_end_ptr(
1413                         &btree->bt_bmap, &path[level].bp_oldreq);
1414  err_out_child_node:
1415         for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) {
1416                 brelse(path[level].bp_sib_bh);
1417                 if (btree->bt_bmap.b_pops->bpop_abort_end_ptr != NULL)
1418                         btree->bt_bmap.b_pops->bpop_abort_end_ptr(
1419                                 &btree->bt_bmap, &path[level].bp_oldreq);
1420         }
1421         *levelp = level;
1422         stats->bs_nblocks = 0;
1423         return ret;
1424 }
1425
1426 static void nilfs_btree_commit_delete(struct nilfs_btree *btree,
1427                                       struct nilfs_btree_path *path,
1428                                       int maxlevel)
1429 {
1430         int level;
1431
1432         for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1433                 if (btree->bt_bmap.b_pops->bpop_commit_end_ptr != NULL)
1434                         btree->bt_bmap.b_pops->bpop_commit_end_ptr(
1435                                 &btree->bt_bmap, &path[level].bp_oldreq);
1436                 path[level].bp_op(btree, path, level, NULL, NULL);
1437         }
1438
1439         if (!nilfs_bmap_dirty(&btree->bt_bmap))
1440                 nilfs_bmap_set_dirty(&btree->bt_bmap);
1441 }
1442
1443 static int nilfs_btree_delete(struct nilfs_bmap *bmap, __u64 key)
1444
1445 {
1446         struct nilfs_btree *btree;
1447         struct nilfs_btree_path *path;
1448         struct nilfs_bmap_stats stats;
1449         int level, ret;
1450
1451         btree = (struct nilfs_btree *)bmap;
1452         path = nilfs_btree_alloc_path(btree);
1453         if (path == NULL)
1454                 return -ENOMEM;
1455         nilfs_btree_init_path(btree, path);
1456         ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1457                                     NILFS_BTREE_LEVEL_NODE_MIN);
1458         if (ret < 0)
1459                 goto out;
1460
1461         ret = nilfs_btree_prepare_delete(btree, path, &level, &stats);
1462         if (ret < 0)
1463                 goto out;
1464         nilfs_btree_commit_delete(btree, path, level);
1465         nilfs_bmap_sub_blocks(bmap, stats.bs_nblocks);
1466
1467 out:
1468         nilfs_btree_clear_path(btree, path);
1469         nilfs_btree_free_path(btree, path);
1470         return ret;
1471 }
1472
1473 static int nilfs_btree_last_key(const struct nilfs_bmap *bmap, __u64 *keyp)
1474 {
1475         struct nilfs_btree *btree;
1476         struct nilfs_btree_path *path;
1477         int ret;
1478
1479         btree = (struct nilfs_btree *)bmap;
1480         path = nilfs_btree_alloc_path(btree);
1481         if (path == NULL)
1482                 return -ENOMEM;
1483         nilfs_btree_init_path(btree, path);
1484
1485         ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL);
1486
1487         nilfs_btree_clear_path(btree, path);
1488         nilfs_btree_free_path(btree, path);
1489
1490         return ret;
1491 }
1492
1493 static int nilfs_btree_check_delete(struct nilfs_bmap *bmap, __u64 key)
1494 {
1495         struct buffer_head *bh;
1496         struct nilfs_btree *btree;
1497         struct nilfs_btree_node *root, *node;
1498         __u64 maxkey, nextmaxkey;
1499         __u64 ptr;
1500         int nchildren, ret;
1501
1502         btree = (struct nilfs_btree *)bmap;
1503         root = nilfs_btree_get_root(btree);
1504         switch (nilfs_btree_height(btree)) {
1505         case 2:
1506                 bh = NULL;
1507                 node = root;
1508                 break;
1509         case 3:
1510                 nchildren = nilfs_btree_node_get_nchildren(btree, root);
1511                 if (nchildren > 1)
1512                         return 0;
1513                 ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1);
1514                 ret = nilfs_btree_get_block(btree, ptr, &bh);
1515                 if (ret < 0)
1516                         return ret;
1517                 node = (struct nilfs_btree_node *)bh->b_data;
1518                 break;
1519         default:
1520                 return 0;
1521         }
1522
1523         nchildren = nilfs_btree_node_get_nchildren(btree, node);
1524         maxkey = nilfs_btree_node_get_key(btree, node, nchildren - 1);
1525         nextmaxkey = (nchildren > 1) ?
1526                 nilfs_btree_node_get_key(btree, node, nchildren - 2) : 0;
1527         if (bh != NULL)
1528                 brelse(bh);
1529
1530         return (maxkey == key) && (nextmaxkey < bmap->b_low);
1531 }
1532
1533 static int nilfs_btree_gather_data(struct nilfs_bmap *bmap,
1534                                    __u64 *keys, __u64 *ptrs, int nitems)
1535 {
1536         struct buffer_head *bh;
1537         struct nilfs_btree *btree;
1538         struct nilfs_btree_node *node, *root;
1539         __le64 *dkeys;
1540         __le64 *dptrs;
1541         __u64 ptr;
1542         int nchildren, i, ret;
1543
1544         btree = (struct nilfs_btree *)bmap;
1545         root = nilfs_btree_get_root(btree);
1546         switch (nilfs_btree_height(btree)) {
1547         case 2:
1548                 bh = NULL;
1549                 node = root;
1550                 break;
1551         case 3:
1552                 nchildren = nilfs_btree_node_get_nchildren(btree, root);
1553                 WARN_ON(nchildren > 1);
1554                 ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1);
1555                 ret = nilfs_btree_get_block(btree, ptr, &bh);
1556                 if (ret < 0)
1557                         return ret;
1558                 node = (struct nilfs_btree_node *)bh->b_data;
1559                 break;
1560         default:
1561                 node = NULL;
1562                 return -EINVAL;
1563         }
1564
1565         nchildren = nilfs_btree_node_get_nchildren(btree, node);
1566         if (nchildren < nitems)
1567                 nitems = nchildren;
1568         dkeys = nilfs_btree_node_dkeys(btree, node);
1569         dptrs = nilfs_btree_node_dptrs(btree, node);
1570         for (i = 0; i < nitems; i++) {
1571                 keys[i] = nilfs_bmap_dkey_to_key(dkeys[i]);
1572                 ptrs[i] = nilfs_bmap_dptr_to_ptr(dptrs[i]);
1573         }
1574
1575         if (bh != NULL)
1576                 brelse(bh);
1577
1578         return nitems;
1579 }
1580
1581 static int
1582 nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key,
1583                                        union nilfs_bmap_ptr_req *dreq,
1584                                        union nilfs_bmap_ptr_req *nreq,
1585                                        struct buffer_head **bhp,
1586                                        struct nilfs_bmap_stats *stats)
1587 {
1588         struct buffer_head *bh;
1589         struct nilfs_btree *btree;
1590         int ret;
1591
1592         btree = (struct nilfs_btree *)bmap;
1593         stats->bs_nblocks = 0;
1594
1595         /* for data */
1596         /* cannot find near ptr */
1597         if (btree->bt_ops->btop_find_target != NULL)
1598                 dreq->bpr_ptr
1599                         = btree->bt_ops->btop_find_target(btree, NULL, key);
1600         ret = bmap->b_pops->bpop_prepare_alloc_ptr(bmap, dreq);
1601         if (ret < 0)
1602                 return ret;
1603
1604         *bhp = NULL;
1605         stats->bs_nblocks++;
1606         if (nreq != NULL) {
1607                 nreq->bpr_ptr = dreq->bpr_ptr + 1;
1608                 ret = bmap->b_pops->bpop_prepare_alloc_ptr(bmap, nreq);
1609                 if (ret < 0)
1610                         goto err_out_dreq;
1611
1612                 ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh);
1613                 if (ret < 0)
1614                         goto err_out_nreq;
1615
1616                 *bhp = bh;
1617                 stats->bs_nblocks++;
1618         }
1619
1620         /* success */
1621         return 0;
1622
1623         /* error */
1624  err_out_nreq:
1625         bmap->b_pops->bpop_abort_alloc_ptr(bmap, nreq);
1626  err_out_dreq:
1627         bmap->b_pops->bpop_abort_alloc_ptr(bmap, dreq);
1628         stats->bs_nblocks = 0;
1629         return ret;
1630
1631 }
1632
1633 static void
1634 nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap,
1635                                       __u64 key, __u64 ptr,
1636                                       const __u64 *keys, const __u64 *ptrs,
1637                                       int n, __u64 low, __u64 high,
1638                                       union nilfs_bmap_ptr_req *dreq,
1639                                       union nilfs_bmap_ptr_req *nreq,
1640                                       struct buffer_head *bh)
1641 {
1642         struct nilfs_btree *btree;
1643         struct nilfs_btree_node *node;
1644         __u64 tmpptr;
1645
1646         /* free resources */
1647         if (bmap->b_ops->bop_clear != NULL)
1648                 bmap->b_ops->bop_clear(bmap);
1649
1650         /* ptr must be a pointer to a buffer head. */
1651         set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1652
1653         /* convert and insert */
1654         btree = (struct nilfs_btree *)bmap;
1655         nilfs_btree_init(bmap, low, high);
1656         if (nreq != NULL) {
1657                 bmap->b_pops->bpop_commit_alloc_ptr(bmap, dreq);
1658                 bmap->b_pops->bpop_commit_alloc_ptr(bmap, nreq);
1659
1660                 /* create child node at level 1 */
1661                 lock_buffer(bh);
1662                 node = (struct nilfs_btree_node *)bh->b_data;
1663                 nilfs_btree_node_init(btree, node, 0, 1, n, keys, ptrs);
1664                 nilfs_btree_node_insert(btree, node,
1665                                         key, dreq->bpr_ptr, n);
1666                 if (!buffer_dirty(bh))
1667                         nilfs_btnode_mark_dirty(bh);
1668                 if (!nilfs_bmap_dirty(bmap))
1669                         nilfs_bmap_set_dirty(bmap);
1670
1671                 unlock_buffer(bh);
1672                 brelse(bh);
1673
1674                 /* create root node at level 2 */
1675                 node = nilfs_btree_get_root(btree);
1676                 tmpptr = nreq->bpr_ptr;
1677                 nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT,
1678                                       2, 1, &keys[0], &tmpptr);
1679         } else {
1680                 bmap->b_pops->bpop_commit_alloc_ptr(bmap, dreq);
1681
1682                 /* create root node at level 1 */
1683                 node = nilfs_btree_get_root(btree);
1684                 nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT,
1685                                       1, n, keys, ptrs);
1686                 nilfs_btree_node_insert(btree, node,
1687                                         key, dreq->bpr_ptr, n);
1688                 if (!nilfs_bmap_dirty(bmap))
1689                         nilfs_bmap_set_dirty(bmap);
1690         }
1691
1692         if (btree->bt_ops->btop_set_target != NULL)
1693                 btree->bt_ops->btop_set_target(btree, key, dreq->bpr_ptr);
1694 }
1695
1696 /**
1697  * nilfs_btree_convert_and_insert -
1698  * @bmap:
1699  * @key:
1700  * @ptr:
1701  * @keys:
1702  * @ptrs:
1703  * @n:
1704  * @low:
1705  * @high:
1706  */
1707 int nilfs_btree_convert_and_insert(struct nilfs_bmap *bmap,
1708                                    __u64 key, __u64 ptr,
1709                                    const __u64 *keys, const __u64 *ptrs,
1710                                    int n, __u64 low, __u64 high)
1711 {
1712         struct buffer_head *bh;
1713         union nilfs_bmap_ptr_req dreq, nreq, *di, *ni;
1714         struct nilfs_bmap_stats stats;
1715         int ret;
1716
1717         if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1718                 di = &dreq;
1719                 ni = NULL;
1720         } else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX(
1721                            1 << bmap->b_inode->i_blkbits)) {
1722                 di = &dreq;
1723                 ni = &nreq;
1724         } else {
1725                 di = NULL;
1726                 ni = NULL;
1727                 BUG();
1728         }
1729
1730         ret = nilfs_btree_prepare_convert_and_insert(bmap, key, di, ni, &bh,
1731                                                      &stats);
1732         if (ret < 0)
1733                 return ret;
1734         nilfs_btree_commit_convert_and_insert(bmap, key, ptr, keys, ptrs, n,
1735                                               low, high, di, ni, bh);
1736         nilfs_bmap_add_blocks(bmap, stats.bs_nblocks);
1737         return 0;
1738 }
1739
1740 static int nilfs_btree_propagate_p(struct nilfs_btree *btree,
1741                                    struct nilfs_btree_path *path,
1742                                    int level,
1743                                    struct buffer_head *bh)
1744 {
1745         while ((++level < nilfs_btree_height(btree) - 1) &&
1746                !buffer_dirty(path[level].bp_bh))
1747                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1748
1749         return 0;
1750 }
1751
1752 static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree,
1753                                         struct nilfs_btree_path *path,
1754                                         int level)
1755 {
1756         struct nilfs_btree_node *parent;
1757         int ret;
1758
1759         parent = nilfs_btree_get_node(btree, path, level + 1);
1760         path[level].bp_oldreq.bpr_ptr =
1761                 nilfs_btree_node_get_ptr(btree, parent,
1762                                          path[level + 1].bp_index);
1763         path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1;
1764         ret = nilfs_bmap_prepare_update(&btree->bt_bmap,
1765                                         &path[level].bp_oldreq,
1766                                         &path[level].bp_newreq);
1767         if (ret < 0)
1768                 return ret;
1769
1770         if (buffer_nilfs_node(path[level].bp_bh)) {
1771                 path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr;
1772                 path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr;
1773                 path[level].bp_ctxt.bh = path[level].bp_bh;
1774                 ret = nilfs_btnode_prepare_change_key(
1775                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1776                         &path[level].bp_ctxt);
1777                 if (ret < 0) {
1778                         nilfs_bmap_abort_update(&btree->bt_bmap,
1779                                                 &path[level].bp_oldreq,
1780                                                 &path[level].bp_newreq);
1781                         return ret;
1782                 }
1783         }
1784
1785         return 0;
1786 }
1787
1788 static void nilfs_btree_commit_update_v(struct nilfs_btree *btree,
1789                                         struct nilfs_btree_path *path,
1790                                         int level)
1791 {
1792         struct nilfs_btree_node *parent;
1793
1794         nilfs_bmap_commit_update(&btree->bt_bmap,
1795                                  &path[level].bp_oldreq,
1796                                  &path[level].bp_newreq);
1797
1798         if (buffer_nilfs_node(path[level].bp_bh)) {
1799                 nilfs_btnode_commit_change_key(
1800                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1801                         &path[level].bp_ctxt);
1802                 path[level].bp_bh = path[level].bp_ctxt.bh;
1803         }
1804         set_buffer_nilfs_volatile(path[level].bp_bh);
1805
1806         parent = nilfs_btree_get_node(btree, path, level + 1);
1807         nilfs_btree_node_set_ptr(btree, parent, path[level + 1].bp_index,
1808                                  path[level].bp_newreq.bpr_ptr);
1809 }
1810
1811 static void nilfs_btree_abort_update_v(struct nilfs_btree *btree,
1812                                        struct nilfs_btree_path *path,
1813                                        int level)
1814 {
1815         nilfs_bmap_abort_update(&btree->bt_bmap,
1816                                 &path[level].bp_oldreq,
1817                                 &path[level].bp_newreq);
1818         if (buffer_nilfs_node(path[level].bp_bh))
1819                 nilfs_btnode_abort_change_key(
1820                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1821                         &path[level].bp_ctxt);
1822 }
1823
1824 static int nilfs_btree_prepare_propagate_v(struct nilfs_btree *btree,
1825                                            struct nilfs_btree_path *path,
1826                                            int minlevel,
1827                                            int *maxlevelp)
1828 {
1829         int level, ret;
1830
1831         level = minlevel;
1832         if (!buffer_nilfs_volatile(path[level].bp_bh)) {
1833                 ret = nilfs_btree_prepare_update_v(btree, path, level);
1834                 if (ret < 0)
1835                         return ret;
1836         }
1837         while ((++level < nilfs_btree_height(btree) - 1) &&
1838                !buffer_dirty(path[level].bp_bh)) {
1839
1840                 WARN_ON(buffer_nilfs_volatile(path[level].bp_bh));
1841                 ret = nilfs_btree_prepare_update_v(btree, path, level);
1842                 if (ret < 0)
1843                         goto out;
1844         }
1845
1846         /* success */
1847         *maxlevelp = level - 1;
1848         return 0;
1849
1850         /* error */
1851  out:
1852         while (--level > minlevel)
1853                 nilfs_btree_abort_update_v(btree, path, level);
1854         if (!buffer_nilfs_volatile(path[level].bp_bh))
1855                 nilfs_btree_abort_update_v(btree, path, level);
1856         return ret;
1857 }
1858
1859 static void nilfs_btree_commit_propagate_v(struct nilfs_btree *btree,
1860                                            struct nilfs_btree_path *path,
1861                                            int minlevel,
1862                                            int maxlevel,
1863                                            struct buffer_head *bh)
1864 {
1865         int level;
1866
1867         if (!buffer_nilfs_volatile(path[minlevel].bp_bh))
1868                 nilfs_btree_commit_update_v(btree, path, minlevel);
1869
1870         for (level = minlevel + 1; level <= maxlevel; level++)
1871                 nilfs_btree_commit_update_v(btree, path, level);
1872 }
1873
1874 static int nilfs_btree_propagate_v(struct nilfs_btree *btree,
1875                                    struct nilfs_btree_path *path,
1876                                    int level,
1877                                    struct buffer_head *bh)
1878 {
1879         int maxlevel, ret;
1880         struct nilfs_btree_node *parent;
1881         __u64 ptr;
1882
1883         get_bh(bh);
1884         path[level].bp_bh = bh;
1885         ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel);
1886         if (ret < 0)
1887                 goto out;
1888
1889         if (buffer_nilfs_volatile(path[level].bp_bh)) {
1890                 parent = nilfs_btree_get_node(btree, path, level + 1);
1891                 ptr = nilfs_btree_node_get_ptr(btree, parent,
1892                                                path[level + 1].bp_index);
1893                 ret = nilfs_bmap_mark_dirty(&btree->bt_bmap, ptr);
1894                 if (ret < 0)
1895                         goto out;
1896         }
1897
1898         nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh);
1899
1900  out:
1901         brelse(path[level].bp_bh);
1902         path[level].bp_bh = NULL;
1903         return ret;
1904 }
1905
1906 static int nilfs_btree_propagate(const struct nilfs_bmap *bmap,
1907                                  struct buffer_head *bh)
1908 {
1909         struct nilfs_btree *btree;
1910         struct nilfs_btree_path *path;
1911         struct nilfs_btree_node *node;
1912         __u64 key;
1913         int level, ret;
1914
1915         WARN_ON(!buffer_dirty(bh));
1916
1917         btree = (struct nilfs_btree *)bmap;
1918         path = nilfs_btree_alloc_path(btree);
1919         if (path == NULL)
1920                 return -ENOMEM;
1921         nilfs_btree_init_path(btree, path);
1922
1923         if (buffer_nilfs_node(bh)) {
1924                 node = (struct nilfs_btree_node *)bh->b_data;
1925                 key = nilfs_btree_node_get_key(btree, node, 0);
1926                 level = nilfs_btree_node_get_level(btree, node);
1927         } else {
1928                 key = nilfs_bmap_data_get_key(bmap, bh);
1929                 level = NILFS_BTREE_LEVEL_DATA;
1930         }
1931
1932         ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1);
1933         if (ret < 0) {
1934                 if (unlikely(ret == -ENOENT))
1935                         printk(KERN_CRIT "%s: key = %llu, level == %d\n",
1936                                __func__, (unsigned long long)key, level);
1937                 goto out;
1938         }
1939
1940         ret = btree->bt_ops->btop_propagate(btree, path, level, bh);
1941
1942  out:
1943         nilfs_btree_clear_path(btree, path);
1944         nilfs_btree_free_path(btree, path);
1945
1946         return ret;
1947 }
1948
1949 static int nilfs_btree_propagate_gc(const struct nilfs_bmap *bmap,
1950                                     struct buffer_head *bh)
1951 {
1952         return nilfs_bmap_mark_dirty(bmap, bh->b_blocknr);
1953 }
1954
1955 static void nilfs_btree_add_dirty_buffer(struct nilfs_btree *btree,
1956                                          struct list_head *lists,
1957                                          struct buffer_head *bh)
1958 {
1959         struct list_head *head;
1960         struct buffer_head *cbh;
1961         struct nilfs_btree_node *node, *cnode;
1962         __u64 key, ckey;
1963         int level;
1964
1965         get_bh(bh);
1966         node = (struct nilfs_btree_node *)bh->b_data;
1967         key = nilfs_btree_node_get_key(btree, node, 0);
1968         level = nilfs_btree_node_get_level(btree, node);
1969         list_for_each(head, &lists[level]) {
1970                 cbh = list_entry(head, struct buffer_head, b_assoc_buffers);
1971                 cnode = (struct nilfs_btree_node *)cbh->b_data;
1972                 ckey = nilfs_btree_node_get_key(btree, cnode, 0);
1973                 if (key < ckey)
1974                         break;
1975         }
1976         list_add_tail(&bh->b_assoc_buffers, head);
1977 }
1978
1979 static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *bmap,
1980                                              struct list_head *listp)
1981 {
1982         struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
1983         struct address_space *btcache = &NILFS_BMAP_I(bmap)->i_btnode_cache;
1984         struct list_head lists[NILFS_BTREE_LEVEL_MAX];
1985         struct pagevec pvec;
1986         struct buffer_head *bh, *head;
1987         pgoff_t index = 0;
1988         int level, i;
1989
1990         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1991              level < NILFS_BTREE_LEVEL_MAX;
1992              level++)
1993                 INIT_LIST_HEAD(&lists[level]);
1994
1995         pagevec_init(&pvec, 0);
1996
1997         while (pagevec_lookup_tag(&pvec, btcache, &index, PAGECACHE_TAG_DIRTY,
1998                                   PAGEVEC_SIZE)) {
1999                 for (i = 0; i < pagevec_count(&pvec); i++) {
2000                         bh = head = page_buffers(pvec.pages[i]);
2001                         do {
2002                                 if (buffer_dirty(bh))
2003                                         nilfs_btree_add_dirty_buffer(btree,
2004                                                                      lists, bh);
2005                         } while ((bh = bh->b_this_page) != head);
2006                 }
2007                 pagevec_release(&pvec);
2008                 cond_resched();
2009         }
2010
2011         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2012              level < NILFS_BTREE_LEVEL_MAX;
2013              level++)
2014                 list_splice(&lists[level], listp->prev);
2015 }
2016
2017 static int nilfs_btree_assign_p(struct nilfs_btree *btree,
2018                                 struct nilfs_btree_path *path,
2019                                 int level,
2020                                 struct buffer_head **bh,
2021                                 sector_t blocknr,
2022                                 union nilfs_binfo *binfo)
2023 {
2024         struct nilfs_btree_node *parent;
2025         __u64 key;
2026         __u64 ptr;
2027         int ret;
2028
2029         parent = nilfs_btree_get_node(btree, path, level + 1);
2030         ptr = nilfs_btree_node_get_ptr(btree, parent,
2031                                        path[level + 1].bp_index);
2032         if (buffer_nilfs_node(*bh)) {
2033                 path[level].bp_ctxt.oldkey = ptr;
2034                 path[level].bp_ctxt.newkey = blocknr;
2035                 path[level].bp_ctxt.bh = *bh;
2036                 ret = nilfs_btnode_prepare_change_key(
2037                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
2038                         &path[level].bp_ctxt);
2039                 if (ret < 0)
2040                         return ret;
2041                 nilfs_btnode_commit_change_key(
2042                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
2043                         &path[level].bp_ctxt);
2044                 *bh = path[level].bp_ctxt.bh;
2045         }
2046
2047         nilfs_btree_node_set_ptr(btree, parent,
2048                                  path[level + 1].bp_index, blocknr);
2049
2050         key = nilfs_btree_node_get_key(btree, parent,
2051                                        path[level + 1].bp_index);
2052         /* on-disk format */
2053         binfo->bi_dat.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2054         binfo->bi_dat.bi_level = level;
2055
2056         return 0;
2057 }
2058
2059 static int nilfs_btree_assign_v(struct nilfs_btree *btree,
2060                                 struct nilfs_btree_path *path,
2061                                 int level,
2062                                 struct buffer_head **bh,
2063                                 sector_t blocknr,
2064                                 union nilfs_binfo *binfo)
2065 {
2066         struct nilfs_btree_node *parent;
2067         __u64 key;
2068         __u64 ptr;
2069         union nilfs_bmap_ptr_req req;
2070         int ret;
2071
2072         parent = nilfs_btree_get_node(btree, path, level + 1);
2073         ptr = nilfs_btree_node_get_ptr(btree, parent,
2074                                        path[level + 1].bp_index);
2075         req.bpr_ptr = ptr;
2076         ret = nilfs_bmap_start_v(&btree->bt_bmap, &req, blocknr);
2077         if (unlikely(ret < 0))
2078                 return ret;
2079
2080         key = nilfs_btree_node_get_key(btree, parent,
2081                                        path[level + 1].bp_index);
2082         /* on-disk format */
2083         binfo->bi_v.bi_vblocknr = nilfs_bmap_ptr_to_dptr(ptr);
2084         binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2085
2086         return 0;
2087 }
2088
2089 static int nilfs_btree_assign(struct nilfs_bmap *bmap,
2090                               struct buffer_head **bh,
2091                               sector_t blocknr,
2092                               union nilfs_binfo *binfo)
2093 {
2094         struct nilfs_btree *btree;
2095         struct nilfs_btree_path *path;
2096         struct nilfs_btree_node *node;
2097         __u64 key;
2098         int level, ret;
2099
2100         btree = (struct nilfs_btree *)bmap;
2101         path = nilfs_btree_alloc_path(btree);
2102         if (path == NULL)
2103                 return -ENOMEM;
2104         nilfs_btree_init_path(btree, path);
2105
2106         if (buffer_nilfs_node(*bh)) {
2107                 node = (struct nilfs_btree_node *)(*bh)->b_data;
2108                 key = nilfs_btree_node_get_key(btree, node, 0);
2109                 level = nilfs_btree_node_get_level(btree, node);
2110         } else {
2111                 key = nilfs_bmap_data_get_key(bmap, *bh);
2112                 level = NILFS_BTREE_LEVEL_DATA;
2113         }
2114
2115         ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1);
2116         if (ret < 0) {
2117                 WARN_ON(ret == -ENOENT);
2118                 goto out;
2119         }
2120
2121         ret = btree->bt_ops->btop_assign(btree, path, level, bh,
2122                                             blocknr, binfo);
2123
2124  out:
2125         nilfs_btree_clear_path(btree, path);
2126         nilfs_btree_free_path(btree, path);
2127
2128         return ret;
2129 }
2130
2131 static int nilfs_btree_assign_gc(struct nilfs_bmap *bmap,
2132                                  struct buffer_head **bh,
2133                                  sector_t blocknr,
2134                                  union nilfs_binfo *binfo)
2135 {
2136         struct nilfs_btree *btree;
2137         struct nilfs_btree_node *node;
2138         __u64 key;
2139         int ret;
2140
2141         btree = (struct nilfs_btree *)bmap;
2142         ret = nilfs_bmap_move_v(bmap, (*bh)->b_blocknr, blocknr);
2143         if (ret < 0)
2144                 return ret;
2145
2146         if (buffer_nilfs_node(*bh)) {
2147                 node = (struct nilfs_btree_node *)(*bh)->b_data;
2148                 key = nilfs_btree_node_get_key(btree, node, 0);
2149         } else
2150                 key = nilfs_bmap_data_get_key(bmap, *bh);
2151
2152         /* on-disk format */
2153         binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr);
2154         binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2155
2156         return 0;
2157 }
2158
2159 static int nilfs_btree_mark(struct nilfs_bmap *bmap, __u64 key, int level)
2160 {
2161         struct buffer_head *bh;
2162         struct nilfs_btree *btree;
2163         struct nilfs_btree_path *path;
2164         __u64 ptr;
2165         int ret;
2166
2167         btree = (struct nilfs_btree *)bmap;
2168         path = nilfs_btree_alloc_path(btree);
2169         if (path == NULL)
2170                 return -ENOMEM;
2171         nilfs_btree_init_path(btree, path);
2172
2173         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1);
2174         if (ret < 0) {
2175                 WARN_ON(ret == -ENOENT);
2176                 goto out;
2177         }
2178         ret = nilfs_btree_get_block(btree, ptr, &bh);
2179         if (ret < 0) {
2180                 WARN_ON(ret == -ENOENT);
2181                 goto out;
2182         }
2183
2184         if (!buffer_dirty(bh))
2185                 nilfs_btnode_mark_dirty(bh);
2186         brelse(bh);
2187         if (!nilfs_bmap_dirty(&btree->bt_bmap))
2188                 nilfs_bmap_set_dirty(&btree->bt_bmap);
2189
2190  out:
2191         nilfs_btree_clear_path(btree, path);
2192         nilfs_btree_free_path(btree, path);
2193         return ret;
2194 }
2195
2196 static const struct nilfs_bmap_operations nilfs_btree_ops = {
2197         .bop_lookup             =       nilfs_btree_lookup,
2198         .bop_insert             =       nilfs_btree_insert,
2199         .bop_delete             =       nilfs_btree_delete,
2200         .bop_clear              =       NULL,
2201
2202         .bop_propagate          =       nilfs_btree_propagate,
2203
2204         .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2205
2206         .bop_assign             =       nilfs_btree_assign,
2207         .bop_mark               =       nilfs_btree_mark,
2208
2209         .bop_last_key           =       nilfs_btree_last_key,
2210         .bop_check_insert       =       NULL,
2211         .bop_check_delete       =       nilfs_btree_check_delete,
2212         .bop_gather_data        =       nilfs_btree_gather_data,
2213 };
2214
2215 static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
2216         .bop_lookup             =       NULL,
2217         .bop_insert             =       NULL,
2218         .bop_delete             =       NULL,
2219         .bop_clear              =       NULL,
2220
2221         .bop_propagate          =       nilfs_btree_propagate_gc,
2222
2223         .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2224
2225         .bop_assign             =       nilfs_btree_assign_gc,
2226         .bop_mark               =       NULL,
2227
2228         .bop_last_key           =       NULL,
2229         .bop_check_insert       =       NULL,
2230         .bop_check_delete       =       NULL,
2231         .bop_gather_data        =       NULL,
2232 };
2233
2234 static const struct nilfs_btree_operations nilfs_btree_ops_v = {
2235         .btop_find_target       =       nilfs_btree_find_target_v,
2236         .btop_set_target        =       nilfs_btree_set_target_v,
2237         .btop_propagate         =       nilfs_btree_propagate_v,
2238         .btop_assign            =       nilfs_btree_assign_v,
2239 };
2240
2241 static const struct nilfs_btree_operations nilfs_btree_ops_p = {
2242         .btop_find_target       =       NULL,
2243         .btop_set_target        =       NULL,
2244         .btop_propagate         =       nilfs_btree_propagate_p,
2245         .btop_assign            =       nilfs_btree_assign_p,
2246 };
2247
2248 int nilfs_btree_init(struct nilfs_bmap *bmap, __u64 low, __u64 high)
2249 {
2250         struct nilfs_btree *btree;
2251
2252         btree = (struct nilfs_btree *)bmap;
2253         bmap->b_ops = &nilfs_btree_ops;
2254         bmap->b_low = low;
2255         bmap->b_high = high;
2256         switch (bmap->b_inode->i_ino) {
2257         case NILFS_DAT_INO:
2258                 btree->bt_ops = &nilfs_btree_ops_p;
2259                 break;
2260         default:
2261                 btree->bt_ops = &nilfs_btree_ops_v;
2262                 break;
2263         }
2264
2265         return 0;
2266 }
2267
2268 void nilfs_btree_init_gc(struct nilfs_bmap *bmap)
2269 {
2270         bmap->b_low = NILFS_BMAP_LARGE_LOW;
2271         bmap->b_high = NILFS_BMAP_LARGE_HIGH;
2272         bmap->b_ops = &nilfs_btree_ops_gc;
2273 }