Merge commit 'origin/master' into next
[pandora-kernel.git] / fs / nilfs2 / btree.c
1 /*
2  * btree.c - NILFS B-tree.
3  *
4  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19  *
20  * Written by Koji Sato <koji@osrg.net>.
21  */
22
23 #include <linux/slab.h>
24 #include <linux/string.h>
25 #include <linux/errno.h>
26 #include <linux/pagevec.h>
27 #include "nilfs.h"
28 #include "page.h"
29 #include "btnode.h"
30 #include "btree.h"
31 #include "alloc.h"
32 #include "dat.h"
33
34 /**
35  * struct nilfs_btree_path - A path on which B-tree operations are executed
36  * @bp_bh: buffer head of node block
37  * @bp_sib_bh: buffer head of sibling node block
38  * @bp_index: index of child node
39  * @bp_oldreq: ptr end request for old ptr
40  * @bp_newreq: ptr alloc request for new ptr
41  * @bp_op: rebalance operation
42  */
43 struct nilfs_btree_path {
44         struct buffer_head *bp_bh;
45         struct buffer_head *bp_sib_bh;
46         int bp_index;
47         union nilfs_bmap_ptr_req bp_oldreq;
48         union nilfs_bmap_ptr_req bp_newreq;
49         struct nilfs_btnode_chkey_ctxt bp_ctxt;
50         void (*bp_op)(struct nilfs_btree *, struct nilfs_btree_path *,
51                       int, __u64 *, __u64 *);
52 };
53
54 /*
55  * B-tree path operations
56  */
57
58 static struct kmem_cache *nilfs_btree_path_cache;
59
60 int __init nilfs_btree_path_cache_init(void)
61 {
62         nilfs_btree_path_cache =
63                 kmem_cache_create("nilfs2_btree_path_cache",
64                                   sizeof(struct nilfs_btree_path) *
65                                   NILFS_BTREE_LEVEL_MAX, 0, 0, NULL);
66         return (nilfs_btree_path_cache != NULL) ? 0 : -ENOMEM;
67 }
68
69 void nilfs_btree_path_cache_destroy(void)
70 {
71         kmem_cache_destroy(nilfs_btree_path_cache);
72 }
73
74 static inline struct nilfs_btree_path *
75 nilfs_btree_alloc_path(const struct nilfs_btree *btree)
76 {
77         return (struct nilfs_btree_path *)
78                 kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS);
79 }
80
81 static inline void nilfs_btree_free_path(const struct nilfs_btree *btree,
82                                          struct nilfs_btree_path *path)
83 {
84         kmem_cache_free(nilfs_btree_path_cache, path);
85 }
86
87 static void nilfs_btree_init_path(const struct nilfs_btree *btree,
88                                   struct nilfs_btree_path *path)
89 {
90         int level;
91
92         for (level = NILFS_BTREE_LEVEL_DATA;
93              level < NILFS_BTREE_LEVEL_MAX;
94              level++) {
95                 path[level].bp_bh = NULL;
96                 path[level].bp_sib_bh = NULL;
97                 path[level].bp_index = 0;
98                 path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
99                 path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
100                 path[level].bp_op = NULL;
101         }
102 }
103
104 static void nilfs_btree_clear_path(const struct nilfs_btree *btree,
105                                    struct nilfs_btree_path *path)
106 {
107         int level;
108
109         for (level = NILFS_BTREE_LEVEL_DATA;
110              level < NILFS_BTREE_LEVEL_MAX;
111              level++) {
112                 if (path[level].bp_bh != NULL) {
113                         brelse(path[level].bp_bh);
114                         path[level].bp_bh = NULL;
115                 }
116                 /* sib_bh is released or deleted by prepare or commit
117                  * operations. */
118                 path[level].bp_sib_bh = NULL;
119                 path[level].bp_index = 0;
120                 path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
121                 path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
122                 path[level].bp_op = NULL;
123         }
124 }
125
126 /*
127  * B-tree node operations
128  */
129 static int nilfs_btree_get_block(const struct nilfs_btree *btree, __u64 ptr,
130                                  struct buffer_head **bhp)
131 {
132         struct address_space *btnc =
133                 &NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache;
134         return nilfs_btnode_get(btnc, ptr, 0, bhp, 0);
135 }
136
137 static int nilfs_btree_get_new_block(const struct nilfs_btree *btree,
138                                      __u64 ptr, struct buffer_head **bhp)
139 {
140         struct address_space *btnc =
141                 &NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache;
142         int ret;
143
144         ret = nilfs_btnode_get(btnc, ptr, 0, bhp, 1);
145         if (!ret)
146                 set_buffer_nilfs_volatile(*bhp);
147         return ret;
148 }
149
150 static inline int
151 nilfs_btree_node_get_flags(const struct nilfs_btree *btree,
152                            const struct nilfs_btree_node *node)
153 {
154         return node->bn_flags;
155 }
156
157 static inline void
158 nilfs_btree_node_set_flags(struct nilfs_btree *btree,
159                            struct nilfs_btree_node *node,
160                            int flags)
161 {
162         node->bn_flags = flags;
163 }
164
165 static inline int nilfs_btree_node_root(const struct nilfs_btree *btree,
166                                         const struct nilfs_btree_node *node)
167 {
168         return nilfs_btree_node_get_flags(btree, node) & NILFS_BTREE_NODE_ROOT;
169 }
170
171 static inline int
172 nilfs_btree_node_get_level(const struct nilfs_btree *btree,
173                            const struct nilfs_btree_node *node)
174 {
175         return node->bn_level;
176 }
177
178 static inline void
179 nilfs_btree_node_set_level(struct nilfs_btree *btree,
180                            struct nilfs_btree_node *node,
181                            int level)
182 {
183         node->bn_level = level;
184 }
185
186 static inline int
187 nilfs_btree_node_get_nchildren(const struct nilfs_btree *btree,
188                                const struct nilfs_btree_node *node)
189 {
190         return le16_to_cpu(node->bn_nchildren);
191 }
192
193 static inline void
194 nilfs_btree_node_set_nchildren(struct nilfs_btree *btree,
195                                struct nilfs_btree_node *node,
196                                int nchildren)
197 {
198         node->bn_nchildren = cpu_to_le16(nchildren);
199 }
200
201 static inline int
202 nilfs_btree_node_size(const struct nilfs_btree *btree)
203 {
204         return 1 << btree->bt_bmap.b_inode->i_blkbits;
205 }
206
207 static inline int
208 nilfs_btree_node_nchildren_min(const struct nilfs_btree *btree,
209                                const struct nilfs_btree_node *node)
210 {
211         return nilfs_btree_node_root(btree, node) ?
212                 NILFS_BTREE_ROOT_NCHILDREN_MIN :
213                 NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
214 }
215
216 static inline int
217 nilfs_btree_node_nchildren_max(const struct nilfs_btree *btree,
218                                const struct nilfs_btree_node *node)
219 {
220         return nilfs_btree_node_root(btree, node) ?
221                 NILFS_BTREE_ROOT_NCHILDREN_MAX :
222                 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(btree));
223 }
224
225 static inline __le64 *
226 nilfs_btree_node_dkeys(const struct nilfs_btree *btree,
227                        const struct nilfs_btree_node *node)
228 {
229         return (__le64 *)((char *)(node + 1) +
230                           (nilfs_btree_node_root(btree, node) ?
231                            0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE));
232 }
233
234 static inline __le64 *
235 nilfs_btree_node_dptrs(const struct nilfs_btree *btree,
236                        const struct nilfs_btree_node *node)
237 {
238         return (__le64 *)(nilfs_btree_node_dkeys(btree, node) +
239                           nilfs_btree_node_nchildren_max(btree, node));
240 }
241
242 static inline __u64
243 nilfs_btree_node_get_key(const struct nilfs_btree *btree,
244                          const struct nilfs_btree_node *node, int index)
245 {
246         return nilfs_bmap_dkey_to_key(*(nilfs_btree_node_dkeys(btree, node) +
247                                         index));
248 }
249
250 static inline void
251 nilfs_btree_node_set_key(struct nilfs_btree *btree,
252                          struct nilfs_btree_node *node, int index, __u64 key)
253 {
254         *(nilfs_btree_node_dkeys(btree, node) + index) =
255                 nilfs_bmap_key_to_dkey(key);
256 }
257
258 static inline __u64
259 nilfs_btree_node_get_ptr(const struct nilfs_btree *btree,
260                          const struct nilfs_btree_node *node,
261                          int index)
262 {
263         return nilfs_bmap_dptr_to_ptr(*(nilfs_btree_node_dptrs(btree, node) +
264                                         index));
265 }
266
267 static inline void
268 nilfs_btree_node_set_ptr(struct nilfs_btree *btree,
269                          struct nilfs_btree_node *node,
270                          int index,
271                          __u64 ptr)
272 {
273         *(nilfs_btree_node_dptrs(btree, node) + index) =
274                 nilfs_bmap_ptr_to_dptr(ptr);
275 }
276
277 static void nilfs_btree_node_init(struct nilfs_btree *btree,
278                                   struct nilfs_btree_node *node,
279                                   int flags, int level, int nchildren,
280                                   const __u64 *keys, const __u64 *ptrs)
281 {
282         __le64 *dkeys;
283         __le64 *dptrs;
284         int i;
285
286         nilfs_btree_node_set_flags(btree, node, flags);
287         nilfs_btree_node_set_level(btree, node, level);
288         nilfs_btree_node_set_nchildren(btree, node, nchildren);
289
290         dkeys = nilfs_btree_node_dkeys(btree, node);
291         dptrs = nilfs_btree_node_dptrs(btree, node);
292         for (i = 0; i < nchildren; i++) {
293                 dkeys[i] = nilfs_bmap_key_to_dkey(keys[i]);
294                 dptrs[i] = nilfs_bmap_ptr_to_dptr(ptrs[i]);
295         }
296 }
297
298 /* Assume the buffer heads corresponding to left and right are locked. */
299 static void nilfs_btree_node_move_left(struct nilfs_btree *btree,
300                                        struct nilfs_btree_node *left,
301                                        struct nilfs_btree_node *right,
302                                        int n)
303 {
304         __le64 *ldkeys, *rdkeys;
305         __le64 *ldptrs, *rdptrs;
306         int lnchildren, rnchildren;
307
308         ldkeys = nilfs_btree_node_dkeys(btree, left);
309         ldptrs = nilfs_btree_node_dptrs(btree, left);
310         lnchildren = nilfs_btree_node_get_nchildren(btree, left);
311
312         rdkeys = nilfs_btree_node_dkeys(btree, right);
313         rdptrs = nilfs_btree_node_dptrs(btree, right);
314         rnchildren = nilfs_btree_node_get_nchildren(btree, right);
315
316         memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys));
317         memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs));
318         memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys));
319         memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs));
320
321         lnchildren += n;
322         rnchildren -= n;
323         nilfs_btree_node_set_nchildren(btree, left, lnchildren);
324         nilfs_btree_node_set_nchildren(btree, right, rnchildren);
325 }
326
327 /* Assume that the buffer heads corresponding to left and right are locked. */
328 static void nilfs_btree_node_move_right(struct nilfs_btree *btree,
329                                         struct nilfs_btree_node *left,
330                                         struct nilfs_btree_node *right,
331                                         int n)
332 {
333         __le64 *ldkeys, *rdkeys;
334         __le64 *ldptrs, *rdptrs;
335         int lnchildren, rnchildren;
336
337         ldkeys = nilfs_btree_node_dkeys(btree, left);
338         ldptrs = nilfs_btree_node_dptrs(btree, left);
339         lnchildren = nilfs_btree_node_get_nchildren(btree, left);
340
341         rdkeys = nilfs_btree_node_dkeys(btree, right);
342         rdptrs = nilfs_btree_node_dptrs(btree, right);
343         rnchildren = nilfs_btree_node_get_nchildren(btree, right);
344
345         memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys));
346         memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs));
347         memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys));
348         memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs));
349
350         lnchildren -= n;
351         rnchildren += n;
352         nilfs_btree_node_set_nchildren(btree, left, lnchildren);
353         nilfs_btree_node_set_nchildren(btree, right, rnchildren);
354 }
355
356 /* Assume that the buffer head corresponding to node is locked. */
357 static void nilfs_btree_node_insert(struct nilfs_btree *btree,
358                                     struct nilfs_btree_node *node,
359                                     __u64 key, __u64 ptr, int index)
360 {
361         __le64 *dkeys;
362         __le64 *dptrs;
363         int nchildren;
364
365         dkeys = nilfs_btree_node_dkeys(btree, node);
366         dptrs = nilfs_btree_node_dptrs(btree, node);
367         nchildren = nilfs_btree_node_get_nchildren(btree, node);
368         if (index < nchildren) {
369                 memmove(dkeys + index + 1, dkeys + index,
370                         (nchildren - index) * sizeof(*dkeys));
371                 memmove(dptrs + index + 1, dptrs + index,
372                         (nchildren - index) * sizeof(*dptrs));
373         }
374         dkeys[index] = nilfs_bmap_key_to_dkey(key);
375         dptrs[index] = nilfs_bmap_ptr_to_dptr(ptr);
376         nchildren++;
377         nilfs_btree_node_set_nchildren(btree, node, nchildren);
378 }
379
380 /* Assume that the buffer head corresponding to node is locked. */
381 static void nilfs_btree_node_delete(struct nilfs_btree *btree,
382                                     struct nilfs_btree_node *node,
383                                     __u64 *keyp, __u64 *ptrp, int index)
384 {
385         __u64 key;
386         __u64 ptr;
387         __le64 *dkeys;
388         __le64 *dptrs;
389         int nchildren;
390
391         dkeys = nilfs_btree_node_dkeys(btree, node);
392         dptrs = nilfs_btree_node_dptrs(btree, node);
393         key = nilfs_bmap_dkey_to_key(dkeys[index]);
394         ptr = nilfs_bmap_dptr_to_ptr(dptrs[index]);
395         nchildren = nilfs_btree_node_get_nchildren(btree, node);
396         if (keyp != NULL)
397                 *keyp = key;
398         if (ptrp != NULL)
399                 *ptrp = ptr;
400
401         if (index < nchildren - 1) {
402                 memmove(dkeys + index, dkeys + index + 1,
403                         (nchildren - index - 1) * sizeof(*dkeys));
404                 memmove(dptrs + index, dptrs + index + 1,
405                         (nchildren - index - 1) * sizeof(*dptrs));
406         }
407         nchildren--;
408         nilfs_btree_node_set_nchildren(btree, node, nchildren);
409 }
410
411 static int nilfs_btree_node_lookup(const struct nilfs_btree *btree,
412                                    const struct nilfs_btree_node *node,
413                                    __u64 key, int *indexp)
414 {
415         __u64 nkey;
416         int index, low, high, s;
417
418         /* binary search */
419         low = 0;
420         high = nilfs_btree_node_get_nchildren(btree, node) - 1;
421         index = 0;
422         s = 0;
423         while (low <= high) {
424                 index = (low + high) / 2;
425                 nkey = nilfs_btree_node_get_key(btree, node, index);
426                 if (nkey == key) {
427                         s = 0;
428                         goto out;
429                 } else if (nkey < key) {
430                         low = index + 1;
431                         s = -1;
432                 } else {
433                         high = index - 1;
434                         s = 1;
435                 }
436         }
437
438         /* adjust index */
439         if (nilfs_btree_node_get_level(btree, node) >
440             NILFS_BTREE_LEVEL_NODE_MIN) {
441                 if ((s > 0) && (index > 0))
442                         index--;
443         } else if (s < 0)
444                 index++;
445
446  out:
447         *indexp = index;
448
449         return s == 0;
450 }
451
452 static inline struct nilfs_btree_node *
453 nilfs_btree_get_root(const struct nilfs_btree *btree)
454 {
455         return (struct nilfs_btree_node *)btree->bt_bmap.b_u.u_data;
456 }
457
458 static inline struct nilfs_btree_node *
459 nilfs_btree_get_nonroot_node(const struct nilfs_btree *btree,
460                              const struct nilfs_btree_path *path,
461                              int level)
462 {
463         return (struct nilfs_btree_node *)path[level].bp_bh->b_data;
464 }
465
466 static inline struct nilfs_btree_node *
467 nilfs_btree_get_sib_node(const struct nilfs_btree *btree,
468                          const struct nilfs_btree_path *path,
469                          int level)
470 {
471         return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data;
472 }
473
474 static inline int nilfs_btree_height(const struct nilfs_btree *btree)
475 {
476         return nilfs_btree_node_get_level(btree, nilfs_btree_get_root(btree))
477                 + 1;
478 }
479
480 static inline struct nilfs_btree_node *
481 nilfs_btree_get_node(const struct nilfs_btree *btree,
482                      const struct nilfs_btree_path *path,
483                      int level)
484 {
485         return (level == nilfs_btree_height(btree) - 1) ?
486                 nilfs_btree_get_root(btree) :
487                 nilfs_btree_get_nonroot_node(btree, path, level);
488 }
489
490 static int nilfs_btree_do_lookup(const struct nilfs_btree *btree,
491                                  struct nilfs_btree_path *path,
492                                  __u64 key, __u64 *ptrp, int minlevel)
493 {
494         struct nilfs_btree_node *node;
495         __u64 ptr;
496         int level, index, found, ret;
497
498         node = nilfs_btree_get_root(btree);
499         level = nilfs_btree_node_get_level(btree, node);
500         if ((level < minlevel) ||
501             (nilfs_btree_node_get_nchildren(btree, node) <= 0))
502                 return -ENOENT;
503
504         found = nilfs_btree_node_lookup(btree, node, key, &index);
505         ptr = nilfs_btree_node_get_ptr(btree, node, index);
506         path[level].bp_bh = NULL;
507         path[level].bp_index = index;
508
509         for (level--; level >= minlevel; level--) {
510                 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
511                 if (ret < 0)
512                         return ret;
513                 node = nilfs_btree_get_nonroot_node(btree, path, level);
514                 BUG_ON(level != nilfs_btree_node_get_level(btree, node));
515                 if (!found)
516                         found = nilfs_btree_node_lookup(btree, node, key,
517                                                         &index);
518                 else
519                         index = 0;
520                 if (index < nilfs_btree_node_nchildren_max(btree, node))
521                         ptr = nilfs_btree_node_get_ptr(btree, node, index);
522                 else {
523                         WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN);
524                         /* insert */
525                         ptr = NILFS_BMAP_INVALID_PTR;
526                 }
527                 path[level].bp_index = index;
528         }
529         if (!found)
530                 return -ENOENT;
531
532         if (ptrp != NULL)
533                 *ptrp = ptr;
534
535         return 0;
536 }
537
538 static int nilfs_btree_do_lookup_last(const struct nilfs_btree *btree,
539                                       struct nilfs_btree_path *path,
540                                       __u64 *keyp, __u64 *ptrp)
541 {
542         struct nilfs_btree_node *node;
543         __u64 ptr;
544         int index, level, ret;
545
546         node = nilfs_btree_get_root(btree);
547         index = nilfs_btree_node_get_nchildren(btree, node) - 1;
548         if (index < 0)
549                 return -ENOENT;
550         level = nilfs_btree_node_get_level(btree, node);
551         ptr = nilfs_btree_node_get_ptr(btree, node, index);
552         path[level].bp_bh = NULL;
553         path[level].bp_index = index;
554
555         for (level--; level > 0; level--) {
556                 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
557                 if (ret < 0)
558                         return ret;
559                 node = nilfs_btree_get_nonroot_node(btree, path, level);
560                 BUG_ON(level != nilfs_btree_node_get_level(btree, node));
561                 index = nilfs_btree_node_get_nchildren(btree, node) - 1;
562                 ptr = nilfs_btree_node_get_ptr(btree, node, index);
563                 path[level].bp_index = index;
564         }
565
566         if (keyp != NULL)
567                 *keyp = nilfs_btree_node_get_key(btree, node, index);
568         if (ptrp != NULL)
569                 *ptrp = ptr;
570
571         return 0;
572 }
573
574 static int nilfs_btree_lookup(const struct nilfs_bmap *bmap,
575                               __u64 key, int level, __u64 *ptrp)
576 {
577         struct nilfs_btree *btree;
578         struct nilfs_btree_path *path;
579         __u64 ptr;
580         int ret;
581
582         btree = (struct nilfs_btree *)bmap;
583         path = nilfs_btree_alloc_path(btree);
584         if (path == NULL)
585                 return -ENOMEM;
586         nilfs_btree_init_path(btree, path);
587
588         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level);
589
590         if (ptrp != NULL)
591                 *ptrp = ptr;
592
593         nilfs_btree_clear_path(btree, path);
594         nilfs_btree_free_path(btree, path);
595
596         return ret;
597 }
598
599 static int nilfs_btree_lookup_contig(const struct nilfs_bmap *bmap,
600                                      __u64 key, __u64 *ptrp, unsigned maxblocks)
601 {
602         struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
603         struct nilfs_btree_path *path;
604         struct nilfs_btree_node *node;
605         struct inode *dat = NULL;
606         __u64 ptr, ptr2;
607         sector_t blocknr;
608         int level = NILFS_BTREE_LEVEL_NODE_MIN;
609         int ret, cnt, index, maxlevel;
610
611         path = nilfs_btree_alloc_path(btree);
612         if (path == NULL)
613                 return -ENOMEM;
614         nilfs_btree_init_path(btree, path);
615         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level);
616         if (ret < 0)
617                 goto out;
618
619         if (NILFS_BMAP_USE_VBN(bmap)) {
620                 dat = nilfs_bmap_get_dat(bmap);
621                 ret = nilfs_dat_translate(dat, ptr, &blocknr);
622                 if (ret < 0)
623                         goto out;
624                 ptr = blocknr;
625         }
626         cnt = 1;
627         if (cnt == maxblocks)
628                 goto end;
629
630         maxlevel = nilfs_btree_height(btree) - 1;
631         node = nilfs_btree_get_node(btree, path, level);
632         index = path[level].bp_index + 1;
633         for (;;) {
634                 while (index < nilfs_btree_node_get_nchildren(btree, node)) {
635                         if (nilfs_btree_node_get_key(btree, node, index) !=
636                             key + cnt)
637                                 goto end;
638                         ptr2 = nilfs_btree_node_get_ptr(btree, node, index);
639                         if (dat) {
640                                 ret = nilfs_dat_translate(dat, ptr2, &blocknr);
641                                 if (ret < 0)
642                                         goto out;
643                                 ptr2 = blocknr;
644                         }
645                         if (ptr2 != ptr + cnt || ++cnt == maxblocks)
646                                 goto end;
647                         index++;
648                         continue;
649                 }
650                 if (level == maxlevel)
651                         break;
652
653                 /* look-up right sibling node */
654                 node = nilfs_btree_get_node(btree, path, level + 1);
655                 index = path[level + 1].bp_index + 1;
656                 if (index >= nilfs_btree_node_get_nchildren(btree, node) ||
657                     nilfs_btree_node_get_key(btree, node, index) != key + cnt)
658                         break;
659                 ptr2 = nilfs_btree_node_get_ptr(btree, node, index);
660                 path[level + 1].bp_index = index;
661
662                 brelse(path[level].bp_bh);
663                 path[level].bp_bh = NULL;
664                 ret = nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh);
665                 if (ret < 0)
666                         goto out;
667                 node = nilfs_btree_get_nonroot_node(btree, path, level);
668                 index = 0;
669                 path[level].bp_index = index;
670         }
671  end:
672         *ptrp = ptr;
673         ret = cnt;
674  out:
675         nilfs_btree_clear_path(btree, path);
676         nilfs_btree_free_path(btree, path);
677         return ret;
678 }
679
680 static void nilfs_btree_promote_key(struct nilfs_btree *btree,
681                                     struct nilfs_btree_path *path,
682                                     int level, __u64 key)
683 {
684         if (level < nilfs_btree_height(btree) - 1) {
685                 do {
686                         lock_buffer(path[level].bp_bh);
687                         nilfs_btree_node_set_key(
688                                 btree,
689                                 nilfs_btree_get_nonroot_node(
690                                         btree, path, level),
691                                 path[level].bp_index, key);
692                         if (!buffer_dirty(path[level].bp_bh))
693                                 nilfs_btnode_mark_dirty(path[level].bp_bh);
694                         unlock_buffer(path[level].bp_bh);
695                 } while ((path[level].bp_index == 0) &&
696                          (++level < nilfs_btree_height(btree) - 1));
697         }
698
699         /* root */
700         if (level == nilfs_btree_height(btree) - 1) {
701                 nilfs_btree_node_set_key(btree,
702                                          nilfs_btree_get_root(btree),
703                                          path[level].bp_index, key);
704         }
705 }
706
707 static void nilfs_btree_do_insert(struct nilfs_btree *btree,
708                                   struct nilfs_btree_path *path,
709                                   int level, __u64 *keyp, __u64 *ptrp)
710 {
711         struct nilfs_btree_node *node;
712
713         if (level < nilfs_btree_height(btree) - 1) {
714                 lock_buffer(path[level].bp_bh);
715                 node = nilfs_btree_get_nonroot_node(btree, path, level);
716                 nilfs_btree_node_insert(btree, node, *keyp, *ptrp,
717                                         path[level].bp_index);
718                 if (!buffer_dirty(path[level].bp_bh))
719                         nilfs_btnode_mark_dirty(path[level].bp_bh);
720                 unlock_buffer(path[level].bp_bh);
721
722                 if (path[level].bp_index == 0)
723                         nilfs_btree_promote_key(btree, path, level + 1,
724                                                 nilfs_btree_node_get_key(
725                                                         btree, node, 0));
726         } else {
727                 node = nilfs_btree_get_root(btree);
728                 nilfs_btree_node_insert(btree, node, *keyp, *ptrp,
729                                         path[level].bp_index);
730         }
731 }
732
733 static void nilfs_btree_carry_left(struct nilfs_btree *btree,
734                                    struct nilfs_btree_path *path,
735                                    int level, __u64 *keyp, __u64 *ptrp)
736 {
737         struct nilfs_btree_node *node, *left;
738         int nchildren, lnchildren, n, move;
739
740         lock_buffer(path[level].bp_bh);
741         lock_buffer(path[level].bp_sib_bh);
742
743         node = nilfs_btree_get_nonroot_node(btree, path, level);
744         left = nilfs_btree_get_sib_node(btree, path, level);
745         nchildren = nilfs_btree_node_get_nchildren(btree, node);
746         lnchildren = nilfs_btree_node_get_nchildren(btree, left);
747         move = 0;
748
749         n = (nchildren + lnchildren + 1) / 2 - lnchildren;
750         if (n > path[level].bp_index) {
751                 /* move insert point */
752                 n--;
753                 move = 1;
754         }
755
756         nilfs_btree_node_move_left(btree, left, node, n);
757
758         if (!buffer_dirty(path[level].bp_bh))
759                 nilfs_btnode_mark_dirty(path[level].bp_bh);
760         if (!buffer_dirty(path[level].bp_sib_bh))
761                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
762
763         unlock_buffer(path[level].bp_bh);
764         unlock_buffer(path[level].bp_sib_bh);
765
766         nilfs_btree_promote_key(btree, path, level + 1,
767                                 nilfs_btree_node_get_key(btree, node, 0));
768
769         if (move) {
770                 brelse(path[level].bp_bh);
771                 path[level].bp_bh = path[level].bp_sib_bh;
772                 path[level].bp_sib_bh = NULL;
773                 path[level].bp_index += lnchildren;
774                 path[level + 1].bp_index--;
775         } else {
776                 brelse(path[level].bp_sib_bh);
777                 path[level].bp_sib_bh = NULL;
778                 path[level].bp_index -= n;
779         }
780
781         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
782 }
783
784 static void nilfs_btree_carry_right(struct nilfs_btree *btree,
785                                     struct nilfs_btree_path *path,
786                                     int level, __u64 *keyp, __u64 *ptrp)
787 {
788         struct nilfs_btree_node *node, *right;
789         int nchildren, rnchildren, n, move;
790
791         lock_buffer(path[level].bp_bh);
792         lock_buffer(path[level].bp_sib_bh);
793
794         node = nilfs_btree_get_nonroot_node(btree, path, level);
795         right = nilfs_btree_get_sib_node(btree, path, level);
796         nchildren = nilfs_btree_node_get_nchildren(btree, node);
797         rnchildren = nilfs_btree_node_get_nchildren(btree, right);
798         move = 0;
799
800         n = (nchildren + rnchildren + 1) / 2 - rnchildren;
801         if (n > nchildren - path[level].bp_index) {
802                 /* move insert point */
803                 n--;
804                 move = 1;
805         }
806
807         nilfs_btree_node_move_right(btree, node, right, n);
808
809         if (!buffer_dirty(path[level].bp_bh))
810                 nilfs_btnode_mark_dirty(path[level].bp_bh);
811         if (!buffer_dirty(path[level].bp_sib_bh))
812                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
813
814         unlock_buffer(path[level].bp_bh);
815         unlock_buffer(path[level].bp_sib_bh);
816
817         path[level + 1].bp_index++;
818         nilfs_btree_promote_key(btree, path, level + 1,
819                                 nilfs_btree_node_get_key(btree, right, 0));
820         path[level + 1].bp_index--;
821
822         if (move) {
823                 brelse(path[level].bp_bh);
824                 path[level].bp_bh = path[level].bp_sib_bh;
825                 path[level].bp_sib_bh = NULL;
826                 path[level].bp_index -=
827                         nilfs_btree_node_get_nchildren(btree, node);
828                 path[level + 1].bp_index++;
829         } else {
830                 brelse(path[level].bp_sib_bh);
831                 path[level].bp_sib_bh = NULL;
832         }
833
834         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
835 }
836
837 static void nilfs_btree_split(struct nilfs_btree *btree,
838                               struct nilfs_btree_path *path,
839                               int level, __u64 *keyp, __u64 *ptrp)
840 {
841         struct nilfs_btree_node *node, *right;
842         __u64 newkey;
843         __u64 newptr;
844         int nchildren, n, move;
845
846         lock_buffer(path[level].bp_bh);
847         lock_buffer(path[level].bp_sib_bh);
848
849         node = nilfs_btree_get_nonroot_node(btree, path, level);
850         right = nilfs_btree_get_sib_node(btree, path, level);
851         nchildren = nilfs_btree_node_get_nchildren(btree, node);
852         move = 0;
853
854         n = (nchildren + 1) / 2;
855         if (n > nchildren - path[level].bp_index) {
856                 n--;
857                 move = 1;
858         }
859
860         nilfs_btree_node_move_right(btree, node, right, n);
861
862         if (!buffer_dirty(path[level].bp_bh))
863                 nilfs_btnode_mark_dirty(path[level].bp_bh);
864         if (!buffer_dirty(path[level].bp_sib_bh))
865                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
866
867         unlock_buffer(path[level].bp_bh);
868         unlock_buffer(path[level].bp_sib_bh);
869
870         newkey = nilfs_btree_node_get_key(btree, right, 0);
871         newptr = path[level].bp_newreq.bpr_ptr;
872
873         if (move) {
874                 path[level].bp_index -=
875                         nilfs_btree_node_get_nchildren(btree, node);
876                 nilfs_btree_node_insert(btree, right, *keyp, *ptrp,
877                                         path[level].bp_index);
878
879                 *keyp = nilfs_btree_node_get_key(btree, right, 0);
880                 *ptrp = path[level].bp_newreq.bpr_ptr;
881
882                 brelse(path[level].bp_bh);
883                 path[level].bp_bh = path[level].bp_sib_bh;
884                 path[level].bp_sib_bh = NULL;
885         } else {
886                 nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
887
888                 *keyp = nilfs_btree_node_get_key(btree, right, 0);
889                 *ptrp = path[level].bp_newreq.bpr_ptr;
890
891                 brelse(path[level].bp_sib_bh);
892                 path[level].bp_sib_bh = NULL;
893         }
894
895         path[level + 1].bp_index++;
896 }
897
898 static void nilfs_btree_grow(struct nilfs_btree *btree,
899                              struct nilfs_btree_path *path,
900                              int level, __u64 *keyp, __u64 *ptrp)
901 {
902         struct nilfs_btree_node *root, *child;
903         int n;
904
905         lock_buffer(path[level].bp_sib_bh);
906
907         root = nilfs_btree_get_root(btree);
908         child = nilfs_btree_get_sib_node(btree, path, level);
909
910         n = nilfs_btree_node_get_nchildren(btree, root);
911
912         nilfs_btree_node_move_right(btree, root, child, n);
913         nilfs_btree_node_set_level(btree, root, level + 1);
914
915         if (!buffer_dirty(path[level].bp_sib_bh))
916                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
917
918         unlock_buffer(path[level].bp_sib_bh);
919
920         path[level].bp_bh = path[level].bp_sib_bh;
921         path[level].bp_sib_bh = NULL;
922
923         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
924
925         *keyp = nilfs_btree_node_get_key(btree, child, 0);
926         *ptrp = path[level].bp_newreq.bpr_ptr;
927 }
928
929 static __u64 nilfs_btree_find_near(const struct nilfs_btree *btree,
930                                    const struct nilfs_btree_path *path)
931 {
932         struct nilfs_btree_node *node;
933         int level;
934
935         if (path == NULL)
936                 return NILFS_BMAP_INVALID_PTR;
937
938         /* left sibling */
939         level = NILFS_BTREE_LEVEL_NODE_MIN;
940         if (path[level].bp_index > 0) {
941                 node = nilfs_btree_get_node(btree, path, level);
942                 return nilfs_btree_node_get_ptr(btree, node,
943                                                 path[level].bp_index - 1);
944         }
945
946         /* parent */
947         level = NILFS_BTREE_LEVEL_NODE_MIN + 1;
948         if (level <= nilfs_btree_height(btree) - 1) {
949                 node = nilfs_btree_get_node(btree, path, level);
950                 return nilfs_btree_node_get_ptr(btree, node,
951                                                 path[level].bp_index);
952         }
953
954         return NILFS_BMAP_INVALID_PTR;
955 }
956
957 static __u64 nilfs_btree_find_target_v(const struct nilfs_btree *btree,
958                                        const struct nilfs_btree_path *path,
959                                        __u64 key)
960 {
961         __u64 ptr;
962
963         ptr = nilfs_bmap_find_target_seq(&btree->bt_bmap, key);
964         if (ptr != NILFS_BMAP_INVALID_PTR)
965                 /* sequential access */
966                 return ptr;
967         else {
968                 ptr = nilfs_btree_find_near(btree, path);
969                 if (ptr != NILFS_BMAP_INVALID_PTR)
970                         /* near */
971                         return ptr;
972         }
973         /* block group */
974         return nilfs_bmap_find_target_in_group(&btree->bt_bmap);
975 }
976
977 static void nilfs_btree_set_target_v(struct nilfs_btree *btree, __u64 key,
978                                      __u64 ptr)
979 {
980         btree->bt_bmap.b_last_allocated_key = key;
981         btree->bt_bmap.b_last_allocated_ptr = ptr;
982 }
983
984 static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
985                                       struct nilfs_btree_path *path,
986                                       int *levelp, __u64 key, __u64 ptr,
987                                       struct nilfs_bmap_stats *stats)
988 {
989         struct buffer_head *bh;
990         struct nilfs_btree_node *node, *parent, *sib;
991         __u64 sibptr;
992         int pindex, level, ret;
993
994         stats->bs_nblocks = 0;
995         level = NILFS_BTREE_LEVEL_DATA;
996
997         /* allocate a new ptr for data block */
998         if (NILFS_BMAP_USE_VBN(&btree->bt_bmap))
999                 path[level].bp_newreq.bpr_ptr =
1000                         nilfs_btree_find_target_v(btree, path, key);
1001
1002         ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
1003                                            &path[level].bp_newreq);
1004         if (ret < 0)
1005                 goto err_out_data;
1006
1007         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1008              level < nilfs_btree_height(btree) - 1;
1009              level++) {
1010                 node = nilfs_btree_get_nonroot_node(btree, path, level);
1011                 if (nilfs_btree_node_get_nchildren(btree, node) <
1012                     nilfs_btree_node_nchildren_max(btree, node)) {
1013                         path[level].bp_op = nilfs_btree_do_insert;
1014                         stats->bs_nblocks++;
1015                         goto out;
1016                 }
1017
1018                 parent = nilfs_btree_get_node(btree, path, level + 1);
1019                 pindex = path[level + 1].bp_index;
1020
1021                 /* left sibling */
1022                 if (pindex > 0) {
1023                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
1024                                                           pindex - 1);
1025                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1026                         if (ret < 0)
1027                                 goto err_out_child_node;
1028                         sib = (struct nilfs_btree_node *)bh->b_data;
1029                         if (nilfs_btree_node_get_nchildren(btree, sib) <
1030                             nilfs_btree_node_nchildren_max(btree, sib)) {
1031                                 path[level].bp_sib_bh = bh;
1032                                 path[level].bp_op = nilfs_btree_carry_left;
1033                                 stats->bs_nblocks++;
1034                                 goto out;
1035                         } else
1036                                 brelse(bh);
1037                 }
1038
1039                 /* right sibling */
1040                 if (pindex <
1041                     nilfs_btree_node_get_nchildren(btree, parent) - 1) {
1042                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
1043                                                           pindex + 1);
1044                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1045                         if (ret < 0)
1046                                 goto err_out_child_node;
1047                         sib = (struct nilfs_btree_node *)bh->b_data;
1048                         if (nilfs_btree_node_get_nchildren(btree, sib) <
1049                             nilfs_btree_node_nchildren_max(btree, sib)) {
1050                                 path[level].bp_sib_bh = bh;
1051                                 path[level].bp_op = nilfs_btree_carry_right;
1052                                 stats->bs_nblocks++;
1053                                 goto out;
1054                         } else
1055                                 brelse(bh);
1056                 }
1057
1058                 /* split */
1059                 path[level].bp_newreq.bpr_ptr =
1060                         path[level - 1].bp_newreq.bpr_ptr + 1;
1061                 ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
1062                                                    &path[level].bp_newreq);
1063                 if (ret < 0)
1064                         goto err_out_child_node;
1065                 ret = nilfs_btree_get_new_block(btree,
1066                                                 path[level].bp_newreq.bpr_ptr,
1067                                                 &bh);
1068                 if (ret < 0)
1069                         goto err_out_curr_node;
1070
1071                 stats->bs_nblocks++;
1072
1073                 lock_buffer(bh);
1074                 nilfs_btree_node_init(btree,
1075                                       (struct nilfs_btree_node *)bh->b_data,
1076                                       0, level, 0, NULL, NULL);
1077                 unlock_buffer(bh);
1078                 path[level].bp_sib_bh = bh;
1079                 path[level].bp_op = nilfs_btree_split;
1080         }
1081
1082         /* root */
1083         node = nilfs_btree_get_root(btree);
1084         if (nilfs_btree_node_get_nchildren(btree, node) <
1085             nilfs_btree_node_nchildren_max(btree, node)) {
1086                 path[level].bp_op = nilfs_btree_do_insert;
1087                 stats->bs_nblocks++;
1088                 goto out;
1089         }
1090
1091         /* grow */
1092         path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1;
1093         ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
1094                                            &path[level].bp_newreq);
1095         if (ret < 0)
1096                 goto err_out_child_node;
1097         ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr,
1098                                         &bh);
1099         if (ret < 0)
1100                 goto err_out_curr_node;
1101
1102         lock_buffer(bh);
1103         nilfs_btree_node_init(btree, (struct nilfs_btree_node *)bh->b_data,
1104                               0, level, 0, NULL, NULL);
1105         unlock_buffer(bh);
1106         path[level].bp_sib_bh = bh;
1107         path[level].bp_op = nilfs_btree_grow;
1108
1109         level++;
1110         path[level].bp_op = nilfs_btree_do_insert;
1111
1112         /* a newly-created node block and a data block are added */
1113         stats->bs_nblocks += 2;
1114
1115         /* success */
1116  out:
1117         *levelp = level;
1118         return ret;
1119
1120         /* error */
1121  err_out_curr_node:
1122         nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq);
1123  err_out_child_node:
1124         for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) {
1125                 nilfs_btnode_delete(path[level].bp_sib_bh);
1126                 nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap,
1127                                            &path[level].bp_newreq);
1128
1129         }
1130
1131         nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq);
1132  err_out_data:
1133         *levelp = level;
1134         stats->bs_nblocks = 0;
1135         return ret;
1136 }
1137
1138 static void nilfs_btree_commit_insert(struct nilfs_btree *btree,
1139                                       struct nilfs_btree_path *path,
1140                                       int maxlevel, __u64 key, __u64 ptr)
1141 {
1142         int level;
1143
1144         set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1145         ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr;
1146         if (NILFS_BMAP_USE_VBN(&btree->bt_bmap))
1147                 nilfs_btree_set_target_v(btree, key, ptr);
1148
1149         for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1150                 nilfs_bmap_commit_alloc_ptr(&btree->bt_bmap,
1151                                             &path[level - 1].bp_newreq);
1152                 path[level].bp_op(btree, path, level, &key, &ptr);
1153         }
1154
1155         if (!nilfs_bmap_dirty(&btree->bt_bmap))
1156                 nilfs_bmap_set_dirty(&btree->bt_bmap);
1157 }
1158
1159 static int nilfs_btree_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
1160 {
1161         struct nilfs_btree *btree;
1162         struct nilfs_btree_path *path;
1163         struct nilfs_bmap_stats stats;
1164         int level, ret;
1165
1166         btree = (struct nilfs_btree *)bmap;
1167         path = nilfs_btree_alloc_path(btree);
1168         if (path == NULL)
1169                 return -ENOMEM;
1170         nilfs_btree_init_path(btree, path);
1171
1172         ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1173                                     NILFS_BTREE_LEVEL_NODE_MIN);
1174         if (ret != -ENOENT) {
1175                 if (ret == 0)
1176                         ret = -EEXIST;
1177                 goto out;
1178         }
1179
1180         ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats);
1181         if (ret < 0)
1182                 goto out;
1183         nilfs_btree_commit_insert(btree, path, level, key, ptr);
1184         nilfs_bmap_add_blocks(bmap, stats.bs_nblocks);
1185
1186  out:
1187         nilfs_btree_clear_path(btree, path);
1188         nilfs_btree_free_path(btree, path);
1189         return ret;
1190 }
1191
1192 static void nilfs_btree_do_delete(struct nilfs_btree *btree,
1193                                   struct nilfs_btree_path *path,
1194                                   int level, __u64 *keyp, __u64 *ptrp)
1195 {
1196         struct nilfs_btree_node *node;
1197
1198         if (level < nilfs_btree_height(btree) - 1) {
1199                 lock_buffer(path[level].bp_bh);
1200                 node = nilfs_btree_get_nonroot_node(btree, path, level);
1201                 nilfs_btree_node_delete(btree, node, keyp, ptrp,
1202                                         path[level].bp_index);
1203                 if (!buffer_dirty(path[level].bp_bh))
1204                         nilfs_btnode_mark_dirty(path[level].bp_bh);
1205                 unlock_buffer(path[level].bp_bh);
1206                 if (path[level].bp_index == 0)
1207                         nilfs_btree_promote_key(btree, path, level + 1,
1208                                 nilfs_btree_node_get_key(btree, node, 0));
1209         } else {
1210                 node = nilfs_btree_get_root(btree);
1211                 nilfs_btree_node_delete(btree, node, keyp, ptrp,
1212                                         path[level].bp_index);
1213         }
1214 }
1215
1216 static void nilfs_btree_borrow_left(struct nilfs_btree *btree,
1217                                     struct nilfs_btree_path *path,
1218                                     int level, __u64 *keyp, __u64 *ptrp)
1219 {
1220         struct nilfs_btree_node *node, *left;
1221         int nchildren, lnchildren, n;
1222
1223         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1224
1225         lock_buffer(path[level].bp_bh);
1226         lock_buffer(path[level].bp_sib_bh);
1227
1228         node = nilfs_btree_get_nonroot_node(btree, path, level);
1229         left = nilfs_btree_get_sib_node(btree, path, level);
1230         nchildren = nilfs_btree_node_get_nchildren(btree, node);
1231         lnchildren = nilfs_btree_node_get_nchildren(btree, left);
1232
1233         n = (nchildren + lnchildren) / 2 - nchildren;
1234
1235         nilfs_btree_node_move_right(btree, left, node, n);
1236
1237         if (!buffer_dirty(path[level].bp_bh))
1238                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1239         if (!buffer_dirty(path[level].bp_sib_bh))
1240                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1241
1242         unlock_buffer(path[level].bp_bh);
1243         unlock_buffer(path[level].bp_sib_bh);
1244
1245         nilfs_btree_promote_key(btree, path, level + 1,
1246                                 nilfs_btree_node_get_key(btree, node, 0));
1247
1248         brelse(path[level].bp_sib_bh);
1249         path[level].bp_sib_bh = NULL;
1250         path[level].bp_index += n;
1251 }
1252
1253 static void nilfs_btree_borrow_right(struct nilfs_btree *btree,
1254                                      struct nilfs_btree_path *path,
1255                                      int level, __u64 *keyp, __u64 *ptrp)
1256 {
1257         struct nilfs_btree_node *node, *right;
1258         int nchildren, rnchildren, n;
1259
1260         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1261
1262         lock_buffer(path[level].bp_bh);
1263         lock_buffer(path[level].bp_sib_bh);
1264
1265         node = nilfs_btree_get_nonroot_node(btree, path, level);
1266         right = nilfs_btree_get_sib_node(btree, path, level);
1267         nchildren = nilfs_btree_node_get_nchildren(btree, node);
1268         rnchildren = nilfs_btree_node_get_nchildren(btree, right);
1269
1270         n = (nchildren + rnchildren) / 2 - nchildren;
1271
1272         nilfs_btree_node_move_left(btree, node, right, n);
1273
1274         if (!buffer_dirty(path[level].bp_bh))
1275                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1276         if (!buffer_dirty(path[level].bp_sib_bh))
1277                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1278
1279         unlock_buffer(path[level].bp_bh);
1280         unlock_buffer(path[level].bp_sib_bh);
1281
1282         path[level + 1].bp_index++;
1283         nilfs_btree_promote_key(btree, path, level + 1,
1284                                 nilfs_btree_node_get_key(btree, right, 0));
1285         path[level + 1].bp_index--;
1286
1287         brelse(path[level].bp_sib_bh);
1288         path[level].bp_sib_bh = NULL;
1289 }
1290
1291 static void nilfs_btree_concat_left(struct nilfs_btree *btree,
1292                                     struct nilfs_btree_path *path,
1293                                     int level, __u64 *keyp, __u64 *ptrp)
1294 {
1295         struct nilfs_btree_node *node, *left;
1296         int n;
1297
1298         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1299
1300         lock_buffer(path[level].bp_bh);
1301         lock_buffer(path[level].bp_sib_bh);
1302
1303         node = nilfs_btree_get_nonroot_node(btree, path, level);
1304         left = nilfs_btree_get_sib_node(btree, path, level);
1305
1306         n = nilfs_btree_node_get_nchildren(btree, node);
1307
1308         nilfs_btree_node_move_left(btree, left, node, n);
1309
1310         if (!buffer_dirty(path[level].bp_sib_bh))
1311                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1312
1313         unlock_buffer(path[level].bp_bh);
1314         unlock_buffer(path[level].bp_sib_bh);
1315
1316         nilfs_btnode_delete(path[level].bp_bh);
1317         path[level].bp_bh = path[level].bp_sib_bh;
1318         path[level].bp_sib_bh = NULL;
1319         path[level].bp_index += nilfs_btree_node_get_nchildren(btree, left);
1320 }
1321
1322 static void nilfs_btree_concat_right(struct nilfs_btree *btree,
1323                                      struct nilfs_btree_path *path,
1324                                      int level, __u64 *keyp, __u64 *ptrp)
1325 {
1326         struct nilfs_btree_node *node, *right;
1327         int n;
1328
1329         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1330
1331         lock_buffer(path[level].bp_bh);
1332         lock_buffer(path[level].bp_sib_bh);
1333
1334         node = nilfs_btree_get_nonroot_node(btree, path, level);
1335         right = nilfs_btree_get_sib_node(btree, path, level);
1336
1337         n = nilfs_btree_node_get_nchildren(btree, right);
1338
1339         nilfs_btree_node_move_left(btree, node, right, n);
1340
1341         if (!buffer_dirty(path[level].bp_bh))
1342                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1343
1344         unlock_buffer(path[level].bp_bh);
1345         unlock_buffer(path[level].bp_sib_bh);
1346
1347         nilfs_btnode_delete(path[level].bp_sib_bh);
1348         path[level].bp_sib_bh = NULL;
1349         path[level + 1].bp_index++;
1350 }
1351
1352 static void nilfs_btree_shrink(struct nilfs_btree *btree,
1353                                struct nilfs_btree_path *path,
1354                                int level, __u64 *keyp, __u64 *ptrp)
1355 {
1356         struct nilfs_btree_node *root, *child;
1357         int n;
1358
1359         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1360
1361         lock_buffer(path[level].bp_bh);
1362         root = nilfs_btree_get_root(btree);
1363         child = nilfs_btree_get_nonroot_node(btree, path, level);
1364
1365         nilfs_btree_node_delete(btree, root, NULL, NULL, 0);
1366         nilfs_btree_node_set_level(btree, root, level);
1367         n = nilfs_btree_node_get_nchildren(btree, child);
1368         nilfs_btree_node_move_left(btree, root, child, n);
1369         unlock_buffer(path[level].bp_bh);
1370
1371         nilfs_btnode_delete(path[level].bp_bh);
1372         path[level].bp_bh = NULL;
1373 }
1374
1375
1376 static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
1377                                       struct nilfs_btree_path *path,
1378                                       int *levelp,
1379                                       struct nilfs_bmap_stats *stats)
1380 {
1381         struct buffer_head *bh;
1382         struct nilfs_btree_node *node, *parent, *sib;
1383         __u64 sibptr;
1384         int pindex, level, ret;
1385
1386         ret = 0;
1387         stats->bs_nblocks = 0;
1388         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1389              level < nilfs_btree_height(btree) - 1;
1390              level++) {
1391                 node = nilfs_btree_get_nonroot_node(btree, path, level);
1392                 path[level].bp_oldreq.bpr_ptr =
1393                         nilfs_btree_node_get_ptr(btree, node,
1394                                                  path[level].bp_index);
1395                 ret = nilfs_bmap_prepare_end_ptr(&btree->bt_bmap,
1396                                                  &path[level].bp_oldreq);
1397                 if (ret < 0)
1398                         goto err_out_child_node;
1399
1400                 if (nilfs_btree_node_get_nchildren(btree, node) >
1401                     nilfs_btree_node_nchildren_min(btree, node)) {
1402                         path[level].bp_op = nilfs_btree_do_delete;
1403                         stats->bs_nblocks++;
1404                         goto out;
1405                 }
1406
1407                 parent = nilfs_btree_get_node(btree, path, level + 1);
1408                 pindex = path[level + 1].bp_index;
1409
1410                 if (pindex > 0) {
1411                         /* left sibling */
1412                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
1413                                                           pindex - 1);
1414                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1415                         if (ret < 0)
1416                                 goto err_out_curr_node;
1417                         sib = (struct nilfs_btree_node *)bh->b_data;
1418                         if (nilfs_btree_node_get_nchildren(btree, sib) >
1419                             nilfs_btree_node_nchildren_min(btree, sib)) {
1420                                 path[level].bp_sib_bh = bh;
1421                                 path[level].bp_op = nilfs_btree_borrow_left;
1422                                 stats->bs_nblocks++;
1423                                 goto out;
1424                         } else {
1425                                 path[level].bp_sib_bh = bh;
1426                                 path[level].bp_op = nilfs_btree_concat_left;
1427                                 stats->bs_nblocks++;
1428                                 /* continue; */
1429                         }
1430                 } else if (pindex <
1431                            nilfs_btree_node_get_nchildren(btree, parent) - 1) {
1432                         /* right sibling */
1433                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
1434                                                           pindex + 1);
1435                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1436                         if (ret < 0)
1437                                 goto err_out_curr_node;
1438                         sib = (struct nilfs_btree_node *)bh->b_data;
1439                         if (nilfs_btree_node_get_nchildren(btree, sib) >
1440                             nilfs_btree_node_nchildren_min(btree, sib)) {
1441                                 path[level].bp_sib_bh = bh;
1442                                 path[level].bp_op = nilfs_btree_borrow_right;
1443                                 stats->bs_nblocks++;
1444                                 goto out;
1445                         } else {
1446                                 path[level].bp_sib_bh = bh;
1447                                 path[level].bp_op = nilfs_btree_concat_right;
1448                                 stats->bs_nblocks++;
1449                                 /* continue; */
1450                         }
1451                 } else {
1452                         /* no siblings */
1453                         /* the only child of the root node */
1454                         WARN_ON(level != nilfs_btree_height(btree) - 2);
1455                         if (nilfs_btree_node_get_nchildren(btree, node) - 1 <=
1456                             NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1457                                 path[level].bp_op = nilfs_btree_shrink;
1458                                 stats->bs_nblocks += 2;
1459                         } else {
1460                                 path[level].bp_op = nilfs_btree_do_delete;
1461                                 stats->bs_nblocks++;
1462                         }
1463
1464                         goto out;
1465
1466                 }
1467         }
1468
1469         node = nilfs_btree_get_root(btree);
1470         path[level].bp_oldreq.bpr_ptr =
1471                 nilfs_btree_node_get_ptr(btree, node, path[level].bp_index);
1472
1473         ret = nilfs_bmap_prepare_end_ptr(&btree->bt_bmap,
1474                                          &path[level].bp_oldreq);
1475         if (ret < 0)
1476                 goto err_out_child_node;
1477
1478         /* child of the root node is deleted */
1479         path[level].bp_op = nilfs_btree_do_delete;
1480         stats->bs_nblocks++;
1481
1482         /* success */
1483  out:
1484         *levelp = level;
1485         return ret;
1486
1487         /* error */
1488  err_out_curr_node:
1489         nilfs_bmap_abort_end_ptr(&btree->bt_bmap, &path[level].bp_oldreq);
1490  err_out_child_node:
1491         for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) {
1492                 brelse(path[level].bp_sib_bh);
1493                 nilfs_bmap_abort_end_ptr(&btree->bt_bmap,
1494                                          &path[level].bp_oldreq);
1495         }
1496         *levelp = level;
1497         stats->bs_nblocks = 0;
1498         return ret;
1499 }
1500
1501 static void nilfs_btree_commit_delete(struct nilfs_btree *btree,
1502                                       struct nilfs_btree_path *path,
1503                                       int maxlevel)
1504 {
1505         int level;
1506
1507         for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1508                 nilfs_bmap_commit_end_ptr(&btree->bt_bmap,
1509                                           &path[level].bp_oldreq);
1510                 path[level].bp_op(btree, path, level, NULL, NULL);
1511         }
1512
1513         if (!nilfs_bmap_dirty(&btree->bt_bmap))
1514                 nilfs_bmap_set_dirty(&btree->bt_bmap);
1515 }
1516
1517 static int nilfs_btree_delete(struct nilfs_bmap *bmap, __u64 key)
1518
1519 {
1520         struct nilfs_btree *btree;
1521         struct nilfs_btree_path *path;
1522         struct nilfs_bmap_stats stats;
1523         int level, ret;
1524
1525         btree = (struct nilfs_btree *)bmap;
1526         path = nilfs_btree_alloc_path(btree);
1527         if (path == NULL)
1528                 return -ENOMEM;
1529         nilfs_btree_init_path(btree, path);
1530         ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1531                                     NILFS_BTREE_LEVEL_NODE_MIN);
1532         if (ret < 0)
1533                 goto out;
1534
1535         ret = nilfs_btree_prepare_delete(btree, path, &level, &stats);
1536         if (ret < 0)
1537                 goto out;
1538         nilfs_btree_commit_delete(btree, path, level);
1539         nilfs_bmap_sub_blocks(bmap, stats.bs_nblocks);
1540
1541 out:
1542         nilfs_btree_clear_path(btree, path);
1543         nilfs_btree_free_path(btree, path);
1544         return ret;
1545 }
1546
1547 static int nilfs_btree_last_key(const struct nilfs_bmap *bmap, __u64 *keyp)
1548 {
1549         struct nilfs_btree *btree;
1550         struct nilfs_btree_path *path;
1551         int ret;
1552
1553         btree = (struct nilfs_btree *)bmap;
1554         path = nilfs_btree_alloc_path(btree);
1555         if (path == NULL)
1556                 return -ENOMEM;
1557         nilfs_btree_init_path(btree, path);
1558
1559         ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL);
1560
1561         nilfs_btree_clear_path(btree, path);
1562         nilfs_btree_free_path(btree, path);
1563
1564         return ret;
1565 }
1566
1567 static int nilfs_btree_check_delete(struct nilfs_bmap *bmap, __u64 key)
1568 {
1569         struct buffer_head *bh;
1570         struct nilfs_btree *btree;
1571         struct nilfs_btree_node *root, *node;
1572         __u64 maxkey, nextmaxkey;
1573         __u64 ptr;
1574         int nchildren, ret;
1575
1576         btree = (struct nilfs_btree *)bmap;
1577         root = nilfs_btree_get_root(btree);
1578         switch (nilfs_btree_height(btree)) {
1579         case 2:
1580                 bh = NULL;
1581                 node = root;
1582                 break;
1583         case 3:
1584                 nchildren = nilfs_btree_node_get_nchildren(btree, root);
1585                 if (nchildren > 1)
1586                         return 0;
1587                 ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1);
1588                 ret = nilfs_btree_get_block(btree, ptr, &bh);
1589                 if (ret < 0)
1590                         return ret;
1591                 node = (struct nilfs_btree_node *)bh->b_data;
1592                 break;
1593         default:
1594                 return 0;
1595         }
1596
1597         nchildren = nilfs_btree_node_get_nchildren(btree, node);
1598         maxkey = nilfs_btree_node_get_key(btree, node, nchildren - 1);
1599         nextmaxkey = (nchildren > 1) ?
1600                 nilfs_btree_node_get_key(btree, node, nchildren - 2) : 0;
1601         if (bh != NULL)
1602                 brelse(bh);
1603
1604         return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW);
1605 }
1606
1607 static int nilfs_btree_gather_data(struct nilfs_bmap *bmap,
1608                                    __u64 *keys, __u64 *ptrs, int nitems)
1609 {
1610         struct buffer_head *bh;
1611         struct nilfs_btree *btree;
1612         struct nilfs_btree_node *node, *root;
1613         __le64 *dkeys;
1614         __le64 *dptrs;
1615         __u64 ptr;
1616         int nchildren, i, ret;
1617
1618         btree = (struct nilfs_btree *)bmap;
1619         root = nilfs_btree_get_root(btree);
1620         switch (nilfs_btree_height(btree)) {
1621         case 2:
1622                 bh = NULL;
1623                 node = root;
1624                 break;
1625         case 3:
1626                 nchildren = nilfs_btree_node_get_nchildren(btree, root);
1627                 WARN_ON(nchildren > 1);
1628                 ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1);
1629                 ret = nilfs_btree_get_block(btree, ptr, &bh);
1630                 if (ret < 0)
1631                         return ret;
1632                 node = (struct nilfs_btree_node *)bh->b_data;
1633                 break;
1634         default:
1635                 node = NULL;
1636                 return -EINVAL;
1637         }
1638
1639         nchildren = nilfs_btree_node_get_nchildren(btree, node);
1640         if (nchildren < nitems)
1641                 nitems = nchildren;
1642         dkeys = nilfs_btree_node_dkeys(btree, node);
1643         dptrs = nilfs_btree_node_dptrs(btree, node);
1644         for (i = 0; i < nitems; i++) {
1645                 keys[i] = nilfs_bmap_dkey_to_key(dkeys[i]);
1646                 ptrs[i] = nilfs_bmap_dptr_to_ptr(dptrs[i]);
1647         }
1648
1649         if (bh != NULL)
1650                 brelse(bh);
1651
1652         return nitems;
1653 }
1654
1655 static int
1656 nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key,
1657                                        union nilfs_bmap_ptr_req *dreq,
1658                                        union nilfs_bmap_ptr_req *nreq,
1659                                        struct buffer_head **bhp,
1660                                        struct nilfs_bmap_stats *stats)
1661 {
1662         struct buffer_head *bh;
1663         struct nilfs_btree *btree;
1664         int ret;
1665
1666         btree = (struct nilfs_btree *)bmap;
1667         stats->bs_nblocks = 0;
1668
1669         /* for data */
1670         /* cannot find near ptr */
1671         if (NILFS_BMAP_USE_VBN(bmap))
1672                 dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key);
1673
1674         ret = nilfs_bmap_prepare_alloc_ptr(bmap, dreq);
1675         if (ret < 0)
1676                 return ret;
1677
1678         *bhp = NULL;
1679         stats->bs_nblocks++;
1680         if (nreq != NULL) {
1681                 nreq->bpr_ptr = dreq->bpr_ptr + 1;
1682                 ret = nilfs_bmap_prepare_alloc_ptr(bmap, nreq);
1683                 if (ret < 0)
1684                         goto err_out_dreq;
1685
1686                 ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh);
1687                 if (ret < 0)
1688                         goto err_out_nreq;
1689
1690                 *bhp = bh;
1691                 stats->bs_nblocks++;
1692         }
1693
1694         /* success */
1695         return 0;
1696
1697         /* error */
1698  err_out_nreq:
1699         nilfs_bmap_abort_alloc_ptr(bmap, nreq);
1700  err_out_dreq:
1701         nilfs_bmap_abort_alloc_ptr(bmap, dreq);
1702         stats->bs_nblocks = 0;
1703         return ret;
1704
1705 }
1706
1707 static void
1708 nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap,
1709                                       __u64 key, __u64 ptr,
1710                                       const __u64 *keys, const __u64 *ptrs,
1711                                       int n,
1712                                       union nilfs_bmap_ptr_req *dreq,
1713                                       union nilfs_bmap_ptr_req *nreq,
1714                                       struct buffer_head *bh)
1715 {
1716         struct nilfs_btree *btree;
1717         struct nilfs_btree_node *node;
1718         __u64 tmpptr;
1719
1720         /* free resources */
1721         if (bmap->b_ops->bop_clear != NULL)
1722                 bmap->b_ops->bop_clear(bmap);
1723
1724         /* ptr must be a pointer to a buffer head. */
1725         set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1726
1727         /* convert and insert */
1728         btree = (struct nilfs_btree *)bmap;
1729         nilfs_btree_init(bmap);
1730         if (nreq != NULL) {
1731                 nilfs_bmap_commit_alloc_ptr(bmap, dreq);
1732                 nilfs_bmap_commit_alloc_ptr(bmap, nreq);
1733
1734                 /* create child node at level 1 */
1735                 lock_buffer(bh);
1736                 node = (struct nilfs_btree_node *)bh->b_data;
1737                 nilfs_btree_node_init(btree, node, 0, 1, n, keys, ptrs);
1738                 nilfs_btree_node_insert(btree, node,
1739                                         key, dreq->bpr_ptr, n);
1740                 if (!buffer_dirty(bh))
1741                         nilfs_btnode_mark_dirty(bh);
1742                 if (!nilfs_bmap_dirty(bmap))
1743                         nilfs_bmap_set_dirty(bmap);
1744
1745                 unlock_buffer(bh);
1746                 brelse(bh);
1747
1748                 /* create root node at level 2 */
1749                 node = nilfs_btree_get_root(btree);
1750                 tmpptr = nreq->bpr_ptr;
1751                 nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT,
1752                                       2, 1, &keys[0], &tmpptr);
1753         } else {
1754                 nilfs_bmap_commit_alloc_ptr(bmap, dreq);
1755
1756                 /* create root node at level 1 */
1757                 node = nilfs_btree_get_root(btree);
1758                 nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT,
1759                                       1, n, keys, ptrs);
1760                 nilfs_btree_node_insert(btree, node,
1761                                         key, dreq->bpr_ptr, n);
1762                 if (!nilfs_bmap_dirty(bmap))
1763                         nilfs_bmap_set_dirty(bmap);
1764         }
1765
1766         if (NILFS_BMAP_USE_VBN(bmap))
1767                 nilfs_btree_set_target_v(btree, key, dreq->bpr_ptr);
1768 }
1769
1770 /**
1771  * nilfs_btree_convert_and_insert -
1772  * @bmap:
1773  * @key:
1774  * @ptr:
1775  * @keys:
1776  * @ptrs:
1777  * @n:
1778  */
1779 int nilfs_btree_convert_and_insert(struct nilfs_bmap *bmap,
1780                                    __u64 key, __u64 ptr,
1781                                    const __u64 *keys, const __u64 *ptrs, int n)
1782 {
1783         struct buffer_head *bh;
1784         union nilfs_bmap_ptr_req dreq, nreq, *di, *ni;
1785         struct nilfs_bmap_stats stats;
1786         int ret;
1787
1788         if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1789                 di = &dreq;
1790                 ni = NULL;
1791         } else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX(
1792                            1 << bmap->b_inode->i_blkbits)) {
1793                 di = &dreq;
1794                 ni = &nreq;
1795         } else {
1796                 di = NULL;
1797                 ni = NULL;
1798                 BUG();
1799         }
1800
1801         ret = nilfs_btree_prepare_convert_and_insert(bmap, key, di, ni, &bh,
1802                                                      &stats);
1803         if (ret < 0)
1804                 return ret;
1805         nilfs_btree_commit_convert_and_insert(bmap, key, ptr, keys, ptrs, n,
1806                                               di, ni, bh);
1807         nilfs_bmap_add_blocks(bmap, stats.bs_nblocks);
1808         return 0;
1809 }
1810
1811 static int nilfs_btree_propagate_p(struct nilfs_btree *btree,
1812                                    struct nilfs_btree_path *path,
1813                                    int level,
1814                                    struct buffer_head *bh)
1815 {
1816         while ((++level < nilfs_btree_height(btree) - 1) &&
1817                !buffer_dirty(path[level].bp_bh))
1818                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1819
1820         return 0;
1821 }
1822
1823 static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree,
1824                                         struct nilfs_btree_path *path,
1825                                         int level)
1826 {
1827         struct nilfs_btree_node *parent;
1828         int ret;
1829
1830         parent = nilfs_btree_get_node(btree, path, level + 1);
1831         path[level].bp_oldreq.bpr_ptr =
1832                 nilfs_btree_node_get_ptr(btree, parent,
1833                                          path[level + 1].bp_index);
1834         path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1;
1835         ret = nilfs_bmap_prepare_update_v(&btree->bt_bmap,
1836                                           &path[level].bp_oldreq,
1837                                           &path[level].bp_newreq);
1838         if (ret < 0)
1839                 return ret;
1840
1841         if (buffer_nilfs_node(path[level].bp_bh)) {
1842                 path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr;
1843                 path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr;
1844                 path[level].bp_ctxt.bh = path[level].bp_bh;
1845                 ret = nilfs_btnode_prepare_change_key(
1846                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1847                         &path[level].bp_ctxt);
1848                 if (ret < 0) {
1849                         nilfs_bmap_abort_update_v(&btree->bt_bmap,
1850                                                   &path[level].bp_oldreq,
1851                                                   &path[level].bp_newreq);
1852                         return ret;
1853                 }
1854         }
1855
1856         return 0;
1857 }
1858
1859 static void nilfs_btree_commit_update_v(struct nilfs_btree *btree,
1860                                         struct nilfs_btree_path *path,
1861                                         int level)
1862 {
1863         struct nilfs_btree_node *parent;
1864
1865         nilfs_bmap_commit_update_v(&btree->bt_bmap,
1866                                    &path[level].bp_oldreq,
1867                                    &path[level].bp_newreq);
1868
1869         if (buffer_nilfs_node(path[level].bp_bh)) {
1870                 nilfs_btnode_commit_change_key(
1871                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1872                         &path[level].bp_ctxt);
1873                 path[level].bp_bh = path[level].bp_ctxt.bh;
1874         }
1875         set_buffer_nilfs_volatile(path[level].bp_bh);
1876
1877         parent = nilfs_btree_get_node(btree, path, level + 1);
1878         nilfs_btree_node_set_ptr(btree, parent, path[level + 1].bp_index,
1879                                  path[level].bp_newreq.bpr_ptr);
1880 }
1881
1882 static void nilfs_btree_abort_update_v(struct nilfs_btree *btree,
1883                                        struct nilfs_btree_path *path,
1884                                        int level)
1885 {
1886         nilfs_bmap_abort_update_v(&btree->bt_bmap,
1887                                   &path[level].bp_oldreq,
1888                                   &path[level].bp_newreq);
1889         if (buffer_nilfs_node(path[level].bp_bh))
1890                 nilfs_btnode_abort_change_key(
1891                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1892                         &path[level].bp_ctxt);
1893 }
1894
1895 static int nilfs_btree_prepare_propagate_v(struct nilfs_btree *btree,
1896                                            struct nilfs_btree_path *path,
1897                                            int minlevel,
1898                                            int *maxlevelp)
1899 {
1900         int level, ret;
1901
1902         level = minlevel;
1903         if (!buffer_nilfs_volatile(path[level].bp_bh)) {
1904                 ret = nilfs_btree_prepare_update_v(btree, path, level);
1905                 if (ret < 0)
1906                         return ret;
1907         }
1908         while ((++level < nilfs_btree_height(btree) - 1) &&
1909                !buffer_dirty(path[level].bp_bh)) {
1910
1911                 WARN_ON(buffer_nilfs_volatile(path[level].bp_bh));
1912                 ret = nilfs_btree_prepare_update_v(btree, path, level);
1913                 if (ret < 0)
1914                         goto out;
1915         }
1916
1917         /* success */
1918         *maxlevelp = level - 1;
1919         return 0;
1920
1921         /* error */
1922  out:
1923         while (--level > minlevel)
1924                 nilfs_btree_abort_update_v(btree, path, level);
1925         if (!buffer_nilfs_volatile(path[level].bp_bh))
1926                 nilfs_btree_abort_update_v(btree, path, level);
1927         return ret;
1928 }
1929
1930 static void nilfs_btree_commit_propagate_v(struct nilfs_btree *btree,
1931                                            struct nilfs_btree_path *path,
1932                                            int minlevel,
1933                                            int maxlevel,
1934                                            struct buffer_head *bh)
1935 {
1936         int level;
1937
1938         if (!buffer_nilfs_volatile(path[minlevel].bp_bh))
1939                 nilfs_btree_commit_update_v(btree, path, minlevel);
1940
1941         for (level = minlevel + 1; level <= maxlevel; level++)
1942                 nilfs_btree_commit_update_v(btree, path, level);
1943 }
1944
1945 static int nilfs_btree_propagate_v(struct nilfs_btree *btree,
1946                                    struct nilfs_btree_path *path,
1947                                    int level,
1948                                    struct buffer_head *bh)
1949 {
1950         int maxlevel, ret;
1951         struct nilfs_btree_node *parent;
1952         __u64 ptr;
1953
1954         get_bh(bh);
1955         path[level].bp_bh = bh;
1956         ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel);
1957         if (ret < 0)
1958                 goto out;
1959
1960         if (buffer_nilfs_volatile(path[level].bp_bh)) {
1961                 parent = nilfs_btree_get_node(btree, path, level + 1);
1962                 ptr = nilfs_btree_node_get_ptr(btree, parent,
1963                                                path[level + 1].bp_index);
1964                 ret = nilfs_bmap_mark_dirty(&btree->bt_bmap, ptr);
1965                 if (ret < 0)
1966                         goto out;
1967         }
1968
1969         nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh);
1970
1971  out:
1972         brelse(path[level].bp_bh);
1973         path[level].bp_bh = NULL;
1974         return ret;
1975 }
1976
1977 static int nilfs_btree_propagate(const struct nilfs_bmap *bmap,
1978                                  struct buffer_head *bh)
1979 {
1980         struct nilfs_btree *btree;
1981         struct nilfs_btree_path *path;
1982         struct nilfs_btree_node *node;
1983         __u64 key;
1984         int level, ret;
1985
1986         WARN_ON(!buffer_dirty(bh));
1987
1988         btree = (struct nilfs_btree *)bmap;
1989         path = nilfs_btree_alloc_path(btree);
1990         if (path == NULL)
1991                 return -ENOMEM;
1992         nilfs_btree_init_path(btree, path);
1993
1994         if (buffer_nilfs_node(bh)) {
1995                 node = (struct nilfs_btree_node *)bh->b_data;
1996                 key = nilfs_btree_node_get_key(btree, node, 0);
1997                 level = nilfs_btree_node_get_level(btree, node);
1998         } else {
1999                 key = nilfs_bmap_data_get_key(bmap, bh);
2000                 level = NILFS_BTREE_LEVEL_DATA;
2001         }
2002
2003         ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1);
2004         if (ret < 0) {
2005                 if (unlikely(ret == -ENOENT))
2006                         printk(KERN_CRIT "%s: key = %llu, level == %d\n",
2007                                __func__, (unsigned long long)key, level);
2008                 goto out;
2009         }
2010
2011         ret = NILFS_BMAP_USE_VBN(bmap) ?
2012                 nilfs_btree_propagate_v(btree, path, level, bh) :
2013                 nilfs_btree_propagate_p(btree, path, level, bh);
2014
2015  out:
2016         nilfs_btree_clear_path(btree, path);
2017         nilfs_btree_free_path(btree, path);
2018
2019         return ret;
2020 }
2021
2022 static int nilfs_btree_propagate_gc(const struct nilfs_bmap *bmap,
2023                                     struct buffer_head *bh)
2024 {
2025         return nilfs_bmap_mark_dirty(bmap, bh->b_blocknr);
2026 }
2027
2028 static void nilfs_btree_add_dirty_buffer(struct nilfs_btree *btree,
2029                                          struct list_head *lists,
2030                                          struct buffer_head *bh)
2031 {
2032         struct list_head *head;
2033         struct buffer_head *cbh;
2034         struct nilfs_btree_node *node, *cnode;
2035         __u64 key, ckey;
2036         int level;
2037
2038         get_bh(bh);
2039         node = (struct nilfs_btree_node *)bh->b_data;
2040         key = nilfs_btree_node_get_key(btree, node, 0);
2041         level = nilfs_btree_node_get_level(btree, node);
2042         list_for_each(head, &lists[level]) {
2043                 cbh = list_entry(head, struct buffer_head, b_assoc_buffers);
2044                 cnode = (struct nilfs_btree_node *)cbh->b_data;
2045                 ckey = nilfs_btree_node_get_key(btree, cnode, 0);
2046                 if (key < ckey)
2047                         break;
2048         }
2049         list_add_tail(&bh->b_assoc_buffers, head);
2050 }
2051
2052 static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *bmap,
2053                                              struct list_head *listp)
2054 {
2055         struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
2056         struct address_space *btcache = &NILFS_BMAP_I(bmap)->i_btnode_cache;
2057         struct list_head lists[NILFS_BTREE_LEVEL_MAX];
2058         struct pagevec pvec;
2059         struct buffer_head *bh, *head;
2060         pgoff_t index = 0;
2061         int level, i;
2062
2063         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2064              level < NILFS_BTREE_LEVEL_MAX;
2065              level++)
2066                 INIT_LIST_HEAD(&lists[level]);
2067
2068         pagevec_init(&pvec, 0);
2069
2070         while (pagevec_lookup_tag(&pvec, btcache, &index, PAGECACHE_TAG_DIRTY,
2071                                   PAGEVEC_SIZE)) {
2072                 for (i = 0; i < pagevec_count(&pvec); i++) {
2073                         bh = head = page_buffers(pvec.pages[i]);
2074                         do {
2075                                 if (buffer_dirty(bh))
2076                                         nilfs_btree_add_dirty_buffer(btree,
2077                                                                      lists, bh);
2078                         } while ((bh = bh->b_this_page) != head);
2079                 }
2080                 pagevec_release(&pvec);
2081                 cond_resched();
2082         }
2083
2084         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2085              level < NILFS_BTREE_LEVEL_MAX;
2086              level++)
2087                 list_splice(&lists[level], listp->prev);
2088 }
2089
2090 static int nilfs_btree_assign_p(struct nilfs_btree *btree,
2091                                 struct nilfs_btree_path *path,
2092                                 int level,
2093                                 struct buffer_head **bh,
2094                                 sector_t blocknr,
2095                                 union nilfs_binfo *binfo)
2096 {
2097         struct nilfs_btree_node *parent;
2098         __u64 key;
2099         __u64 ptr;
2100         int ret;
2101
2102         parent = nilfs_btree_get_node(btree, path, level + 1);
2103         ptr = nilfs_btree_node_get_ptr(btree, parent,
2104                                        path[level + 1].bp_index);
2105         if (buffer_nilfs_node(*bh)) {
2106                 path[level].bp_ctxt.oldkey = ptr;
2107                 path[level].bp_ctxt.newkey = blocknr;
2108                 path[level].bp_ctxt.bh = *bh;
2109                 ret = nilfs_btnode_prepare_change_key(
2110                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
2111                         &path[level].bp_ctxt);
2112                 if (ret < 0)
2113                         return ret;
2114                 nilfs_btnode_commit_change_key(
2115                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
2116                         &path[level].bp_ctxt);
2117                 *bh = path[level].bp_ctxt.bh;
2118         }
2119
2120         nilfs_btree_node_set_ptr(btree, parent,
2121                                  path[level + 1].bp_index, blocknr);
2122
2123         key = nilfs_btree_node_get_key(btree, parent,
2124                                        path[level + 1].bp_index);
2125         /* on-disk format */
2126         binfo->bi_dat.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2127         binfo->bi_dat.bi_level = level;
2128
2129         return 0;
2130 }
2131
2132 static int nilfs_btree_assign_v(struct nilfs_btree *btree,
2133                                 struct nilfs_btree_path *path,
2134                                 int level,
2135                                 struct buffer_head **bh,
2136                                 sector_t blocknr,
2137                                 union nilfs_binfo *binfo)
2138 {
2139         struct nilfs_btree_node *parent;
2140         __u64 key;
2141         __u64 ptr;
2142         union nilfs_bmap_ptr_req req;
2143         int ret;
2144
2145         parent = nilfs_btree_get_node(btree, path, level + 1);
2146         ptr = nilfs_btree_node_get_ptr(btree, parent,
2147                                        path[level + 1].bp_index);
2148         req.bpr_ptr = ptr;
2149         ret = nilfs_bmap_start_v(&btree->bt_bmap, &req, blocknr);
2150         if (unlikely(ret < 0))
2151                 return ret;
2152
2153         key = nilfs_btree_node_get_key(btree, parent,
2154                                        path[level + 1].bp_index);
2155         /* on-disk format */
2156         binfo->bi_v.bi_vblocknr = nilfs_bmap_ptr_to_dptr(ptr);
2157         binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2158
2159         return 0;
2160 }
2161
2162 static int nilfs_btree_assign(struct nilfs_bmap *bmap,
2163                               struct buffer_head **bh,
2164                               sector_t blocknr,
2165                               union nilfs_binfo *binfo)
2166 {
2167         struct nilfs_btree *btree;
2168         struct nilfs_btree_path *path;
2169         struct nilfs_btree_node *node;
2170         __u64 key;
2171         int level, ret;
2172
2173         btree = (struct nilfs_btree *)bmap;
2174         path = nilfs_btree_alloc_path(btree);
2175         if (path == NULL)
2176                 return -ENOMEM;
2177         nilfs_btree_init_path(btree, path);
2178
2179         if (buffer_nilfs_node(*bh)) {
2180                 node = (struct nilfs_btree_node *)(*bh)->b_data;
2181                 key = nilfs_btree_node_get_key(btree, node, 0);
2182                 level = nilfs_btree_node_get_level(btree, node);
2183         } else {
2184                 key = nilfs_bmap_data_get_key(bmap, *bh);
2185                 level = NILFS_BTREE_LEVEL_DATA;
2186         }
2187
2188         ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1);
2189         if (ret < 0) {
2190                 WARN_ON(ret == -ENOENT);
2191                 goto out;
2192         }
2193
2194         ret = NILFS_BMAP_USE_VBN(bmap) ?
2195                 nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) :
2196                 nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo);
2197
2198  out:
2199         nilfs_btree_clear_path(btree, path);
2200         nilfs_btree_free_path(btree, path);
2201
2202         return ret;
2203 }
2204
2205 static int nilfs_btree_assign_gc(struct nilfs_bmap *bmap,
2206                                  struct buffer_head **bh,
2207                                  sector_t blocknr,
2208                                  union nilfs_binfo *binfo)
2209 {
2210         struct nilfs_btree *btree;
2211         struct nilfs_btree_node *node;
2212         __u64 key;
2213         int ret;
2214
2215         btree = (struct nilfs_btree *)bmap;
2216         ret = nilfs_bmap_move_v(bmap, (*bh)->b_blocknr, blocknr);
2217         if (ret < 0)
2218                 return ret;
2219
2220         if (buffer_nilfs_node(*bh)) {
2221                 node = (struct nilfs_btree_node *)(*bh)->b_data;
2222                 key = nilfs_btree_node_get_key(btree, node, 0);
2223         } else
2224                 key = nilfs_bmap_data_get_key(bmap, *bh);
2225
2226         /* on-disk format */
2227         binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr);
2228         binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2229
2230         return 0;
2231 }
2232
2233 static int nilfs_btree_mark(struct nilfs_bmap *bmap, __u64 key, int level)
2234 {
2235         struct buffer_head *bh;
2236         struct nilfs_btree *btree;
2237         struct nilfs_btree_path *path;
2238         __u64 ptr;
2239         int ret;
2240
2241         btree = (struct nilfs_btree *)bmap;
2242         path = nilfs_btree_alloc_path(btree);
2243         if (path == NULL)
2244                 return -ENOMEM;
2245         nilfs_btree_init_path(btree, path);
2246
2247         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1);
2248         if (ret < 0) {
2249                 WARN_ON(ret == -ENOENT);
2250                 goto out;
2251         }
2252         ret = nilfs_btree_get_block(btree, ptr, &bh);
2253         if (ret < 0) {
2254                 WARN_ON(ret == -ENOENT);
2255                 goto out;
2256         }
2257
2258         if (!buffer_dirty(bh))
2259                 nilfs_btnode_mark_dirty(bh);
2260         brelse(bh);
2261         if (!nilfs_bmap_dirty(&btree->bt_bmap))
2262                 nilfs_bmap_set_dirty(&btree->bt_bmap);
2263
2264  out:
2265         nilfs_btree_clear_path(btree, path);
2266         nilfs_btree_free_path(btree, path);
2267         return ret;
2268 }
2269
2270 static const struct nilfs_bmap_operations nilfs_btree_ops = {
2271         .bop_lookup             =       nilfs_btree_lookup,
2272         .bop_lookup_contig      =       nilfs_btree_lookup_contig,
2273         .bop_insert             =       nilfs_btree_insert,
2274         .bop_delete             =       nilfs_btree_delete,
2275         .bop_clear              =       NULL,
2276
2277         .bop_propagate          =       nilfs_btree_propagate,
2278
2279         .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2280
2281         .bop_assign             =       nilfs_btree_assign,
2282         .bop_mark               =       nilfs_btree_mark,
2283
2284         .bop_last_key           =       nilfs_btree_last_key,
2285         .bop_check_insert       =       NULL,
2286         .bop_check_delete       =       nilfs_btree_check_delete,
2287         .bop_gather_data        =       nilfs_btree_gather_data,
2288 };
2289
2290 static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
2291         .bop_lookup             =       NULL,
2292         .bop_lookup_contig      =       NULL,
2293         .bop_insert             =       NULL,
2294         .bop_delete             =       NULL,
2295         .bop_clear              =       NULL,
2296
2297         .bop_propagate          =       nilfs_btree_propagate_gc,
2298
2299         .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2300
2301         .bop_assign             =       nilfs_btree_assign_gc,
2302         .bop_mark               =       NULL,
2303
2304         .bop_last_key           =       NULL,
2305         .bop_check_insert       =       NULL,
2306         .bop_check_delete       =       NULL,
2307         .bop_gather_data        =       NULL,
2308 };
2309
2310 int nilfs_btree_init(struct nilfs_bmap *bmap)
2311 {
2312         bmap->b_ops = &nilfs_btree_ops;
2313         return 0;
2314 }
2315
2316 void nilfs_btree_init_gc(struct nilfs_bmap *bmap)
2317 {
2318         bmap->b_ops = &nilfs_btree_ops_gc;
2319 }