nilfs2: move get block functions in bmap.c into btree codes
[pandora-kernel.git] / fs / nilfs2 / btree.c
1 /*
2  * btree.c - NILFS B-tree.
3  *
4  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19  *
20  * Written by Koji Sato <koji@osrg.net>.
21  */
22
23 #include <linux/slab.h>
24 #include <linux/string.h>
25 #include <linux/errno.h>
26 #include <linux/pagevec.h>
27 #include "nilfs.h"
28 #include "page.h"
29 #include "btnode.h"
30 #include "btree.h"
31 #include "alloc.h"
32
33 /**
34  * struct nilfs_btree_path - A path on which B-tree operations are executed
35  * @bp_bh: buffer head of node block
36  * @bp_sib_bh: buffer head of sibling node block
37  * @bp_index: index of child node
38  * @bp_oldreq: ptr end request for old ptr
39  * @bp_newreq: ptr alloc request for new ptr
40  * @bp_op: rebalance operation
41  */
42 struct nilfs_btree_path {
43         struct buffer_head *bp_bh;
44         struct buffer_head *bp_sib_bh;
45         int bp_index;
46         union nilfs_bmap_ptr_req bp_oldreq;
47         union nilfs_bmap_ptr_req bp_newreq;
48         struct nilfs_btnode_chkey_ctxt bp_ctxt;
49         void (*bp_op)(struct nilfs_btree *, struct nilfs_btree_path *,
50                       int, __u64 *, __u64 *);
51 };
52
53 /*
54  * B-tree path operations
55  */
56
57 static struct kmem_cache *nilfs_btree_path_cache;
58
59 int __init nilfs_btree_path_cache_init(void)
60 {
61         nilfs_btree_path_cache =
62                 kmem_cache_create("nilfs2_btree_path_cache",
63                                   sizeof(struct nilfs_btree_path) *
64                                   NILFS_BTREE_LEVEL_MAX, 0, 0, NULL);
65         return (nilfs_btree_path_cache != NULL) ? 0 : -ENOMEM;
66 }
67
68 void nilfs_btree_path_cache_destroy(void)
69 {
70         kmem_cache_destroy(nilfs_btree_path_cache);
71 }
72
73 static inline struct nilfs_btree_path *
74 nilfs_btree_alloc_path(const struct nilfs_btree *btree)
75 {
76         return (struct nilfs_btree_path *)
77                 kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS);
78 }
79
80 static inline void nilfs_btree_free_path(const struct nilfs_btree *btree,
81                                          struct nilfs_btree_path *path)
82 {
83         kmem_cache_free(nilfs_btree_path_cache, path);
84 }
85
86 static void nilfs_btree_init_path(const struct nilfs_btree *btree,
87                                   struct nilfs_btree_path *path)
88 {
89         int level;
90
91         for (level = NILFS_BTREE_LEVEL_DATA;
92              level < NILFS_BTREE_LEVEL_MAX;
93              level++) {
94                 path[level].bp_bh = NULL;
95                 path[level].bp_sib_bh = NULL;
96                 path[level].bp_index = 0;
97                 path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
98                 path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
99                 path[level].bp_op = NULL;
100         }
101 }
102
103 static void nilfs_btree_clear_path(const struct nilfs_btree *btree,
104                                    struct nilfs_btree_path *path)
105 {
106         int level;
107
108         for (level = NILFS_BTREE_LEVEL_DATA;
109              level < NILFS_BTREE_LEVEL_MAX;
110              level++) {
111                 if (path[level].bp_bh != NULL) {
112                         brelse(path[level].bp_bh);
113                         path[level].bp_bh = NULL;
114                 }
115                 /* sib_bh is released or deleted by prepare or commit
116                  * operations. */
117                 path[level].bp_sib_bh = NULL;
118                 path[level].bp_index = 0;
119                 path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
120                 path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
121                 path[level].bp_op = NULL;
122         }
123 }
124
125 /*
126  * B-tree node operations
127  */
128 static int nilfs_btree_get_block(const struct nilfs_btree *btree, __u64 ptr,
129                                  struct buffer_head **bhp)
130 {
131         struct address_space *btnc =
132                 &NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache;
133         return nilfs_btnode_get(btnc, ptr, 0, bhp, 0);
134 }
135
136 static int nilfs_btree_get_new_block(const struct nilfs_btree *btree,
137                                      __u64 ptr, struct buffer_head **bhp)
138 {
139         struct address_space *btnc =
140                 &NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache;
141         int ret;
142
143         ret = nilfs_btnode_get(btnc, ptr, 0, bhp, 1);
144         if (!ret)
145                 set_buffer_nilfs_volatile(*bhp);
146         return ret;
147 }
148
149 static inline int
150 nilfs_btree_node_get_flags(const struct nilfs_btree *btree,
151                            const struct nilfs_btree_node *node)
152 {
153         return node->bn_flags;
154 }
155
156 static inline void
157 nilfs_btree_node_set_flags(struct nilfs_btree *btree,
158                            struct nilfs_btree_node *node,
159                            int flags)
160 {
161         node->bn_flags = flags;
162 }
163
164 static inline int nilfs_btree_node_root(const struct nilfs_btree *btree,
165                                         const struct nilfs_btree_node *node)
166 {
167         return nilfs_btree_node_get_flags(btree, node) & NILFS_BTREE_NODE_ROOT;
168 }
169
170 static inline int
171 nilfs_btree_node_get_level(const struct nilfs_btree *btree,
172                            const struct nilfs_btree_node *node)
173 {
174         return node->bn_level;
175 }
176
177 static inline void
178 nilfs_btree_node_set_level(struct nilfs_btree *btree,
179                            struct nilfs_btree_node *node,
180                            int level)
181 {
182         node->bn_level = level;
183 }
184
185 static inline int
186 nilfs_btree_node_get_nchildren(const struct nilfs_btree *btree,
187                                const struct nilfs_btree_node *node)
188 {
189         return le16_to_cpu(node->bn_nchildren);
190 }
191
192 static inline void
193 nilfs_btree_node_set_nchildren(struct nilfs_btree *btree,
194                                struct nilfs_btree_node *node,
195                                int nchildren)
196 {
197         node->bn_nchildren = cpu_to_le16(nchildren);
198 }
199
200 static inline int
201 nilfs_btree_node_size(const struct nilfs_btree *btree)
202 {
203         return 1 << btree->bt_bmap.b_inode->i_blkbits;
204 }
205
206 static inline int
207 nilfs_btree_node_nchildren_min(const struct nilfs_btree *btree,
208                                const struct nilfs_btree_node *node)
209 {
210         return nilfs_btree_node_root(btree, node) ?
211                 NILFS_BTREE_ROOT_NCHILDREN_MIN :
212                 NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
213 }
214
215 static inline int
216 nilfs_btree_node_nchildren_max(const struct nilfs_btree *btree,
217                                const struct nilfs_btree_node *node)
218 {
219         return nilfs_btree_node_root(btree, node) ?
220                 NILFS_BTREE_ROOT_NCHILDREN_MAX :
221                 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(btree));
222 }
223
224 static inline __le64 *
225 nilfs_btree_node_dkeys(const struct nilfs_btree *btree,
226                        const struct nilfs_btree_node *node)
227 {
228         return (__le64 *)((char *)(node + 1) +
229                           (nilfs_btree_node_root(btree, node) ?
230                            0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE));
231 }
232
233 static inline __le64 *
234 nilfs_btree_node_dptrs(const struct nilfs_btree *btree,
235                        const struct nilfs_btree_node *node)
236 {
237         return (__le64 *)(nilfs_btree_node_dkeys(btree, node) +
238                           nilfs_btree_node_nchildren_max(btree, node));
239 }
240
241 static inline __u64
242 nilfs_btree_node_get_key(const struct nilfs_btree *btree,
243                          const struct nilfs_btree_node *node, int index)
244 {
245         return nilfs_bmap_dkey_to_key(*(nilfs_btree_node_dkeys(btree, node) +
246                                         index));
247 }
248
249 static inline void
250 nilfs_btree_node_set_key(struct nilfs_btree *btree,
251                          struct nilfs_btree_node *node, int index, __u64 key)
252 {
253         *(nilfs_btree_node_dkeys(btree, node) + index) =
254                 nilfs_bmap_key_to_dkey(key);
255 }
256
257 static inline __u64
258 nilfs_btree_node_get_ptr(const struct nilfs_btree *btree,
259                          const struct nilfs_btree_node *node,
260                          int index)
261 {
262         return nilfs_bmap_dptr_to_ptr(*(nilfs_btree_node_dptrs(btree, node) +
263                                         index));
264 }
265
266 static inline void
267 nilfs_btree_node_set_ptr(struct nilfs_btree *btree,
268                          struct nilfs_btree_node *node,
269                          int index,
270                          __u64 ptr)
271 {
272         *(nilfs_btree_node_dptrs(btree, node) + index) =
273                 nilfs_bmap_ptr_to_dptr(ptr);
274 }
275
276 static void nilfs_btree_node_init(struct nilfs_btree *btree,
277                                   struct nilfs_btree_node *node,
278                                   int flags, int level, int nchildren,
279                                   const __u64 *keys, const __u64 *ptrs)
280 {
281         __le64 *dkeys;
282         __le64 *dptrs;
283         int i;
284
285         nilfs_btree_node_set_flags(btree, node, flags);
286         nilfs_btree_node_set_level(btree, node, level);
287         nilfs_btree_node_set_nchildren(btree, node, nchildren);
288
289         dkeys = nilfs_btree_node_dkeys(btree, node);
290         dptrs = nilfs_btree_node_dptrs(btree, node);
291         for (i = 0; i < nchildren; i++) {
292                 dkeys[i] = nilfs_bmap_key_to_dkey(keys[i]);
293                 dptrs[i] = nilfs_bmap_ptr_to_dptr(ptrs[i]);
294         }
295 }
296
297 /* Assume the buffer heads corresponding to left and right are locked. */
298 static void nilfs_btree_node_move_left(struct nilfs_btree *btree,
299                                        struct nilfs_btree_node *left,
300                                        struct nilfs_btree_node *right,
301                                        int n)
302 {
303         __le64 *ldkeys, *rdkeys;
304         __le64 *ldptrs, *rdptrs;
305         int lnchildren, rnchildren;
306
307         ldkeys = nilfs_btree_node_dkeys(btree, left);
308         ldptrs = nilfs_btree_node_dptrs(btree, left);
309         lnchildren = nilfs_btree_node_get_nchildren(btree, left);
310
311         rdkeys = nilfs_btree_node_dkeys(btree, right);
312         rdptrs = nilfs_btree_node_dptrs(btree, right);
313         rnchildren = nilfs_btree_node_get_nchildren(btree, right);
314
315         memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys));
316         memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs));
317         memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys));
318         memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs));
319
320         lnchildren += n;
321         rnchildren -= n;
322         nilfs_btree_node_set_nchildren(btree, left, lnchildren);
323         nilfs_btree_node_set_nchildren(btree, right, rnchildren);
324 }
325
326 /* Assume that the buffer heads corresponding to left and right are locked. */
327 static void nilfs_btree_node_move_right(struct nilfs_btree *btree,
328                                         struct nilfs_btree_node *left,
329                                         struct nilfs_btree_node *right,
330                                         int n)
331 {
332         __le64 *ldkeys, *rdkeys;
333         __le64 *ldptrs, *rdptrs;
334         int lnchildren, rnchildren;
335
336         ldkeys = nilfs_btree_node_dkeys(btree, left);
337         ldptrs = nilfs_btree_node_dptrs(btree, left);
338         lnchildren = nilfs_btree_node_get_nchildren(btree, left);
339
340         rdkeys = nilfs_btree_node_dkeys(btree, right);
341         rdptrs = nilfs_btree_node_dptrs(btree, right);
342         rnchildren = nilfs_btree_node_get_nchildren(btree, right);
343
344         memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys));
345         memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs));
346         memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys));
347         memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs));
348
349         lnchildren -= n;
350         rnchildren += n;
351         nilfs_btree_node_set_nchildren(btree, left, lnchildren);
352         nilfs_btree_node_set_nchildren(btree, right, rnchildren);
353 }
354
355 /* Assume that the buffer head corresponding to node is locked. */
356 static void nilfs_btree_node_insert(struct nilfs_btree *btree,
357                                     struct nilfs_btree_node *node,
358                                     __u64 key, __u64 ptr, int index)
359 {
360         __le64 *dkeys;
361         __le64 *dptrs;
362         int nchildren;
363
364         dkeys = nilfs_btree_node_dkeys(btree, node);
365         dptrs = nilfs_btree_node_dptrs(btree, node);
366         nchildren = nilfs_btree_node_get_nchildren(btree, node);
367         if (index < nchildren) {
368                 memmove(dkeys + index + 1, dkeys + index,
369                         (nchildren - index) * sizeof(*dkeys));
370                 memmove(dptrs + index + 1, dptrs + index,
371                         (nchildren - index) * sizeof(*dptrs));
372         }
373         dkeys[index] = nilfs_bmap_key_to_dkey(key);
374         dptrs[index] = nilfs_bmap_ptr_to_dptr(ptr);
375         nchildren++;
376         nilfs_btree_node_set_nchildren(btree, node, nchildren);
377 }
378
379 /* Assume that the buffer head corresponding to node is locked. */
380 static void nilfs_btree_node_delete(struct nilfs_btree *btree,
381                                     struct nilfs_btree_node *node,
382                                     __u64 *keyp, __u64 *ptrp, int index)
383 {
384         __u64 key;
385         __u64 ptr;
386         __le64 *dkeys;
387         __le64 *dptrs;
388         int nchildren;
389
390         dkeys = nilfs_btree_node_dkeys(btree, node);
391         dptrs = nilfs_btree_node_dptrs(btree, node);
392         key = nilfs_bmap_dkey_to_key(dkeys[index]);
393         ptr = nilfs_bmap_dptr_to_ptr(dptrs[index]);
394         nchildren = nilfs_btree_node_get_nchildren(btree, node);
395         if (keyp != NULL)
396                 *keyp = key;
397         if (ptrp != NULL)
398                 *ptrp = ptr;
399
400         if (index < nchildren - 1) {
401                 memmove(dkeys + index, dkeys + index + 1,
402                         (nchildren - index - 1) * sizeof(*dkeys));
403                 memmove(dptrs + index, dptrs + index + 1,
404                         (nchildren - index - 1) * sizeof(*dptrs));
405         }
406         nchildren--;
407         nilfs_btree_node_set_nchildren(btree, node, nchildren);
408 }
409
410 static int nilfs_btree_node_lookup(const struct nilfs_btree *btree,
411                                    const struct nilfs_btree_node *node,
412                                    __u64 key, int *indexp)
413 {
414         __u64 nkey;
415         int index, low, high, s;
416
417         /* binary search */
418         low = 0;
419         high = nilfs_btree_node_get_nchildren(btree, node) - 1;
420         index = 0;
421         s = 0;
422         while (low <= high) {
423                 index = (low + high) / 2;
424                 nkey = nilfs_btree_node_get_key(btree, node, index);
425                 if (nkey == key) {
426                         s = 0;
427                         goto out;
428                 } else if (nkey < key) {
429                         low = index + 1;
430                         s = -1;
431                 } else {
432                         high = index - 1;
433                         s = 1;
434                 }
435         }
436
437         /* adjust index */
438         if (nilfs_btree_node_get_level(btree, node) >
439             NILFS_BTREE_LEVEL_NODE_MIN) {
440                 if ((s > 0) && (index > 0))
441                         index--;
442         } else if (s < 0)
443                 index++;
444
445  out:
446         *indexp = index;
447
448         return s == 0;
449 }
450
451 static inline struct nilfs_btree_node *
452 nilfs_btree_get_root(const struct nilfs_btree *btree)
453 {
454         return (struct nilfs_btree_node *)btree->bt_bmap.b_u.u_data;
455 }
456
457 static inline struct nilfs_btree_node *
458 nilfs_btree_get_nonroot_node(const struct nilfs_btree *btree,
459                              const struct nilfs_btree_path *path,
460                              int level)
461 {
462         return (struct nilfs_btree_node *)path[level].bp_bh->b_data;
463 }
464
465 static inline struct nilfs_btree_node *
466 nilfs_btree_get_sib_node(const struct nilfs_btree *btree,
467                          const struct nilfs_btree_path *path,
468                          int level)
469 {
470         return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data;
471 }
472
473 static inline int nilfs_btree_height(const struct nilfs_btree *btree)
474 {
475         return nilfs_btree_node_get_level(btree, nilfs_btree_get_root(btree))
476                 + 1;
477 }
478
479 static inline struct nilfs_btree_node *
480 nilfs_btree_get_node(const struct nilfs_btree *btree,
481                      const struct nilfs_btree_path *path,
482                      int level)
483 {
484         return (level == nilfs_btree_height(btree) - 1) ?
485                 nilfs_btree_get_root(btree) :
486                 nilfs_btree_get_nonroot_node(btree, path, level);
487 }
488
489 static int nilfs_btree_do_lookup(const struct nilfs_btree *btree,
490                                  struct nilfs_btree_path *path,
491                                  __u64 key, __u64 *ptrp, int minlevel)
492 {
493         struct nilfs_btree_node *node;
494         __u64 ptr;
495         int level, index, found, ret;
496
497         node = nilfs_btree_get_root(btree);
498         level = nilfs_btree_node_get_level(btree, node);
499         if ((level < minlevel) ||
500             (nilfs_btree_node_get_nchildren(btree, node) <= 0))
501                 return -ENOENT;
502
503         found = nilfs_btree_node_lookup(btree, node, key, &index);
504         ptr = nilfs_btree_node_get_ptr(btree, node, index);
505         path[level].bp_bh = NULL;
506         path[level].bp_index = index;
507
508         for (level--; level >= minlevel; level--) {
509                 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
510                 if (ret < 0)
511                         return ret;
512                 node = nilfs_btree_get_nonroot_node(btree, path, level);
513                 BUG_ON(level != nilfs_btree_node_get_level(btree, node));
514                 if (!found)
515                         found = nilfs_btree_node_lookup(btree, node, key,
516                                                         &index);
517                 else
518                         index = 0;
519                 if (index < nilfs_btree_node_nchildren_max(btree, node))
520                         ptr = nilfs_btree_node_get_ptr(btree, node, index);
521                 else {
522                         WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN);
523                         /* insert */
524                         ptr = NILFS_BMAP_INVALID_PTR;
525                 }
526                 path[level].bp_index = index;
527         }
528         if (!found)
529                 return -ENOENT;
530
531         if (ptrp != NULL)
532                 *ptrp = ptr;
533
534         return 0;
535 }
536
537 static int nilfs_btree_do_lookup_last(const struct nilfs_btree *btree,
538                                       struct nilfs_btree_path *path,
539                                       __u64 *keyp, __u64 *ptrp)
540 {
541         struct nilfs_btree_node *node;
542         __u64 ptr;
543         int index, level, ret;
544
545         node = nilfs_btree_get_root(btree);
546         index = nilfs_btree_node_get_nchildren(btree, node) - 1;
547         if (index < 0)
548                 return -ENOENT;
549         level = nilfs_btree_node_get_level(btree, node);
550         ptr = nilfs_btree_node_get_ptr(btree, node, index);
551         path[level].bp_bh = NULL;
552         path[level].bp_index = index;
553
554         for (level--; level > 0; level--) {
555                 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
556                 if (ret < 0)
557                         return ret;
558                 node = nilfs_btree_get_nonroot_node(btree, path, level);
559                 BUG_ON(level != nilfs_btree_node_get_level(btree, node));
560                 index = nilfs_btree_node_get_nchildren(btree, node) - 1;
561                 ptr = nilfs_btree_node_get_ptr(btree, node, index);
562                 path[level].bp_index = index;
563         }
564
565         if (keyp != NULL)
566                 *keyp = nilfs_btree_node_get_key(btree, node, index);
567         if (ptrp != NULL)
568                 *ptrp = ptr;
569
570         return 0;
571 }
572
573 static int nilfs_btree_lookup(const struct nilfs_bmap *bmap,
574                               __u64 key, int level, __u64 *ptrp)
575 {
576         struct nilfs_btree *btree;
577         struct nilfs_btree_path *path;
578         __u64 ptr;
579         int ret;
580
581         btree = (struct nilfs_btree *)bmap;
582         path = nilfs_btree_alloc_path(btree);
583         if (path == NULL)
584                 return -ENOMEM;
585         nilfs_btree_init_path(btree, path);
586
587         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level);
588
589         if (ptrp != NULL)
590                 *ptrp = ptr;
591
592         nilfs_btree_clear_path(btree, path);
593         nilfs_btree_free_path(btree, path);
594
595         return ret;
596 }
597
598 static void nilfs_btree_promote_key(struct nilfs_btree *btree,
599                                     struct nilfs_btree_path *path,
600                                     int level, __u64 key)
601 {
602         if (level < nilfs_btree_height(btree) - 1) {
603                 do {
604                         lock_buffer(path[level].bp_bh);
605                         nilfs_btree_node_set_key(
606                                 btree,
607                                 nilfs_btree_get_nonroot_node(
608                                         btree, path, level),
609                                 path[level].bp_index, key);
610                         if (!buffer_dirty(path[level].bp_bh))
611                                 nilfs_btnode_mark_dirty(path[level].bp_bh);
612                         unlock_buffer(path[level].bp_bh);
613                 } while ((path[level].bp_index == 0) &&
614                          (++level < nilfs_btree_height(btree) - 1));
615         }
616
617         /* root */
618         if (level == nilfs_btree_height(btree) - 1) {
619                 nilfs_btree_node_set_key(btree,
620                                          nilfs_btree_get_root(btree),
621                                          path[level].bp_index, key);
622         }
623 }
624
625 static void nilfs_btree_do_insert(struct nilfs_btree *btree,
626                                   struct nilfs_btree_path *path,
627                                   int level, __u64 *keyp, __u64 *ptrp)
628 {
629         struct nilfs_btree_node *node;
630
631         if (level < nilfs_btree_height(btree) - 1) {
632                 lock_buffer(path[level].bp_bh);
633                 node = nilfs_btree_get_nonroot_node(btree, path, level);
634                 nilfs_btree_node_insert(btree, node, *keyp, *ptrp,
635                                         path[level].bp_index);
636                 if (!buffer_dirty(path[level].bp_bh))
637                         nilfs_btnode_mark_dirty(path[level].bp_bh);
638                 unlock_buffer(path[level].bp_bh);
639
640                 if (path[level].bp_index == 0)
641                         nilfs_btree_promote_key(btree, path, level + 1,
642                                                 nilfs_btree_node_get_key(
643                                                         btree, node, 0));
644         } else {
645                 node = nilfs_btree_get_root(btree);
646                 nilfs_btree_node_insert(btree, node, *keyp, *ptrp,
647                                         path[level].bp_index);
648         }
649 }
650
651 static void nilfs_btree_carry_left(struct nilfs_btree *btree,
652                                    struct nilfs_btree_path *path,
653                                    int level, __u64 *keyp, __u64 *ptrp)
654 {
655         struct nilfs_btree_node *node, *left;
656         int nchildren, lnchildren, n, move;
657
658         lock_buffer(path[level].bp_bh);
659         lock_buffer(path[level].bp_sib_bh);
660
661         node = nilfs_btree_get_nonroot_node(btree, path, level);
662         left = nilfs_btree_get_sib_node(btree, path, level);
663         nchildren = nilfs_btree_node_get_nchildren(btree, node);
664         lnchildren = nilfs_btree_node_get_nchildren(btree, left);
665         move = 0;
666
667         n = (nchildren + lnchildren + 1) / 2 - lnchildren;
668         if (n > path[level].bp_index) {
669                 /* move insert point */
670                 n--;
671                 move = 1;
672         }
673
674         nilfs_btree_node_move_left(btree, left, node, n);
675
676         if (!buffer_dirty(path[level].bp_bh))
677                 nilfs_btnode_mark_dirty(path[level].bp_bh);
678         if (!buffer_dirty(path[level].bp_sib_bh))
679                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
680
681         unlock_buffer(path[level].bp_bh);
682         unlock_buffer(path[level].bp_sib_bh);
683
684         nilfs_btree_promote_key(btree, path, level + 1,
685                                 nilfs_btree_node_get_key(btree, node, 0));
686
687         if (move) {
688                 brelse(path[level].bp_bh);
689                 path[level].bp_bh = path[level].bp_sib_bh;
690                 path[level].bp_sib_bh = NULL;
691                 path[level].bp_index += lnchildren;
692                 path[level + 1].bp_index--;
693         } else {
694                 brelse(path[level].bp_sib_bh);
695                 path[level].bp_sib_bh = NULL;
696                 path[level].bp_index -= n;
697         }
698
699         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
700 }
701
702 static void nilfs_btree_carry_right(struct nilfs_btree *btree,
703                                     struct nilfs_btree_path *path,
704                                     int level, __u64 *keyp, __u64 *ptrp)
705 {
706         struct nilfs_btree_node *node, *right;
707         int nchildren, rnchildren, n, move;
708
709         lock_buffer(path[level].bp_bh);
710         lock_buffer(path[level].bp_sib_bh);
711
712         node = nilfs_btree_get_nonroot_node(btree, path, level);
713         right = nilfs_btree_get_sib_node(btree, path, level);
714         nchildren = nilfs_btree_node_get_nchildren(btree, node);
715         rnchildren = nilfs_btree_node_get_nchildren(btree, right);
716         move = 0;
717
718         n = (nchildren + rnchildren + 1) / 2 - rnchildren;
719         if (n > nchildren - path[level].bp_index) {
720                 /* move insert point */
721                 n--;
722                 move = 1;
723         }
724
725         nilfs_btree_node_move_right(btree, node, right, n);
726
727         if (!buffer_dirty(path[level].bp_bh))
728                 nilfs_btnode_mark_dirty(path[level].bp_bh);
729         if (!buffer_dirty(path[level].bp_sib_bh))
730                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
731
732         unlock_buffer(path[level].bp_bh);
733         unlock_buffer(path[level].bp_sib_bh);
734
735         path[level + 1].bp_index++;
736         nilfs_btree_promote_key(btree, path, level + 1,
737                                 nilfs_btree_node_get_key(btree, right, 0));
738         path[level + 1].bp_index--;
739
740         if (move) {
741                 brelse(path[level].bp_bh);
742                 path[level].bp_bh = path[level].bp_sib_bh;
743                 path[level].bp_sib_bh = NULL;
744                 path[level].bp_index -=
745                         nilfs_btree_node_get_nchildren(btree, node);
746                 path[level + 1].bp_index++;
747         } else {
748                 brelse(path[level].bp_sib_bh);
749                 path[level].bp_sib_bh = NULL;
750         }
751
752         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
753 }
754
755 static void nilfs_btree_split(struct nilfs_btree *btree,
756                               struct nilfs_btree_path *path,
757                               int level, __u64 *keyp, __u64 *ptrp)
758 {
759         struct nilfs_btree_node *node, *right;
760         __u64 newkey;
761         __u64 newptr;
762         int nchildren, n, move;
763
764         lock_buffer(path[level].bp_bh);
765         lock_buffer(path[level].bp_sib_bh);
766
767         node = nilfs_btree_get_nonroot_node(btree, path, level);
768         right = nilfs_btree_get_sib_node(btree, path, level);
769         nchildren = nilfs_btree_node_get_nchildren(btree, node);
770         move = 0;
771
772         n = (nchildren + 1) / 2;
773         if (n > nchildren - path[level].bp_index) {
774                 n--;
775                 move = 1;
776         }
777
778         nilfs_btree_node_move_right(btree, node, right, n);
779
780         if (!buffer_dirty(path[level].bp_bh))
781                 nilfs_btnode_mark_dirty(path[level].bp_bh);
782         if (!buffer_dirty(path[level].bp_sib_bh))
783                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
784
785         unlock_buffer(path[level].bp_bh);
786         unlock_buffer(path[level].bp_sib_bh);
787
788         newkey = nilfs_btree_node_get_key(btree, right, 0);
789         newptr = path[level].bp_newreq.bpr_ptr;
790
791         if (move) {
792                 path[level].bp_index -=
793                         nilfs_btree_node_get_nchildren(btree, node);
794                 nilfs_btree_node_insert(btree, right, *keyp, *ptrp,
795                                         path[level].bp_index);
796
797                 *keyp = nilfs_btree_node_get_key(btree, right, 0);
798                 *ptrp = path[level].bp_newreq.bpr_ptr;
799
800                 brelse(path[level].bp_bh);
801                 path[level].bp_bh = path[level].bp_sib_bh;
802                 path[level].bp_sib_bh = NULL;
803         } else {
804                 nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
805
806                 *keyp = nilfs_btree_node_get_key(btree, right, 0);
807                 *ptrp = path[level].bp_newreq.bpr_ptr;
808
809                 brelse(path[level].bp_sib_bh);
810                 path[level].bp_sib_bh = NULL;
811         }
812
813         path[level + 1].bp_index++;
814 }
815
816 static void nilfs_btree_grow(struct nilfs_btree *btree,
817                              struct nilfs_btree_path *path,
818                              int level, __u64 *keyp, __u64 *ptrp)
819 {
820         struct nilfs_btree_node *root, *child;
821         int n;
822
823         lock_buffer(path[level].bp_sib_bh);
824
825         root = nilfs_btree_get_root(btree);
826         child = nilfs_btree_get_sib_node(btree, path, level);
827
828         n = nilfs_btree_node_get_nchildren(btree, root);
829
830         nilfs_btree_node_move_right(btree, root, child, n);
831         nilfs_btree_node_set_level(btree, root, level + 1);
832
833         if (!buffer_dirty(path[level].bp_sib_bh))
834                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
835
836         unlock_buffer(path[level].bp_sib_bh);
837
838         path[level].bp_bh = path[level].bp_sib_bh;
839         path[level].bp_sib_bh = NULL;
840
841         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
842
843         *keyp = nilfs_btree_node_get_key(btree, child, 0);
844         *ptrp = path[level].bp_newreq.bpr_ptr;
845 }
846
847 static __u64 nilfs_btree_find_near(const struct nilfs_btree *btree,
848                                    const struct nilfs_btree_path *path)
849 {
850         struct nilfs_btree_node *node;
851         int level;
852
853         if (path == NULL)
854                 return NILFS_BMAP_INVALID_PTR;
855
856         /* left sibling */
857         level = NILFS_BTREE_LEVEL_NODE_MIN;
858         if (path[level].bp_index > 0) {
859                 node = nilfs_btree_get_node(btree, path, level);
860                 return nilfs_btree_node_get_ptr(btree, node,
861                                                 path[level].bp_index - 1);
862         }
863
864         /* parent */
865         level = NILFS_BTREE_LEVEL_NODE_MIN + 1;
866         if (level <= nilfs_btree_height(btree) - 1) {
867                 node = nilfs_btree_get_node(btree, path, level);
868                 return nilfs_btree_node_get_ptr(btree, node,
869                                                 path[level].bp_index);
870         }
871
872         return NILFS_BMAP_INVALID_PTR;
873 }
874
875 static __u64 nilfs_btree_find_target_v(const struct nilfs_btree *btree,
876                                        const struct nilfs_btree_path *path,
877                                        __u64 key)
878 {
879         __u64 ptr;
880
881         ptr = nilfs_bmap_find_target_seq(&btree->bt_bmap, key);
882         if (ptr != NILFS_BMAP_INVALID_PTR)
883                 /* sequential access */
884                 return ptr;
885         else {
886                 ptr = nilfs_btree_find_near(btree, path);
887                 if (ptr != NILFS_BMAP_INVALID_PTR)
888                         /* near */
889                         return ptr;
890         }
891         /* block group */
892         return nilfs_bmap_find_target_in_group(&btree->bt_bmap);
893 }
894
895 static void nilfs_btree_set_target_v(struct nilfs_btree *btree, __u64 key,
896                                      __u64 ptr)
897 {
898         btree->bt_bmap.b_last_allocated_key = key;
899         btree->bt_bmap.b_last_allocated_ptr = ptr;
900 }
901
902 static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
903                                       struct nilfs_btree_path *path,
904                                       int *levelp, __u64 key, __u64 ptr,
905                                       struct nilfs_bmap_stats *stats)
906 {
907         struct buffer_head *bh;
908         struct nilfs_btree_node *node, *parent, *sib;
909         __u64 sibptr;
910         int pindex, level, ret;
911
912         stats->bs_nblocks = 0;
913         level = NILFS_BTREE_LEVEL_DATA;
914
915         /* allocate a new ptr for data block */
916         if (btree->bt_ops->btop_find_target != NULL)
917                 path[level].bp_newreq.bpr_ptr =
918                         btree->bt_ops->btop_find_target(btree, path, key);
919
920         ret = btree->bt_bmap.b_pops->bpop_prepare_alloc_ptr(
921                 &btree->bt_bmap, &path[level].bp_newreq);
922         if (ret < 0)
923                 goto err_out_data;
924
925         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
926              level < nilfs_btree_height(btree) - 1;
927              level++) {
928                 node = nilfs_btree_get_nonroot_node(btree, path, level);
929                 if (nilfs_btree_node_get_nchildren(btree, node) <
930                     nilfs_btree_node_nchildren_max(btree, node)) {
931                         path[level].bp_op = nilfs_btree_do_insert;
932                         stats->bs_nblocks++;
933                         goto out;
934                 }
935
936                 parent = nilfs_btree_get_node(btree, path, level + 1);
937                 pindex = path[level + 1].bp_index;
938
939                 /* left sibling */
940                 if (pindex > 0) {
941                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
942                                                           pindex - 1);
943                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
944                         if (ret < 0)
945                                 goto err_out_child_node;
946                         sib = (struct nilfs_btree_node *)bh->b_data;
947                         if (nilfs_btree_node_get_nchildren(btree, sib) <
948                             nilfs_btree_node_nchildren_max(btree, sib)) {
949                                 path[level].bp_sib_bh = bh;
950                                 path[level].bp_op = nilfs_btree_carry_left;
951                                 stats->bs_nblocks++;
952                                 goto out;
953                         } else
954                                 brelse(bh);
955                 }
956
957                 /* right sibling */
958                 if (pindex <
959                     nilfs_btree_node_get_nchildren(btree, parent) - 1) {
960                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
961                                                           pindex + 1);
962                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
963                         if (ret < 0)
964                                 goto err_out_child_node;
965                         sib = (struct nilfs_btree_node *)bh->b_data;
966                         if (nilfs_btree_node_get_nchildren(btree, sib) <
967                             nilfs_btree_node_nchildren_max(btree, sib)) {
968                                 path[level].bp_sib_bh = bh;
969                                 path[level].bp_op = nilfs_btree_carry_right;
970                                 stats->bs_nblocks++;
971                                 goto out;
972                         } else
973                                 brelse(bh);
974                 }
975
976                 /* split */
977                 path[level].bp_newreq.bpr_ptr =
978                         path[level - 1].bp_newreq.bpr_ptr + 1;
979                 ret = btree->bt_bmap.b_pops->bpop_prepare_alloc_ptr(
980                         &btree->bt_bmap, &path[level].bp_newreq);
981                 if (ret < 0)
982                         goto err_out_child_node;
983                 ret = nilfs_btree_get_new_block(btree,
984                                                 path[level].bp_newreq.bpr_ptr,
985                                                 &bh);
986                 if (ret < 0)
987                         goto err_out_curr_node;
988
989                 stats->bs_nblocks++;
990
991                 lock_buffer(bh);
992                 nilfs_btree_node_init(btree,
993                                       (struct nilfs_btree_node *)bh->b_data,
994                                       0, level, 0, NULL, NULL);
995                 unlock_buffer(bh);
996                 path[level].bp_sib_bh = bh;
997                 path[level].bp_op = nilfs_btree_split;
998         }
999
1000         /* root */
1001         node = nilfs_btree_get_root(btree);
1002         if (nilfs_btree_node_get_nchildren(btree, node) <
1003             nilfs_btree_node_nchildren_max(btree, node)) {
1004                 path[level].bp_op = nilfs_btree_do_insert;
1005                 stats->bs_nblocks++;
1006                 goto out;
1007         }
1008
1009         /* grow */
1010         path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1;
1011         ret = btree->bt_bmap.b_pops->bpop_prepare_alloc_ptr(
1012                 &btree->bt_bmap, &path[level].bp_newreq);
1013         if (ret < 0)
1014                 goto err_out_child_node;
1015         ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr,
1016                                         &bh);
1017         if (ret < 0)
1018                 goto err_out_curr_node;
1019
1020         lock_buffer(bh);
1021         nilfs_btree_node_init(btree, (struct nilfs_btree_node *)bh->b_data,
1022                               0, level, 0, NULL, NULL);
1023         unlock_buffer(bh);
1024         path[level].bp_sib_bh = bh;
1025         path[level].bp_op = nilfs_btree_grow;
1026
1027         level++;
1028         path[level].bp_op = nilfs_btree_do_insert;
1029
1030         /* a newly-created node block and a data block are added */
1031         stats->bs_nblocks += 2;
1032
1033         /* success */
1034  out:
1035         *levelp = level;
1036         return ret;
1037
1038         /* error */
1039  err_out_curr_node:
1040         btree->bt_bmap.b_pops->bpop_abort_alloc_ptr(&btree->bt_bmap,
1041                                                     &path[level].bp_newreq);
1042  err_out_child_node:
1043         for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) {
1044                 nilfs_btnode_delete(path[level].bp_sib_bh);
1045                 btree->bt_bmap.b_pops->bpop_abort_alloc_ptr(
1046                         &btree->bt_bmap, &path[level].bp_newreq);
1047
1048         }
1049
1050         btree->bt_bmap.b_pops->bpop_abort_alloc_ptr(&btree->bt_bmap,
1051                                                        &path[level].bp_newreq);
1052  err_out_data:
1053         *levelp = level;
1054         stats->bs_nblocks = 0;
1055         return ret;
1056 }
1057
1058 static void nilfs_btree_commit_insert(struct nilfs_btree *btree,
1059                                       struct nilfs_btree_path *path,
1060                                       int maxlevel, __u64 key, __u64 ptr)
1061 {
1062         int level;
1063
1064         set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1065         ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr;
1066         if (btree->bt_ops->btop_set_target != NULL)
1067                 btree->bt_ops->btop_set_target(btree, key, ptr);
1068
1069         for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1070                 if (btree->bt_bmap.b_pops->bpop_commit_alloc_ptr != NULL) {
1071                         btree->bt_bmap.b_pops->bpop_commit_alloc_ptr(
1072                                 &btree->bt_bmap, &path[level - 1].bp_newreq);
1073                 }
1074                 path[level].bp_op(btree, path, level, &key, &ptr);
1075         }
1076
1077         if (!nilfs_bmap_dirty(&btree->bt_bmap))
1078                 nilfs_bmap_set_dirty(&btree->bt_bmap);
1079 }
1080
1081 static int nilfs_btree_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
1082 {
1083         struct nilfs_btree *btree;
1084         struct nilfs_btree_path *path;
1085         struct nilfs_bmap_stats stats;
1086         int level, ret;
1087
1088         btree = (struct nilfs_btree *)bmap;
1089         path = nilfs_btree_alloc_path(btree);
1090         if (path == NULL)
1091                 return -ENOMEM;
1092         nilfs_btree_init_path(btree, path);
1093
1094         ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1095                                     NILFS_BTREE_LEVEL_NODE_MIN);
1096         if (ret != -ENOENT) {
1097                 if (ret == 0)
1098                         ret = -EEXIST;
1099                 goto out;
1100         }
1101
1102         ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats);
1103         if (ret < 0)
1104                 goto out;
1105         nilfs_btree_commit_insert(btree, path, level, key, ptr);
1106         nilfs_bmap_add_blocks(bmap, stats.bs_nblocks);
1107
1108  out:
1109         nilfs_btree_clear_path(btree, path);
1110         nilfs_btree_free_path(btree, path);
1111         return ret;
1112 }
1113
1114 static void nilfs_btree_do_delete(struct nilfs_btree *btree,
1115                                   struct nilfs_btree_path *path,
1116                                   int level, __u64 *keyp, __u64 *ptrp)
1117 {
1118         struct nilfs_btree_node *node;
1119
1120         if (level < nilfs_btree_height(btree) - 1) {
1121                 lock_buffer(path[level].bp_bh);
1122                 node = nilfs_btree_get_nonroot_node(btree, path, level);
1123                 nilfs_btree_node_delete(btree, node, keyp, ptrp,
1124                                         path[level].bp_index);
1125                 if (!buffer_dirty(path[level].bp_bh))
1126                         nilfs_btnode_mark_dirty(path[level].bp_bh);
1127                 unlock_buffer(path[level].bp_bh);
1128                 if (path[level].bp_index == 0)
1129                         nilfs_btree_promote_key(btree, path, level + 1,
1130                                 nilfs_btree_node_get_key(btree, node, 0));
1131         } else {
1132                 node = nilfs_btree_get_root(btree);
1133                 nilfs_btree_node_delete(btree, node, keyp, ptrp,
1134                                         path[level].bp_index);
1135         }
1136 }
1137
1138 static void nilfs_btree_borrow_left(struct nilfs_btree *btree,
1139                                     struct nilfs_btree_path *path,
1140                                     int level, __u64 *keyp, __u64 *ptrp)
1141 {
1142         struct nilfs_btree_node *node, *left;
1143         int nchildren, lnchildren, n;
1144
1145         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1146
1147         lock_buffer(path[level].bp_bh);
1148         lock_buffer(path[level].bp_sib_bh);
1149
1150         node = nilfs_btree_get_nonroot_node(btree, path, level);
1151         left = nilfs_btree_get_sib_node(btree, path, level);
1152         nchildren = nilfs_btree_node_get_nchildren(btree, node);
1153         lnchildren = nilfs_btree_node_get_nchildren(btree, left);
1154
1155         n = (nchildren + lnchildren) / 2 - nchildren;
1156
1157         nilfs_btree_node_move_right(btree, left, node, n);
1158
1159         if (!buffer_dirty(path[level].bp_bh))
1160                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1161         if (!buffer_dirty(path[level].bp_sib_bh))
1162                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1163
1164         unlock_buffer(path[level].bp_bh);
1165         unlock_buffer(path[level].bp_sib_bh);
1166
1167         nilfs_btree_promote_key(btree, path, level + 1,
1168                                 nilfs_btree_node_get_key(btree, node, 0));
1169
1170         brelse(path[level].bp_sib_bh);
1171         path[level].bp_sib_bh = NULL;
1172         path[level].bp_index += n;
1173 }
1174
1175 static void nilfs_btree_borrow_right(struct nilfs_btree *btree,
1176                                      struct nilfs_btree_path *path,
1177                                      int level, __u64 *keyp, __u64 *ptrp)
1178 {
1179         struct nilfs_btree_node *node, *right;
1180         int nchildren, rnchildren, n;
1181
1182         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1183
1184         lock_buffer(path[level].bp_bh);
1185         lock_buffer(path[level].bp_sib_bh);
1186
1187         node = nilfs_btree_get_nonroot_node(btree, path, level);
1188         right = nilfs_btree_get_sib_node(btree, path, level);
1189         nchildren = nilfs_btree_node_get_nchildren(btree, node);
1190         rnchildren = nilfs_btree_node_get_nchildren(btree, right);
1191
1192         n = (nchildren + rnchildren) / 2 - nchildren;
1193
1194         nilfs_btree_node_move_left(btree, node, right, n);
1195
1196         if (!buffer_dirty(path[level].bp_bh))
1197                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1198         if (!buffer_dirty(path[level].bp_sib_bh))
1199                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1200
1201         unlock_buffer(path[level].bp_bh);
1202         unlock_buffer(path[level].bp_sib_bh);
1203
1204         path[level + 1].bp_index++;
1205         nilfs_btree_promote_key(btree, path, level + 1,
1206                                 nilfs_btree_node_get_key(btree, right, 0));
1207         path[level + 1].bp_index--;
1208
1209         brelse(path[level].bp_sib_bh);
1210         path[level].bp_sib_bh = NULL;
1211 }
1212
1213 static void nilfs_btree_concat_left(struct nilfs_btree *btree,
1214                                     struct nilfs_btree_path *path,
1215                                     int level, __u64 *keyp, __u64 *ptrp)
1216 {
1217         struct nilfs_btree_node *node, *left;
1218         int n;
1219
1220         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1221
1222         lock_buffer(path[level].bp_bh);
1223         lock_buffer(path[level].bp_sib_bh);
1224
1225         node = nilfs_btree_get_nonroot_node(btree, path, level);
1226         left = nilfs_btree_get_sib_node(btree, path, level);
1227
1228         n = nilfs_btree_node_get_nchildren(btree, node);
1229
1230         nilfs_btree_node_move_left(btree, left, node, n);
1231
1232         if (!buffer_dirty(path[level].bp_sib_bh))
1233                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1234
1235         unlock_buffer(path[level].bp_bh);
1236         unlock_buffer(path[level].bp_sib_bh);
1237
1238         nilfs_btnode_delete(path[level].bp_bh);
1239         path[level].bp_bh = path[level].bp_sib_bh;
1240         path[level].bp_sib_bh = NULL;
1241         path[level].bp_index += nilfs_btree_node_get_nchildren(btree, left);
1242 }
1243
1244 static void nilfs_btree_concat_right(struct nilfs_btree *btree,
1245                                      struct nilfs_btree_path *path,
1246                                      int level, __u64 *keyp, __u64 *ptrp)
1247 {
1248         struct nilfs_btree_node *node, *right;
1249         int n;
1250
1251         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1252
1253         lock_buffer(path[level].bp_bh);
1254         lock_buffer(path[level].bp_sib_bh);
1255
1256         node = nilfs_btree_get_nonroot_node(btree, path, level);
1257         right = nilfs_btree_get_sib_node(btree, path, level);
1258
1259         n = nilfs_btree_node_get_nchildren(btree, right);
1260
1261         nilfs_btree_node_move_left(btree, node, right, n);
1262
1263         if (!buffer_dirty(path[level].bp_bh))
1264                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1265
1266         unlock_buffer(path[level].bp_bh);
1267         unlock_buffer(path[level].bp_sib_bh);
1268
1269         nilfs_btnode_delete(path[level].bp_sib_bh);
1270         path[level].bp_sib_bh = NULL;
1271         path[level + 1].bp_index++;
1272 }
1273
1274 static void nilfs_btree_shrink(struct nilfs_btree *btree,
1275                                struct nilfs_btree_path *path,
1276                                int level, __u64 *keyp, __u64 *ptrp)
1277 {
1278         struct nilfs_btree_node *root, *child;
1279         int n;
1280
1281         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1282
1283         lock_buffer(path[level].bp_bh);
1284         root = nilfs_btree_get_root(btree);
1285         child = nilfs_btree_get_nonroot_node(btree, path, level);
1286
1287         nilfs_btree_node_delete(btree, root, NULL, NULL, 0);
1288         nilfs_btree_node_set_level(btree, root, level);
1289         n = nilfs_btree_node_get_nchildren(btree, child);
1290         nilfs_btree_node_move_left(btree, root, child, n);
1291         unlock_buffer(path[level].bp_bh);
1292
1293         nilfs_btnode_delete(path[level].bp_bh);
1294         path[level].bp_bh = NULL;
1295 }
1296
1297
1298 static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
1299                                       struct nilfs_btree_path *path,
1300                                       int *levelp,
1301                                       struct nilfs_bmap_stats *stats)
1302 {
1303         struct buffer_head *bh;
1304         struct nilfs_btree_node *node, *parent, *sib;
1305         __u64 sibptr;
1306         int pindex, level, ret;
1307
1308         ret = 0;
1309         stats->bs_nblocks = 0;
1310         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1311              level < nilfs_btree_height(btree) - 1;
1312              level++) {
1313                 node = nilfs_btree_get_nonroot_node(btree, path, level);
1314                 path[level].bp_oldreq.bpr_ptr =
1315                         nilfs_btree_node_get_ptr(btree, node,
1316                                                  path[level].bp_index);
1317                 if (btree->bt_bmap.b_pops->bpop_prepare_end_ptr != NULL) {
1318                         ret = btree->bt_bmap.b_pops->bpop_prepare_end_ptr(
1319                                 &btree->bt_bmap, &path[level].bp_oldreq);
1320                         if (ret < 0)
1321                                 goto err_out_child_node;
1322                 }
1323
1324                 if (nilfs_btree_node_get_nchildren(btree, node) >
1325                     nilfs_btree_node_nchildren_min(btree, node)) {
1326                         path[level].bp_op = nilfs_btree_do_delete;
1327                         stats->bs_nblocks++;
1328                         goto out;
1329                 }
1330
1331                 parent = nilfs_btree_get_node(btree, path, level + 1);
1332                 pindex = path[level + 1].bp_index;
1333
1334                 if (pindex > 0) {
1335                         /* left sibling */
1336                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
1337                                                           pindex - 1);
1338                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1339                         if (ret < 0)
1340                                 goto err_out_curr_node;
1341                         sib = (struct nilfs_btree_node *)bh->b_data;
1342                         if (nilfs_btree_node_get_nchildren(btree, sib) >
1343                             nilfs_btree_node_nchildren_min(btree, sib)) {
1344                                 path[level].bp_sib_bh = bh;
1345                                 path[level].bp_op = nilfs_btree_borrow_left;
1346                                 stats->bs_nblocks++;
1347                                 goto out;
1348                         } else {
1349                                 path[level].bp_sib_bh = bh;
1350                                 path[level].bp_op = nilfs_btree_concat_left;
1351                                 stats->bs_nblocks++;
1352                                 /* continue; */
1353                         }
1354                 } else if (pindex <
1355                            nilfs_btree_node_get_nchildren(btree, parent) - 1) {
1356                         /* right sibling */
1357                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
1358                                                           pindex + 1);
1359                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1360                         if (ret < 0)
1361                                 goto err_out_curr_node;
1362                         sib = (struct nilfs_btree_node *)bh->b_data;
1363                         if (nilfs_btree_node_get_nchildren(btree, sib) >
1364                             nilfs_btree_node_nchildren_min(btree, sib)) {
1365                                 path[level].bp_sib_bh = bh;
1366                                 path[level].bp_op = nilfs_btree_borrow_right;
1367                                 stats->bs_nblocks++;
1368                                 goto out;
1369                         } else {
1370                                 path[level].bp_sib_bh = bh;
1371                                 path[level].bp_op = nilfs_btree_concat_right;
1372                                 stats->bs_nblocks++;
1373                                 /* continue; */
1374                         }
1375                 } else {
1376                         /* no siblings */
1377                         /* the only child of the root node */
1378                         WARN_ON(level != nilfs_btree_height(btree) - 2);
1379                         if (nilfs_btree_node_get_nchildren(btree, node) - 1 <=
1380                             NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1381                                 path[level].bp_op = nilfs_btree_shrink;
1382                                 stats->bs_nblocks += 2;
1383                         } else {
1384                                 path[level].bp_op = nilfs_btree_do_delete;
1385                                 stats->bs_nblocks++;
1386                         }
1387
1388                         goto out;
1389
1390                 }
1391         }
1392
1393         node = nilfs_btree_get_root(btree);
1394         path[level].bp_oldreq.bpr_ptr =
1395                 nilfs_btree_node_get_ptr(btree, node, path[level].bp_index);
1396         if (btree->bt_bmap.b_pops->bpop_prepare_end_ptr != NULL) {
1397                 ret = btree->bt_bmap.b_pops->bpop_prepare_end_ptr(
1398                         &btree->bt_bmap, &path[level].bp_oldreq);
1399                 if (ret < 0)
1400                         goto err_out_child_node;
1401         }
1402         /* child of the root node is deleted */
1403         path[level].bp_op = nilfs_btree_do_delete;
1404         stats->bs_nblocks++;
1405
1406         /* success */
1407  out:
1408         *levelp = level;
1409         return ret;
1410
1411         /* error */
1412  err_out_curr_node:
1413         if (btree->bt_bmap.b_pops->bpop_abort_end_ptr != NULL)
1414                 btree->bt_bmap.b_pops->bpop_abort_end_ptr(
1415                         &btree->bt_bmap, &path[level].bp_oldreq);
1416  err_out_child_node:
1417         for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) {
1418                 brelse(path[level].bp_sib_bh);
1419                 if (btree->bt_bmap.b_pops->bpop_abort_end_ptr != NULL)
1420                         btree->bt_bmap.b_pops->bpop_abort_end_ptr(
1421                                 &btree->bt_bmap, &path[level].bp_oldreq);
1422         }
1423         *levelp = level;
1424         stats->bs_nblocks = 0;
1425         return ret;
1426 }
1427
1428 static void nilfs_btree_commit_delete(struct nilfs_btree *btree,
1429                                       struct nilfs_btree_path *path,
1430                                       int maxlevel)
1431 {
1432         int level;
1433
1434         for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1435                 if (btree->bt_bmap.b_pops->bpop_commit_end_ptr != NULL)
1436                         btree->bt_bmap.b_pops->bpop_commit_end_ptr(
1437                                 &btree->bt_bmap, &path[level].bp_oldreq);
1438                 path[level].bp_op(btree, path, level, NULL, NULL);
1439         }
1440
1441         if (!nilfs_bmap_dirty(&btree->bt_bmap))
1442                 nilfs_bmap_set_dirty(&btree->bt_bmap);
1443 }
1444
1445 static int nilfs_btree_delete(struct nilfs_bmap *bmap, __u64 key)
1446
1447 {
1448         struct nilfs_btree *btree;
1449         struct nilfs_btree_path *path;
1450         struct nilfs_bmap_stats stats;
1451         int level, ret;
1452
1453         btree = (struct nilfs_btree *)bmap;
1454         path = nilfs_btree_alloc_path(btree);
1455         if (path == NULL)
1456                 return -ENOMEM;
1457         nilfs_btree_init_path(btree, path);
1458         ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1459                                     NILFS_BTREE_LEVEL_NODE_MIN);
1460         if (ret < 0)
1461                 goto out;
1462
1463         ret = nilfs_btree_prepare_delete(btree, path, &level, &stats);
1464         if (ret < 0)
1465                 goto out;
1466         nilfs_btree_commit_delete(btree, path, level);
1467         nilfs_bmap_sub_blocks(bmap, stats.bs_nblocks);
1468
1469 out:
1470         nilfs_btree_clear_path(btree, path);
1471         nilfs_btree_free_path(btree, path);
1472         return ret;
1473 }
1474
1475 static int nilfs_btree_last_key(const struct nilfs_bmap *bmap, __u64 *keyp)
1476 {
1477         struct nilfs_btree *btree;
1478         struct nilfs_btree_path *path;
1479         int ret;
1480
1481         btree = (struct nilfs_btree *)bmap;
1482         path = nilfs_btree_alloc_path(btree);
1483         if (path == NULL)
1484                 return -ENOMEM;
1485         nilfs_btree_init_path(btree, path);
1486
1487         ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL);
1488
1489         nilfs_btree_clear_path(btree, path);
1490         nilfs_btree_free_path(btree, path);
1491
1492         return ret;
1493 }
1494
1495 static int nilfs_btree_check_delete(struct nilfs_bmap *bmap, __u64 key)
1496 {
1497         struct buffer_head *bh;
1498         struct nilfs_btree *btree;
1499         struct nilfs_btree_node *root, *node;
1500         __u64 maxkey, nextmaxkey;
1501         __u64 ptr;
1502         int nchildren, ret;
1503
1504         btree = (struct nilfs_btree *)bmap;
1505         root = nilfs_btree_get_root(btree);
1506         switch (nilfs_btree_height(btree)) {
1507         case 2:
1508                 bh = NULL;
1509                 node = root;
1510                 break;
1511         case 3:
1512                 nchildren = nilfs_btree_node_get_nchildren(btree, root);
1513                 if (nchildren > 1)
1514                         return 0;
1515                 ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1);
1516                 ret = nilfs_btree_get_block(btree, ptr, &bh);
1517                 if (ret < 0)
1518                         return ret;
1519                 node = (struct nilfs_btree_node *)bh->b_data;
1520                 break;
1521         default:
1522                 return 0;
1523         }
1524
1525         nchildren = nilfs_btree_node_get_nchildren(btree, node);
1526         maxkey = nilfs_btree_node_get_key(btree, node, nchildren - 1);
1527         nextmaxkey = (nchildren > 1) ?
1528                 nilfs_btree_node_get_key(btree, node, nchildren - 2) : 0;
1529         if (bh != NULL)
1530                 brelse(bh);
1531
1532         return (maxkey == key) && (nextmaxkey < bmap->b_low);
1533 }
1534
1535 static int nilfs_btree_gather_data(struct nilfs_bmap *bmap,
1536                                    __u64 *keys, __u64 *ptrs, int nitems)
1537 {
1538         struct buffer_head *bh;
1539         struct nilfs_btree *btree;
1540         struct nilfs_btree_node *node, *root;
1541         __le64 *dkeys;
1542         __le64 *dptrs;
1543         __u64 ptr;
1544         int nchildren, i, ret;
1545
1546         btree = (struct nilfs_btree *)bmap;
1547         root = nilfs_btree_get_root(btree);
1548         switch (nilfs_btree_height(btree)) {
1549         case 2:
1550                 bh = NULL;
1551                 node = root;
1552                 break;
1553         case 3:
1554                 nchildren = nilfs_btree_node_get_nchildren(btree, root);
1555                 WARN_ON(nchildren > 1);
1556                 ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1);
1557                 ret = nilfs_btree_get_block(btree, ptr, &bh);
1558                 if (ret < 0)
1559                         return ret;
1560                 node = (struct nilfs_btree_node *)bh->b_data;
1561                 break;
1562         default:
1563                 node = NULL;
1564                 return -EINVAL;
1565         }
1566
1567         nchildren = nilfs_btree_node_get_nchildren(btree, node);
1568         if (nchildren < nitems)
1569                 nitems = nchildren;
1570         dkeys = nilfs_btree_node_dkeys(btree, node);
1571         dptrs = nilfs_btree_node_dptrs(btree, node);
1572         for (i = 0; i < nitems; i++) {
1573                 keys[i] = nilfs_bmap_dkey_to_key(dkeys[i]);
1574                 ptrs[i] = nilfs_bmap_dptr_to_ptr(dptrs[i]);
1575         }
1576
1577         if (bh != NULL)
1578                 brelse(bh);
1579
1580         return nitems;
1581 }
1582
1583 static int
1584 nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key,
1585                                        union nilfs_bmap_ptr_req *dreq,
1586                                        union nilfs_bmap_ptr_req *nreq,
1587                                        struct buffer_head **bhp,
1588                                        struct nilfs_bmap_stats *stats)
1589 {
1590         struct buffer_head *bh;
1591         struct nilfs_btree *btree;
1592         int ret;
1593
1594         btree = (struct nilfs_btree *)bmap;
1595         stats->bs_nblocks = 0;
1596
1597         /* for data */
1598         /* cannot find near ptr */
1599         if (btree->bt_ops->btop_find_target != NULL)
1600                 dreq->bpr_ptr
1601                         = btree->bt_ops->btop_find_target(btree, NULL, key);
1602         ret = bmap->b_pops->bpop_prepare_alloc_ptr(bmap, dreq);
1603         if (ret < 0)
1604                 return ret;
1605
1606         *bhp = NULL;
1607         stats->bs_nblocks++;
1608         if (nreq != NULL) {
1609                 nreq->bpr_ptr = dreq->bpr_ptr + 1;
1610                 ret = bmap->b_pops->bpop_prepare_alloc_ptr(bmap, nreq);
1611                 if (ret < 0)
1612                         goto err_out_dreq;
1613
1614                 ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh);
1615                 if (ret < 0)
1616                         goto err_out_nreq;
1617
1618                 *bhp = bh;
1619                 stats->bs_nblocks++;
1620         }
1621
1622         /* success */
1623         return 0;
1624
1625         /* error */
1626  err_out_nreq:
1627         bmap->b_pops->bpop_abort_alloc_ptr(bmap, nreq);
1628  err_out_dreq:
1629         bmap->b_pops->bpop_abort_alloc_ptr(bmap, dreq);
1630         stats->bs_nblocks = 0;
1631         return ret;
1632
1633 }
1634
1635 static void
1636 nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap,
1637                                       __u64 key, __u64 ptr,
1638                                       const __u64 *keys, const __u64 *ptrs,
1639                                       int n, __u64 low, __u64 high,
1640                                       union nilfs_bmap_ptr_req *dreq,
1641                                       union nilfs_bmap_ptr_req *nreq,
1642                                       struct buffer_head *bh)
1643 {
1644         struct nilfs_btree *btree;
1645         struct nilfs_btree_node *node;
1646         __u64 tmpptr;
1647
1648         /* free resources */
1649         if (bmap->b_ops->bop_clear != NULL)
1650                 bmap->b_ops->bop_clear(bmap);
1651
1652         /* ptr must be a pointer to a buffer head. */
1653         set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1654
1655         /* convert and insert */
1656         btree = (struct nilfs_btree *)bmap;
1657         nilfs_btree_init(bmap, low, high);
1658         if (nreq != NULL) {
1659                 if (bmap->b_pops->bpop_commit_alloc_ptr != NULL) {
1660                         bmap->b_pops->bpop_commit_alloc_ptr(bmap, dreq);
1661                         bmap->b_pops->bpop_commit_alloc_ptr(bmap, nreq);
1662                 }
1663
1664                 /* create child node at level 1 */
1665                 lock_buffer(bh);
1666                 node = (struct nilfs_btree_node *)bh->b_data;
1667                 nilfs_btree_node_init(btree, node, 0, 1, n, keys, ptrs);
1668                 nilfs_btree_node_insert(btree, node,
1669                                         key, dreq->bpr_ptr, n);
1670                 if (!buffer_dirty(bh))
1671                         nilfs_btnode_mark_dirty(bh);
1672                 if (!nilfs_bmap_dirty(bmap))
1673                         nilfs_bmap_set_dirty(bmap);
1674
1675                 unlock_buffer(bh);
1676                 brelse(bh);
1677
1678                 /* create root node at level 2 */
1679                 node = nilfs_btree_get_root(btree);
1680                 tmpptr = nreq->bpr_ptr;
1681                 nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT,
1682                                       2, 1, &keys[0], &tmpptr);
1683         } else {
1684                 if (bmap->b_pops->bpop_commit_alloc_ptr != NULL)
1685                         bmap->b_pops->bpop_commit_alloc_ptr(bmap, dreq);
1686
1687                 /* create root node at level 1 */
1688                 node = nilfs_btree_get_root(btree);
1689                 nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT,
1690                                       1, n, keys, ptrs);
1691                 nilfs_btree_node_insert(btree, node,
1692                                         key, dreq->bpr_ptr, n);
1693                 if (!nilfs_bmap_dirty(bmap))
1694                         nilfs_bmap_set_dirty(bmap);
1695         }
1696
1697         if (btree->bt_ops->btop_set_target != NULL)
1698                 btree->bt_ops->btop_set_target(btree, key, dreq->bpr_ptr);
1699 }
1700
1701 /**
1702  * nilfs_btree_convert_and_insert -
1703  * @bmap:
1704  * @key:
1705  * @ptr:
1706  * @keys:
1707  * @ptrs:
1708  * @n:
1709  * @low:
1710  * @high:
1711  */
1712 int nilfs_btree_convert_and_insert(struct nilfs_bmap *bmap,
1713                                    __u64 key, __u64 ptr,
1714                                    const __u64 *keys, const __u64 *ptrs,
1715                                    int n, __u64 low, __u64 high)
1716 {
1717         struct buffer_head *bh;
1718         union nilfs_bmap_ptr_req dreq, nreq, *di, *ni;
1719         struct nilfs_bmap_stats stats;
1720         int ret;
1721
1722         if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1723                 di = &dreq;
1724                 ni = NULL;
1725         } else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX(
1726                            1 << bmap->b_inode->i_blkbits)) {
1727                 di = &dreq;
1728                 ni = &nreq;
1729         } else {
1730                 di = NULL;
1731                 ni = NULL;
1732                 BUG();
1733         }
1734
1735         ret = nilfs_btree_prepare_convert_and_insert(bmap, key, di, ni, &bh,
1736                                                      &stats);
1737         if (ret < 0)
1738                 return ret;
1739         nilfs_btree_commit_convert_and_insert(bmap, key, ptr, keys, ptrs, n,
1740                                               low, high, di, ni, bh);
1741         nilfs_bmap_add_blocks(bmap, stats.bs_nblocks);
1742         return 0;
1743 }
1744
1745 static int nilfs_btree_propagate_p(struct nilfs_btree *btree,
1746                                    struct nilfs_btree_path *path,
1747                                    int level,
1748                                    struct buffer_head *bh)
1749 {
1750         while ((++level < nilfs_btree_height(btree) - 1) &&
1751                !buffer_dirty(path[level].bp_bh))
1752                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1753
1754         return 0;
1755 }
1756
1757 static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree,
1758                                         struct nilfs_btree_path *path,
1759                                         int level)
1760 {
1761         struct nilfs_btree_node *parent;
1762         int ret;
1763
1764         parent = nilfs_btree_get_node(btree, path, level + 1);
1765         path[level].bp_oldreq.bpr_ptr =
1766                 nilfs_btree_node_get_ptr(btree, parent,
1767                                          path[level + 1].bp_index);
1768         path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1;
1769         ret = nilfs_bmap_prepare_update(&btree->bt_bmap,
1770                                         &path[level].bp_oldreq,
1771                                         &path[level].bp_newreq);
1772         if (ret < 0)
1773                 return ret;
1774
1775         if (buffer_nilfs_node(path[level].bp_bh)) {
1776                 path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr;
1777                 path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr;
1778                 path[level].bp_ctxt.bh = path[level].bp_bh;
1779                 ret = nilfs_btnode_prepare_change_key(
1780                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1781                         &path[level].bp_ctxt);
1782                 if (ret < 0) {
1783                         nilfs_bmap_abort_update(&btree->bt_bmap,
1784                                                 &path[level].bp_oldreq,
1785                                                 &path[level].bp_newreq);
1786                         return ret;
1787                 }
1788         }
1789
1790         return 0;
1791 }
1792
1793 static void nilfs_btree_commit_update_v(struct nilfs_btree *btree,
1794                                         struct nilfs_btree_path *path,
1795                                         int level)
1796 {
1797         struct nilfs_btree_node *parent;
1798
1799         nilfs_bmap_commit_update(&btree->bt_bmap,
1800                                  &path[level].bp_oldreq,
1801                                  &path[level].bp_newreq);
1802
1803         if (buffer_nilfs_node(path[level].bp_bh)) {
1804                 nilfs_btnode_commit_change_key(
1805                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1806                         &path[level].bp_ctxt);
1807                 path[level].bp_bh = path[level].bp_ctxt.bh;
1808         }
1809         set_buffer_nilfs_volatile(path[level].bp_bh);
1810
1811         parent = nilfs_btree_get_node(btree, path, level + 1);
1812         nilfs_btree_node_set_ptr(btree, parent, path[level + 1].bp_index,
1813                                  path[level].bp_newreq.bpr_ptr);
1814 }
1815
1816 static void nilfs_btree_abort_update_v(struct nilfs_btree *btree,
1817                                        struct nilfs_btree_path *path,
1818                                        int level)
1819 {
1820         nilfs_bmap_abort_update(&btree->bt_bmap,
1821                                 &path[level].bp_oldreq,
1822                                 &path[level].bp_newreq);
1823         if (buffer_nilfs_node(path[level].bp_bh))
1824                 nilfs_btnode_abort_change_key(
1825                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1826                         &path[level].bp_ctxt);
1827 }
1828
1829 static int nilfs_btree_prepare_propagate_v(struct nilfs_btree *btree,
1830                                            struct nilfs_btree_path *path,
1831                                            int minlevel,
1832                                            int *maxlevelp)
1833 {
1834         int level, ret;
1835
1836         level = minlevel;
1837         if (!buffer_nilfs_volatile(path[level].bp_bh)) {
1838                 ret = nilfs_btree_prepare_update_v(btree, path, level);
1839                 if (ret < 0)
1840                         return ret;
1841         }
1842         while ((++level < nilfs_btree_height(btree) - 1) &&
1843                !buffer_dirty(path[level].bp_bh)) {
1844
1845                 WARN_ON(buffer_nilfs_volatile(path[level].bp_bh));
1846                 ret = nilfs_btree_prepare_update_v(btree, path, level);
1847                 if (ret < 0)
1848                         goto out;
1849         }
1850
1851         /* success */
1852         *maxlevelp = level - 1;
1853         return 0;
1854
1855         /* error */
1856  out:
1857         while (--level > minlevel)
1858                 nilfs_btree_abort_update_v(btree, path, level);
1859         if (!buffer_nilfs_volatile(path[level].bp_bh))
1860                 nilfs_btree_abort_update_v(btree, path, level);
1861         return ret;
1862 }
1863
1864 static void nilfs_btree_commit_propagate_v(struct nilfs_btree *btree,
1865                                            struct nilfs_btree_path *path,
1866                                            int minlevel,
1867                                            int maxlevel,
1868                                            struct buffer_head *bh)
1869 {
1870         int level;
1871
1872         if (!buffer_nilfs_volatile(path[minlevel].bp_bh))
1873                 nilfs_btree_commit_update_v(btree, path, minlevel);
1874
1875         for (level = minlevel + 1; level <= maxlevel; level++)
1876                 nilfs_btree_commit_update_v(btree, path, level);
1877 }
1878
1879 static int nilfs_btree_propagate_v(struct nilfs_btree *btree,
1880                                    struct nilfs_btree_path *path,
1881                                    int level,
1882                                    struct buffer_head *bh)
1883 {
1884         int maxlevel, ret;
1885         struct nilfs_btree_node *parent;
1886         __u64 ptr;
1887
1888         get_bh(bh);
1889         path[level].bp_bh = bh;
1890         ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel);
1891         if (ret < 0)
1892                 goto out;
1893
1894         if (buffer_nilfs_volatile(path[level].bp_bh)) {
1895                 parent = nilfs_btree_get_node(btree, path, level + 1);
1896                 ptr = nilfs_btree_node_get_ptr(btree, parent,
1897                                                path[level + 1].bp_index);
1898                 ret = nilfs_bmap_mark_dirty(&btree->bt_bmap, ptr);
1899                 if (ret < 0)
1900                         goto out;
1901         }
1902
1903         nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh);
1904
1905  out:
1906         brelse(path[level].bp_bh);
1907         path[level].bp_bh = NULL;
1908         return ret;
1909 }
1910
1911 static int nilfs_btree_propagate(const struct nilfs_bmap *bmap,
1912                                  struct buffer_head *bh)
1913 {
1914         struct nilfs_btree *btree;
1915         struct nilfs_btree_path *path;
1916         struct nilfs_btree_node *node;
1917         __u64 key;
1918         int level, ret;
1919
1920         WARN_ON(!buffer_dirty(bh));
1921
1922         btree = (struct nilfs_btree *)bmap;
1923         path = nilfs_btree_alloc_path(btree);
1924         if (path == NULL)
1925                 return -ENOMEM;
1926         nilfs_btree_init_path(btree, path);
1927
1928         if (buffer_nilfs_node(bh)) {
1929                 node = (struct nilfs_btree_node *)bh->b_data;
1930                 key = nilfs_btree_node_get_key(btree, node, 0);
1931                 level = nilfs_btree_node_get_level(btree, node);
1932         } else {
1933                 key = nilfs_bmap_data_get_key(bmap, bh);
1934                 level = NILFS_BTREE_LEVEL_DATA;
1935         }
1936
1937         ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1);
1938         if (ret < 0) {
1939                 if (unlikely(ret == -ENOENT))
1940                         printk(KERN_CRIT "%s: key = %llu, level == %d\n",
1941                                __func__, (unsigned long long)key, level);
1942                 goto out;
1943         }
1944
1945         ret = btree->bt_ops->btop_propagate(btree, path, level, bh);
1946
1947  out:
1948         nilfs_btree_clear_path(btree, path);
1949         nilfs_btree_free_path(btree, path);
1950
1951         return ret;
1952 }
1953
1954 static int nilfs_btree_propagate_gc(const struct nilfs_bmap *bmap,
1955                                     struct buffer_head *bh)
1956 {
1957         return nilfs_bmap_mark_dirty(bmap, bh->b_blocknr);
1958 }
1959
1960 static void nilfs_btree_add_dirty_buffer(struct nilfs_btree *btree,
1961                                          struct list_head *lists,
1962                                          struct buffer_head *bh)
1963 {
1964         struct list_head *head;
1965         struct buffer_head *cbh;
1966         struct nilfs_btree_node *node, *cnode;
1967         __u64 key, ckey;
1968         int level;
1969
1970         get_bh(bh);
1971         node = (struct nilfs_btree_node *)bh->b_data;
1972         key = nilfs_btree_node_get_key(btree, node, 0);
1973         level = nilfs_btree_node_get_level(btree, node);
1974         list_for_each(head, &lists[level]) {
1975                 cbh = list_entry(head, struct buffer_head, b_assoc_buffers);
1976                 cnode = (struct nilfs_btree_node *)cbh->b_data;
1977                 ckey = nilfs_btree_node_get_key(btree, cnode, 0);
1978                 if (key < ckey)
1979                         break;
1980         }
1981         list_add_tail(&bh->b_assoc_buffers, head);
1982 }
1983
1984 static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *bmap,
1985                                              struct list_head *listp)
1986 {
1987         struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
1988         struct address_space *btcache = &NILFS_BMAP_I(bmap)->i_btnode_cache;
1989         struct list_head lists[NILFS_BTREE_LEVEL_MAX];
1990         struct pagevec pvec;
1991         struct buffer_head *bh, *head;
1992         pgoff_t index = 0;
1993         int level, i;
1994
1995         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1996              level < NILFS_BTREE_LEVEL_MAX;
1997              level++)
1998                 INIT_LIST_HEAD(&lists[level]);
1999
2000         pagevec_init(&pvec, 0);
2001
2002         while (pagevec_lookup_tag(&pvec, btcache, &index, PAGECACHE_TAG_DIRTY,
2003                                   PAGEVEC_SIZE)) {
2004                 for (i = 0; i < pagevec_count(&pvec); i++) {
2005                         bh = head = page_buffers(pvec.pages[i]);
2006                         do {
2007                                 if (buffer_dirty(bh))
2008                                         nilfs_btree_add_dirty_buffer(btree,
2009                                                                      lists, bh);
2010                         } while ((bh = bh->b_this_page) != head);
2011                 }
2012                 pagevec_release(&pvec);
2013                 cond_resched();
2014         }
2015
2016         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2017              level < NILFS_BTREE_LEVEL_MAX;
2018              level++)
2019                 list_splice(&lists[level], listp->prev);
2020 }
2021
2022 static int nilfs_btree_assign_p(struct nilfs_btree *btree,
2023                                 struct nilfs_btree_path *path,
2024                                 int level,
2025                                 struct buffer_head **bh,
2026                                 sector_t blocknr,
2027                                 union nilfs_binfo *binfo)
2028 {
2029         struct nilfs_btree_node *parent;
2030         __u64 key;
2031         __u64 ptr;
2032         int ret;
2033
2034         parent = nilfs_btree_get_node(btree, path, level + 1);
2035         ptr = nilfs_btree_node_get_ptr(btree, parent,
2036                                        path[level + 1].bp_index);
2037         if (buffer_nilfs_node(*bh)) {
2038                 path[level].bp_ctxt.oldkey = ptr;
2039                 path[level].bp_ctxt.newkey = blocknr;
2040                 path[level].bp_ctxt.bh = *bh;
2041                 ret = nilfs_btnode_prepare_change_key(
2042                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
2043                         &path[level].bp_ctxt);
2044                 if (ret < 0)
2045                         return ret;
2046                 nilfs_btnode_commit_change_key(
2047                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
2048                         &path[level].bp_ctxt);
2049                 *bh = path[level].bp_ctxt.bh;
2050         }
2051
2052         nilfs_btree_node_set_ptr(btree, parent,
2053                                  path[level + 1].bp_index, blocknr);
2054
2055         key = nilfs_btree_node_get_key(btree, parent,
2056                                        path[level + 1].bp_index);
2057         /* on-disk format */
2058         binfo->bi_dat.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2059         binfo->bi_dat.bi_level = level;
2060
2061         return 0;
2062 }
2063
2064 static int nilfs_btree_assign_v(struct nilfs_btree *btree,
2065                                 struct nilfs_btree_path *path,
2066                                 int level,
2067                                 struct buffer_head **bh,
2068                                 sector_t blocknr,
2069                                 union nilfs_binfo *binfo)
2070 {
2071         struct nilfs_btree_node *parent;
2072         __u64 key;
2073         __u64 ptr;
2074         union nilfs_bmap_ptr_req req;
2075         int ret;
2076
2077         parent = nilfs_btree_get_node(btree, path, level + 1);
2078         ptr = nilfs_btree_node_get_ptr(btree, parent,
2079                                        path[level + 1].bp_index);
2080         req.bpr_ptr = ptr;
2081         ret = nilfs_bmap_start_v(&btree->bt_bmap, &req, blocknr);
2082         if (unlikely(ret < 0))
2083                 return ret;
2084
2085         key = nilfs_btree_node_get_key(btree, parent,
2086                                        path[level + 1].bp_index);
2087         /* on-disk format */
2088         binfo->bi_v.bi_vblocknr = nilfs_bmap_ptr_to_dptr(ptr);
2089         binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2090
2091         return 0;
2092 }
2093
2094 static int nilfs_btree_assign(struct nilfs_bmap *bmap,
2095                               struct buffer_head **bh,
2096                               sector_t blocknr,
2097                               union nilfs_binfo *binfo)
2098 {
2099         struct nilfs_btree *btree;
2100         struct nilfs_btree_path *path;
2101         struct nilfs_btree_node *node;
2102         __u64 key;
2103         int level, ret;
2104
2105         btree = (struct nilfs_btree *)bmap;
2106         path = nilfs_btree_alloc_path(btree);
2107         if (path == NULL)
2108                 return -ENOMEM;
2109         nilfs_btree_init_path(btree, path);
2110
2111         if (buffer_nilfs_node(*bh)) {
2112                 node = (struct nilfs_btree_node *)(*bh)->b_data;
2113                 key = nilfs_btree_node_get_key(btree, node, 0);
2114                 level = nilfs_btree_node_get_level(btree, node);
2115         } else {
2116                 key = nilfs_bmap_data_get_key(bmap, *bh);
2117                 level = NILFS_BTREE_LEVEL_DATA;
2118         }
2119
2120         ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1);
2121         if (ret < 0) {
2122                 WARN_ON(ret == -ENOENT);
2123                 goto out;
2124         }
2125
2126         ret = btree->bt_ops->btop_assign(btree, path, level, bh,
2127                                             blocknr, binfo);
2128
2129  out:
2130         nilfs_btree_clear_path(btree, path);
2131         nilfs_btree_free_path(btree, path);
2132
2133         return ret;
2134 }
2135
2136 static int nilfs_btree_assign_gc(struct nilfs_bmap *bmap,
2137                                  struct buffer_head **bh,
2138                                  sector_t blocknr,
2139                                  union nilfs_binfo *binfo)
2140 {
2141         struct nilfs_btree *btree;
2142         struct nilfs_btree_node *node;
2143         __u64 key;
2144         int ret;
2145
2146         btree = (struct nilfs_btree *)bmap;
2147         ret = nilfs_bmap_move_v(bmap, (*bh)->b_blocknr, blocknr);
2148         if (ret < 0)
2149                 return ret;
2150
2151         if (buffer_nilfs_node(*bh)) {
2152                 node = (struct nilfs_btree_node *)(*bh)->b_data;
2153                 key = nilfs_btree_node_get_key(btree, node, 0);
2154         } else
2155                 key = nilfs_bmap_data_get_key(bmap, *bh);
2156
2157         /* on-disk format */
2158         binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr);
2159         binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2160
2161         return 0;
2162 }
2163
2164 static int nilfs_btree_mark(struct nilfs_bmap *bmap, __u64 key, int level)
2165 {
2166         struct buffer_head *bh;
2167         struct nilfs_btree *btree;
2168         struct nilfs_btree_path *path;
2169         __u64 ptr;
2170         int ret;
2171
2172         btree = (struct nilfs_btree *)bmap;
2173         path = nilfs_btree_alloc_path(btree);
2174         if (path == NULL)
2175                 return -ENOMEM;
2176         nilfs_btree_init_path(btree, path);
2177
2178         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1);
2179         if (ret < 0) {
2180                 WARN_ON(ret == -ENOENT);
2181                 goto out;
2182         }
2183         ret = nilfs_btree_get_block(btree, ptr, &bh);
2184         if (ret < 0) {
2185                 WARN_ON(ret == -ENOENT);
2186                 goto out;
2187         }
2188
2189         if (!buffer_dirty(bh))
2190                 nilfs_btnode_mark_dirty(bh);
2191         brelse(bh);
2192         if (!nilfs_bmap_dirty(&btree->bt_bmap))
2193                 nilfs_bmap_set_dirty(&btree->bt_bmap);
2194
2195  out:
2196         nilfs_btree_clear_path(btree, path);
2197         nilfs_btree_free_path(btree, path);
2198         return ret;
2199 }
2200
2201 static const struct nilfs_bmap_operations nilfs_btree_ops = {
2202         .bop_lookup             =       nilfs_btree_lookup,
2203         .bop_insert             =       nilfs_btree_insert,
2204         .bop_delete             =       nilfs_btree_delete,
2205         .bop_clear              =       NULL,
2206
2207         .bop_propagate          =       nilfs_btree_propagate,
2208
2209         .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2210
2211         .bop_assign             =       nilfs_btree_assign,
2212         .bop_mark               =       nilfs_btree_mark,
2213
2214         .bop_last_key           =       nilfs_btree_last_key,
2215         .bop_check_insert       =       NULL,
2216         .bop_check_delete       =       nilfs_btree_check_delete,
2217         .bop_gather_data        =       nilfs_btree_gather_data,
2218 };
2219
2220 static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
2221         .bop_lookup             =       NULL,
2222         .bop_insert             =       NULL,
2223         .bop_delete             =       NULL,
2224         .bop_clear              =       NULL,
2225
2226         .bop_propagate          =       nilfs_btree_propagate_gc,
2227
2228         .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2229
2230         .bop_assign             =       nilfs_btree_assign_gc,
2231         .bop_mark               =       NULL,
2232
2233         .bop_last_key           =       NULL,
2234         .bop_check_insert       =       NULL,
2235         .bop_check_delete       =       NULL,
2236         .bop_gather_data        =       NULL,
2237 };
2238
2239 static const struct nilfs_btree_operations nilfs_btree_ops_v = {
2240         .btop_find_target       =       nilfs_btree_find_target_v,
2241         .btop_set_target        =       nilfs_btree_set_target_v,
2242         .btop_propagate         =       nilfs_btree_propagate_v,
2243         .btop_assign            =       nilfs_btree_assign_v,
2244 };
2245
2246 static const struct nilfs_btree_operations nilfs_btree_ops_p = {
2247         .btop_find_target       =       NULL,
2248         .btop_set_target        =       NULL,
2249         .btop_propagate         =       nilfs_btree_propagate_p,
2250         .btop_assign            =       nilfs_btree_assign_p,
2251 };
2252
2253 int nilfs_btree_init(struct nilfs_bmap *bmap, __u64 low, __u64 high)
2254 {
2255         struct nilfs_btree *btree;
2256
2257         btree = (struct nilfs_btree *)bmap;
2258         bmap->b_ops = &nilfs_btree_ops;
2259         bmap->b_low = low;
2260         bmap->b_high = high;
2261         switch (bmap->b_inode->i_ino) {
2262         case NILFS_DAT_INO:
2263                 btree->bt_ops = &nilfs_btree_ops_p;
2264                 break;
2265         default:
2266                 btree->bt_ops = &nilfs_btree_ops_v;
2267                 break;
2268         }
2269
2270         return 0;
2271 }
2272
2273 void nilfs_btree_init_gc(struct nilfs_bmap *bmap)
2274 {
2275         bmap->b_low = NILFS_BMAP_LARGE_LOW;
2276         bmap->b_high = NILFS_BMAP_LARGE_HIGH;
2277         bmap->b_ops = &nilfs_btree_ops_gc;
2278 }