Merge branch 'topic/pcm-estrpipe-in-pm' into for-linus
[pandora-kernel.git] / fs / xfs / xfs_btree.c
1 /*
2  * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_types.h"
21 #include "xfs_bit.h"
22 #include "xfs_log.h"
23 #include "xfs_inum.h"
24 #include "xfs_trans.h"
25 #include "xfs_sb.h"
26 #include "xfs_ag.h"
27 #include "xfs_dir2.h"
28 #include "xfs_dmapi.h"
29 #include "xfs_mount.h"
30 #include "xfs_bmap_btree.h"
31 #include "xfs_alloc_btree.h"
32 #include "xfs_ialloc_btree.h"
33 #include "xfs_dir2_sf.h"
34 #include "xfs_attr_sf.h"
35 #include "xfs_dinode.h"
36 #include "xfs_inode.h"
37 #include "xfs_inode_item.h"
38 #include "xfs_btree.h"
39 #include "xfs_btree_trace.h"
40 #include "xfs_ialloc.h"
41 #include "xfs_error.h"
42
43 /*
44  * Cursor allocation zone.
45  */
46 kmem_zone_t     *xfs_btree_cur_zone;
47
48 /*
49  * Btree magic numbers.
50  */
51 const __uint32_t xfs_magics[XFS_BTNUM_MAX] = {
52         XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC
53 };
54
55
56 STATIC int                              /* error (0 or EFSCORRUPTED) */
57 xfs_btree_check_lblock(
58         struct xfs_btree_cur    *cur,   /* btree cursor */
59         struct xfs_btree_block  *block, /* btree long form block pointer */
60         int                     level,  /* level of the btree block */
61         struct xfs_buf          *bp)    /* buffer for block, if any */
62 {
63         int                     lblock_ok; /* block passes checks */
64         struct xfs_mount        *mp;    /* file system mount point */
65
66         mp = cur->bc_mp;
67         lblock_ok =
68                 be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] &&
69                 be16_to_cpu(block->bb_level) == level &&
70                 be16_to_cpu(block->bb_numrecs) <=
71                         cur->bc_ops->get_maxrecs(cur, level) &&
72                 block->bb_u.l.bb_leftsib &&
73                 (be64_to_cpu(block->bb_u.l.bb_leftsib) == NULLDFSBNO ||
74                  XFS_FSB_SANITY_CHECK(mp,
75                         be64_to_cpu(block->bb_u.l.bb_leftsib))) &&
76                 block->bb_u.l.bb_rightsib &&
77                 (be64_to_cpu(block->bb_u.l.bb_rightsib) == NULLDFSBNO ||
78                  XFS_FSB_SANITY_CHECK(mp,
79                         be64_to_cpu(block->bb_u.l.bb_rightsib)));
80         if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp,
81                         XFS_ERRTAG_BTREE_CHECK_LBLOCK,
82                         XFS_RANDOM_BTREE_CHECK_LBLOCK))) {
83                 if (bp)
84                         xfs_buftrace("LBTREE ERROR", bp);
85                 XFS_ERROR_REPORT("xfs_btree_check_lblock", XFS_ERRLEVEL_LOW,
86                                  mp);
87                 return XFS_ERROR(EFSCORRUPTED);
88         }
89         return 0;
90 }
91
92 STATIC int                              /* error (0 or EFSCORRUPTED) */
93 xfs_btree_check_sblock(
94         struct xfs_btree_cur    *cur,   /* btree cursor */
95         struct xfs_btree_block  *block, /* btree short form block pointer */
96         int                     level,  /* level of the btree block */
97         struct xfs_buf          *bp)    /* buffer containing block */
98 {
99         struct xfs_buf          *agbp;  /* buffer for ag. freespace struct */
100         struct xfs_agf          *agf;   /* ag. freespace structure */
101         xfs_agblock_t           agflen; /* native ag. freespace length */
102         int                     sblock_ok; /* block passes checks */
103
104         agbp = cur->bc_private.a.agbp;
105         agf = XFS_BUF_TO_AGF(agbp);
106         agflen = be32_to_cpu(agf->agf_length);
107         sblock_ok =
108                 be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] &&
109                 be16_to_cpu(block->bb_level) == level &&
110                 be16_to_cpu(block->bb_numrecs) <=
111                         cur->bc_ops->get_maxrecs(cur, level) &&
112                 (be32_to_cpu(block->bb_u.s.bb_leftsib) == NULLAGBLOCK ||
113                  be32_to_cpu(block->bb_u.s.bb_leftsib) < agflen) &&
114                 block->bb_u.s.bb_leftsib &&
115                 (be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK ||
116                  be32_to_cpu(block->bb_u.s.bb_rightsib) < agflen) &&
117                 block->bb_u.s.bb_rightsib;
118         if (unlikely(XFS_TEST_ERROR(!sblock_ok, cur->bc_mp,
119                         XFS_ERRTAG_BTREE_CHECK_SBLOCK,
120                         XFS_RANDOM_BTREE_CHECK_SBLOCK))) {
121                 if (bp)
122                         xfs_buftrace("SBTREE ERROR", bp);
123                 XFS_CORRUPTION_ERROR("xfs_btree_check_sblock",
124                         XFS_ERRLEVEL_LOW, cur->bc_mp, block);
125                 return XFS_ERROR(EFSCORRUPTED);
126         }
127         return 0;
128 }
129
130 /*
131  * Debug routine: check that block header is ok.
132  */
133 int
134 xfs_btree_check_block(
135         struct xfs_btree_cur    *cur,   /* btree cursor */
136         struct xfs_btree_block  *block, /* generic btree block pointer */
137         int                     level,  /* level of the btree block */
138         struct xfs_buf          *bp)    /* buffer containing block, if any */
139 {
140         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
141                 return xfs_btree_check_lblock(cur, block, level, bp);
142         else
143                 return xfs_btree_check_sblock(cur, block, level, bp);
144 }
145
146 /*
147  * Check that (long) pointer is ok.
148  */
149 int                                     /* error (0 or EFSCORRUPTED) */
150 xfs_btree_check_lptr(
151         struct xfs_btree_cur    *cur,   /* btree cursor */
152         xfs_dfsbno_t            bno,    /* btree block disk address */
153         int                     level)  /* btree block level */
154 {
155         XFS_WANT_CORRUPTED_RETURN(
156                 level > 0 &&
157                 bno != NULLDFSBNO &&
158                 XFS_FSB_SANITY_CHECK(cur->bc_mp, bno));
159         return 0;
160 }
161
162 #ifdef DEBUG
163 /*
164  * Check that (short) pointer is ok.
165  */
166 STATIC int                              /* error (0 or EFSCORRUPTED) */
167 xfs_btree_check_sptr(
168         struct xfs_btree_cur    *cur,   /* btree cursor */
169         xfs_agblock_t           bno,    /* btree block disk address */
170         int                     level)  /* btree block level */
171 {
172         xfs_agblock_t           agblocks = cur->bc_mp->m_sb.sb_agblocks;
173
174         XFS_WANT_CORRUPTED_RETURN(
175                 level > 0 &&
176                 bno != NULLAGBLOCK &&
177                 bno != 0 &&
178                 bno < agblocks);
179         return 0;
180 }
181
182 /*
183  * Check that block ptr is ok.
184  */
185 STATIC int                              /* error (0 or EFSCORRUPTED) */
186 xfs_btree_check_ptr(
187         struct xfs_btree_cur    *cur,   /* btree cursor */
188         union xfs_btree_ptr     *ptr,   /* btree block disk address */
189         int                     index,  /* offset from ptr to check */
190         int                     level)  /* btree block level */
191 {
192         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
193                 return xfs_btree_check_lptr(cur,
194                                 be64_to_cpu((&ptr->l)[index]), level);
195         } else {
196                 return xfs_btree_check_sptr(cur,
197                                 be32_to_cpu((&ptr->s)[index]), level);
198         }
199 }
200 #endif
201
202 /*
203  * Delete the btree cursor.
204  */
205 void
206 xfs_btree_del_cursor(
207         xfs_btree_cur_t *cur,           /* btree cursor */
208         int             error)          /* del because of error */
209 {
210         int             i;              /* btree level */
211
212         /*
213          * Clear the buffer pointers, and release the buffers.
214          * If we're doing this in the face of an error, we
215          * need to make sure to inspect all of the entries
216          * in the bc_bufs array for buffers to be unlocked.
217          * This is because some of the btree code works from
218          * level n down to 0, and if we get an error along
219          * the way we won't have initialized all the entries
220          * down to 0.
221          */
222         for (i = 0; i < cur->bc_nlevels; i++) {
223                 if (cur->bc_bufs[i])
224                         xfs_btree_setbuf(cur, i, NULL);
225                 else if (!error)
226                         break;
227         }
228         /*
229          * Can't free a bmap cursor without having dealt with the
230          * allocated indirect blocks' accounting.
231          */
232         ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP ||
233                cur->bc_private.b.allocated == 0);
234         /*
235          * Free the cursor.
236          */
237         kmem_zone_free(xfs_btree_cur_zone, cur);
238 }
239
240 /*
241  * Duplicate the btree cursor.
242  * Allocate a new one, copy the record, re-get the buffers.
243  */
244 int                                     /* error */
245 xfs_btree_dup_cursor(
246         xfs_btree_cur_t *cur,           /* input cursor */
247         xfs_btree_cur_t **ncur)         /* output cursor */
248 {
249         xfs_buf_t       *bp;            /* btree block's buffer pointer */
250         int             error;          /* error return value */
251         int             i;              /* level number of btree block */
252         xfs_mount_t     *mp;            /* mount structure for filesystem */
253         xfs_btree_cur_t *new;           /* new cursor value */
254         xfs_trans_t     *tp;            /* transaction pointer, can be NULL */
255
256         tp = cur->bc_tp;
257         mp = cur->bc_mp;
258
259         /*
260          * Allocate a new cursor like the old one.
261          */
262         new = cur->bc_ops->dup_cursor(cur);
263
264         /*
265          * Copy the record currently in the cursor.
266          */
267         new->bc_rec = cur->bc_rec;
268
269         /*
270          * For each level current, re-get the buffer and copy the ptr value.
271          */
272         for (i = 0; i < new->bc_nlevels; i++) {
273                 new->bc_ptrs[i] = cur->bc_ptrs[i];
274                 new->bc_ra[i] = cur->bc_ra[i];
275                 if ((bp = cur->bc_bufs[i])) {
276                         if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
277                                 XFS_BUF_ADDR(bp), mp->m_bsize, 0, &bp))) {
278                                 xfs_btree_del_cursor(new, error);
279                                 *ncur = NULL;
280                                 return error;
281                         }
282                         new->bc_bufs[i] = bp;
283                         ASSERT(bp);
284                         ASSERT(!XFS_BUF_GETERROR(bp));
285                 } else
286                         new->bc_bufs[i] = NULL;
287         }
288         *ncur = new;
289         return 0;
290 }
291
292 /*
293  * XFS btree block layout and addressing:
294  *
295  * There are two types of blocks in the btree: leaf and non-leaf blocks.
296  *
297  * The leaf record start with a header then followed by records containing
298  * the values.  A non-leaf block also starts with the same header, and
299  * then first contains lookup keys followed by an equal number of pointers
300  * to the btree blocks at the previous level.
301  *
302  *              +--------+-------+-------+-------+-------+-------+-------+
303  * Leaf:        | header | rec 1 | rec 2 | rec 3 | rec 4 | rec 5 | rec N |
304  *              +--------+-------+-------+-------+-------+-------+-------+
305  *
306  *              +--------+-------+-------+-------+-------+-------+-------+
307  * Non-Leaf:    | header | key 1 | key 2 | key N | ptr 1 | ptr 2 | ptr N |
308  *              +--------+-------+-------+-------+-------+-------+-------+
309  *
310  * The header is called struct xfs_btree_block for reasons better left unknown
311  * and comes in different versions for short (32bit) and long (64bit) block
312  * pointers.  The record and key structures are defined by the btree instances
313  * and opaque to the btree core.  The block pointers are simple disk endian
314  * integers, available in a short (32bit) and long (64bit) variant.
315  *
316  * The helpers below calculate the offset of a given record, key or pointer
317  * into a btree block (xfs_btree_*_offset) or return a pointer to the given
318  * record, key or pointer (xfs_btree_*_addr).  Note that all addressing
319  * inside the btree block is done using indices starting at one, not zero!
320  */
321
322 /*
323  * Return size of the btree block header for this btree instance.
324  */
325 static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur)
326 {
327         return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
328                 XFS_BTREE_LBLOCK_LEN :
329                 XFS_BTREE_SBLOCK_LEN;
330 }
331
332 /*
333  * Return size of btree block pointers for this btree instance.
334  */
335 static inline size_t xfs_btree_ptr_len(struct xfs_btree_cur *cur)
336 {
337         return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
338                 sizeof(__be64) : sizeof(__be32);
339 }
340
341 /*
342  * Calculate offset of the n-th record in a btree block.
343  */
344 STATIC size_t
345 xfs_btree_rec_offset(
346         struct xfs_btree_cur    *cur,
347         int                     n)
348 {
349         return xfs_btree_block_len(cur) +
350                 (n - 1) * cur->bc_ops->rec_len;
351 }
352
353 /*
354  * Calculate offset of the n-th key in a btree block.
355  */
356 STATIC size_t
357 xfs_btree_key_offset(
358         struct xfs_btree_cur    *cur,
359         int                     n)
360 {
361         return xfs_btree_block_len(cur) +
362                 (n - 1) * cur->bc_ops->key_len;
363 }
364
365 /*
366  * Calculate offset of the n-th block pointer in a btree block.
367  */
368 STATIC size_t
369 xfs_btree_ptr_offset(
370         struct xfs_btree_cur    *cur,
371         int                     n,
372         int                     level)
373 {
374         return xfs_btree_block_len(cur) +
375                 cur->bc_ops->get_maxrecs(cur, level) * cur->bc_ops->key_len +
376                 (n - 1) * xfs_btree_ptr_len(cur);
377 }
378
379 /*
380  * Return a pointer to the n-th record in the btree block.
381  */
382 STATIC union xfs_btree_rec *
383 xfs_btree_rec_addr(
384         struct xfs_btree_cur    *cur,
385         int                     n,
386         struct xfs_btree_block  *block)
387 {
388         return (union xfs_btree_rec *)
389                 ((char *)block + xfs_btree_rec_offset(cur, n));
390 }
391
392 /*
393  * Return a pointer to the n-th key in the btree block.
394  */
395 STATIC union xfs_btree_key *
396 xfs_btree_key_addr(
397         struct xfs_btree_cur    *cur,
398         int                     n,
399         struct xfs_btree_block  *block)
400 {
401         return (union xfs_btree_key *)
402                 ((char *)block + xfs_btree_key_offset(cur, n));
403 }
404
405 /*
406  * Return a pointer to the n-th block pointer in the btree block.
407  */
408 STATIC union xfs_btree_ptr *
409 xfs_btree_ptr_addr(
410         struct xfs_btree_cur    *cur,
411         int                     n,
412         struct xfs_btree_block  *block)
413 {
414         int                     level = xfs_btree_get_level(block);
415
416         ASSERT(block->bb_level != 0);
417
418         return (union xfs_btree_ptr *)
419                 ((char *)block + xfs_btree_ptr_offset(cur, n, level));
420 }
421
422 /*
423  * Get a the root block which is stored in the inode.
424  *
425  * For now this btree implementation assumes the btree root is always
426  * stored in the if_broot field of an inode fork.
427  */
428 STATIC struct xfs_btree_block *
429 xfs_btree_get_iroot(
430        struct xfs_btree_cur    *cur)
431 {
432        struct xfs_ifork        *ifp;
433
434        ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, cur->bc_private.b.whichfork);
435        return (struct xfs_btree_block *)ifp->if_broot;
436 }
437
438 /*
439  * Retrieve the block pointer from the cursor at the given level.
440  * This may be an inode btree root or from a buffer.
441  */
442 STATIC struct xfs_btree_block *         /* generic btree block pointer */
443 xfs_btree_get_block(
444         struct xfs_btree_cur    *cur,   /* btree cursor */
445         int                     level,  /* level in btree */
446         struct xfs_buf          **bpp)  /* buffer containing the block */
447 {
448         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
449             (level == cur->bc_nlevels - 1)) {
450                 *bpp = NULL;
451                 return xfs_btree_get_iroot(cur);
452         }
453
454         *bpp = cur->bc_bufs[level];
455         return XFS_BUF_TO_BLOCK(*bpp);
456 }
457
458 /*
459  * Get a buffer for the block, return it with no data read.
460  * Long-form addressing.
461  */
462 xfs_buf_t *                             /* buffer for fsbno */
463 xfs_btree_get_bufl(
464         xfs_mount_t     *mp,            /* file system mount point */
465         xfs_trans_t     *tp,            /* transaction pointer */
466         xfs_fsblock_t   fsbno,          /* file system block number */
467         uint            lock)           /* lock flags for get_buf */
468 {
469         xfs_buf_t       *bp;            /* buffer pointer (return value) */
470         xfs_daddr_t             d;              /* real disk block address */
471
472         ASSERT(fsbno != NULLFSBLOCK);
473         d = XFS_FSB_TO_DADDR(mp, fsbno);
474         bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
475         ASSERT(bp);
476         ASSERT(!XFS_BUF_GETERROR(bp));
477         return bp;
478 }
479
480 /*
481  * Get a buffer for the block, return it with no data read.
482  * Short-form addressing.
483  */
484 xfs_buf_t *                             /* buffer for agno/agbno */
485 xfs_btree_get_bufs(
486         xfs_mount_t     *mp,            /* file system mount point */
487         xfs_trans_t     *tp,            /* transaction pointer */
488         xfs_agnumber_t  agno,           /* allocation group number */
489         xfs_agblock_t   agbno,          /* allocation group block number */
490         uint            lock)           /* lock flags for get_buf */
491 {
492         xfs_buf_t       *bp;            /* buffer pointer (return value) */
493         xfs_daddr_t             d;              /* real disk block address */
494
495         ASSERT(agno != NULLAGNUMBER);
496         ASSERT(agbno != NULLAGBLOCK);
497         d = XFS_AGB_TO_DADDR(mp, agno, agbno);
498         bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
499         ASSERT(bp);
500         ASSERT(!XFS_BUF_GETERROR(bp));
501         return bp;
502 }
503
504 /*
505  * Check for the cursor referring to the last block at the given level.
506  */
507 int                                     /* 1=is last block, 0=not last block */
508 xfs_btree_islastblock(
509         xfs_btree_cur_t         *cur,   /* btree cursor */
510         int                     level)  /* level to check */
511 {
512         struct xfs_btree_block  *block; /* generic btree block pointer */
513         xfs_buf_t               *bp;    /* buffer containing block */
514
515         block = xfs_btree_get_block(cur, level, &bp);
516         xfs_btree_check_block(cur, block, level, bp);
517         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
518                 return be64_to_cpu(block->bb_u.l.bb_rightsib) == NULLDFSBNO;
519         else
520                 return be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK;
521 }
522
523 /*
524  * Change the cursor to point to the first record at the given level.
525  * Other levels are unaffected.
526  */
527 STATIC int                              /* success=1, failure=0 */
528 xfs_btree_firstrec(
529         xfs_btree_cur_t         *cur,   /* btree cursor */
530         int                     level)  /* level to change */
531 {
532         struct xfs_btree_block  *block; /* generic btree block pointer */
533         xfs_buf_t               *bp;    /* buffer containing block */
534
535         /*
536          * Get the block pointer for this level.
537          */
538         block = xfs_btree_get_block(cur, level, &bp);
539         xfs_btree_check_block(cur, block, level, bp);
540         /*
541          * It's empty, there is no such record.
542          */
543         if (!block->bb_numrecs)
544                 return 0;
545         /*
546          * Set the ptr value to 1, that's the first record/key.
547          */
548         cur->bc_ptrs[level] = 1;
549         return 1;
550 }
551
552 /*
553  * Change the cursor to point to the last record in the current block
554  * at the given level.  Other levels are unaffected.
555  */
556 STATIC int                              /* success=1, failure=0 */
557 xfs_btree_lastrec(
558         xfs_btree_cur_t         *cur,   /* btree cursor */
559         int                     level)  /* level to change */
560 {
561         struct xfs_btree_block  *block; /* generic btree block pointer */
562         xfs_buf_t               *bp;    /* buffer containing block */
563
564         /*
565          * Get the block pointer for this level.
566          */
567         block = xfs_btree_get_block(cur, level, &bp);
568         xfs_btree_check_block(cur, block, level, bp);
569         /*
570          * It's empty, there is no such record.
571          */
572         if (!block->bb_numrecs)
573                 return 0;
574         /*
575          * Set the ptr value to numrecs, that's the last record/key.
576          */
577         cur->bc_ptrs[level] = be16_to_cpu(block->bb_numrecs);
578         return 1;
579 }
580
581 /*
582  * Compute first and last byte offsets for the fields given.
583  * Interprets the offsets table, which contains struct field offsets.
584  */
585 void
586 xfs_btree_offsets(
587         __int64_t       fields,         /* bitmask of fields */
588         const short     *offsets,       /* table of field offsets */
589         int             nbits,          /* number of bits to inspect */
590         int             *first,         /* output: first byte offset */
591         int             *last)          /* output: last byte offset */
592 {
593         int             i;              /* current bit number */
594         __int64_t       imask;          /* mask for current bit number */
595
596         ASSERT(fields != 0);
597         /*
598          * Find the lowest bit, so the first byte offset.
599          */
600         for (i = 0, imask = 1LL; ; i++, imask <<= 1) {
601                 if (imask & fields) {
602                         *first = offsets[i];
603                         break;
604                 }
605         }
606         /*
607          * Find the highest bit, so the last byte offset.
608          */
609         for (i = nbits - 1, imask = 1LL << i; ; i--, imask >>= 1) {
610                 if (imask & fields) {
611                         *last = offsets[i + 1] - 1;
612                         break;
613                 }
614         }
615 }
616
617 /*
618  * Get a buffer for the block, return it read in.
619  * Long-form addressing.
620  */
621 int                                     /* error */
622 xfs_btree_read_bufl(
623         xfs_mount_t     *mp,            /* file system mount point */
624         xfs_trans_t     *tp,            /* transaction pointer */
625         xfs_fsblock_t   fsbno,          /* file system block number */
626         uint            lock,           /* lock flags for read_buf */
627         xfs_buf_t       **bpp,          /* buffer for fsbno */
628         int             refval)         /* ref count value for buffer */
629 {
630         xfs_buf_t       *bp;            /* return value */
631         xfs_daddr_t             d;              /* real disk block address */
632         int             error;
633
634         ASSERT(fsbno != NULLFSBLOCK);
635         d = XFS_FSB_TO_DADDR(mp, fsbno);
636         if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
637                         mp->m_bsize, lock, &bp))) {
638                 return error;
639         }
640         ASSERT(!bp || !XFS_BUF_GETERROR(bp));
641         if (bp != NULL) {
642                 XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval);
643         }
644         *bpp = bp;
645         return 0;
646 }
647
648 /*
649  * Get a buffer for the block, return it read in.
650  * Short-form addressing.
651  */
652 int                                     /* error */
653 xfs_btree_read_bufs(
654         xfs_mount_t     *mp,            /* file system mount point */
655         xfs_trans_t     *tp,            /* transaction pointer */
656         xfs_agnumber_t  agno,           /* allocation group number */
657         xfs_agblock_t   agbno,          /* allocation group block number */
658         uint            lock,           /* lock flags for read_buf */
659         xfs_buf_t       **bpp,          /* buffer for agno/agbno */
660         int             refval)         /* ref count value for buffer */
661 {
662         xfs_buf_t       *bp;            /* return value */
663         xfs_daddr_t     d;              /* real disk block address */
664         int             error;
665
666         ASSERT(agno != NULLAGNUMBER);
667         ASSERT(agbno != NULLAGBLOCK);
668         d = XFS_AGB_TO_DADDR(mp, agno, agbno);
669         if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
670                                         mp->m_bsize, lock, &bp))) {
671                 return error;
672         }
673         ASSERT(!bp || !XFS_BUF_GETERROR(bp));
674         if (bp != NULL) {
675                 switch (refval) {
676                 case XFS_ALLOC_BTREE_REF:
677                         XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval);
678                         break;
679                 case XFS_INO_BTREE_REF:
680                         XFS_BUF_SET_VTYPE_REF(bp, B_FS_INOMAP, refval);
681                         break;
682                 }
683         }
684         *bpp = bp;
685         return 0;
686 }
687
688 /*
689  * Read-ahead the block, don't wait for it, don't return a buffer.
690  * Long-form addressing.
691  */
692 /* ARGSUSED */
693 void
694 xfs_btree_reada_bufl(
695         xfs_mount_t     *mp,            /* file system mount point */
696         xfs_fsblock_t   fsbno,          /* file system block number */
697         xfs_extlen_t    count)          /* count of filesystem blocks */
698 {
699         xfs_daddr_t             d;
700
701         ASSERT(fsbno != NULLFSBLOCK);
702         d = XFS_FSB_TO_DADDR(mp, fsbno);
703         xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count);
704 }
705
706 /*
707  * Read-ahead the block, don't wait for it, don't return a buffer.
708  * Short-form addressing.
709  */
710 /* ARGSUSED */
711 void
712 xfs_btree_reada_bufs(
713         xfs_mount_t     *mp,            /* file system mount point */
714         xfs_agnumber_t  agno,           /* allocation group number */
715         xfs_agblock_t   agbno,          /* allocation group block number */
716         xfs_extlen_t    count)          /* count of filesystem blocks */
717 {
718         xfs_daddr_t             d;
719
720         ASSERT(agno != NULLAGNUMBER);
721         ASSERT(agbno != NULLAGBLOCK);
722         d = XFS_AGB_TO_DADDR(mp, agno, agbno);
723         xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count);
724 }
725
726 STATIC int
727 xfs_btree_readahead_lblock(
728         struct xfs_btree_cur    *cur,
729         int                     lr,
730         struct xfs_btree_block  *block)
731 {
732         int                     rval = 0;
733         xfs_dfsbno_t            left = be64_to_cpu(block->bb_u.l.bb_leftsib);
734         xfs_dfsbno_t            right = be64_to_cpu(block->bb_u.l.bb_rightsib);
735
736         if ((lr & XFS_BTCUR_LEFTRA) && left != NULLDFSBNO) {
737                 xfs_btree_reada_bufl(cur->bc_mp, left, 1);
738                 rval++;
739         }
740
741         if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLDFSBNO) {
742                 xfs_btree_reada_bufl(cur->bc_mp, right, 1);
743                 rval++;
744         }
745
746         return rval;
747 }
748
749 STATIC int
750 xfs_btree_readahead_sblock(
751         struct xfs_btree_cur    *cur,
752         int                     lr,
753         struct xfs_btree_block *block)
754 {
755         int                     rval = 0;
756         xfs_agblock_t           left = be32_to_cpu(block->bb_u.s.bb_leftsib);
757         xfs_agblock_t           right = be32_to_cpu(block->bb_u.s.bb_rightsib);
758
759
760         if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) {
761                 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
762                                      left, 1);
763                 rval++;
764         }
765
766         if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) {
767                 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
768                                      right, 1);
769                 rval++;
770         }
771
772         return rval;
773 }
774
775 /*
776  * Read-ahead btree blocks, at the given level.
777  * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA.
778  */
779 STATIC int
780 xfs_btree_readahead(
781         struct xfs_btree_cur    *cur,           /* btree cursor */
782         int                     lev,            /* level in btree */
783         int                     lr)             /* left/right bits */
784 {
785         struct xfs_btree_block  *block;
786
787         /*
788          * No readahead needed if we are at the root level and the
789          * btree root is stored in the inode.
790          */
791         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
792             (lev == cur->bc_nlevels - 1))
793                 return 0;
794
795         if ((cur->bc_ra[lev] | lr) == cur->bc_ra[lev])
796                 return 0;
797
798         cur->bc_ra[lev] |= lr;
799         block = XFS_BUF_TO_BLOCK(cur->bc_bufs[lev]);
800
801         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
802                 return xfs_btree_readahead_lblock(cur, lr, block);
803         return xfs_btree_readahead_sblock(cur, lr, block);
804 }
805
806 /*
807  * Set the buffer for level "lev" in the cursor to bp, releasing
808  * any previous buffer.
809  */
810 void
811 xfs_btree_setbuf(
812         xfs_btree_cur_t         *cur,   /* btree cursor */
813         int                     lev,    /* level in btree */
814         xfs_buf_t               *bp)    /* new buffer to set */
815 {
816         struct xfs_btree_block  *b;     /* btree block */
817         xfs_buf_t               *obp;   /* old buffer pointer */
818
819         obp = cur->bc_bufs[lev];
820         if (obp)
821                 xfs_trans_brelse(cur->bc_tp, obp);
822         cur->bc_bufs[lev] = bp;
823         cur->bc_ra[lev] = 0;
824         if (!bp)
825                 return;
826         b = XFS_BUF_TO_BLOCK(bp);
827         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
828                 if (be64_to_cpu(b->bb_u.l.bb_leftsib) == NULLDFSBNO)
829                         cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
830                 if (be64_to_cpu(b->bb_u.l.bb_rightsib) == NULLDFSBNO)
831                         cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
832         } else {
833                 if (be32_to_cpu(b->bb_u.s.bb_leftsib) == NULLAGBLOCK)
834                         cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
835                 if (be32_to_cpu(b->bb_u.s.bb_rightsib) == NULLAGBLOCK)
836                         cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
837         }
838 }
839
840 STATIC int
841 xfs_btree_ptr_is_null(
842         struct xfs_btree_cur    *cur,
843         union xfs_btree_ptr     *ptr)
844 {
845         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
846                 return be64_to_cpu(ptr->l) == NULLDFSBNO;
847         else
848                 return be32_to_cpu(ptr->s) == NULLAGBLOCK;
849 }
850
851 STATIC void
852 xfs_btree_set_ptr_null(
853         struct xfs_btree_cur    *cur,
854         union xfs_btree_ptr     *ptr)
855 {
856         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
857                 ptr->l = cpu_to_be64(NULLDFSBNO);
858         else
859                 ptr->s = cpu_to_be32(NULLAGBLOCK);
860 }
861
862 /*
863  * Get/set/init sibling pointers
864  */
865 STATIC void
866 xfs_btree_get_sibling(
867         struct xfs_btree_cur    *cur,
868         struct xfs_btree_block  *block,
869         union xfs_btree_ptr     *ptr,
870         int                     lr)
871 {
872         ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
873
874         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
875                 if (lr == XFS_BB_RIGHTSIB)
876                         ptr->l = block->bb_u.l.bb_rightsib;
877                 else
878                         ptr->l = block->bb_u.l.bb_leftsib;
879         } else {
880                 if (lr == XFS_BB_RIGHTSIB)
881                         ptr->s = block->bb_u.s.bb_rightsib;
882                 else
883                         ptr->s = block->bb_u.s.bb_leftsib;
884         }
885 }
886
887 STATIC void
888 xfs_btree_set_sibling(
889         struct xfs_btree_cur    *cur,
890         struct xfs_btree_block  *block,
891         union xfs_btree_ptr     *ptr,
892         int                     lr)
893 {
894         ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
895
896         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
897                 if (lr == XFS_BB_RIGHTSIB)
898                         block->bb_u.l.bb_rightsib = ptr->l;
899                 else
900                         block->bb_u.l.bb_leftsib = ptr->l;
901         } else {
902                 if (lr == XFS_BB_RIGHTSIB)
903                         block->bb_u.s.bb_rightsib = ptr->s;
904                 else
905                         block->bb_u.s.bb_leftsib = ptr->s;
906         }
907 }
908
909 STATIC void
910 xfs_btree_init_block(
911         struct xfs_btree_cur    *cur,
912         int                     level,
913         int                     numrecs,
914         struct xfs_btree_block  *new)   /* new block */
915 {
916         new->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]);
917         new->bb_level = cpu_to_be16(level);
918         new->bb_numrecs = cpu_to_be16(numrecs);
919
920         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
921                 new->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO);
922                 new->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO);
923         } else {
924                 new->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
925                 new->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
926         }
927 }
928
929 /*
930  * Return true if ptr is the last record in the btree and
931  * we need to track updateÑ• to this record.  The decision
932  * will be further refined in the update_lastrec method.
933  */
934 STATIC int
935 xfs_btree_is_lastrec(
936         struct xfs_btree_cur    *cur,
937         struct xfs_btree_block  *block,
938         int                     level)
939 {
940         union xfs_btree_ptr     ptr;
941
942         if (level > 0)
943                 return 0;
944         if (!(cur->bc_flags & XFS_BTREE_LASTREC_UPDATE))
945                 return 0;
946
947         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
948         if (!xfs_btree_ptr_is_null(cur, &ptr))
949                 return 0;
950         return 1;
951 }
952
953 STATIC void
954 xfs_btree_buf_to_ptr(
955         struct xfs_btree_cur    *cur,
956         struct xfs_buf          *bp,
957         union xfs_btree_ptr     *ptr)
958 {
959         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
960                 ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp,
961                                         XFS_BUF_ADDR(bp)));
962         else {
963                 ptr->s = cpu_to_be32(xfs_daddr_to_agbno(cur->bc_mp,
964                                         XFS_BUF_ADDR(bp)));
965         }
966 }
967
968 STATIC xfs_daddr_t
969 xfs_btree_ptr_to_daddr(
970         struct xfs_btree_cur    *cur,
971         union xfs_btree_ptr     *ptr)
972 {
973         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
974                 ASSERT(be64_to_cpu(ptr->l) != NULLDFSBNO);
975
976                 return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l));
977         } else {
978                 ASSERT(cur->bc_private.a.agno != NULLAGNUMBER);
979                 ASSERT(be32_to_cpu(ptr->s) != NULLAGBLOCK);
980
981                 return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno,
982                                         be32_to_cpu(ptr->s));
983         }
984 }
985
986 STATIC void
987 xfs_btree_set_refs(
988         struct xfs_btree_cur    *cur,
989         struct xfs_buf          *bp)
990 {
991         switch (cur->bc_btnum) {
992         case XFS_BTNUM_BNO:
993         case XFS_BTNUM_CNT:
994                 XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_MAP, XFS_ALLOC_BTREE_REF);
995                 break;
996         case XFS_BTNUM_INO:
997                 XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_INOMAP, XFS_INO_BTREE_REF);
998                 break;
999         case XFS_BTNUM_BMAP:
1000                 XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_MAP, XFS_BMAP_BTREE_REF);
1001                 break;
1002         default:
1003                 ASSERT(0);
1004         }
1005 }
1006
1007 STATIC int
1008 xfs_btree_get_buf_block(
1009         struct xfs_btree_cur    *cur,
1010         union xfs_btree_ptr     *ptr,
1011         int                     flags,
1012         struct xfs_btree_block  **block,
1013         struct xfs_buf          **bpp)
1014 {
1015         struct xfs_mount        *mp = cur->bc_mp;
1016         xfs_daddr_t             d;
1017
1018         /* need to sort out how callers deal with failures first */
1019         ASSERT(!(flags & XFS_BUF_TRYLOCK));
1020
1021         d = xfs_btree_ptr_to_daddr(cur, ptr);
1022         *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d,
1023                                  mp->m_bsize, flags);
1024
1025         ASSERT(*bpp);
1026         ASSERT(!XFS_BUF_GETERROR(*bpp));
1027
1028         *block = XFS_BUF_TO_BLOCK(*bpp);
1029         return 0;
1030 }
1031
1032 /*
1033  * Read in the buffer at the given ptr and return the buffer and
1034  * the block pointer within the buffer.
1035  */
1036 STATIC int
1037 xfs_btree_read_buf_block(
1038         struct xfs_btree_cur    *cur,
1039         union xfs_btree_ptr     *ptr,
1040         int                     level,
1041         int                     flags,
1042         struct xfs_btree_block  **block,
1043         struct xfs_buf          **bpp)
1044 {
1045         struct xfs_mount        *mp = cur->bc_mp;
1046         xfs_daddr_t             d;
1047         int                     error;
1048
1049         /* need to sort out how callers deal with failures first */
1050         ASSERT(!(flags & XFS_BUF_TRYLOCK));
1051
1052         d = xfs_btree_ptr_to_daddr(cur, ptr);
1053         error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d,
1054                                    mp->m_bsize, flags, bpp);
1055         if (error)
1056                 return error;
1057
1058         ASSERT(*bpp != NULL);
1059         ASSERT(!XFS_BUF_GETERROR(*bpp));
1060
1061         xfs_btree_set_refs(cur, *bpp);
1062         *block = XFS_BUF_TO_BLOCK(*bpp);
1063
1064         error = xfs_btree_check_block(cur, *block, level, *bpp);
1065         if (error)
1066                 xfs_trans_brelse(cur->bc_tp, *bpp);
1067         return error;
1068 }
1069
1070 /*
1071  * Copy keys from one btree block to another.
1072  */
1073 STATIC void
1074 xfs_btree_copy_keys(
1075         struct xfs_btree_cur    *cur,
1076         union xfs_btree_key     *dst_key,
1077         union xfs_btree_key     *src_key,
1078         int                     numkeys)
1079 {
1080         ASSERT(numkeys >= 0);
1081         memcpy(dst_key, src_key, numkeys * cur->bc_ops->key_len);
1082 }
1083
1084 /*
1085  * Copy records from one btree block to another.
1086  */
1087 STATIC void
1088 xfs_btree_copy_recs(
1089         struct xfs_btree_cur    *cur,
1090         union xfs_btree_rec     *dst_rec,
1091         union xfs_btree_rec     *src_rec,
1092         int                     numrecs)
1093 {
1094         ASSERT(numrecs >= 0);
1095         memcpy(dst_rec, src_rec, numrecs * cur->bc_ops->rec_len);
1096 }
1097
1098 /*
1099  * Copy block pointers from one btree block to another.
1100  */
1101 STATIC void
1102 xfs_btree_copy_ptrs(
1103         struct xfs_btree_cur    *cur,
1104         union xfs_btree_ptr     *dst_ptr,
1105         union xfs_btree_ptr     *src_ptr,
1106         int                     numptrs)
1107 {
1108         ASSERT(numptrs >= 0);
1109         memcpy(dst_ptr, src_ptr, numptrs * xfs_btree_ptr_len(cur));
1110 }
1111
1112 /*
1113  * Shift keys one index left/right inside a single btree block.
1114  */
1115 STATIC void
1116 xfs_btree_shift_keys(
1117         struct xfs_btree_cur    *cur,
1118         union xfs_btree_key     *key,
1119         int                     dir,
1120         int                     numkeys)
1121 {
1122         char                    *dst_key;
1123
1124         ASSERT(numkeys >= 0);
1125         ASSERT(dir == 1 || dir == -1);
1126
1127         dst_key = (char *)key + (dir * cur->bc_ops->key_len);
1128         memmove(dst_key, key, numkeys * cur->bc_ops->key_len);
1129 }
1130
1131 /*
1132  * Shift records one index left/right inside a single btree block.
1133  */
1134 STATIC void
1135 xfs_btree_shift_recs(
1136         struct xfs_btree_cur    *cur,
1137         union xfs_btree_rec     *rec,
1138         int                     dir,
1139         int                     numrecs)
1140 {
1141         char                    *dst_rec;
1142
1143         ASSERT(numrecs >= 0);
1144         ASSERT(dir == 1 || dir == -1);
1145
1146         dst_rec = (char *)rec + (dir * cur->bc_ops->rec_len);
1147         memmove(dst_rec, rec, numrecs * cur->bc_ops->rec_len);
1148 }
1149
1150 /*
1151  * Shift block pointers one index left/right inside a single btree block.
1152  */
1153 STATIC void
1154 xfs_btree_shift_ptrs(
1155         struct xfs_btree_cur    *cur,
1156         union xfs_btree_ptr     *ptr,
1157         int                     dir,
1158         int                     numptrs)
1159 {
1160         char                    *dst_ptr;
1161
1162         ASSERT(numptrs >= 0);
1163         ASSERT(dir == 1 || dir == -1);
1164
1165         dst_ptr = (char *)ptr + (dir * xfs_btree_ptr_len(cur));
1166         memmove(dst_ptr, ptr, numptrs * xfs_btree_ptr_len(cur));
1167 }
1168
1169 /*
1170  * Log key values from the btree block.
1171  */
1172 STATIC void
1173 xfs_btree_log_keys(
1174         struct xfs_btree_cur    *cur,
1175         struct xfs_buf          *bp,
1176         int                     first,
1177         int                     last)
1178 {
1179         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1180         XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1181
1182         if (bp) {
1183                 xfs_trans_log_buf(cur->bc_tp, bp,
1184                                   xfs_btree_key_offset(cur, first),
1185                                   xfs_btree_key_offset(cur, last + 1) - 1);
1186         } else {
1187                 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1188                                 xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1189         }
1190
1191         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1192 }
1193
1194 /*
1195  * Log record values from the btree block.
1196  */
1197 void
1198 xfs_btree_log_recs(
1199         struct xfs_btree_cur    *cur,
1200         struct xfs_buf          *bp,
1201         int                     first,
1202         int                     last)
1203 {
1204         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1205         XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1206
1207         xfs_trans_log_buf(cur->bc_tp, bp,
1208                           xfs_btree_rec_offset(cur, first),
1209                           xfs_btree_rec_offset(cur, last + 1) - 1);
1210
1211         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1212 }
1213
1214 /*
1215  * Log block pointer fields from a btree block (nonleaf).
1216  */
1217 STATIC void
1218 xfs_btree_log_ptrs(
1219         struct xfs_btree_cur    *cur,   /* btree cursor */
1220         struct xfs_buf          *bp,    /* buffer containing btree block */
1221         int                     first,  /* index of first pointer to log */
1222         int                     last)   /* index of last pointer to log */
1223 {
1224         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1225         XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1226
1227         if (bp) {
1228                 struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
1229                 int                     level = xfs_btree_get_level(block);
1230
1231                 xfs_trans_log_buf(cur->bc_tp, bp,
1232                                 xfs_btree_ptr_offset(cur, first, level),
1233                                 xfs_btree_ptr_offset(cur, last + 1, level) - 1);
1234         } else {
1235                 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1236                         xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1237         }
1238
1239         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1240 }
1241
1242 /*
1243  * Log fields from a btree block header.
1244  */
1245 void
1246 xfs_btree_log_block(
1247         struct xfs_btree_cur    *cur,   /* btree cursor */
1248         struct xfs_buf          *bp,    /* buffer containing btree block */
1249         int                     fields) /* mask of fields: XFS_BB_... */
1250 {
1251         int                     first;  /* first byte offset logged */
1252         int                     last;   /* last byte offset logged */
1253         static const short      soffsets[] = {  /* table of offsets (short) */
1254                 offsetof(struct xfs_btree_block, bb_magic),
1255                 offsetof(struct xfs_btree_block, bb_level),
1256                 offsetof(struct xfs_btree_block, bb_numrecs),
1257                 offsetof(struct xfs_btree_block, bb_u.s.bb_leftsib),
1258                 offsetof(struct xfs_btree_block, bb_u.s.bb_rightsib),
1259                 XFS_BTREE_SBLOCK_LEN
1260         };
1261         static const short      loffsets[] = {  /* table of offsets (long) */
1262                 offsetof(struct xfs_btree_block, bb_magic),
1263                 offsetof(struct xfs_btree_block, bb_level),
1264                 offsetof(struct xfs_btree_block, bb_numrecs),
1265                 offsetof(struct xfs_btree_block, bb_u.l.bb_leftsib),
1266                 offsetof(struct xfs_btree_block, bb_u.l.bb_rightsib),
1267                 XFS_BTREE_LBLOCK_LEN
1268         };
1269
1270         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1271         XFS_BTREE_TRACE_ARGBI(cur, bp, fields);
1272
1273         if (bp) {
1274                 xfs_btree_offsets(fields,
1275                                   (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
1276                                         loffsets : soffsets,
1277                                   XFS_BB_NUM_BITS, &first, &last);
1278                 xfs_trans_log_buf(cur->bc_tp, bp, first, last);
1279         } else {
1280                 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1281                         xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1282         }
1283
1284         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1285 }
1286
1287 /*
1288  * Increment cursor by one record at the level.
1289  * For nonzero levels the leaf-ward information is untouched.
1290  */
1291 int                                             /* error */
1292 xfs_btree_increment(
1293         struct xfs_btree_cur    *cur,
1294         int                     level,
1295         int                     *stat)          /* success/failure */
1296 {
1297         struct xfs_btree_block  *block;
1298         union xfs_btree_ptr     ptr;
1299         struct xfs_buf          *bp;
1300         int                     error;          /* error return value */
1301         int                     lev;
1302
1303         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1304         XFS_BTREE_TRACE_ARGI(cur, level);
1305
1306         ASSERT(level < cur->bc_nlevels);
1307
1308         /* Read-ahead to the right at this level. */
1309         xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
1310
1311         /* Get a pointer to the btree block. */
1312         block = xfs_btree_get_block(cur, level, &bp);
1313
1314 #ifdef DEBUG
1315         error = xfs_btree_check_block(cur, block, level, bp);
1316         if (error)
1317                 goto error0;
1318 #endif
1319
1320         /* We're done if we remain in the block after the increment. */
1321         if (++cur->bc_ptrs[level] <= xfs_btree_get_numrecs(block))
1322                 goto out1;
1323
1324         /* Fail if we just went off the right edge of the tree. */
1325         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1326         if (xfs_btree_ptr_is_null(cur, &ptr))
1327                 goto out0;
1328
1329         XFS_BTREE_STATS_INC(cur, increment);
1330
1331         /*
1332          * March up the tree incrementing pointers.
1333          * Stop when we don't go off the right edge of a block.
1334          */
1335         for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1336                 block = xfs_btree_get_block(cur, lev, &bp);
1337
1338 #ifdef DEBUG
1339                 error = xfs_btree_check_block(cur, block, lev, bp);
1340                 if (error)
1341                         goto error0;
1342 #endif
1343
1344                 if (++cur->bc_ptrs[lev] <= xfs_btree_get_numrecs(block))
1345                         break;
1346
1347                 /* Read-ahead the right block for the next loop. */
1348                 xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
1349         }
1350
1351         /*
1352          * If we went off the root then we are either seriously
1353          * confused or have the tree root in an inode.
1354          */
1355         if (lev == cur->bc_nlevels) {
1356                 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
1357                         goto out0;
1358                 ASSERT(0);
1359                 error = EFSCORRUPTED;
1360                 goto error0;
1361         }
1362         ASSERT(lev < cur->bc_nlevels);
1363
1364         /*
1365          * Now walk back down the tree, fixing up the cursor's buffer
1366          * pointers and key numbers.
1367          */
1368         for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
1369                 union xfs_btree_ptr     *ptrp;
1370
1371                 ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
1372                 error = xfs_btree_read_buf_block(cur, ptrp, --lev,
1373                                                         0, &block, &bp);
1374                 if (error)
1375                         goto error0;
1376
1377                 xfs_btree_setbuf(cur, lev, bp);
1378                 cur->bc_ptrs[lev] = 1;
1379         }
1380 out1:
1381         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1382         *stat = 1;
1383         return 0;
1384
1385 out0:
1386         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1387         *stat = 0;
1388         return 0;
1389
1390 error0:
1391         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1392         return error;
1393 }
1394
1395 /*
1396  * Decrement cursor by one record at the level.
1397  * For nonzero levels the leaf-ward information is untouched.
1398  */
1399 int                                             /* error */
1400 xfs_btree_decrement(
1401         struct xfs_btree_cur    *cur,
1402         int                     level,
1403         int                     *stat)          /* success/failure */
1404 {
1405         struct xfs_btree_block  *block;
1406         xfs_buf_t               *bp;
1407         int                     error;          /* error return value */
1408         int                     lev;
1409         union xfs_btree_ptr     ptr;
1410
1411         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1412         XFS_BTREE_TRACE_ARGI(cur, level);
1413
1414         ASSERT(level < cur->bc_nlevels);
1415
1416         /* Read-ahead to the left at this level. */
1417         xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
1418
1419         /* We're done if we remain in the block after the decrement. */
1420         if (--cur->bc_ptrs[level] > 0)
1421                 goto out1;
1422
1423         /* Get a pointer to the btree block. */
1424         block = xfs_btree_get_block(cur, level, &bp);
1425
1426 #ifdef DEBUG
1427         error = xfs_btree_check_block(cur, block, level, bp);
1428         if (error)
1429                 goto error0;
1430 #endif
1431
1432         /* Fail if we just went off the left edge of the tree. */
1433         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
1434         if (xfs_btree_ptr_is_null(cur, &ptr))
1435                 goto out0;
1436
1437         XFS_BTREE_STATS_INC(cur, decrement);
1438
1439         /*
1440          * March up the tree decrementing pointers.
1441          * Stop when we don't go off the left edge of a block.
1442          */
1443         for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1444                 if (--cur->bc_ptrs[lev] > 0)
1445                         break;
1446                 /* Read-ahead the left block for the next loop. */
1447                 xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
1448         }
1449
1450         /*
1451          * If we went off the root then we are seriously confused.
1452          * or the root of the tree is in an inode.
1453          */
1454         if (lev == cur->bc_nlevels) {
1455                 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
1456                         goto out0;
1457                 ASSERT(0);
1458                 error = EFSCORRUPTED;
1459                 goto error0;
1460         }
1461         ASSERT(lev < cur->bc_nlevels);
1462
1463         /*
1464          * Now walk back down the tree, fixing up the cursor's buffer
1465          * pointers and key numbers.
1466          */
1467         for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
1468                 union xfs_btree_ptr     *ptrp;
1469
1470                 ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
1471                 error = xfs_btree_read_buf_block(cur, ptrp, --lev,
1472                                                         0, &block, &bp);
1473                 if (error)
1474                         goto error0;
1475                 xfs_btree_setbuf(cur, lev, bp);
1476                 cur->bc_ptrs[lev] = xfs_btree_get_numrecs(block);
1477         }
1478 out1:
1479         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1480         *stat = 1;
1481         return 0;
1482
1483 out0:
1484         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1485         *stat = 0;
1486         return 0;
1487
1488 error0:
1489         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1490         return error;
1491 }
1492
1493 STATIC int
1494 xfs_btree_lookup_get_block(
1495         struct xfs_btree_cur    *cur,   /* btree cursor */
1496         int                     level,  /* level in the btree */
1497         union xfs_btree_ptr     *pp,    /* ptr to btree block */
1498         struct xfs_btree_block  **blkp) /* return btree block */
1499 {
1500         struct xfs_buf          *bp;    /* buffer pointer for btree block */
1501         int                     error = 0;
1502
1503         /* special case the root block if in an inode */
1504         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
1505             (level == cur->bc_nlevels - 1)) {
1506                 *blkp = xfs_btree_get_iroot(cur);
1507                 return 0;
1508         }
1509
1510         /*
1511          * If the old buffer at this level for the disk address we are
1512          * looking for re-use it.
1513          *
1514          * Otherwise throw it away and get a new one.
1515          */
1516         bp = cur->bc_bufs[level];
1517         if (bp && XFS_BUF_ADDR(bp) == xfs_btree_ptr_to_daddr(cur, pp)) {
1518                 *blkp = XFS_BUF_TO_BLOCK(bp);
1519                 return 0;
1520         }
1521
1522         error = xfs_btree_read_buf_block(cur, pp, level, 0, blkp, &bp);
1523         if (error)
1524                 return error;
1525
1526         xfs_btree_setbuf(cur, level, bp);
1527         return 0;
1528 }
1529
1530 /*
1531  * Get current search key.  For level 0 we don't actually have a key
1532  * structure so we make one up from the record.  For all other levels
1533  * we just return the right key.
1534  */
1535 STATIC union xfs_btree_key *
1536 xfs_lookup_get_search_key(
1537         struct xfs_btree_cur    *cur,
1538         int                     level,
1539         int                     keyno,
1540         struct xfs_btree_block  *block,
1541         union xfs_btree_key     *kp)
1542 {
1543         if (level == 0) {
1544                 cur->bc_ops->init_key_from_rec(kp,
1545                                 xfs_btree_rec_addr(cur, keyno, block));
1546                 return kp;
1547         }
1548
1549         return xfs_btree_key_addr(cur, keyno, block);
1550 }
1551
1552 /*
1553  * Lookup the record.  The cursor is made to point to it, based on dir.
1554  * Return 0 if can't find any such record, 1 for success.
1555  */
1556 int                                     /* error */
1557 xfs_btree_lookup(
1558         struct xfs_btree_cur    *cur,   /* btree cursor */
1559         xfs_lookup_t            dir,    /* <=, ==, or >= */
1560         int                     *stat)  /* success/failure */
1561 {
1562         struct xfs_btree_block  *block; /* current btree block */
1563         __int64_t               diff;   /* difference for the current key */
1564         int                     error;  /* error return value */
1565         int                     keyno;  /* current key number */
1566         int                     level;  /* level in the btree */
1567         union xfs_btree_ptr     *pp;    /* ptr to btree block */
1568         union xfs_btree_ptr     ptr;    /* ptr to btree block */
1569
1570         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1571         XFS_BTREE_TRACE_ARGI(cur, dir);
1572
1573         XFS_BTREE_STATS_INC(cur, lookup);
1574
1575         block = NULL;
1576         keyno = 0;
1577
1578         /* initialise start pointer from cursor */
1579         cur->bc_ops->init_ptr_from_cur(cur, &ptr);
1580         pp = &ptr;
1581
1582         /*
1583          * Iterate over each level in the btree, starting at the root.
1584          * For each level above the leaves, find the key we need, based
1585          * on the lookup record, then follow the corresponding block
1586          * pointer down to the next level.
1587          */
1588         for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
1589                 /* Get the block we need to do the lookup on. */
1590                 error = xfs_btree_lookup_get_block(cur, level, pp, &block);
1591                 if (error)
1592                         goto error0;
1593
1594                 if (diff == 0) {
1595                         /*
1596                          * If we already had a key match at a higher level, we
1597                          * know we need to use the first entry in this block.
1598                          */
1599                         keyno = 1;
1600                 } else {
1601                         /* Otherwise search this block. Do a binary search. */
1602
1603                         int     high;   /* high entry number */
1604                         int     low;    /* low entry number */
1605
1606                         /* Set low and high entry numbers, 1-based. */
1607                         low = 1;
1608                         high = xfs_btree_get_numrecs(block);
1609                         if (!high) {
1610                                 /* Block is empty, must be an empty leaf. */
1611                                 ASSERT(level == 0 && cur->bc_nlevels == 1);
1612
1613                                 cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
1614                                 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1615                                 *stat = 0;
1616                                 return 0;
1617                         }
1618
1619                         /* Binary search the block. */
1620                         while (low <= high) {
1621                                 union xfs_btree_key     key;
1622                                 union xfs_btree_key     *kp;
1623
1624                                 XFS_BTREE_STATS_INC(cur, compare);
1625
1626                                 /* keyno is average of low and high. */
1627                                 keyno = (low + high) >> 1;
1628
1629                                 /* Get current search key */
1630                                 kp = xfs_lookup_get_search_key(cur, level,
1631                                                 keyno, block, &key);
1632
1633                                 /*
1634                                  * Compute difference to get next direction:
1635                                  *  - less than, move right
1636                                  *  - greater than, move left
1637                                  *  - equal, we're done
1638                                  */
1639                                 diff = cur->bc_ops->key_diff(cur, kp);
1640                                 if (diff < 0)
1641                                         low = keyno + 1;
1642                                 else if (diff > 0)
1643                                         high = keyno - 1;
1644                                 else
1645                                         break;
1646                         }
1647                 }
1648
1649                 /*
1650                  * If there are more levels, set up for the next level
1651                  * by getting the block number and filling in the cursor.
1652                  */
1653                 if (level > 0) {
1654                         /*
1655                          * If we moved left, need the previous key number,
1656                          * unless there isn't one.
1657                          */
1658                         if (diff > 0 && --keyno < 1)
1659                                 keyno = 1;
1660                         pp = xfs_btree_ptr_addr(cur, keyno, block);
1661
1662 #ifdef DEBUG
1663                         error = xfs_btree_check_ptr(cur, pp, 0, level);
1664                         if (error)
1665                                 goto error0;
1666 #endif
1667                         cur->bc_ptrs[level] = keyno;
1668                 }
1669         }
1670
1671         /* Done with the search. See if we need to adjust the results. */
1672         if (dir != XFS_LOOKUP_LE && diff < 0) {
1673                 keyno++;
1674                 /*
1675                  * If ge search and we went off the end of the block, but it's
1676                  * not the last block, we're in the wrong block.
1677                  */
1678                 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1679                 if (dir == XFS_LOOKUP_GE &&
1680                     keyno > xfs_btree_get_numrecs(block) &&
1681                     !xfs_btree_ptr_is_null(cur, &ptr)) {
1682                         int     i;
1683
1684                         cur->bc_ptrs[0] = keyno;
1685                         error = xfs_btree_increment(cur, 0, &i);
1686                         if (error)
1687                                 goto error0;
1688                         XFS_WANT_CORRUPTED_RETURN(i == 1);
1689                         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1690                         *stat = 1;
1691                         return 0;
1692                 }
1693         } else if (dir == XFS_LOOKUP_LE && diff > 0)
1694                 keyno--;
1695         cur->bc_ptrs[0] = keyno;
1696
1697         /* Return if we succeeded or not. */
1698         if (keyno == 0 || keyno > xfs_btree_get_numrecs(block))
1699                 *stat = 0;
1700         else if (dir != XFS_LOOKUP_EQ || diff == 0)
1701                 *stat = 1;
1702         else
1703                 *stat = 0;
1704         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1705         return 0;
1706
1707 error0:
1708         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1709         return error;
1710 }
1711
1712 /*
1713  * Update keys at all levels from here to the root along the cursor's path.
1714  */
1715 STATIC int
1716 xfs_btree_updkey(
1717         struct xfs_btree_cur    *cur,
1718         union xfs_btree_key     *keyp,
1719         int                     level)
1720 {
1721         struct xfs_btree_block  *block;
1722         struct xfs_buf          *bp;
1723         union xfs_btree_key     *kp;
1724         int                     ptr;
1725
1726         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1727         XFS_BTREE_TRACE_ARGIK(cur, level, keyp);
1728
1729         ASSERT(!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) || level >= 1);
1730
1731         /*
1732          * Go up the tree from this level toward the root.
1733          * At each level, update the key value to the value input.
1734          * Stop when we reach a level where the cursor isn't pointing
1735          * at the first entry in the block.
1736          */
1737         for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
1738 #ifdef DEBUG
1739                 int             error;
1740 #endif
1741                 block = xfs_btree_get_block(cur, level, &bp);
1742 #ifdef DEBUG
1743                 error = xfs_btree_check_block(cur, block, level, bp);
1744                 if (error) {
1745                         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1746                         return error;
1747                 }
1748 #endif
1749                 ptr = cur->bc_ptrs[level];
1750                 kp = xfs_btree_key_addr(cur, ptr, block);
1751                 xfs_btree_copy_keys(cur, kp, keyp, 1);
1752                 xfs_btree_log_keys(cur, bp, ptr, ptr);
1753         }
1754
1755         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1756         return 0;
1757 }
1758
1759 /*
1760  * Update the record referred to by cur to the value in the
1761  * given record. This either works (return 0) or gets an
1762  * EFSCORRUPTED error.
1763  */
1764 int
1765 xfs_btree_update(
1766         struct xfs_btree_cur    *cur,
1767         union xfs_btree_rec     *rec)
1768 {
1769         struct xfs_btree_block  *block;
1770         struct xfs_buf          *bp;
1771         int                     error;
1772         int                     ptr;
1773         union xfs_btree_rec     *rp;
1774
1775         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1776         XFS_BTREE_TRACE_ARGR(cur, rec);
1777
1778         /* Pick up the current block. */
1779         block = xfs_btree_get_block(cur, 0, &bp);
1780
1781 #ifdef DEBUG
1782         error = xfs_btree_check_block(cur, block, 0, bp);
1783         if (error)
1784                 goto error0;
1785 #endif
1786         /* Get the address of the rec to be updated. */
1787         ptr = cur->bc_ptrs[0];
1788         rp = xfs_btree_rec_addr(cur, ptr, block);
1789
1790         /* Fill in the new contents and log them. */
1791         xfs_btree_copy_recs(cur, rp, rec, 1);
1792         xfs_btree_log_recs(cur, bp, ptr, ptr);
1793
1794         /*
1795          * If we are tracking the last record in the tree and
1796          * we are at the far right edge of the tree, update it.
1797          */
1798         if (xfs_btree_is_lastrec(cur, block, 0)) {
1799                 cur->bc_ops->update_lastrec(cur, block, rec,
1800                                             ptr, LASTREC_UPDATE);
1801         }
1802
1803         /* Updating first rec in leaf. Pass new key value up to our parent. */
1804         if (ptr == 1) {
1805                 union xfs_btree_key     key;
1806
1807                 cur->bc_ops->init_key_from_rec(&key, rec);
1808                 error = xfs_btree_updkey(cur, &key, 1);
1809                 if (error)
1810                         goto error0;
1811         }
1812
1813         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1814         return 0;
1815
1816 error0:
1817         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1818         return error;
1819 }
1820
1821 /*
1822  * Move 1 record left from cur/level if possible.
1823  * Update cur to reflect the new path.
1824  */
1825 STATIC int                                      /* error */
1826 xfs_btree_lshift(
1827         struct xfs_btree_cur    *cur,
1828         int                     level,
1829         int                     *stat)          /* success/failure */
1830 {
1831         union xfs_btree_key     key;            /* btree key */
1832         struct xfs_buf          *lbp;           /* left buffer pointer */
1833         struct xfs_btree_block  *left;          /* left btree block */
1834         int                     lrecs;          /* left record count */
1835         struct xfs_buf          *rbp;           /* right buffer pointer */
1836         struct xfs_btree_block  *right;         /* right btree block */
1837         int                     rrecs;          /* right record count */
1838         union xfs_btree_ptr     lptr;           /* left btree pointer */
1839         union xfs_btree_key     *rkp = NULL;    /* right btree key */
1840         union xfs_btree_ptr     *rpp = NULL;    /* right address pointer */
1841         union xfs_btree_rec     *rrp = NULL;    /* right record pointer */
1842         int                     error;          /* error return value */
1843
1844         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1845         XFS_BTREE_TRACE_ARGI(cur, level);
1846
1847         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
1848             level == cur->bc_nlevels - 1)
1849                 goto out0;
1850
1851         /* Set up variables for this block as "right". */
1852         right = xfs_btree_get_block(cur, level, &rbp);
1853
1854 #ifdef DEBUG
1855         error = xfs_btree_check_block(cur, right, level, rbp);
1856         if (error)
1857                 goto error0;
1858 #endif
1859
1860         /* If we've got no left sibling then we can't shift an entry left. */
1861         xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
1862         if (xfs_btree_ptr_is_null(cur, &lptr))
1863                 goto out0;
1864
1865         /*
1866          * If the cursor entry is the one that would be moved, don't
1867          * do it... it's too complicated.
1868          */
1869         if (cur->bc_ptrs[level] <= 1)
1870                 goto out0;
1871
1872         /* Set up the left neighbor as "left". */
1873         error = xfs_btree_read_buf_block(cur, &lptr, level, 0, &left, &lbp);
1874         if (error)
1875                 goto error0;
1876
1877         /* If it's full, it can't take another entry. */
1878         lrecs = xfs_btree_get_numrecs(left);
1879         if (lrecs == cur->bc_ops->get_maxrecs(cur, level))
1880                 goto out0;
1881
1882         rrecs = xfs_btree_get_numrecs(right);
1883
1884         /*
1885          * We add one entry to the left side and remove one for the right side.
1886          * Account for it here, the changes will be updated on disk and logged
1887          * later.
1888          */
1889         lrecs++;
1890         rrecs--;
1891
1892         XFS_BTREE_STATS_INC(cur, lshift);
1893         XFS_BTREE_STATS_ADD(cur, moves, 1);
1894
1895         /*
1896          * If non-leaf, copy a key and a ptr to the left block.
1897          * Log the changes to the left block.
1898          */
1899         if (level > 0) {
1900                 /* It's a non-leaf.  Move keys and pointers. */
1901                 union xfs_btree_key     *lkp;   /* left btree key */
1902                 union xfs_btree_ptr     *lpp;   /* left address pointer */
1903
1904                 lkp = xfs_btree_key_addr(cur, lrecs, left);
1905                 rkp = xfs_btree_key_addr(cur, 1, right);
1906
1907                 lpp = xfs_btree_ptr_addr(cur, lrecs, left);
1908                 rpp = xfs_btree_ptr_addr(cur, 1, right);
1909 #ifdef DEBUG
1910                 error = xfs_btree_check_ptr(cur, rpp, 0, level);
1911                 if (error)
1912                         goto error0;
1913 #endif
1914                 xfs_btree_copy_keys(cur, lkp, rkp, 1);
1915                 xfs_btree_copy_ptrs(cur, lpp, rpp, 1);
1916
1917                 xfs_btree_log_keys(cur, lbp, lrecs, lrecs);
1918                 xfs_btree_log_ptrs(cur, lbp, lrecs, lrecs);
1919
1920                 ASSERT(cur->bc_ops->keys_inorder(cur,
1921                         xfs_btree_key_addr(cur, lrecs - 1, left), lkp));
1922         } else {
1923                 /* It's a leaf.  Move records.  */
1924                 union xfs_btree_rec     *lrp;   /* left record pointer */
1925
1926                 lrp = xfs_btree_rec_addr(cur, lrecs, left);
1927                 rrp = xfs_btree_rec_addr(cur, 1, right);
1928
1929                 xfs_btree_copy_recs(cur, lrp, rrp, 1);
1930                 xfs_btree_log_recs(cur, lbp, lrecs, lrecs);
1931
1932                 ASSERT(cur->bc_ops->recs_inorder(cur,
1933                         xfs_btree_rec_addr(cur, lrecs - 1, left), lrp));
1934         }
1935
1936         xfs_btree_set_numrecs(left, lrecs);
1937         xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
1938
1939         xfs_btree_set_numrecs(right, rrecs);
1940         xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
1941
1942         /*
1943          * Slide the contents of right down one entry.
1944          */
1945         XFS_BTREE_STATS_ADD(cur, moves, rrecs - 1);
1946         if (level > 0) {
1947                 /* It's a nonleaf. operate on keys and ptrs */
1948 #ifdef DEBUG
1949                 int                     i;              /* loop index */
1950
1951                 for (i = 0; i < rrecs; i++) {
1952                         error = xfs_btree_check_ptr(cur, rpp, i + 1, level);
1953                         if (error)
1954                                 goto error0;
1955                 }
1956 #endif
1957                 xfs_btree_shift_keys(cur,
1958                                 xfs_btree_key_addr(cur, 2, right),
1959                                 -1, rrecs);
1960                 xfs_btree_shift_ptrs(cur,
1961                                 xfs_btree_ptr_addr(cur, 2, right),
1962                                 -1, rrecs);
1963
1964                 xfs_btree_log_keys(cur, rbp, 1, rrecs);
1965                 xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
1966         } else {
1967                 /* It's a leaf. operate on records */
1968                 xfs_btree_shift_recs(cur,
1969                         xfs_btree_rec_addr(cur, 2, right),
1970                         -1, rrecs);
1971                 xfs_btree_log_recs(cur, rbp, 1, rrecs);
1972
1973                 /*
1974                  * If it's the first record in the block, we'll need a key
1975                  * structure to pass up to the next level (updkey).
1976                  */
1977                 cur->bc_ops->init_key_from_rec(&key,
1978                         xfs_btree_rec_addr(cur, 1, right));
1979                 rkp = &key;
1980         }
1981
1982         /* Update the parent key values of right. */
1983         error = xfs_btree_updkey(cur, rkp, level + 1);
1984         if (error)
1985                 goto error0;
1986
1987         /* Slide the cursor value left one. */
1988         cur->bc_ptrs[level]--;
1989
1990         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1991         *stat = 1;
1992         return 0;
1993
1994 out0:
1995         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1996         *stat = 0;
1997         return 0;
1998
1999 error0:
2000         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2001         return error;
2002 }
2003
2004 /*
2005  * Move 1 record right from cur/level if possible.
2006  * Update cur to reflect the new path.
2007  */
2008 STATIC int                                      /* error */
2009 xfs_btree_rshift(
2010         struct xfs_btree_cur    *cur,
2011         int                     level,
2012         int                     *stat)          /* success/failure */
2013 {
2014         union xfs_btree_key     key;            /* btree key */
2015         struct xfs_buf          *lbp;           /* left buffer pointer */
2016         struct xfs_btree_block  *left;          /* left btree block */
2017         struct xfs_buf          *rbp;           /* right buffer pointer */
2018         struct xfs_btree_block  *right;         /* right btree block */
2019         struct xfs_btree_cur    *tcur;          /* temporary btree cursor */
2020         union xfs_btree_ptr     rptr;           /* right block pointer */
2021         union xfs_btree_key     *rkp;           /* right btree key */
2022         int                     rrecs;          /* right record count */
2023         int                     lrecs;          /* left record count */
2024         int                     error;          /* error return value */
2025         int                     i;              /* loop counter */
2026
2027         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2028         XFS_BTREE_TRACE_ARGI(cur, level);
2029
2030         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2031             (level == cur->bc_nlevels - 1))
2032                 goto out0;
2033
2034         /* Set up variables for this block as "left". */
2035         left = xfs_btree_get_block(cur, level, &lbp);
2036
2037 #ifdef DEBUG
2038         error = xfs_btree_check_block(cur, left, level, lbp);
2039         if (error)
2040                 goto error0;
2041 #endif
2042
2043         /* If we've got no right sibling then we can't shift an entry right. */
2044         xfs_btree_get_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
2045         if (xfs_btree_ptr_is_null(cur, &rptr))
2046                 goto out0;
2047
2048         /*
2049          * If the cursor entry is the one that would be moved, don't
2050          * do it... it's too complicated.
2051          */
2052         lrecs = xfs_btree_get_numrecs(left);
2053         if (cur->bc_ptrs[level] >= lrecs)
2054                 goto out0;
2055
2056         /* Set up the right neighbor as "right". */
2057         error = xfs_btree_read_buf_block(cur, &rptr, level, 0, &right, &rbp);
2058         if (error)
2059                 goto error0;
2060
2061         /* If it's full, it can't take another entry. */
2062         rrecs = xfs_btree_get_numrecs(right);
2063         if (rrecs == cur->bc_ops->get_maxrecs(cur, level))
2064                 goto out0;
2065
2066         XFS_BTREE_STATS_INC(cur, rshift);
2067         XFS_BTREE_STATS_ADD(cur, moves, rrecs);
2068
2069         /*
2070          * Make a hole at the start of the right neighbor block, then
2071          * copy the last left block entry to the hole.
2072          */
2073         if (level > 0) {
2074                 /* It's a nonleaf. make a hole in the keys and ptrs */
2075                 union xfs_btree_key     *lkp;
2076                 union xfs_btree_ptr     *lpp;
2077                 union xfs_btree_ptr     *rpp;
2078
2079                 lkp = xfs_btree_key_addr(cur, lrecs, left);
2080                 lpp = xfs_btree_ptr_addr(cur, lrecs, left);
2081                 rkp = xfs_btree_key_addr(cur, 1, right);
2082                 rpp = xfs_btree_ptr_addr(cur, 1, right);
2083
2084 #ifdef DEBUG
2085                 for (i = rrecs - 1; i >= 0; i--) {
2086                         error = xfs_btree_check_ptr(cur, rpp, i, level);
2087                         if (error)
2088                                 goto error0;
2089                 }
2090 #endif
2091
2092                 xfs_btree_shift_keys(cur, rkp, 1, rrecs);
2093                 xfs_btree_shift_ptrs(cur, rpp, 1, rrecs);
2094
2095 #ifdef DEBUG
2096                 error = xfs_btree_check_ptr(cur, lpp, 0, level);
2097                 if (error)
2098                         goto error0;
2099 #endif
2100
2101                 /* Now put the new data in, and log it. */
2102                 xfs_btree_copy_keys(cur, rkp, lkp, 1);
2103                 xfs_btree_copy_ptrs(cur, rpp, lpp, 1);
2104
2105                 xfs_btree_log_keys(cur, rbp, 1, rrecs + 1);
2106                 xfs_btree_log_ptrs(cur, rbp, 1, rrecs + 1);
2107
2108                 ASSERT(cur->bc_ops->keys_inorder(cur, rkp,
2109                         xfs_btree_key_addr(cur, 2, right)));
2110         } else {
2111                 /* It's a leaf. make a hole in the records */
2112                 union xfs_btree_rec     *lrp;
2113                 union xfs_btree_rec     *rrp;
2114
2115                 lrp = xfs_btree_rec_addr(cur, lrecs, left);
2116                 rrp = xfs_btree_rec_addr(cur, 1, right);
2117
2118                 xfs_btree_shift_recs(cur, rrp, 1, rrecs);
2119
2120                 /* Now put the new data in, and log it. */
2121                 xfs_btree_copy_recs(cur, rrp, lrp, 1);
2122                 xfs_btree_log_recs(cur, rbp, 1, rrecs + 1);
2123
2124                 cur->bc_ops->init_key_from_rec(&key, rrp);
2125                 rkp = &key;
2126
2127                 ASSERT(cur->bc_ops->recs_inorder(cur, rrp,
2128                         xfs_btree_rec_addr(cur, 2, right)));
2129         }
2130
2131         /*
2132          * Decrement and log left's numrecs, bump and log right's numrecs.
2133          */
2134         xfs_btree_set_numrecs(left, --lrecs);
2135         xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
2136
2137         xfs_btree_set_numrecs(right, ++rrecs);
2138         xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
2139
2140         /*
2141          * Using a temporary cursor, update the parent key values of the
2142          * block on the right.
2143          */
2144         error = xfs_btree_dup_cursor(cur, &tcur);
2145         if (error)
2146                 goto error0;
2147         i = xfs_btree_lastrec(tcur, level);
2148         XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
2149
2150         error = xfs_btree_increment(tcur, level, &i);
2151         if (error)
2152                 goto error1;
2153
2154         error = xfs_btree_updkey(tcur, rkp, level + 1);
2155         if (error)
2156                 goto error1;
2157
2158         xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
2159
2160         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2161         *stat = 1;
2162         return 0;
2163
2164 out0:
2165         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2166         *stat = 0;
2167         return 0;
2168
2169 error0:
2170         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2171         return error;
2172
2173 error1:
2174         XFS_BTREE_TRACE_CURSOR(tcur, XBT_ERROR);
2175         xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
2176         return error;
2177 }
2178
2179 /*
2180  * Split cur/level block in half.
2181  * Return new block number and the key to its first
2182  * record (to be inserted into parent).
2183  */
2184 STATIC int                                      /* error */
2185 xfs_btree_split(
2186         struct xfs_btree_cur    *cur,
2187         int                     level,
2188         union xfs_btree_ptr     *ptrp,
2189         union xfs_btree_key     *key,
2190         struct xfs_btree_cur    **curp,
2191         int                     *stat)          /* success/failure */
2192 {
2193         union xfs_btree_ptr     lptr;           /* left sibling block ptr */
2194         struct xfs_buf          *lbp;           /* left buffer pointer */
2195         struct xfs_btree_block  *left;          /* left btree block */
2196         union xfs_btree_ptr     rptr;           /* right sibling block ptr */
2197         struct xfs_buf          *rbp;           /* right buffer pointer */
2198         struct xfs_btree_block  *right;         /* right btree block */
2199         union xfs_btree_ptr     rrptr;          /* right-right sibling ptr */
2200         struct xfs_buf          *rrbp;          /* right-right buffer pointer */
2201         struct xfs_btree_block  *rrblock;       /* right-right btree block */
2202         int                     lrecs;
2203         int                     rrecs;
2204         int                     src_index;
2205         int                     error;          /* error return value */
2206 #ifdef DEBUG
2207         int                     i;
2208 #endif
2209
2210         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2211         XFS_BTREE_TRACE_ARGIPK(cur, level, *ptrp, key);
2212
2213         XFS_BTREE_STATS_INC(cur, split);
2214
2215         /* Set up left block (current one). */
2216         left = xfs_btree_get_block(cur, level, &lbp);
2217
2218 #ifdef DEBUG
2219         error = xfs_btree_check_block(cur, left, level, lbp);
2220         if (error)
2221                 goto error0;
2222 #endif
2223
2224         xfs_btree_buf_to_ptr(cur, lbp, &lptr);
2225
2226         /* Allocate the new block. If we can't do it, we're toast. Give up. */
2227         error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, 1, stat);
2228         if (error)
2229                 goto error0;
2230         if (*stat == 0)
2231                 goto out0;
2232         XFS_BTREE_STATS_INC(cur, alloc);
2233
2234         /* Set up the new block as "right". */
2235         error = xfs_btree_get_buf_block(cur, &rptr, 0, &right, &rbp);
2236         if (error)
2237                 goto error0;
2238
2239         /* Fill in the btree header for the new right block. */
2240         xfs_btree_init_block(cur, xfs_btree_get_level(left), 0, right);
2241
2242         /*
2243          * Split the entries between the old and the new block evenly.
2244          * Make sure that if there's an odd number of entries now, that
2245          * each new block will have the same number of entries.
2246          */
2247         lrecs = xfs_btree_get_numrecs(left);
2248         rrecs = lrecs / 2;
2249         if ((lrecs & 1) && cur->bc_ptrs[level] <= rrecs + 1)
2250                 rrecs++;
2251         src_index = (lrecs - rrecs + 1);
2252
2253         XFS_BTREE_STATS_ADD(cur, moves, rrecs);
2254
2255         /*
2256          * Copy btree block entries from the left block over to the
2257          * new block, the right. Update the right block and log the
2258          * changes.
2259          */
2260         if (level > 0) {
2261                 /* It's a non-leaf.  Move keys and pointers. */
2262                 union xfs_btree_key     *lkp;   /* left btree key */
2263                 union xfs_btree_ptr     *lpp;   /* left address pointer */
2264                 union xfs_btree_key     *rkp;   /* right btree key */
2265                 union xfs_btree_ptr     *rpp;   /* right address pointer */
2266
2267                 lkp = xfs_btree_key_addr(cur, src_index, left);
2268                 lpp = xfs_btree_ptr_addr(cur, src_index, left);
2269                 rkp = xfs_btree_key_addr(cur, 1, right);
2270                 rpp = xfs_btree_ptr_addr(cur, 1, right);
2271
2272 #ifdef DEBUG
2273                 for (i = src_index; i < rrecs; i++) {
2274                         error = xfs_btree_check_ptr(cur, lpp, i, level);
2275                         if (error)
2276                                 goto error0;
2277                 }
2278 #endif
2279
2280                 xfs_btree_copy_keys(cur, rkp, lkp, rrecs);
2281                 xfs_btree_copy_ptrs(cur, rpp, lpp, rrecs);
2282
2283                 xfs_btree_log_keys(cur, rbp, 1, rrecs);
2284                 xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
2285
2286                 /* Grab the keys to the entries moved to the right block */
2287                 xfs_btree_copy_keys(cur, key, rkp, 1);
2288         } else {
2289                 /* It's a leaf.  Move records.  */
2290                 union xfs_btree_rec     *lrp;   /* left record pointer */
2291                 union xfs_btree_rec     *rrp;   /* right record pointer */
2292
2293                 lrp = xfs_btree_rec_addr(cur, src_index, left);
2294                 rrp = xfs_btree_rec_addr(cur, 1, right);
2295
2296                 xfs_btree_copy_recs(cur, rrp, lrp, rrecs);
2297                 xfs_btree_log_recs(cur, rbp, 1, rrecs);
2298
2299                 cur->bc_ops->init_key_from_rec(key,
2300                         xfs_btree_rec_addr(cur, 1, right));
2301         }
2302
2303
2304         /*
2305          * Find the left block number by looking in the buffer.
2306          * Adjust numrecs, sibling pointers.
2307          */
2308         xfs_btree_get_sibling(cur, left, &rrptr, XFS_BB_RIGHTSIB);
2309         xfs_btree_set_sibling(cur, right, &rrptr, XFS_BB_RIGHTSIB);
2310         xfs_btree_set_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
2311         xfs_btree_set_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
2312
2313         lrecs -= rrecs;
2314         xfs_btree_set_numrecs(left, lrecs);
2315         xfs_btree_set_numrecs(right, xfs_btree_get_numrecs(right) + rrecs);
2316
2317         xfs_btree_log_block(cur, rbp, XFS_BB_ALL_BITS);
2318         xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
2319
2320         /*
2321          * If there's a block to the new block's right, make that block
2322          * point back to right instead of to left.
2323          */
2324         if (!xfs_btree_ptr_is_null(cur, &rrptr)) {
2325                 error = xfs_btree_read_buf_block(cur, &rrptr, level,
2326                                                         0, &rrblock, &rrbp);
2327                 if (error)
2328                         goto error0;
2329                 xfs_btree_set_sibling(cur, rrblock, &rptr, XFS_BB_LEFTSIB);
2330                 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
2331         }
2332         /*
2333          * If the cursor is really in the right block, move it there.
2334          * If it's just pointing past the last entry in left, then we'll
2335          * insert there, so don't change anything in that case.
2336          */
2337         if (cur->bc_ptrs[level] > lrecs + 1) {
2338                 xfs_btree_setbuf(cur, level, rbp);
2339                 cur->bc_ptrs[level] -= lrecs;
2340         }
2341         /*
2342          * If there are more levels, we'll need another cursor which refers
2343          * the right block, no matter where this cursor was.
2344          */
2345         if (level + 1 < cur->bc_nlevels) {
2346                 error = xfs_btree_dup_cursor(cur, curp);
2347                 if (error)
2348                         goto error0;
2349                 (*curp)->bc_ptrs[level + 1]++;
2350         }
2351         *ptrp = rptr;
2352         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2353         *stat = 1;
2354         return 0;
2355 out0:
2356         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2357         *stat = 0;
2358         return 0;
2359
2360 error0:
2361         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2362         return error;
2363 }
2364
2365 /*
2366  * Copy the old inode root contents into a real block and make the
2367  * broot point to it.
2368  */
2369 int                                             /* error */
2370 xfs_btree_new_iroot(
2371         struct xfs_btree_cur    *cur,           /* btree cursor */
2372         int                     *logflags,      /* logging flags for inode */
2373         int                     *stat)          /* return status - 0 fail */
2374 {
2375         struct xfs_buf          *cbp;           /* buffer for cblock */
2376         struct xfs_btree_block  *block;         /* btree block */
2377         struct xfs_btree_block  *cblock;        /* child btree block */
2378         union xfs_btree_key     *ckp;           /* child key pointer */
2379         union xfs_btree_ptr     *cpp;           /* child ptr pointer */
2380         union xfs_btree_key     *kp;            /* pointer to btree key */
2381         union xfs_btree_ptr     *pp;            /* pointer to block addr */
2382         union xfs_btree_ptr     nptr;           /* new block addr */
2383         int                     level;          /* btree level */
2384         int                     error;          /* error return code */
2385 #ifdef DEBUG
2386         int                     i;              /* loop counter */
2387 #endif
2388
2389         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2390         XFS_BTREE_STATS_INC(cur, newroot);
2391
2392         ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
2393
2394         level = cur->bc_nlevels - 1;
2395
2396         block = xfs_btree_get_iroot(cur);
2397         pp = xfs_btree_ptr_addr(cur, 1, block);
2398
2399         /* Allocate the new block. If we can't do it, we're toast. Give up. */
2400         error = cur->bc_ops->alloc_block(cur, pp, &nptr, 1, stat);
2401         if (error)
2402                 goto error0;
2403         if (*stat == 0) {
2404                 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2405                 return 0;
2406         }
2407         XFS_BTREE_STATS_INC(cur, alloc);
2408
2409         /* Copy the root into a real block. */
2410         error = xfs_btree_get_buf_block(cur, &nptr, 0, &cblock, &cbp);
2411         if (error)
2412                 goto error0;
2413
2414         memcpy(cblock, block, xfs_btree_block_len(cur));
2415
2416         be16_add_cpu(&block->bb_level, 1);
2417         xfs_btree_set_numrecs(block, 1);
2418         cur->bc_nlevels++;
2419         cur->bc_ptrs[level + 1] = 1;
2420
2421         kp = xfs_btree_key_addr(cur, 1, block);
2422         ckp = xfs_btree_key_addr(cur, 1, cblock);
2423         xfs_btree_copy_keys(cur, ckp, kp, xfs_btree_get_numrecs(cblock));
2424
2425         cpp = xfs_btree_ptr_addr(cur, 1, cblock);
2426 #ifdef DEBUG
2427         for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) {
2428                 error = xfs_btree_check_ptr(cur, pp, i, level);
2429                 if (error)
2430                         goto error0;
2431         }
2432 #endif
2433         xfs_btree_copy_ptrs(cur, cpp, pp, xfs_btree_get_numrecs(cblock));
2434
2435 #ifdef DEBUG
2436         error = xfs_btree_check_ptr(cur, &nptr, 0, level);
2437         if (error)
2438                 goto error0;
2439 #endif
2440         xfs_btree_copy_ptrs(cur, pp, &nptr, 1);
2441
2442         xfs_iroot_realloc(cur->bc_private.b.ip,
2443                           1 - xfs_btree_get_numrecs(cblock),
2444                           cur->bc_private.b.whichfork);
2445
2446         xfs_btree_setbuf(cur, level, cbp);
2447
2448         /*
2449          * Do all this logging at the end so that
2450          * the root is at the right level.
2451          */
2452         xfs_btree_log_block(cur, cbp, XFS_BB_ALL_BITS);
2453         xfs_btree_log_keys(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
2454         xfs_btree_log_ptrs(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
2455
2456         *logflags |=
2457                 XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork);
2458         *stat = 1;
2459         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2460         return 0;
2461 error0:
2462         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2463         return error;
2464 }
2465
2466 /*
2467  * Allocate a new root block, fill it in.
2468  */
2469 STATIC int                              /* error */
2470 xfs_btree_new_root(
2471         struct xfs_btree_cur    *cur,   /* btree cursor */
2472         int                     *stat)  /* success/failure */
2473 {
2474         struct xfs_btree_block  *block; /* one half of the old root block */
2475         struct xfs_buf          *bp;    /* buffer containing block */
2476         int                     error;  /* error return value */
2477         struct xfs_buf          *lbp;   /* left buffer pointer */
2478         struct xfs_btree_block  *left;  /* left btree block */
2479         struct xfs_buf          *nbp;   /* new (root) buffer */
2480         struct xfs_btree_block  *new;   /* new (root) btree block */
2481         int                     nptr;   /* new value for key index, 1 or 2 */
2482         struct xfs_buf          *rbp;   /* right buffer pointer */
2483         struct xfs_btree_block  *right; /* right btree block */
2484         union xfs_btree_ptr     rptr;
2485         union xfs_btree_ptr     lptr;
2486
2487         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2488         XFS_BTREE_STATS_INC(cur, newroot);
2489
2490         /* initialise our start point from the cursor */
2491         cur->bc_ops->init_ptr_from_cur(cur, &rptr);
2492
2493         /* Allocate the new block. If we can't do it, we're toast. Give up. */
2494         error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, 1, stat);
2495         if (error)
2496                 goto error0;
2497         if (*stat == 0)
2498                 goto out0;
2499         XFS_BTREE_STATS_INC(cur, alloc);
2500
2501         /* Set up the new block. */
2502         error = xfs_btree_get_buf_block(cur, &lptr, 0, &new, &nbp);
2503         if (error)
2504                 goto error0;
2505
2506         /* Set the root in the holding structure  increasing the level by 1. */
2507         cur->bc_ops->set_root(cur, &lptr, 1);
2508
2509         /*
2510          * At the previous root level there are now two blocks: the old root,
2511          * and the new block generated when it was split.  We don't know which
2512          * one the cursor is pointing at, so we set up variables "left" and
2513          * "right" for each case.
2514          */
2515         block = xfs_btree_get_block(cur, cur->bc_nlevels - 1, &bp);
2516
2517 #ifdef DEBUG
2518         error = xfs_btree_check_block(cur, block, cur->bc_nlevels - 1, bp);
2519         if (error)
2520                 goto error0;
2521 #endif
2522
2523         xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
2524         if (!xfs_btree_ptr_is_null(cur, &rptr)) {
2525                 /* Our block is left, pick up the right block. */
2526                 lbp = bp;
2527                 xfs_btree_buf_to_ptr(cur, lbp, &lptr);
2528                 left = block;
2529                 error = xfs_btree_read_buf_block(cur, &rptr,
2530                                         cur->bc_nlevels - 1, 0, &right, &rbp);
2531                 if (error)
2532                         goto error0;
2533                 bp = rbp;
2534                 nptr = 1;
2535         } else {
2536                 /* Our block is right, pick up the left block. */
2537                 rbp = bp;
2538                 xfs_btree_buf_to_ptr(cur, rbp, &rptr);
2539                 right = block;
2540                 xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
2541                 error = xfs_btree_read_buf_block(cur, &lptr,
2542                                         cur->bc_nlevels - 1, 0, &left, &lbp);
2543                 if (error)
2544                         goto error0;
2545                 bp = lbp;
2546                 nptr = 2;
2547         }
2548         /* Fill in the new block's btree header and log it. */
2549         xfs_btree_init_block(cur, cur->bc_nlevels, 2, new);
2550         xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS);
2551         ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) &&
2552                         !xfs_btree_ptr_is_null(cur, &rptr));
2553
2554         /* Fill in the key data in the new root. */
2555         if (xfs_btree_get_level(left) > 0) {
2556                 xfs_btree_copy_keys(cur,
2557                                 xfs_btree_key_addr(cur, 1, new),
2558                                 xfs_btree_key_addr(cur, 1, left), 1);
2559                 xfs_btree_copy_keys(cur,
2560                                 xfs_btree_key_addr(cur, 2, new),
2561                                 xfs_btree_key_addr(cur, 1, right), 1);
2562         } else {
2563                 cur->bc_ops->init_key_from_rec(
2564                                 xfs_btree_key_addr(cur, 1, new),
2565                                 xfs_btree_rec_addr(cur, 1, left));
2566                 cur->bc_ops->init_key_from_rec(
2567                                 xfs_btree_key_addr(cur, 2, new),
2568                                 xfs_btree_rec_addr(cur, 1, right));
2569         }
2570         xfs_btree_log_keys(cur, nbp, 1, 2);
2571
2572         /* Fill in the pointer data in the new root. */
2573         xfs_btree_copy_ptrs(cur,
2574                 xfs_btree_ptr_addr(cur, 1, new), &lptr, 1);
2575         xfs_btree_copy_ptrs(cur,
2576                 xfs_btree_ptr_addr(cur, 2, new), &rptr, 1);
2577         xfs_btree_log_ptrs(cur, nbp, 1, 2);
2578
2579         /* Fix up the cursor. */
2580         xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
2581         cur->bc_ptrs[cur->bc_nlevels] = nptr;
2582         cur->bc_nlevels++;
2583         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2584         *stat = 1;
2585         return 0;
2586 error0:
2587         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2588         return error;
2589 out0:
2590         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2591         *stat = 0;
2592         return 0;
2593 }
2594
2595 STATIC int
2596 xfs_btree_make_block_unfull(
2597         struct xfs_btree_cur    *cur,   /* btree cursor */
2598         int                     level,  /* btree level */
2599         int                     numrecs,/* # of recs in block */
2600         int                     *oindex,/* old tree index */
2601         int                     *index, /* new tree index */
2602         union xfs_btree_ptr     *nptr,  /* new btree ptr */
2603         struct xfs_btree_cur    **ncur, /* new btree cursor */
2604         union xfs_btree_rec     *nrec,  /* new record */
2605         int                     *stat)
2606 {
2607         union xfs_btree_key     key;    /* new btree key value */
2608         int                     error = 0;
2609
2610         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2611             level == cur->bc_nlevels - 1) {
2612                 struct xfs_inode *ip = cur->bc_private.b.ip;
2613
2614                 if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) {
2615                         /* A root block that can be made bigger. */
2616
2617                         xfs_iroot_realloc(ip, 1, cur->bc_private.b.whichfork);
2618                 } else {
2619                         /* A root block that needs replacing */
2620                         int     logflags = 0;
2621
2622                         error = xfs_btree_new_iroot(cur, &logflags, stat);
2623                         if (error || *stat == 0)
2624                                 return error;
2625
2626                         xfs_trans_log_inode(cur->bc_tp, ip, logflags);
2627                 }
2628
2629                 return 0;
2630         }
2631
2632         /* First, try shifting an entry to the right neighbor. */
2633         error = xfs_btree_rshift(cur, level, stat);
2634         if (error || *stat)
2635                 return error;
2636
2637         /* Next, try shifting an entry to the left neighbor. */
2638         error = xfs_btree_lshift(cur, level, stat);
2639         if (error)
2640                 return error;
2641
2642         if (*stat) {
2643                 *oindex = *index = cur->bc_ptrs[level];
2644                 return 0;
2645         }
2646
2647         /*
2648          * Next, try splitting the current block in half.
2649          *
2650          * If this works we have to re-set our variables because we
2651          * could be in a different block now.
2652          */
2653         error = xfs_btree_split(cur, level, nptr, &key, ncur, stat);
2654         if (error || *stat == 0)
2655                 return error;
2656
2657
2658         *index = cur->bc_ptrs[level];
2659         cur->bc_ops->init_rec_from_key(&key, nrec);
2660         return 0;
2661 }
2662
2663 /*
2664  * Insert one record/level.  Return information to the caller
2665  * allowing the next level up to proceed if necessary.
2666  */
2667 STATIC int
2668 xfs_btree_insrec(
2669         struct xfs_btree_cur    *cur,   /* btree cursor */
2670         int                     level,  /* level to insert record at */
2671         union xfs_btree_ptr     *ptrp,  /* i/o: block number inserted */
2672         union xfs_btree_rec     *recp,  /* i/o: record data inserted */
2673         struct xfs_btree_cur    **curp, /* output: new cursor replacing cur */
2674         int                     *stat)  /* success/failure */
2675 {
2676         struct xfs_btree_block  *block; /* btree block */
2677         struct xfs_buf          *bp;    /* buffer for block */
2678         union xfs_btree_key     key;    /* btree key */
2679         union xfs_btree_ptr     nptr;   /* new block ptr */
2680         struct xfs_btree_cur    *ncur;  /* new btree cursor */
2681         union xfs_btree_rec     nrec;   /* new record count */
2682         int                     optr;   /* old key/record index */
2683         int                     ptr;    /* key/record index */
2684         int                     numrecs;/* number of records */
2685         int                     error;  /* error return value */
2686 #ifdef DEBUG
2687         int                     i;
2688 #endif
2689
2690         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2691         XFS_BTREE_TRACE_ARGIPR(cur, level, *ptrp, recp);
2692
2693         ncur = NULL;
2694
2695         /*
2696          * If we have an external root pointer, and we've made it to the
2697          * root level, allocate a new root block and we're done.
2698          */
2699         if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2700             (level >= cur->bc_nlevels)) {
2701                 error = xfs_btree_new_root(cur, stat);
2702                 xfs_btree_set_ptr_null(cur, ptrp);
2703
2704                 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2705                 return error;
2706         }
2707
2708         /* If we're off the left edge, return failure. */
2709         ptr = cur->bc_ptrs[level];
2710         if (ptr == 0) {
2711                 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2712                 *stat = 0;
2713                 return 0;
2714         }
2715
2716         /* Make a key out of the record data to be inserted, and save it. */
2717         cur->bc_ops->init_key_from_rec(&key, recp);
2718
2719         optr = ptr;
2720
2721         XFS_BTREE_STATS_INC(cur, insrec);
2722
2723         /* Get pointers to the btree buffer and block. */
2724         block = xfs_btree_get_block(cur, level, &bp);
2725         numrecs = xfs_btree_get_numrecs(block);
2726
2727 #ifdef DEBUG
2728         error = xfs_btree_check_block(cur, block, level, bp);
2729         if (error)
2730                 goto error0;
2731
2732         /* Check that the new entry is being inserted in the right place. */
2733         if (ptr <= numrecs) {
2734                 if (level == 0) {
2735                         ASSERT(cur->bc_ops->recs_inorder(cur, recp,
2736                                 xfs_btree_rec_addr(cur, ptr, block)));
2737                 } else {
2738                         ASSERT(cur->bc_ops->keys_inorder(cur, &key,
2739                                 xfs_btree_key_addr(cur, ptr, block)));
2740                 }
2741         }
2742 #endif
2743
2744         /*
2745          * If the block is full, we can't insert the new entry until we
2746          * make the block un-full.
2747          */
2748         xfs_btree_set_ptr_null(cur, &nptr);
2749         if (numrecs == cur->bc_ops->get_maxrecs(cur, level)) {
2750                 error = xfs_btree_make_block_unfull(cur, level, numrecs,
2751                                         &optr, &ptr, &nptr, &ncur, &nrec, stat);
2752                 if (error || *stat == 0)
2753                         goto error0;
2754         }
2755
2756         /*
2757          * The current block may have changed if the block was
2758          * previously full and we have just made space in it.
2759          */
2760         block = xfs_btree_get_block(cur, level, &bp);
2761         numrecs = xfs_btree_get_numrecs(block);
2762
2763 #ifdef DEBUG
2764         error = xfs_btree_check_block(cur, block, level, bp);
2765         if (error)
2766                 return error;
2767 #endif
2768
2769         /*
2770          * At this point we know there's room for our new entry in the block
2771          * we're pointing at.
2772          */
2773         XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr + 1);
2774
2775         if (level > 0) {
2776                 /* It's a nonleaf. make a hole in the keys and ptrs */
2777                 union xfs_btree_key     *kp;
2778                 union xfs_btree_ptr     *pp;
2779
2780                 kp = xfs_btree_key_addr(cur, ptr, block);
2781                 pp = xfs_btree_ptr_addr(cur, ptr, block);
2782
2783 #ifdef DEBUG
2784                 for (i = numrecs - ptr; i >= 0; i--) {
2785                         error = xfs_btree_check_ptr(cur, pp, i, level);
2786                         if (error)
2787                                 return error;
2788                 }
2789 #endif
2790
2791                 xfs_btree_shift_keys(cur, kp, 1, numrecs - ptr + 1);
2792                 xfs_btree_shift_ptrs(cur, pp, 1, numrecs - ptr + 1);
2793
2794 #ifdef DEBUG
2795                 error = xfs_btree_check_ptr(cur, ptrp, 0, level);
2796                 if (error)
2797                         goto error0;
2798 #endif
2799
2800                 /* Now put the new data in, bump numrecs and log it. */
2801                 xfs_btree_copy_keys(cur, kp, &key, 1);
2802                 xfs_btree_copy_ptrs(cur, pp, ptrp, 1);
2803                 numrecs++;
2804                 xfs_btree_set_numrecs(block, numrecs);
2805                 xfs_btree_log_ptrs(cur, bp, ptr, numrecs);
2806                 xfs_btree_log_keys(cur, bp, ptr, numrecs);
2807 #ifdef DEBUG
2808                 if (ptr < numrecs) {
2809                         ASSERT(cur->bc_ops->keys_inorder(cur, kp,
2810                                 xfs_btree_key_addr(cur, ptr + 1, block)));
2811                 }
2812 #endif
2813         } else {
2814                 /* It's a leaf. make a hole in the records */
2815                 union xfs_btree_rec             *rp;
2816
2817                 rp = xfs_btree_rec_addr(cur, ptr, block);
2818
2819                 xfs_btree_shift_recs(cur, rp, 1, numrecs - ptr + 1);
2820
2821                 /* Now put the new data in, bump numrecs and log it. */
2822                 xfs_btree_copy_recs(cur, rp, recp, 1);
2823                 xfs_btree_set_numrecs(block, ++numrecs);
2824                 xfs_btree_log_recs(cur, bp, ptr, numrecs);
2825 #ifdef DEBUG
2826                 if (ptr < numrecs) {
2827                         ASSERT(cur->bc_ops->recs_inorder(cur, rp,
2828                                 xfs_btree_rec_addr(cur, ptr + 1, block)));
2829                 }
2830 #endif
2831         }
2832
2833         /* Log the new number of records in the btree header. */
2834         xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
2835
2836         /* If we inserted at the start of a block, update the parents' keys. */
2837         if (optr == 1) {
2838                 error = xfs_btree_updkey(cur, &key, level + 1);
2839                 if (error)
2840                         goto error0;
2841         }
2842
2843         /*
2844          * If we are tracking the last record in the tree and
2845          * we are at the far right edge of the tree, update it.
2846          */
2847         if (xfs_btree_is_lastrec(cur, block, level)) {
2848                 cur->bc_ops->update_lastrec(cur, block, recp,
2849                                             ptr, LASTREC_INSREC);
2850         }
2851
2852         /*
2853          * Return the new block number, if any.
2854          * If there is one, give back a record value and a cursor too.
2855          */
2856         *ptrp = nptr;
2857         if (!xfs_btree_ptr_is_null(cur, &nptr)) {
2858                 *recp = nrec;
2859                 *curp = ncur;
2860         }
2861
2862         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2863         *stat = 1;
2864         return 0;
2865
2866 error0:
2867         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2868         return error;
2869 }
2870
2871 /*
2872  * Insert the record at the point referenced by cur.
2873  *
2874  * A multi-level split of the tree on insert will invalidate the original
2875  * cursor.  All callers of this function should assume that the cursor is
2876  * no longer valid and revalidate it.
2877  */
2878 int
2879 xfs_btree_insert(
2880         struct xfs_btree_cur    *cur,
2881         int                     *stat)
2882 {
2883         int                     error;  /* error return value */
2884         int                     i;      /* result value, 0 for failure */
2885         int                     level;  /* current level number in btree */
2886         union xfs_btree_ptr     nptr;   /* new block number (split result) */
2887         struct xfs_btree_cur    *ncur;  /* new cursor (split result) */
2888         struct xfs_btree_cur    *pcur;  /* previous level's cursor */
2889         union xfs_btree_rec     rec;    /* record to insert */
2890
2891         level = 0;
2892         ncur = NULL;
2893         pcur = cur;
2894
2895         xfs_btree_set_ptr_null(cur, &nptr);
2896         cur->bc_ops->init_rec_from_cur(cur, &rec);
2897
2898         /*
2899          * Loop going up the tree, starting at the leaf level.
2900          * Stop when we don't get a split block, that must mean that
2901          * the insert is finished with this level.
2902          */
2903         do {
2904                 /*
2905                  * Insert nrec/nptr into this level of the tree.
2906                  * Note if we fail, nptr will be null.
2907                  */
2908                 error = xfs_btree_insrec(pcur, level, &nptr, &rec, &ncur, &i);
2909                 if (error) {
2910                         if (pcur != cur)
2911                                 xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
2912                         goto error0;
2913                 }
2914
2915                 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
2916                 level++;
2917
2918                 /*
2919                  * See if the cursor we just used is trash.
2920                  * Can't trash the caller's cursor, but otherwise we should
2921                  * if ncur is a new cursor or we're about to be done.
2922                  */
2923                 if (pcur != cur &&
2924                     (ncur || xfs_btree_ptr_is_null(cur, &nptr))) {
2925                         /* Save the state from the cursor before we trash it */
2926                         if (cur->bc_ops->update_cursor)
2927                                 cur->bc_ops->update_cursor(pcur, cur);
2928                         cur->bc_nlevels = pcur->bc_nlevels;
2929                         xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
2930                 }
2931                 /* If we got a new cursor, switch to it. */
2932                 if (ncur) {
2933                         pcur = ncur;
2934                         ncur = NULL;
2935                 }
2936         } while (!xfs_btree_ptr_is_null(cur, &nptr));
2937
2938         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2939         *stat = i;
2940         return 0;
2941 error0:
2942         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2943         return error;
2944 }
2945
2946 /*
2947  * Try to merge a non-leaf block back into the inode root.
2948  *
2949  * Note: the killroot names comes from the fact that we're effectively
2950  * killing the old root block.  But because we can't just delete the
2951  * inode we have to copy the single block it was pointing to into the
2952  * inode.
2953  */
2954 int
2955 xfs_btree_kill_iroot(
2956         struct xfs_btree_cur    *cur)
2957 {
2958         int                     whichfork = cur->bc_private.b.whichfork;
2959         struct xfs_inode        *ip = cur->bc_private.b.ip;
2960         struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
2961         struct xfs_btree_block  *block;
2962         struct xfs_btree_block  *cblock;
2963         union xfs_btree_key     *kp;
2964         union xfs_btree_key     *ckp;
2965         union xfs_btree_ptr     *pp;
2966         union xfs_btree_ptr     *cpp;
2967         struct xfs_buf          *cbp;
2968         int                     level;
2969         int                     index;
2970         int                     numrecs;
2971 #ifdef DEBUG
2972         union xfs_btree_ptr     ptr;
2973         int                     i;
2974 #endif
2975
2976         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2977
2978         ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
2979         ASSERT(cur->bc_nlevels > 1);
2980
2981         /*
2982          * Don't deal with the root block needs to be a leaf case.
2983          * We're just going to turn the thing back into extents anyway.
2984          */
2985         level = cur->bc_nlevels - 1;
2986         if (level == 1)
2987                 goto out0;
2988
2989         /*
2990          * Give up if the root has multiple children.
2991          */
2992         block = xfs_btree_get_iroot(cur);
2993         if (xfs_btree_get_numrecs(block) != 1)
2994                 goto out0;
2995
2996         cblock = xfs_btree_get_block(cur, level - 1, &cbp);
2997         numrecs = xfs_btree_get_numrecs(cblock);
2998
2999         /*
3000          * Only do this if the next level will fit.
3001          * Then the data must be copied up to the inode,
3002          * instead of freeing the root you free the next level.
3003          */
3004         if (numrecs > cur->bc_ops->get_dmaxrecs(cur, level))
3005                 goto out0;
3006
3007         XFS_BTREE_STATS_INC(cur, killroot);
3008
3009 #ifdef DEBUG
3010         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
3011         ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
3012         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
3013         ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
3014 #endif
3015
3016         index = numrecs - cur->bc_ops->get_maxrecs(cur, level);
3017         if (index) {
3018                 xfs_iroot_realloc(cur->bc_private.b.ip, index,
3019                                   cur->bc_private.b.whichfork);
3020                 block = ifp->if_broot;
3021         }
3022
3023         be16_add_cpu(&block->bb_numrecs, index);
3024         ASSERT(block->bb_numrecs == cblock->bb_numrecs);
3025
3026         kp = xfs_btree_key_addr(cur, 1, block);
3027         ckp = xfs_btree_key_addr(cur, 1, cblock);
3028         xfs_btree_copy_keys(cur, kp, ckp, numrecs);
3029
3030         pp = xfs_btree_ptr_addr(cur, 1, block);
3031         cpp = xfs_btree_ptr_addr(cur, 1, cblock);
3032 #ifdef DEBUG
3033         for (i = 0; i < numrecs; i++) {
3034                 int             error;
3035
3036                 error = xfs_btree_check_ptr(cur, cpp, i, level - 1);
3037                 if (error) {
3038                         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3039                         return error;
3040                 }
3041         }
3042 #endif
3043         xfs_btree_copy_ptrs(cur, pp, cpp, numrecs);
3044
3045         cur->bc_ops->free_block(cur, cbp);
3046         XFS_BTREE_STATS_INC(cur, free);
3047
3048         cur->bc_bufs[level - 1] = NULL;
3049         be16_add_cpu(&block->bb_level, -1);
3050         xfs_trans_log_inode(cur->bc_tp, ip,
3051                 XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork));
3052         cur->bc_nlevels--;
3053 out0:
3054         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3055         return 0;
3056 }
3057
3058 STATIC int
3059 xfs_btree_dec_cursor(
3060         struct xfs_btree_cur    *cur,
3061         int                     level,
3062         int                     *stat)
3063 {
3064         int                     error;
3065         int                     i;
3066
3067         if (level > 0) {
3068                 error = xfs_btree_decrement(cur, level, &i);
3069                 if (error)
3070                         return error;
3071         }
3072
3073         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3074         *stat = 1;
3075         return 0;
3076 }
3077
3078 /*
3079  * Single level of the btree record deletion routine.
3080  * Delete record pointed to by cur/level.
3081  * Remove the record from its block then rebalance the tree.
3082  * Return 0 for error, 1 for done, 2 to go on to the next level.
3083  */
3084 STATIC int                                      /* error */
3085 xfs_btree_delrec(
3086         struct xfs_btree_cur    *cur,           /* btree cursor */
3087         int                     level,          /* level removing record from */
3088         int                     *stat)          /* fail/done/go-on */
3089 {
3090         struct xfs_btree_block  *block;         /* btree block */
3091         union xfs_btree_ptr     cptr;           /* current block ptr */
3092         struct xfs_buf          *bp;            /* buffer for block */
3093         int                     error;          /* error return value */
3094         int                     i;              /* loop counter */
3095         union xfs_btree_key     key;            /* storage for keyp */
3096         union xfs_btree_key     *keyp = &key;   /* passed to the next level */
3097         union xfs_btree_ptr     lptr;           /* left sibling block ptr */
3098         struct xfs_buf          *lbp;           /* left buffer pointer */
3099         struct xfs_btree_block  *left;          /* left btree block */
3100         int                     lrecs = 0;      /* left record count */
3101         int                     ptr;            /* key/record index */
3102         union xfs_btree_ptr     rptr;           /* right sibling block ptr */
3103         struct xfs_buf          *rbp;           /* right buffer pointer */
3104         struct xfs_btree_block  *right;         /* right btree block */
3105         struct xfs_btree_block  *rrblock;       /* right-right btree block */
3106         struct xfs_buf          *rrbp;          /* right-right buffer pointer */
3107         int                     rrecs = 0;      /* right record count */
3108         struct xfs_btree_cur    *tcur;          /* temporary btree cursor */
3109         int                     numrecs;        /* temporary numrec count */
3110
3111         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3112         XFS_BTREE_TRACE_ARGI(cur, level);
3113
3114         tcur = NULL;
3115
3116         /* Get the index of the entry being deleted, check for nothing there. */
3117         ptr = cur->bc_ptrs[level];
3118         if (ptr == 0) {
3119                 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3120                 *stat = 0;
3121                 return 0;
3122         }
3123
3124         /* Get the buffer & block containing the record or key/ptr. */
3125         block = xfs_btree_get_block(cur, level, &bp);
3126         numrecs = xfs_btree_get_numrecs(block);
3127
3128 #ifdef DEBUG
3129         error = xfs_btree_check_block(cur, block, level, bp);
3130         if (error)
3131                 goto error0;
3132 #endif
3133
3134         /* Fail if we're off the end of the block. */
3135         if (ptr > numrecs) {
3136                 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3137                 *stat = 0;
3138                 return 0;
3139         }
3140
3141         XFS_BTREE_STATS_INC(cur, delrec);
3142         XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr);
3143
3144         /* Excise the entries being deleted. */
3145         if (level > 0) {
3146                 /* It's a nonleaf. operate on keys and ptrs */
3147                 union xfs_btree_key     *lkp;
3148                 union xfs_btree_ptr     *lpp;
3149
3150                 lkp = xfs_btree_key_addr(cur, ptr + 1, block);
3151                 lpp = xfs_btree_ptr_addr(cur, ptr + 1, block);
3152
3153 #ifdef DEBUG
3154                 for (i = 0; i < numrecs - ptr; i++) {
3155                         error = xfs_btree_check_ptr(cur, lpp, i, level);
3156                         if (error)
3157                                 goto error0;
3158                 }
3159 #endif
3160
3161                 if (ptr < numrecs) {
3162                         xfs_btree_shift_keys(cur, lkp, -1, numrecs - ptr);
3163                         xfs_btree_shift_ptrs(cur, lpp, -1, numrecs - ptr);
3164                         xfs_btree_log_keys(cur, bp, ptr, numrecs - 1);
3165                         xfs_btree_log_ptrs(cur, bp, ptr, numrecs - 1);
3166                 }
3167
3168                 /*
3169                  * If it's the first record in the block, we'll need to pass a
3170                  * key up to the next level (updkey).
3171                  */
3172                 if (ptr == 1)
3173                         keyp = xfs_btree_key_addr(cur, 1, block);
3174         } else {
3175                 /* It's a leaf. operate on records */
3176                 if (ptr < numrecs) {
3177                         xfs_btree_shift_recs(cur,
3178                                 xfs_btree_rec_addr(cur, ptr + 1, block),
3179                                 -1, numrecs - ptr);
3180                         xfs_btree_log_recs(cur, bp, ptr, numrecs - 1);
3181                 }
3182
3183                 /*
3184                  * If it's the first record in the block, we'll need a key
3185                  * structure to pass up to the next level (updkey).
3186                  */
3187                 if (ptr == 1) {
3188                         cur->bc_ops->init_key_from_rec(&key,
3189                                         xfs_btree_rec_addr(cur, 1, block));
3190                         keyp = &key;
3191                 }
3192         }
3193
3194         /*
3195          * Decrement and log the number of entries in the block.
3196          */
3197         xfs_btree_set_numrecs(block, --numrecs);
3198         xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
3199
3200         /*
3201          * If we are tracking the last record in the tree and
3202          * we are at the far right edge of the tree, update it.
3203          */
3204         if (xfs_btree_is_lastrec(cur, block, level)) {
3205                 cur->bc_ops->update_lastrec(cur, block, NULL,
3206                                             ptr, LASTREC_DELREC);
3207         }
3208
3209         /*
3210          * We're at the root level.  First, shrink the root block in-memory.
3211          * Try to get rid of the next level down.  If we can't then there's
3212          * nothing left to do.
3213          */
3214         if (level == cur->bc_nlevels - 1) {
3215                 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
3216                         xfs_iroot_realloc(cur->bc_private.b.ip, -1,
3217                                           cur->bc_private.b.whichfork);
3218
3219                         error = xfs_btree_kill_iroot(cur);
3220                         if (error)
3221                                 goto error0;
3222
3223                         error = xfs_btree_dec_cursor(cur, level, stat);
3224                         if (error)
3225                                 goto error0;
3226                         *stat = 1;
3227                         return 0;
3228                 }
3229
3230                 /*
3231                  * If this is the root level, and there's only one entry left,
3232                  * and it's NOT the leaf level, then we can get rid of this
3233                  * level.
3234                  */
3235                 if (numrecs == 1 && level > 0) {
3236                         union xfs_btree_ptr     *pp;
3237                         /*
3238                          * pp is still set to the first pointer in the block.
3239                          * Make it the new root of the btree.
3240                          */
3241                         pp = xfs_btree_ptr_addr(cur, 1, block);
3242                         error = cur->bc_ops->kill_root(cur, bp, level, pp);
3243                         if (error)
3244                                 goto error0;
3245                 } else if (level > 0) {
3246                         error = xfs_btree_dec_cursor(cur, level, stat);
3247                         if (error)
3248                                 goto error0;
3249                 }
3250                 *stat = 1;
3251                 return 0;
3252         }
3253
3254         /*
3255          * If we deleted the leftmost entry in the block, update the
3256          * key values above us in the tree.
3257          */
3258         if (ptr == 1) {
3259                 error = xfs_btree_updkey(cur, keyp, level + 1);
3260                 if (error)
3261                         goto error0;
3262         }
3263
3264         /*
3265          * If the number of records remaining in the block is at least
3266          * the minimum, we're done.
3267          */
3268         if (numrecs >= cur->bc_ops->get_minrecs(cur, level)) {
3269                 error = xfs_btree_dec_cursor(cur, level, stat);
3270                 if (error)
3271                         goto error0;
3272                 return 0;
3273         }
3274
3275         /*
3276          * Otherwise, we have to move some records around to keep the
3277          * tree balanced.  Look at the left and right sibling blocks to
3278          * see if we can re-balance by moving only one record.
3279          */
3280         xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
3281         xfs_btree_get_sibling(cur, block, &lptr, XFS_BB_LEFTSIB);
3282
3283         if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
3284                 /*
3285                  * One child of root, need to get a chance to copy its contents
3286                  * into the root and delete it. Can't go up to next level,
3287                  * there's nothing to delete there.
3288                  */
3289                 if (xfs_btree_ptr_is_null(cur, &rptr) &&
3290                     xfs_btree_ptr_is_null(cur, &lptr) &&
3291                     level == cur->bc_nlevels - 2) {
3292                         error = xfs_btree_kill_iroot(cur);
3293                         if (!error)
3294                                 error = xfs_btree_dec_cursor(cur, level, stat);
3295                         if (error)
3296                                 goto error0;
3297                         return 0;
3298                 }
3299         }
3300
3301         ASSERT(!xfs_btree_ptr_is_null(cur, &rptr) ||
3302                !xfs_btree_ptr_is_null(cur, &lptr));
3303
3304         /*
3305          * Duplicate the cursor so our btree manipulations here won't
3306          * disrupt the next level up.
3307          */
3308         error = xfs_btree_dup_cursor(cur, &tcur);
3309         if (error)
3310                 goto error0;
3311
3312         /*
3313          * If there's a right sibling, see if it's ok to shift an entry
3314          * out of it.
3315          */
3316         if (!xfs_btree_ptr_is_null(cur, &rptr)) {
3317                 /*
3318                  * Move the temp cursor to the last entry in the next block.
3319                  * Actually any entry but the first would suffice.
3320                  */
3321                 i = xfs_btree_lastrec(tcur, level);
3322                 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3323
3324                 error = xfs_btree_increment(tcur, level, &i);
3325                 if (error)
3326                         goto error0;
3327                 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3328
3329                 i = xfs_btree_lastrec(tcur, level);
3330                 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3331
3332                 /* Grab a pointer to the block. */
3333                 right = xfs_btree_get_block(tcur, level, &rbp);
3334 #ifdef DEBUG
3335                 error = xfs_btree_check_block(tcur, right, level, rbp);
3336                 if (error)
3337                         goto error0;
3338 #endif
3339                 /* Grab the current block number, for future use. */
3340                 xfs_btree_get_sibling(tcur, right, &cptr, XFS_BB_LEFTSIB);
3341
3342                 /*
3343                  * If right block is full enough so that removing one entry
3344                  * won't make it too empty, and left-shifting an entry out
3345                  * of right to us works, we're done.
3346                  */
3347                 if (xfs_btree_get_numrecs(right) - 1 >=
3348                     cur->bc_ops->get_minrecs(tcur, level)) {
3349                         error = xfs_btree_lshift(tcur, level, &i);
3350                         if (error)
3351                                 goto error0;
3352                         if (i) {
3353                                 ASSERT(xfs_btree_get_numrecs(block) >=
3354                                        cur->bc_ops->get_minrecs(tcur, level));
3355
3356                                 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
3357                                 tcur = NULL;
3358
3359                                 error = xfs_btree_dec_cursor(cur, level, stat);
3360                                 if (error)
3361                                         goto error0;
3362                                 return 0;
3363                         }
3364                 }
3365
3366                 /*
3367                  * Otherwise, grab the number of records in right for
3368                  * future reference, and fix up the temp cursor to point
3369                  * to our block again (last record).
3370                  */
3371                 rrecs = xfs_btree_get_numrecs(right);
3372                 if (!xfs_btree_ptr_is_null(cur, &lptr)) {
3373                         i = xfs_btree_firstrec(tcur, level);
3374                         XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3375
3376                         error = xfs_btree_decrement(tcur, level, &i);
3377                         if (error)
3378                                 goto error0;
3379                         XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3380                 }
3381         }
3382
3383         /*
3384          * If there's a left sibling, see if it's ok to shift an entry
3385          * out of it.
3386          */
3387         if (!xfs_btree_ptr_is_null(cur, &lptr)) {
3388                 /*
3389                  * Move the temp cursor to the first entry in the
3390                  * previous block.
3391                  */
3392                 i = xfs_btree_firstrec(tcur, level);
3393                 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3394
3395                 error = xfs_btree_decrement(tcur, level, &i);
3396                 if (error)
3397                         goto error0;
3398                 i = xfs_btree_firstrec(tcur, level);
3399                 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3400
3401                 /* Grab a pointer to the block. */
3402                 left = xfs_btree_get_block(tcur, level, &lbp);
3403 #ifdef DEBUG
3404                 error = xfs_btree_check_block(cur, left, level, lbp);
3405                 if (error)
3406                         goto error0;
3407 #endif
3408                 /* Grab the current block number, for future use. */
3409                 xfs_btree_get_sibling(tcur, left, &cptr, XFS_BB_RIGHTSIB);
3410
3411                 /*
3412                  * If left block is full enough so that removing one entry
3413                  * won't make it too empty, and right-shifting an entry out
3414                  * of left to us works, we're done.
3415                  */
3416                 if (xfs_btree_get_numrecs(left) - 1 >=
3417                     cur->bc_ops->get_minrecs(tcur, level)) {
3418                         error = xfs_btree_rshift(tcur, level, &i);
3419                         if (error)
3420                                 goto error0;
3421                         if (i) {
3422                                 ASSERT(xfs_btree_get_numrecs(block) >=
3423                                        cur->bc_ops->get_minrecs(tcur, level));
3424                                 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
3425                                 tcur = NULL;
3426                                 if (level == 0)
3427                                         cur->bc_ptrs[0]++;
3428                                 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3429                                 *stat = 1;
3430                                 return 0;
3431                         }
3432                 }
3433
3434                 /*
3435                  * Otherwise, grab the number of records in right for
3436                  * future reference.
3437                  */
3438                 lrecs = xfs_btree_get_numrecs(left);
3439         }
3440
3441         /* Delete the temp cursor, we're done with it. */
3442         xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
3443         tcur = NULL;
3444
3445         /* If here, we need to do a join to keep the tree balanced. */
3446         ASSERT(!xfs_btree_ptr_is_null(cur, &cptr));
3447
3448         if (!xfs_btree_ptr_is_null(cur, &lptr) &&
3449             lrecs + xfs_btree_get_numrecs(block) <=
3450                         cur->bc_ops->get_maxrecs(cur, level)) {
3451                 /*
3452                  * Set "right" to be the starting block,
3453                  * "left" to be the left neighbor.
3454                  */
3455                 rptr = cptr;
3456                 right = block;
3457                 rbp = bp;
3458                 error = xfs_btree_read_buf_block(cur, &lptr, level,
3459                                                         0, &left, &lbp);
3460                 if (error)
3461                         goto error0;
3462
3463         /*
3464          * If that won't work, see if we can join with the right neighbor block.
3465          */
3466         } else if (!xfs_btree_ptr_is_null(cur, &rptr) &&
3467                    rrecs + xfs_btree_get_numrecs(block) <=
3468                         cur->bc_ops->get_maxrecs(cur, level)) {
3469                 /*
3470                  * Set "left" to be the starting block,
3471                  * "right" to be the right neighbor.
3472                  */
3473                 lptr = cptr;
3474                 left = block;
3475                 lbp = bp;
3476                 error = xfs_btree_read_buf_block(cur, &rptr, level,
3477                                                         0, &right, &rbp);
3478                 if (error)
3479                         goto error0;
3480
3481         /*
3482          * Otherwise, we can't fix the imbalance.
3483          * Just return.  This is probably a logic error, but it's not fatal.
3484          */
3485         } else {
3486                 error = xfs_btree_dec_cursor(cur, level, stat);
3487                 if (error)
3488                         goto error0;
3489                 return 0;
3490         }
3491
3492         rrecs = xfs_btree_get_numrecs(right);
3493         lrecs = xfs_btree_get_numrecs(left);
3494
3495         /*
3496          * We're now going to join "left" and "right" by moving all the stuff
3497          * in "right" to "left" and deleting "right".
3498          */
3499         XFS_BTREE_STATS_ADD(cur, moves, rrecs);
3500         if (level > 0) {
3501                 /* It's a non-leaf.  Move keys and pointers. */
3502                 union xfs_btree_key     *lkp;   /* left btree key */
3503                 union xfs_btree_ptr     *lpp;   /* left address pointer */
3504                 union xfs_btree_key     *rkp;   /* right btree key */
3505                 union xfs_btree_ptr     *rpp;   /* right address pointer */
3506
3507                 lkp = xfs_btree_key_addr(cur, lrecs + 1, left);
3508                 lpp = xfs_btree_ptr_addr(cur, lrecs + 1, left);
3509                 rkp = xfs_btree_key_addr(cur, 1, right);
3510                 rpp = xfs_btree_ptr_addr(cur, 1, right);
3511 #ifdef DEBUG
3512                 for (i = 1; i < rrecs; i++) {
3513                         error = xfs_btree_check_ptr(cur, rpp, i, level);
3514                         if (error)
3515                                 goto error0;
3516                 }
3517 #endif
3518                 xfs_btree_copy_keys(cur, lkp, rkp, rrecs);
3519                 xfs_btree_copy_ptrs(cur, lpp, rpp, rrecs);
3520
3521                 xfs_btree_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs);
3522                 xfs_btree_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs);
3523         } else {
3524                 /* It's a leaf.  Move records.  */
3525                 union xfs_btree_rec     *lrp;   /* left record pointer */
3526                 union xfs_btree_rec     *rrp;   /* right record pointer */
3527
3528                 lrp = xfs_btree_rec_addr(cur, lrecs + 1, left);
3529                 rrp = xfs_btree_rec_addr(cur, 1, right);
3530
3531                 xfs_btree_copy_recs(cur, lrp, rrp, rrecs);
3532                 xfs_btree_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs);
3533         }
3534
3535         XFS_BTREE_STATS_INC(cur, join);
3536
3537         /*
3538          * Fix up the number of records and right block pointer in the
3539          * surviving block, and log it.
3540          */
3541         xfs_btree_set_numrecs(left, lrecs + rrecs);
3542         xfs_btree_get_sibling(cur, right, &cptr, XFS_BB_RIGHTSIB),
3543         xfs_btree_set_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
3544         xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
3545
3546         /* If there is a right sibling, point it to the remaining block. */
3547         xfs_btree_get_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
3548         if (!xfs_btree_ptr_is_null(cur, &cptr)) {
3549                 error = xfs_btree_read_buf_block(cur, &cptr, level,
3550                                                         0, &rrblock, &rrbp);
3551                 if (error)
3552                         goto error0;
3553                 xfs_btree_set_sibling(cur, rrblock, &lptr, XFS_BB_LEFTSIB);
3554                 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
3555         }
3556
3557         /* Free the deleted block. */
3558         error = cur->bc_ops->free_block(cur, rbp);
3559         if (error)
3560                 goto error0;
3561         XFS_BTREE_STATS_INC(cur, free);
3562
3563         /*
3564          * If we joined with the left neighbor, set the buffer in the
3565          * cursor to the left block, and fix up the index.
3566          */
3567         if (bp != lbp) {
3568                 cur->bc_bufs[level] = lbp;
3569                 cur->bc_ptrs[level] += lrecs;
3570                 cur->bc_ra[level] = 0;
3571         }
3572         /*
3573          * If we joined with the right neighbor and there's a level above
3574          * us, increment the cursor at that level.
3575          */
3576         else if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) ||
3577                    (level + 1 < cur->bc_nlevels)) {
3578                 error = xfs_btree_increment(cur, level + 1, &i);
3579                 if (error)
3580                         goto error0;
3581         }
3582
3583         /*
3584          * Readjust the ptr at this level if it's not a leaf, since it's
3585          * still pointing at the deletion point, which makes the cursor
3586          * inconsistent.  If this makes the ptr 0, the caller fixes it up.
3587          * We can't use decrement because it would change the next level up.
3588          */
3589         if (level > 0)
3590                 cur->bc_ptrs[level]--;
3591
3592         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3593         /* Return value means the next level up has something to do. */
3594         *stat = 2;
3595         return 0;
3596
3597 error0:
3598         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3599         if (tcur)
3600                 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
3601         return error;
3602 }
3603
3604 /*
3605  * Delete the record pointed to by cur.
3606  * The cursor refers to the place where the record was (could be inserted)
3607  * when the operation returns.
3608  */
3609 int                                     /* error */
3610 xfs_btree_delete(
3611         struct xfs_btree_cur    *cur,
3612         int                     *stat)  /* success/failure */
3613 {
3614         int                     error;  /* error return value */
3615         int                     level;
3616         int                     i;
3617
3618         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3619
3620         /*
3621          * Go up the tree, starting at leaf level.
3622          *
3623          * If 2 is returned then a join was done; go to the next level.
3624          * Otherwise we are done.
3625          */
3626         for (level = 0, i = 2; i == 2; level++) {
3627                 error = xfs_btree_delrec(cur, level, &i);
3628                 if (error)
3629                         goto error0;
3630         }
3631
3632         if (i == 0) {
3633                 for (level = 1; level < cur->bc_nlevels; level++) {
3634                         if (cur->bc_ptrs[level] == 0) {
3635                                 error = xfs_btree_decrement(cur, level, &i);
3636                                 if (error)
3637                                         goto error0;
3638                                 break;
3639                         }
3640                 }
3641         }
3642
3643         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3644         *stat = i;
3645         return 0;
3646 error0:
3647         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3648         return error;
3649 }
3650
3651 /*
3652  * Get the data from the pointed-to record.
3653  */
3654 int                                     /* error */
3655 xfs_btree_get_rec(
3656         struct xfs_btree_cur    *cur,   /* btree cursor */
3657         union xfs_btree_rec     **recp, /* output: btree record */
3658         int                     *stat)  /* output: success/failure */
3659 {
3660         struct xfs_btree_block  *block; /* btree block */
3661         struct xfs_buf          *bp;    /* buffer pointer */
3662         int                     ptr;    /* record number */
3663 #ifdef DEBUG
3664         int                     error;  /* error return value */
3665 #endif
3666
3667         ptr = cur->bc_ptrs[0];
3668         block = xfs_btree_get_block(cur, 0, &bp);
3669
3670 #ifdef DEBUG
3671         error = xfs_btree_check_block(cur, block, 0, bp);
3672         if (error)
3673                 return error;
3674 #endif
3675
3676         /*
3677          * Off the right end or left end, return failure.
3678          */
3679         if (ptr > xfs_btree_get_numrecs(block) || ptr <= 0) {
3680                 *stat = 0;
3681                 return 0;
3682         }
3683
3684         /*
3685          * Point to the record and extract its data.
3686          */
3687         *recp = xfs_btree_rec_addr(cur, ptr, block);
3688         *stat = 1;
3689         return 0;
3690 }