Merge branch 'for-linus' of git://oss.sgi.com/xfs/xfs
[pandora-kernel.git] / fs / xfs / xfs_btree.c
1 /*
2  * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_types.h"
21 #include "xfs_bit.h"
22 #include "xfs_log.h"
23 #include "xfs_inum.h"
24 #include "xfs_trans.h"
25 #include "xfs_sb.h"
26 #include "xfs_ag.h"
27 #include "xfs_mount.h"
28 #include "xfs_bmap_btree.h"
29 #include "xfs_alloc_btree.h"
30 #include "xfs_ialloc_btree.h"
31 #include "xfs_dinode.h"
32 #include "xfs_inode.h"
33 #include "xfs_inode_item.h"
34 #include "xfs_btree.h"
35 #include "xfs_error.h"
36 #include "xfs_trace.h"
37
38 /*
39  * Cursor allocation zone.
40  */
41 kmem_zone_t     *xfs_btree_cur_zone;
42
43 /*
44  * Btree magic numbers.
45  */
46 const __uint32_t xfs_magics[XFS_BTNUM_MAX] = {
47         XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC
48 };
49
50
51 STATIC int                              /* error (0 or EFSCORRUPTED) */
52 xfs_btree_check_lblock(
53         struct xfs_btree_cur    *cur,   /* btree cursor */
54         struct xfs_btree_block  *block, /* btree long form block pointer */
55         int                     level,  /* level of the btree block */
56         struct xfs_buf          *bp)    /* buffer for block, if any */
57 {
58         int                     lblock_ok; /* block passes checks */
59         struct xfs_mount        *mp;    /* file system mount point */
60
61         mp = cur->bc_mp;
62         lblock_ok =
63                 be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] &&
64                 be16_to_cpu(block->bb_level) == level &&
65                 be16_to_cpu(block->bb_numrecs) <=
66                         cur->bc_ops->get_maxrecs(cur, level) &&
67                 block->bb_u.l.bb_leftsib &&
68                 (block->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO) ||
69                  XFS_FSB_SANITY_CHECK(mp,
70                         be64_to_cpu(block->bb_u.l.bb_leftsib))) &&
71                 block->bb_u.l.bb_rightsib &&
72                 (block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO) ||
73                  XFS_FSB_SANITY_CHECK(mp,
74                         be64_to_cpu(block->bb_u.l.bb_rightsib)));
75         if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp,
76                         XFS_ERRTAG_BTREE_CHECK_LBLOCK,
77                         XFS_RANDOM_BTREE_CHECK_LBLOCK))) {
78                 if (bp)
79                         trace_xfs_btree_corrupt(bp, _RET_IP_);
80                 XFS_ERROR_REPORT("xfs_btree_check_lblock", XFS_ERRLEVEL_LOW,
81                                  mp);
82                 return XFS_ERROR(EFSCORRUPTED);
83         }
84         return 0;
85 }
86
87 STATIC int                              /* error (0 or EFSCORRUPTED) */
88 xfs_btree_check_sblock(
89         struct xfs_btree_cur    *cur,   /* btree cursor */
90         struct xfs_btree_block  *block, /* btree short form block pointer */
91         int                     level,  /* level of the btree block */
92         struct xfs_buf          *bp)    /* buffer containing block */
93 {
94         struct xfs_buf          *agbp;  /* buffer for ag. freespace struct */
95         struct xfs_agf          *agf;   /* ag. freespace structure */
96         xfs_agblock_t           agflen; /* native ag. freespace length */
97         int                     sblock_ok; /* block passes checks */
98
99         agbp = cur->bc_private.a.agbp;
100         agf = XFS_BUF_TO_AGF(agbp);
101         agflen = be32_to_cpu(agf->agf_length);
102         sblock_ok =
103                 be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] &&
104                 be16_to_cpu(block->bb_level) == level &&
105                 be16_to_cpu(block->bb_numrecs) <=
106                         cur->bc_ops->get_maxrecs(cur, level) &&
107                 (block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) ||
108                  be32_to_cpu(block->bb_u.s.bb_leftsib) < agflen) &&
109                 block->bb_u.s.bb_leftsib &&
110                 (block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) ||
111                  be32_to_cpu(block->bb_u.s.bb_rightsib) < agflen) &&
112                 block->bb_u.s.bb_rightsib;
113         if (unlikely(XFS_TEST_ERROR(!sblock_ok, cur->bc_mp,
114                         XFS_ERRTAG_BTREE_CHECK_SBLOCK,
115                         XFS_RANDOM_BTREE_CHECK_SBLOCK))) {
116                 if (bp)
117                         trace_xfs_btree_corrupt(bp, _RET_IP_);
118                 XFS_CORRUPTION_ERROR("xfs_btree_check_sblock",
119                         XFS_ERRLEVEL_LOW, cur->bc_mp, block);
120                 return XFS_ERROR(EFSCORRUPTED);
121         }
122         return 0;
123 }
124
125 /*
126  * Debug routine: check that block header is ok.
127  */
128 int
129 xfs_btree_check_block(
130         struct xfs_btree_cur    *cur,   /* btree cursor */
131         struct xfs_btree_block  *block, /* generic btree block pointer */
132         int                     level,  /* level of the btree block */
133         struct xfs_buf          *bp)    /* buffer containing block, if any */
134 {
135         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
136                 return xfs_btree_check_lblock(cur, block, level, bp);
137         else
138                 return xfs_btree_check_sblock(cur, block, level, bp);
139 }
140
141 /*
142  * Check that (long) pointer is ok.
143  */
144 int                                     /* error (0 or EFSCORRUPTED) */
145 xfs_btree_check_lptr(
146         struct xfs_btree_cur    *cur,   /* btree cursor */
147         xfs_dfsbno_t            bno,    /* btree block disk address */
148         int                     level)  /* btree block level */
149 {
150         XFS_WANT_CORRUPTED_RETURN(
151                 level > 0 &&
152                 bno != NULLDFSBNO &&
153                 XFS_FSB_SANITY_CHECK(cur->bc_mp, bno));
154         return 0;
155 }
156
157 #ifdef DEBUG
158 /*
159  * Check that (short) pointer is ok.
160  */
161 STATIC int                              /* error (0 or EFSCORRUPTED) */
162 xfs_btree_check_sptr(
163         struct xfs_btree_cur    *cur,   /* btree cursor */
164         xfs_agblock_t           bno,    /* btree block disk address */
165         int                     level)  /* btree block level */
166 {
167         xfs_agblock_t           agblocks = cur->bc_mp->m_sb.sb_agblocks;
168
169         XFS_WANT_CORRUPTED_RETURN(
170                 level > 0 &&
171                 bno != NULLAGBLOCK &&
172                 bno != 0 &&
173                 bno < agblocks);
174         return 0;
175 }
176
177 /*
178  * Check that block ptr is ok.
179  */
180 STATIC int                              /* error (0 or EFSCORRUPTED) */
181 xfs_btree_check_ptr(
182         struct xfs_btree_cur    *cur,   /* btree cursor */
183         union xfs_btree_ptr     *ptr,   /* btree block disk address */
184         int                     index,  /* offset from ptr to check */
185         int                     level)  /* btree block level */
186 {
187         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
188                 return xfs_btree_check_lptr(cur,
189                                 be64_to_cpu((&ptr->l)[index]), level);
190         } else {
191                 return xfs_btree_check_sptr(cur,
192                                 be32_to_cpu((&ptr->s)[index]), level);
193         }
194 }
195 #endif
196
197 /*
198  * Delete the btree cursor.
199  */
200 void
201 xfs_btree_del_cursor(
202         xfs_btree_cur_t *cur,           /* btree cursor */
203         int             error)          /* del because of error */
204 {
205         int             i;              /* btree level */
206
207         /*
208          * Clear the buffer pointers, and release the buffers.
209          * If we're doing this in the face of an error, we
210          * need to make sure to inspect all of the entries
211          * in the bc_bufs array for buffers to be unlocked.
212          * This is because some of the btree code works from
213          * level n down to 0, and if we get an error along
214          * the way we won't have initialized all the entries
215          * down to 0.
216          */
217         for (i = 0; i < cur->bc_nlevels; i++) {
218                 if (cur->bc_bufs[i])
219                         xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[i]);
220                 else if (!error)
221                         break;
222         }
223         /*
224          * Can't free a bmap cursor without having dealt with the
225          * allocated indirect blocks' accounting.
226          */
227         ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP ||
228                cur->bc_private.b.allocated == 0);
229         /*
230          * Free the cursor.
231          */
232         kmem_zone_free(xfs_btree_cur_zone, cur);
233 }
234
235 /*
236  * Duplicate the btree cursor.
237  * Allocate a new one, copy the record, re-get the buffers.
238  */
239 int                                     /* error */
240 xfs_btree_dup_cursor(
241         xfs_btree_cur_t *cur,           /* input cursor */
242         xfs_btree_cur_t **ncur)         /* output cursor */
243 {
244         xfs_buf_t       *bp;            /* btree block's buffer pointer */
245         int             error;          /* error return value */
246         int             i;              /* level number of btree block */
247         xfs_mount_t     *mp;            /* mount structure for filesystem */
248         xfs_btree_cur_t *new;           /* new cursor value */
249         xfs_trans_t     *tp;            /* transaction pointer, can be NULL */
250
251         tp = cur->bc_tp;
252         mp = cur->bc_mp;
253
254         /*
255          * Allocate a new cursor like the old one.
256          */
257         new = cur->bc_ops->dup_cursor(cur);
258
259         /*
260          * Copy the record currently in the cursor.
261          */
262         new->bc_rec = cur->bc_rec;
263
264         /*
265          * For each level current, re-get the buffer and copy the ptr value.
266          */
267         for (i = 0; i < new->bc_nlevels; i++) {
268                 new->bc_ptrs[i] = cur->bc_ptrs[i];
269                 new->bc_ra[i] = cur->bc_ra[i];
270                 if ((bp = cur->bc_bufs[i])) {
271                         if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
272                                 XFS_BUF_ADDR(bp), mp->m_bsize, 0, &bp))) {
273                                 xfs_btree_del_cursor(new, error);
274                                 *ncur = NULL;
275                                 return error;
276                         }
277                         new->bc_bufs[i] = bp;
278                         ASSERT(bp);
279                         ASSERT(!XFS_BUF_GETERROR(bp));
280                 } else
281                         new->bc_bufs[i] = NULL;
282         }
283         *ncur = new;
284         return 0;
285 }
286
287 /*
288  * XFS btree block layout and addressing:
289  *
290  * There are two types of blocks in the btree: leaf and non-leaf blocks.
291  *
292  * The leaf record start with a header then followed by records containing
293  * the values.  A non-leaf block also starts with the same header, and
294  * then first contains lookup keys followed by an equal number of pointers
295  * to the btree blocks at the previous level.
296  *
297  *              +--------+-------+-------+-------+-------+-------+-------+
298  * Leaf:        | header | rec 1 | rec 2 | rec 3 | rec 4 | rec 5 | rec N |
299  *              +--------+-------+-------+-------+-------+-------+-------+
300  *
301  *              +--------+-------+-------+-------+-------+-------+-------+
302  * Non-Leaf:    | header | key 1 | key 2 | key N | ptr 1 | ptr 2 | ptr N |
303  *              +--------+-------+-------+-------+-------+-------+-------+
304  *
305  * The header is called struct xfs_btree_block for reasons better left unknown
306  * and comes in different versions for short (32bit) and long (64bit) block
307  * pointers.  The record and key structures are defined by the btree instances
308  * and opaque to the btree core.  The block pointers are simple disk endian
309  * integers, available in a short (32bit) and long (64bit) variant.
310  *
311  * The helpers below calculate the offset of a given record, key or pointer
312  * into a btree block (xfs_btree_*_offset) or return a pointer to the given
313  * record, key or pointer (xfs_btree_*_addr).  Note that all addressing
314  * inside the btree block is done using indices starting at one, not zero!
315  */
316
317 /*
318  * Return size of the btree block header for this btree instance.
319  */
320 static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur)
321 {
322         return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
323                 XFS_BTREE_LBLOCK_LEN :
324                 XFS_BTREE_SBLOCK_LEN;
325 }
326
327 /*
328  * Return size of btree block pointers for this btree instance.
329  */
330 static inline size_t xfs_btree_ptr_len(struct xfs_btree_cur *cur)
331 {
332         return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
333                 sizeof(__be64) : sizeof(__be32);
334 }
335
336 /*
337  * Calculate offset of the n-th record in a btree block.
338  */
339 STATIC size_t
340 xfs_btree_rec_offset(
341         struct xfs_btree_cur    *cur,
342         int                     n)
343 {
344         return xfs_btree_block_len(cur) +
345                 (n - 1) * cur->bc_ops->rec_len;
346 }
347
348 /*
349  * Calculate offset of the n-th key in a btree block.
350  */
351 STATIC size_t
352 xfs_btree_key_offset(
353         struct xfs_btree_cur    *cur,
354         int                     n)
355 {
356         return xfs_btree_block_len(cur) +
357                 (n - 1) * cur->bc_ops->key_len;
358 }
359
360 /*
361  * Calculate offset of the n-th block pointer in a btree block.
362  */
363 STATIC size_t
364 xfs_btree_ptr_offset(
365         struct xfs_btree_cur    *cur,
366         int                     n,
367         int                     level)
368 {
369         return xfs_btree_block_len(cur) +
370                 cur->bc_ops->get_maxrecs(cur, level) * cur->bc_ops->key_len +
371                 (n - 1) * xfs_btree_ptr_len(cur);
372 }
373
374 /*
375  * Return a pointer to the n-th record in the btree block.
376  */
377 STATIC union xfs_btree_rec *
378 xfs_btree_rec_addr(
379         struct xfs_btree_cur    *cur,
380         int                     n,
381         struct xfs_btree_block  *block)
382 {
383         return (union xfs_btree_rec *)
384                 ((char *)block + xfs_btree_rec_offset(cur, n));
385 }
386
387 /*
388  * Return a pointer to the n-th key in the btree block.
389  */
390 STATIC union xfs_btree_key *
391 xfs_btree_key_addr(
392         struct xfs_btree_cur    *cur,
393         int                     n,
394         struct xfs_btree_block  *block)
395 {
396         return (union xfs_btree_key *)
397                 ((char *)block + xfs_btree_key_offset(cur, n));
398 }
399
400 /*
401  * Return a pointer to the n-th block pointer in the btree block.
402  */
403 STATIC union xfs_btree_ptr *
404 xfs_btree_ptr_addr(
405         struct xfs_btree_cur    *cur,
406         int                     n,
407         struct xfs_btree_block  *block)
408 {
409         int                     level = xfs_btree_get_level(block);
410
411         ASSERT(block->bb_level != 0);
412
413         return (union xfs_btree_ptr *)
414                 ((char *)block + xfs_btree_ptr_offset(cur, n, level));
415 }
416
417 /*
418  * Get a the root block which is stored in the inode.
419  *
420  * For now this btree implementation assumes the btree root is always
421  * stored in the if_broot field of an inode fork.
422  */
423 STATIC struct xfs_btree_block *
424 xfs_btree_get_iroot(
425        struct xfs_btree_cur    *cur)
426 {
427        struct xfs_ifork        *ifp;
428
429        ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, cur->bc_private.b.whichfork);
430        return (struct xfs_btree_block *)ifp->if_broot;
431 }
432
433 /*
434  * Retrieve the block pointer from the cursor at the given level.
435  * This may be an inode btree root or from a buffer.
436  */
437 STATIC struct xfs_btree_block *         /* generic btree block pointer */
438 xfs_btree_get_block(
439         struct xfs_btree_cur    *cur,   /* btree cursor */
440         int                     level,  /* level in btree */
441         struct xfs_buf          **bpp)  /* buffer containing the block */
442 {
443         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
444             (level == cur->bc_nlevels - 1)) {
445                 *bpp = NULL;
446                 return xfs_btree_get_iroot(cur);
447         }
448
449         *bpp = cur->bc_bufs[level];
450         return XFS_BUF_TO_BLOCK(*bpp);
451 }
452
453 /*
454  * Get a buffer for the block, return it with no data read.
455  * Long-form addressing.
456  */
457 xfs_buf_t *                             /* buffer for fsbno */
458 xfs_btree_get_bufl(
459         xfs_mount_t     *mp,            /* file system mount point */
460         xfs_trans_t     *tp,            /* transaction pointer */
461         xfs_fsblock_t   fsbno,          /* file system block number */
462         uint            lock)           /* lock flags for get_buf */
463 {
464         xfs_buf_t       *bp;            /* buffer pointer (return value) */
465         xfs_daddr_t             d;              /* real disk block address */
466
467         ASSERT(fsbno != NULLFSBLOCK);
468         d = XFS_FSB_TO_DADDR(mp, fsbno);
469         bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
470         ASSERT(bp);
471         ASSERT(!XFS_BUF_GETERROR(bp));
472         return bp;
473 }
474
475 /*
476  * Get a buffer for the block, return it with no data read.
477  * Short-form addressing.
478  */
479 xfs_buf_t *                             /* buffer for agno/agbno */
480 xfs_btree_get_bufs(
481         xfs_mount_t     *mp,            /* file system mount point */
482         xfs_trans_t     *tp,            /* transaction pointer */
483         xfs_agnumber_t  agno,           /* allocation group number */
484         xfs_agblock_t   agbno,          /* allocation group block number */
485         uint            lock)           /* lock flags for get_buf */
486 {
487         xfs_buf_t       *bp;            /* buffer pointer (return value) */
488         xfs_daddr_t             d;              /* real disk block address */
489
490         ASSERT(agno != NULLAGNUMBER);
491         ASSERT(agbno != NULLAGBLOCK);
492         d = XFS_AGB_TO_DADDR(mp, agno, agbno);
493         bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
494         ASSERT(bp);
495         ASSERT(!XFS_BUF_GETERROR(bp));
496         return bp;
497 }
498
499 /*
500  * Check for the cursor referring to the last block at the given level.
501  */
502 int                                     /* 1=is last block, 0=not last block */
503 xfs_btree_islastblock(
504         xfs_btree_cur_t         *cur,   /* btree cursor */
505         int                     level)  /* level to check */
506 {
507         struct xfs_btree_block  *block; /* generic btree block pointer */
508         xfs_buf_t               *bp;    /* buffer containing block */
509
510         block = xfs_btree_get_block(cur, level, &bp);
511         xfs_btree_check_block(cur, block, level, bp);
512         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
513                 return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO);
514         else
515                 return block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK);
516 }
517
518 /*
519  * Change the cursor to point to the first record at the given level.
520  * Other levels are unaffected.
521  */
522 STATIC int                              /* success=1, failure=0 */
523 xfs_btree_firstrec(
524         xfs_btree_cur_t         *cur,   /* btree cursor */
525         int                     level)  /* level to change */
526 {
527         struct xfs_btree_block  *block; /* generic btree block pointer */
528         xfs_buf_t               *bp;    /* buffer containing block */
529
530         /*
531          * Get the block pointer for this level.
532          */
533         block = xfs_btree_get_block(cur, level, &bp);
534         xfs_btree_check_block(cur, block, level, bp);
535         /*
536          * It's empty, there is no such record.
537          */
538         if (!block->bb_numrecs)
539                 return 0;
540         /*
541          * Set the ptr value to 1, that's the first record/key.
542          */
543         cur->bc_ptrs[level] = 1;
544         return 1;
545 }
546
547 /*
548  * Change the cursor to point to the last record in the current block
549  * at the given level.  Other levels are unaffected.
550  */
551 STATIC int                              /* success=1, failure=0 */
552 xfs_btree_lastrec(
553         xfs_btree_cur_t         *cur,   /* btree cursor */
554         int                     level)  /* level to change */
555 {
556         struct xfs_btree_block  *block; /* generic btree block pointer */
557         xfs_buf_t               *bp;    /* buffer containing block */
558
559         /*
560          * Get the block pointer for this level.
561          */
562         block = xfs_btree_get_block(cur, level, &bp);
563         xfs_btree_check_block(cur, block, level, bp);
564         /*
565          * It's empty, there is no such record.
566          */
567         if (!block->bb_numrecs)
568                 return 0;
569         /*
570          * Set the ptr value to numrecs, that's the last record/key.
571          */
572         cur->bc_ptrs[level] = be16_to_cpu(block->bb_numrecs);
573         return 1;
574 }
575
576 /*
577  * Compute first and last byte offsets for the fields given.
578  * Interprets the offsets table, which contains struct field offsets.
579  */
580 void
581 xfs_btree_offsets(
582         __int64_t       fields,         /* bitmask of fields */
583         const short     *offsets,       /* table of field offsets */
584         int             nbits,          /* number of bits to inspect */
585         int             *first,         /* output: first byte offset */
586         int             *last)          /* output: last byte offset */
587 {
588         int             i;              /* current bit number */
589         __int64_t       imask;          /* mask for current bit number */
590
591         ASSERT(fields != 0);
592         /*
593          * Find the lowest bit, so the first byte offset.
594          */
595         for (i = 0, imask = 1LL; ; i++, imask <<= 1) {
596                 if (imask & fields) {
597                         *first = offsets[i];
598                         break;
599                 }
600         }
601         /*
602          * Find the highest bit, so the last byte offset.
603          */
604         for (i = nbits - 1, imask = 1LL << i; ; i--, imask >>= 1) {
605                 if (imask & fields) {
606                         *last = offsets[i + 1] - 1;
607                         break;
608                 }
609         }
610 }
611
612 /*
613  * Get a buffer for the block, return it read in.
614  * Long-form addressing.
615  */
616 int                                     /* error */
617 xfs_btree_read_bufl(
618         xfs_mount_t     *mp,            /* file system mount point */
619         xfs_trans_t     *tp,            /* transaction pointer */
620         xfs_fsblock_t   fsbno,          /* file system block number */
621         uint            lock,           /* lock flags for read_buf */
622         xfs_buf_t       **bpp,          /* buffer for fsbno */
623         int             refval)         /* ref count value for buffer */
624 {
625         xfs_buf_t       *bp;            /* return value */
626         xfs_daddr_t             d;              /* real disk block address */
627         int             error;
628
629         ASSERT(fsbno != NULLFSBLOCK);
630         d = XFS_FSB_TO_DADDR(mp, fsbno);
631         if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
632                         mp->m_bsize, lock, &bp))) {
633                 return error;
634         }
635         ASSERT(!bp || !XFS_BUF_GETERROR(bp));
636         if (bp)
637                 XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval);
638         *bpp = bp;
639         return 0;
640 }
641
642 /*
643  * Read-ahead the block, don't wait for it, don't return a buffer.
644  * Long-form addressing.
645  */
646 /* ARGSUSED */
647 void
648 xfs_btree_reada_bufl(
649         xfs_mount_t     *mp,            /* file system mount point */
650         xfs_fsblock_t   fsbno,          /* file system block number */
651         xfs_extlen_t    count)          /* count of filesystem blocks */
652 {
653         xfs_daddr_t             d;
654
655         ASSERT(fsbno != NULLFSBLOCK);
656         d = XFS_FSB_TO_DADDR(mp, fsbno);
657         xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count);
658 }
659
660 /*
661  * Read-ahead the block, don't wait for it, don't return a buffer.
662  * Short-form addressing.
663  */
664 /* ARGSUSED */
665 void
666 xfs_btree_reada_bufs(
667         xfs_mount_t     *mp,            /* file system mount point */
668         xfs_agnumber_t  agno,           /* allocation group number */
669         xfs_agblock_t   agbno,          /* allocation group block number */
670         xfs_extlen_t    count)          /* count of filesystem blocks */
671 {
672         xfs_daddr_t             d;
673
674         ASSERT(agno != NULLAGNUMBER);
675         ASSERT(agbno != NULLAGBLOCK);
676         d = XFS_AGB_TO_DADDR(mp, agno, agbno);
677         xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count);
678 }
679
680 STATIC int
681 xfs_btree_readahead_lblock(
682         struct xfs_btree_cur    *cur,
683         int                     lr,
684         struct xfs_btree_block  *block)
685 {
686         int                     rval = 0;
687         xfs_dfsbno_t            left = be64_to_cpu(block->bb_u.l.bb_leftsib);
688         xfs_dfsbno_t            right = be64_to_cpu(block->bb_u.l.bb_rightsib);
689
690         if ((lr & XFS_BTCUR_LEFTRA) && left != NULLDFSBNO) {
691                 xfs_btree_reada_bufl(cur->bc_mp, left, 1);
692                 rval++;
693         }
694
695         if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLDFSBNO) {
696                 xfs_btree_reada_bufl(cur->bc_mp, right, 1);
697                 rval++;
698         }
699
700         return rval;
701 }
702
703 STATIC int
704 xfs_btree_readahead_sblock(
705         struct xfs_btree_cur    *cur,
706         int                     lr,
707         struct xfs_btree_block *block)
708 {
709         int                     rval = 0;
710         xfs_agblock_t           left = be32_to_cpu(block->bb_u.s.bb_leftsib);
711         xfs_agblock_t           right = be32_to_cpu(block->bb_u.s.bb_rightsib);
712
713
714         if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) {
715                 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
716                                      left, 1);
717                 rval++;
718         }
719
720         if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) {
721                 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
722                                      right, 1);
723                 rval++;
724         }
725
726         return rval;
727 }
728
729 /*
730  * Read-ahead btree blocks, at the given level.
731  * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA.
732  */
733 STATIC int
734 xfs_btree_readahead(
735         struct xfs_btree_cur    *cur,           /* btree cursor */
736         int                     lev,            /* level in btree */
737         int                     lr)             /* left/right bits */
738 {
739         struct xfs_btree_block  *block;
740
741         /*
742          * No readahead needed if we are at the root level and the
743          * btree root is stored in the inode.
744          */
745         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
746             (lev == cur->bc_nlevels - 1))
747                 return 0;
748
749         if ((cur->bc_ra[lev] | lr) == cur->bc_ra[lev])
750                 return 0;
751
752         cur->bc_ra[lev] |= lr;
753         block = XFS_BUF_TO_BLOCK(cur->bc_bufs[lev]);
754
755         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
756                 return xfs_btree_readahead_lblock(cur, lr, block);
757         return xfs_btree_readahead_sblock(cur, lr, block);
758 }
759
760 /*
761  * Set the buffer for level "lev" in the cursor to bp, releasing
762  * any previous buffer.
763  */
764 STATIC void
765 xfs_btree_setbuf(
766         xfs_btree_cur_t         *cur,   /* btree cursor */
767         int                     lev,    /* level in btree */
768         xfs_buf_t               *bp)    /* new buffer to set */
769 {
770         struct xfs_btree_block  *b;     /* btree block */
771
772         if (cur->bc_bufs[lev])
773                 xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[lev]);
774         cur->bc_bufs[lev] = bp;
775         cur->bc_ra[lev] = 0;
776
777         b = XFS_BUF_TO_BLOCK(bp);
778         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
779                 if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO))
780                         cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
781                 if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO))
782                         cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
783         } else {
784                 if (b->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK))
785                         cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
786                 if (b->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK))
787                         cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
788         }
789 }
790
791 STATIC int
792 xfs_btree_ptr_is_null(
793         struct xfs_btree_cur    *cur,
794         union xfs_btree_ptr     *ptr)
795 {
796         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
797                 return ptr->l == cpu_to_be64(NULLDFSBNO);
798         else
799                 return ptr->s == cpu_to_be32(NULLAGBLOCK);
800 }
801
802 STATIC void
803 xfs_btree_set_ptr_null(
804         struct xfs_btree_cur    *cur,
805         union xfs_btree_ptr     *ptr)
806 {
807         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
808                 ptr->l = cpu_to_be64(NULLDFSBNO);
809         else
810                 ptr->s = cpu_to_be32(NULLAGBLOCK);
811 }
812
813 /*
814  * Get/set/init sibling pointers
815  */
816 STATIC void
817 xfs_btree_get_sibling(
818         struct xfs_btree_cur    *cur,
819         struct xfs_btree_block  *block,
820         union xfs_btree_ptr     *ptr,
821         int                     lr)
822 {
823         ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
824
825         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
826                 if (lr == XFS_BB_RIGHTSIB)
827                         ptr->l = block->bb_u.l.bb_rightsib;
828                 else
829                         ptr->l = block->bb_u.l.bb_leftsib;
830         } else {
831                 if (lr == XFS_BB_RIGHTSIB)
832                         ptr->s = block->bb_u.s.bb_rightsib;
833                 else
834                         ptr->s = block->bb_u.s.bb_leftsib;
835         }
836 }
837
838 STATIC void
839 xfs_btree_set_sibling(
840         struct xfs_btree_cur    *cur,
841         struct xfs_btree_block  *block,
842         union xfs_btree_ptr     *ptr,
843         int                     lr)
844 {
845         ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
846
847         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
848                 if (lr == XFS_BB_RIGHTSIB)
849                         block->bb_u.l.bb_rightsib = ptr->l;
850                 else
851                         block->bb_u.l.bb_leftsib = ptr->l;
852         } else {
853                 if (lr == XFS_BB_RIGHTSIB)
854                         block->bb_u.s.bb_rightsib = ptr->s;
855                 else
856                         block->bb_u.s.bb_leftsib = ptr->s;
857         }
858 }
859
860 STATIC void
861 xfs_btree_init_block(
862         struct xfs_btree_cur    *cur,
863         int                     level,
864         int                     numrecs,
865         struct xfs_btree_block  *new)   /* new block */
866 {
867         new->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]);
868         new->bb_level = cpu_to_be16(level);
869         new->bb_numrecs = cpu_to_be16(numrecs);
870
871         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
872                 new->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO);
873                 new->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO);
874         } else {
875                 new->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
876                 new->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
877         }
878 }
879
880 /*
881  * Return true if ptr is the last record in the btree and
882  * we need to track updateÑ• to this record.  The decision
883  * will be further refined in the update_lastrec method.
884  */
885 STATIC int
886 xfs_btree_is_lastrec(
887         struct xfs_btree_cur    *cur,
888         struct xfs_btree_block  *block,
889         int                     level)
890 {
891         union xfs_btree_ptr     ptr;
892
893         if (level > 0)
894                 return 0;
895         if (!(cur->bc_flags & XFS_BTREE_LASTREC_UPDATE))
896                 return 0;
897
898         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
899         if (!xfs_btree_ptr_is_null(cur, &ptr))
900                 return 0;
901         return 1;
902 }
903
904 STATIC void
905 xfs_btree_buf_to_ptr(
906         struct xfs_btree_cur    *cur,
907         struct xfs_buf          *bp,
908         union xfs_btree_ptr     *ptr)
909 {
910         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
911                 ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp,
912                                         XFS_BUF_ADDR(bp)));
913         else {
914                 ptr->s = cpu_to_be32(xfs_daddr_to_agbno(cur->bc_mp,
915                                         XFS_BUF_ADDR(bp)));
916         }
917 }
918
919 STATIC xfs_daddr_t
920 xfs_btree_ptr_to_daddr(
921         struct xfs_btree_cur    *cur,
922         union xfs_btree_ptr     *ptr)
923 {
924         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
925                 ASSERT(ptr->l != cpu_to_be64(NULLDFSBNO));
926
927                 return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l));
928         } else {
929                 ASSERT(cur->bc_private.a.agno != NULLAGNUMBER);
930                 ASSERT(ptr->s != cpu_to_be32(NULLAGBLOCK));
931
932                 return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno,
933                                         be32_to_cpu(ptr->s));
934         }
935 }
936
937 STATIC void
938 xfs_btree_set_refs(
939         struct xfs_btree_cur    *cur,
940         struct xfs_buf          *bp)
941 {
942         switch (cur->bc_btnum) {
943         case XFS_BTNUM_BNO:
944         case XFS_BTNUM_CNT:
945                 XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, XFS_ALLOC_BTREE_REF);
946                 break;
947         case XFS_BTNUM_INO:
948                 XFS_BUF_SET_VTYPE_REF(bp, B_FS_INOMAP, XFS_INO_BTREE_REF);
949                 break;
950         case XFS_BTNUM_BMAP:
951                 XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, XFS_BMAP_BTREE_REF);
952                 break;
953         default:
954                 ASSERT(0);
955         }
956 }
957
958 STATIC int
959 xfs_btree_get_buf_block(
960         struct xfs_btree_cur    *cur,
961         union xfs_btree_ptr     *ptr,
962         int                     flags,
963         struct xfs_btree_block  **block,
964         struct xfs_buf          **bpp)
965 {
966         struct xfs_mount        *mp = cur->bc_mp;
967         xfs_daddr_t             d;
968
969         /* need to sort out how callers deal with failures first */
970         ASSERT(!(flags & XBF_TRYLOCK));
971
972         d = xfs_btree_ptr_to_daddr(cur, ptr);
973         *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d,
974                                  mp->m_bsize, flags);
975
976         ASSERT(*bpp);
977         ASSERT(!XFS_BUF_GETERROR(*bpp));
978
979         *block = XFS_BUF_TO_BLOCK(*bpp);
980         return 0;
981 }
982
983 /*
984  * Read in the buffer at the given ptr and return the buffer and
985  * the block pointer within the buffer.
986  */
987 STATIC int
988 xfs_btree_read_buf_block(
989         struct xfs_btree_cur    *cur,
990         union xfs_btree_ptr     *ptr,
991         int                     level,
992         int                     flags,
993         struct xfs_btree_block  **block,
994         struct xfs_buf          **bpp)
995 {
996         struct xfs_mount        *mp = cur->bc_mp;
997         xfs_daddr_t             d;
998         int                     error;
999
1000         /* need to sort out how callers deal with failures first */
1001         ASSERT(!(flags & XBF_TRYLOCK));
1002
1003         d = xfs_btree_ptr_to_daddr(cur, ptr);
1004         error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d,
1005                                    mp->m_bsize, flags, bpp);
1006         if (error)
1007                 return error;
1008
1009         ASSERT(*bpp != NULL);
1010         ASSERT(!XFS_BUF_GETERROR(*bpp));
1011
1012         xfs_btree_set_refs(cur, *bpp);
1013         *block = XFS_BUF_TO_BLOCK(*bpp);
1014
1015         error = xfs_btree_check_block(cur, *block, level, *bpp);
1016         if (error)
1017                 xfs_trans_brelse(cur->bc_tp, *bpp);
1018         return error;
1019 }
1020
1021 /*
1022  * Copy keys from one btree block to another.
1023  */
1024 STATIC void
1025 xfs_btree_copy_keys(
1026         struct xfs_btree_cur    *cur,
1027         union xfs_btree_key     *dst_key,
1028         union xfs_btree_key     *src_key,
1029         int                     numkeys)
1030 {
1031         ASSERT(numkeys >= 0);
1032         memcpy(dst_key, src_key, numkeys * cur->bc_ops->key_len);
1033 }
1034
1035 /*
1036  * Copy records from one btree block to another.
1037  */
1038 STATIC void
1039 xfs_btree_copy_recs(
1040         struct xfs_btree_cur    *cur,
1041         union xfs_btree_rec     *dst_rec,
1042         union xfs_btree_rec     *src_rec,
1043         int                     numrecs)
1044 {
1045         ASSERT(numrecs >= 0);
1046         memcpy(dst_rec, src_rec, numrecs * cur->bc_ops->rec_len);
1047 }
1048
1049 /*
1050  * Copy block pointers from one btree block to another.
1051  */
1052 STATIC void
1053 xfs_btree_copy_ptrs(
1054         struct xfs_btree_cur    *cur,
1055         union xfs_btree_ptr     *dst_ptr,
1056         union xfs_btree_ptr     *src_ptr,
1057         int                     numptrs)
1058 {
1059         ASSERT(numptrs >= 0);
1060         memcpy(dst_ptr, src_ptr, numptrs * xfs_btree_ptr_len(cur));
1061 }
1062
1063 /*
1064  * Shift keys one index left/right inside a single btree block.
1065  */
1066 STATIC void
1067 xfs_btree_shift_keys(
1068         struct xfs_btree_cur    *cur,
1069         union xfs_btree_key     *key,
1070         int                     dir,
1071         int                     numkeys)
1072 {
1073         char                    *dst_key;
1074
1075         ASSERT(numkeys >= 0);
1076         ASSERT(dir == 1 || dir == -1);
1077
1078         dst_key = (char *)key + (dir * cur->bc_ops->key_len);
1079         memmove(dst_key, key, numkeys * cur->bc_ops->key_len);
1080 }
1081
1082 /*
1083  * Shift records one index left/right inside a single btree block.
1084  */
1085 STATIC void
1086 xfs_btree_shift_recs(
1087         struct xfs_btree_cur    *cur,
1088         union xfs_btree_rec     *rec,
1089         int                     dir,
1090         int                     numrecs)
1091 {
1092         char                    *dst_rec;
1093
1094         ASSERT(numrecs >= 0);
1095         ASSERT(dir == 1 || dir == -1);
1096
1097         dst_rec = (char *)rec + (dir * cur->bc_ops->rec_len);
1098         memmove(dst_rec, rec, numrecs * cur->bc_ops->rec_len);
1099 }
1100
1101 /*
1102  * Shift block pointers one index left/right inside a single btree block.
1103  */
1104 STATIC void
1105 xfs_btree_shift_ptrs(
1106         struct xfs_btree_cur    *cur,
1107         union xfs_btree_ptr     *ptr,
1108         int                     dir,
1109         int                     numptrs)
1110 {
1111         char                    *dst_ptr;
1112
1113         ASSERT(numptrs >= 0);
1114         ASSERT(dir == 1 || dir == -1);
1115
1116         dst_ptr = (char *)ptr + (dir * xfs_btree_ptr_len(cur));
1117         memmove(dst_ptr, ptr, numptrs * xfs_btree_ptr_len(cur));
1118 }
1119
1120 /*
1121  * Log key values from the btree block.
1122  */
1123 STATIC void
1124 xfs_btree_log_keys(
1125         struct xfs_btree_cur    *cur,
1126         struct xfs_buf          *bp,
1127         int                     first,
1128         int                     last)
1129 {
1130         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1131         XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1132
1133         if (bp) {
1134                 xfs_trans_log_buf(cur->bc_tp, bp,
1135                                   xfs_btree_key_offset(cur, first),
1136                                   xfs_btree_key_offset(cur, last + 1) - 1);
1137         } else {
1138                 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1139                                 xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1140         }
1141
1142         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1143 }
1144
1145 /*
1146  * Log record values from the btree block.
1147  */
1148 void
1149 xfs_btree_log_recs(
1150         struct xfs_btree_cur    *cur,
1151         struct xfs_buf          *bp,
1152         int                     first,
1153         int                     last)
1154 {
1155         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1156         XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1157
1158         xfs_trans_log_buf(cur->bc_tp, bp,
1159                           xfs_btree_rec_offset(cur, first),
1160                           xfs_btree_rec_offset(cur, last + 1) - 1);
1161
1162         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1163 }
1164
1165 /*
1166  * Log block pointer fields from a btree block (nonleaf).
1167  */
1168 STATIC void
1169 xfs_btree_log_ptrs(
1170         struct xfs_btree_cur    *cur,   /* btree cursor */
1171         struct xfs_buf          *bp,    /* buffer containing btree block */
1172         int                     first,  /* index of first pointer to log */
1173         int                     last)   /* index of last pointer to log */
1174 {
1175         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1176         XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1177
1178         if (bp) {
1179                 struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
1180                 int                     level = xfs_btree_get_level(block);
1181
1182                 xfs_trans_log_buf(cur->bc_tp, bp,
1183                                 xfs_btree_ptr_offset(cur, first, level),
1184                                 xfs_btree_ptr_offset(cur, last + 1, level) - 1);
1185         } else {
1186                 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1187                         xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1188         }
1189
1190         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1191 }
1192
1193 /*
1194  * Log fields from a btree block header.
1195  */
1196 void
1197 xfs_btree_log_block(
1198         struct xfs_btree_cur    *cur,   /* btree cursor */
1199         struct xfs_buf          *bp,    /* buffer containing btree block */
1200         int                     fields) /* mask of fields: XFS_BB_... */
1201 {
1202         int                     first;  /* first byte offset logged */
1203         int                     last;   /* last byte offset logged */
1204         static const short      soffsets[] = {  /* table of offsets (short) */
1205                 offsetof(struct xfs_btree_block, bb_magic),
1206                 offsetof(struct xfs_btree_block, bb_level),
1207                 offsetof(struct xfs_btree_block, bb_numrecs),
1208                 offsetof(struct xfs_btree_block, bb_u.s.bb_leftsib),
1209                 offsetof(struct xfs_btree_block, bb_u.s.bb_rightsib),
1210                 XFS_BTREE_SBLOCK_LEN
1211         };
1212         static const short      loffsets[] = {  /* table of offsets (long) */
1213                 offsetof(struct xfs_btree_block, bb_magic),
1214                 offsetof(struct xfs_btree_block, bb_level),
1215                 offsetof(struct xfs_btree_block, bb_numrecs),
1216                 offsetof(struct xfs_btree_block, bb_u.l.bb_leftsib),
1217                 offsetof(struct xfs_btree_block, bb_u.l.bb_rightsib),
1218                 XFS_BTREE_LBLOCK_LEN
1219         };
1220
1221         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1222         XFS_BTREE_TRACE_ARGBI(cur, bp, fields);
1223
1224         if (bp) {
1225                 xfs_btree_offsets(fields,
1226                                   (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
1227                                         loffsets : soffsets,
1228                                   XFS_BB_NUM_BITS, &first, &last);
1229                 xfs_trans_log_buf(cur->bc_tp, bp, first, last);
1230         } else {
1231                 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1232                         xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1233         }
1234
1235         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1236 }
1237
1238 /*
1239  * Increment cursor by one record at the level.
1240  * For nonzero levels the leaf-ward information is untouched.
1241  */
1242 int                                             /* error */
1243 xfs_btree_increment(
1244         struct xfs_btree_cur    *cur,
1245         int                     level,
1246         int                     *stat)          /* success/failure */
1247 {
1248         struct xfs_btree_block  *block;
1249         union xfs_btree_ptr     ptr;
1250         struct xfs_buf          *bp;
1251         int                     error;          /* error return value */
1252         int                     lev;
1253
1254         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1255         XFS_BTREE_TRACE_ARGI(cur, level);
1256
1257         ASSERT(level < cur->bc_nlevels);
1258
1259         /* Read-ahead to the right at this level. */
1260         xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
1261
1262         /* Get a pointer to the btree block. */
1263         block = xfs_btree_get_block(cur, level, &bp);
1264
1265 #ifdef DEBUG
1266         error = xfs_btree_check_block(cur, block, level, bp);
1267         if (error)
1268                 goto error0;
1269 #endif
1270
1271         /* We're done if we remain in the block after the increment. */
1272         if (++cur->bc_ptrs[level] <= xfs_btree_get_numrecs(block))
1273                 goto out1;
1274
1275         /* Fail if we just went off the right edge of the tree. */
1276         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1277         if (xfs_btree_ptr_is_null(cur, &ptr))
1278                 goto out0;
1279
1280         XFS_BTREE_STATS_INC(cur, increment);
1281
1282         /*
1283          * March up the tree incrementing pointers.
1284          * Stop when we don't go off the right edge of a block.
1285          */
1286         for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1287                 block = xfs_btree_get_block(cur, lev, &bp);
1288
1289 #ifdef DEBUG
1290                 error = xfs_btree_check_block(cur, block, lev, bp);
1291                 if (error)
1292                         goto error0;
1293 #endif
1294
1295                 if (++cur->bc_ptrs[lev] <= xfs_btree_get_numrecs(block))
1296                         break;
1297
1298                 /* Read-ahead the right block for the next loop. */
1299                 xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
1300         }
1301
1302         /*
1303          * If we went off the root then we are either seriously
1304          * confused or have the tree root in an inode.
1305          */
1306         if (lev == cur->bc_nlevels) {
1307                 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
1308                         goto out0;
1309                 ASSERT(0);
1310                 error = EFSCORRUPTED;
1311                 goto error0;
1312         }
1313         ASSERT(lev < cur->bc_nlevels);
1314
1315         /*
1316          * Now walk back down the tree, fixing up the cursor's buffer
1317          * pointers and key numbers.
1318          */
1319         for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
1320                 union xfs_btree_ptr     *ptrp;
1321
1322                 ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
1323                 error = xfs_btree_read_buf_block(cur, ptrp, --lev,
1324                                                         0, &block, &bp);
1325                 if (error)
1326                         goto error0;
1327
1328                 xfs_btree_setbuf(cur, lev, bp);
1329                 cur->bc_ptrs[lev] = 1;
1330         }
1331 out1:
1332         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1333         *stat = 1;
1334         return 0;
1335
1336 out0:
1337         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1338         *stat = 0;
1339         return 0;
1340
1341 error0:
1342         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1343         return error;
1344 }
1345
1346 /*
1347  * Decrement cursor by one record at the level.
1348  * For nonzero levels the leaf-ward information is untouched.
1349  */
1350 int                                             /* error */
1351 xfs_btree_decrement(
1352         struct xfs_btree_cur    *cur,
1353         int                     level,
1354         int                     *stat)          /* success/failure */
1355 {
1356         struct xfs_btree_block  *block;
1357         xfs_buf_t               *bp;
1358         int                     error;          /* error return value */
1359         int                     lev;
1360         union xfs_btree_ptr     ptr;
1361
1362         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1363         XFS_BTREE_TRACE_ARGI(cur, level);
1364
1365         ASSERT(level < cur->bc_nlevels);
1366
1367         /* Read-ahead to the left at this level. */
1368         xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
1369
1370         /* We're done if we remain in the block after the decrement. */
1371         if (--cur->bc_ptrs[level] > 0)
1372                 goto out1;
1373
1374         /* Get a pointer to the btree block. */
1375         block = xfs_btree_get_block(cur, level, &bp);
1376
1377 #ifdef DEBUG
1378         error = xfs_btree_check_block(cur, block, level, bp);
1379         if (error)
1380                 goto error0;
1381 #endif
1382
1383         /* Fail if we just went off the left edge of the tree. */
1384         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
1385         if (xfs_btree_ptr_is_null(cur, &ptr))
1386                 goto out0;
1387
1388         XFS_BTREE_STATS_INC(cur, decrement);
1389
1390         /*
1391          * March up the tree decrementing pointers.
1392          * Stop when we don't go off the left edge of a block.
1393          */
1394         for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1395                 if (--cur->bc_ptrs[lev] > 0)
1396                         break;
1397                 /* Read-ahead the left block for the next loop. */
1398                 xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
1399         }
1400
1401         /*
1402          * If we went off the root then we are seriously confused.
1403          * or the root of the tree is in an inode.
1404          */
1405         if (lev == cur->bc_nlevels) {
1406                 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
1407                         goto out0;
1408                 ASSERT(0);
1409                 error = EFSCORRUPTED;
1410                 goto error0;
1411         }
1412         ASSERT(lev < cur->bc_nlevels);
1413
1414         /*
1415          * Now walk back down the tree, fixing up the cursor's buffer
1416          * pointers and key numbers.
1417          */
1418         for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
1419                 union xfs_btree_ptr     *ptrp;
1420
1421                 ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
1422                 error = xfs_btree_read_buf_block(cur, ptrp, --lev,
1423                                                         0, &block, &bp);
1424                 if (error)
1425                         goto error0;
1426                 xfs_btree_setbuf(cur, lev, bp);
1427                 cur->bc_ptrs[lev] = xfs_btree_get_numrecs(block);
1428         }
1429 out1:
1430         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1431         *stat = 1;
1432         return 0;
1433
1434 out0:
1435         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1436         *stat = 0;
1437         return 0;
1438
1439 error0:
1440         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1441         return error;
1442 }
1443
1444 STATIC int
1445 xfs_btree_lookup_get_block(
1446         struct xfs_btree_cur    *cur,   /* btree cursor */
1447         int                     level,  /* level in the btree */
1448         union xfs_btree_ptr     *pp,    /* ptr to btree block */
1449         struct xfs_btree_block  **blkp) /* return btree block */
1450 {
1451         struct xfs_buf          *bp;    /* buffer pointer for btree block */
1452         int                     error = 0;
1453
1454         /* special case the root block if in an inode */
1455         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
1456             (level == cur->bc_nlevels - 1)) {
1457                 *blkp = xfs_btree_get_iroot(cur);
1458                 return 0;
1459         }
1460
1461         /*
1462          * If the old buffer at this level for the disk address we are
1463          * looking for re-use it.
1464          *
1465          * Otherwise throw it away and get a new one.
1466          */
1467         bp = cur->bc_bufs[level];
1468         if (bp && XFS_BUF_ADDR(bp) == xfs_btree_ptr_to_daddr(cur, pp)) {
1469                 *blkp = XFS_BUF_TO_BLOCK(bp);
1470                 return 0;
1471         }
1472
1473         error = xfs_btree_read_buf_block(cur, pp, level, 0, blkp, &bp);
1474         if (error)
1475                 return error;
1476
1477         xfs_btree_setbuf(cur, level, bp);
1478         return 0;
1479 }
1480
1481 /*
1482  * Get current search key.  For level 0 we don't actually have a key
1483  * structure so we make one up from the record.  For all other levels
1484  * we just return the right key.
1485  */
1486 STATIC union xfs_btree_key *
1487 xfs_lookup_get_search_key(
1488         struct xfs_btree_cur    *cur,
1489         int                     level,
1490         int                     keyno,
1491         struct xfs_btree_block  *block,
1492         union xfs_btree_key     *kp)
1493 {
1494         if (level == 0) {
1495                 cur->bc_ops->init_key_from_rec(kp,
1496                                 xfs_btree_rec_addr(cur, keyno, block));
1497                 return kp;
1498         }
1499
1500         return xfs_btree_key_addr(cur, keyno, block);
1501 }
1502
1503 /*
1504  * Lookup the record.  The cursor is made to point to it, based on dir.
1505  * Return 0 if can't find any such record, 1 for success.
1506  */
1507 int                                     /* error */
1508 xfs_btree_lookup(
1509         struct xfs_btree_cur    *cur,   /* btree cursor */
1510         xfs_lookup_t            dir,    /* <=, ==, or >= */
1511         int                     *stat)  /* success/failure */
1512 {
1513         struct xfs_btree_block  *block; /* current btree block */
1514         __int64_t               diff;   /* difference for the current key */
1515         int                     error;  /* error return value */
1516         int                     keyno;  /* current key number */
1517         int                     level;  /* level in the btree */
1518         union xfs_btree_ptr     *pp;    /* ptr to btree block */
1519         union xfs_btree_ptr     ptr;    /* ptr to btree block */
1520
1521         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1522         XFS_BTREE_TRACE_ARGI(cur, dir);
1523
1524         XFS_BTREE_STATS_INC(cur, lookup);
1525
1526         block = NULL;
1527         keyno = 0;
1528
1529         /* initialise start pointer from cursor */
1530         cur->bc_ops->init_ptr_from_cur(cur, &ptr);
1531         pp = &ptr;
1532
1533         /*
1534          * Iterate over each level in the btree, starting at the root.
1535          * For each level above the leaves, find the key we need, based
1536          * on the lookup record, then follow the corresponding block
1537          * pointer down to the next level.
1538          */
1539         for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
1540                 /* Get the block we need to do the lookup on. */
1541                 error = xfs_btree_lookup_get_block(cur, level, pp, &block);
1542                 if (error)
1543                         goto error0;
1544
1545                 if (diff == 0) {
1546                         /*
1547                          * If we already had a key match at a higher level, we
1548                          * know we need to use the first entry in this block.
1549                          */
1550                         keyno = 1;
1551                 } else {
1552                         /* Otherwise search this block. Do a binary search. */
1553
1554                         int     high;   /* high entry number */
1555                         int     low;    /* low entry number */
1556
1557                         /* Set low and high entry numbers, 1-based. */
1558                         low = 1;
1559                         high = xfs_btree_get_numrecs(block);
1560                         if (!high) {
1561                                 /* Block is empty, must be an empty leaf. */
1562                                 ASSERT(level == 0 && cur->bc_nlevels == 1);
1563
1564                                 cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
1565                                 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1566                                 *stat = 0;
1567                                 return 0;
1568                         }
1569
1570                         /* Binary search the block. */
1571                         while (low <= high) {
1572                                 union xfs_btree_key     key;
1573                                 union xfs_btree_key     *kp;
1574
1575                                 XFS_BTREE_STATS_INC(cur, compare);
1576
1577                                 /* keyno is average of low and high. */
1578                                 keyno = (low + high) >> 1;
1579
1580                                 /* Get current search key */
1581                                 kp = xfs_lookup_get_search_key(cur, level,
1582                                                 keyno, block, &key);
1583
1584                                 /*
1585                                  * Compute difference to get next direction:
1586                                  *  - less than, move right
1587                                  *  - greater than, move left
1588                                  *  - equal, we're done
1589                                  */
1590                                 diff = cur->bc_ops->key_diff(cur, kp);
1591                                 if (diff < 0)
1592                                         low = keyno + 1;
1593                                 else if (diff > 0)
1594                                         high = keyno - 1;
1595                                 else
1596                                         break;
1597                         }
1598                 }
1599
1600                 /*
1601                  * If there are more levels, set up for the next level
1602                  * by getting the block number and filling in the cursor.
1603                  */
1604                 if (level > 0) {
1605                         /*
1606                          * If we moved left, need the previous key number,
1607                          * unless there isn't one.
1608                          */
1609                         if (diff > 0 && --keyno < 1)
1610                                 keyno = 1;
1611                         pp = xfs_btree_ptr_addr(cur, keyno, block);
1612
1613 #ifdef DEBUG
1614                         error = xfs_btree_check_ptr(cur, pp, 0, level);
1615                         if (error)
1616                                 goto error0;
1617 #endif
1618                         cur->bc_ptrs[level] = keyno;
1619                 }
1620         }
1621
1622         /* Done with the search. See if we need to adjust the results. */
1623         if (dir != XFS_LOOKUP_LE && diff < 0) {
1624                 keyno++;
1625                 /*
1626                  * If ge search and we went off the end of the block, but it's
1627                  * not the last block, we're in the wrong block.
1628                  */
1629                 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1630                 if (dir == XFS_LOOKUP_GE &&
1631                     keyno > xfs_btree_get_numrecs(block) &&
1632                     !xfs_btree_ptr_is_null(cur, &ptr)) {
1633                         int     i;
1634
1635                         cur->bc_ptrs[0] = keyno;
1636                         error = xfs_btree_increment(cur, 0, &i);
1637                         if (error)
1638                                 goto error0;
1639                         XFS_WANT_CORRUPTED_RETURN(i == 1);
1640                         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1641                         *stat = 1;
1642                         return 0;
1643                 }
1644         } else if (dir == XFS_LOOKUP_LE && diff > 0)
1645                 keyno--;
1646         cur->bc_ptrs[0] = keyno;
1647
1648         /* Return if we succeeded or not. */
1649         if (keyno == 0 || keyno > xfs_btree_get_numrecs(block))
1650                 *stat = 0;
1651         else if (dir != XFS_LOOKUP_EQ || diff == 0)
1652                 *stat = 1;
1653         else
1654                 *stat = 0;
1655         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1656         return 0;
1657
1658 error0:
1659         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1660         return error;
1661 }
1662
1663 /*
1664  * Update keys at all levels from here to the root along the cursor's path.
1665  */
1666 STATIC int
1667 xfs_btree_updkey(
1668         struct xfs_btree_cur    *cur,
1669         union xfs_btree_key     *keyp,
1670         int                     level)
1671 {
1672         struct xfs_btree_block  *block;
1673         struct xfs_buf          *bp;
1674         union xfs_btree_key     *kp;
1675         int                     ptr;
1676
1677         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1678         XFS_BTREE_TRACE_ARGIK(cur, level, keyp);
1679
1680         ASSERT(!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) || level >= 1);
1681
1682         /*
1683          * Go up the tree from this level toward the root.
1684          * At each level, update the key value to the value input.
1685          * Stop when we reach a level where the cursor isn't pointing
1686          * at the first entry in the block.
1687          */
1688         for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
1689 #ifdef DEBUG
1690                 int             error;
1691 #endif
1692                 block = xfs_btree_get_block(cur, level, &bp);
1693 #ifdef DEBUG
1694                 error = xfs_btree_check_block(cur, block, level, bp);
1695                 if (error) {
1696                         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1697                         return error;
1698                 }
1699 #endif
1700                 ptr = cur->bc_ptrs[level];
1701                 kp = xfs_btree_key_addr(cur, ptr, block);
1702                 xfs_btree_copy_keys(cur, kp, keyp, 1);
1703                 xfs_btree_log_keys(cur, bp, ptr, ptr);
1704         }
1705
1706         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1707         return 0;
1708 }
1709
1710 /*
1711  * Update the record referred to by cur to the value in the
1712  * given record. This either works (return 0) or gets an
1713  * EFSCORRUPTED error.
1714  */
1715 int
1716 xfs_btree_update(
1717         struct xfs_btree_cur    *cur,
1718         union xfs_btree_rec     *rec)
1719 {
1720         struct xfs_btree_block  *block;
1721         struct xfs_buf          *bp;
1722         int                     error;
1723         int                     ptr;
1724         union xfs_btree_rec     *rp;
1725
1726         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1727         XFS_BTREE_TRACE_ARGR(cur, rec);
1728
1729         /* Pick up the current block. */
1730         block = xfs_btree_get_block(cur, 0, &bp);
1731
1732 #ifdef DEBUG
1733         error = xfs_btree_check_block(cur, block, 0, bp);
1734         if (error)
1735                 goto error0;
1736 #endif
1737         /* Get the address of the rec to be updated. */
1738         ptr = cur->bc_ptrs[0];
1739         rp = xfs_btree_rec_addr(cur, ptr, block);
1740
1741         /* Fill in the new contents and log them. */
1742         xfs_btree_copy_recs(cur, rp, rec, 1);
1743         xfs_btree_log_recs(cur, bp, ptr, ptr);
1744
1745         /*
1746          * If we are tracking the last record in the tree and
1747          * we are at the far right edge of the tree, update it.
1748          */
1749         if (xfs_btree_is_lastrec(cur, block, 0)) {
1750                 cur->bc_ops->update_lastrec(cur, block, rec,
1751                                             ptr, LASTREC_UPDATE);
1752         }
1753
1754         /* Updating first rec in leaf. Pass new key value up to our parent. */
1755         if (ptr == 1) {
1756                 union xfs_btree_key     key;
1757
1758                 cur->bc_ops->init_key_from_rec(&key, rec);
1759                 error = xfs_btree_updkey(cur, &key, 1);
1760                 if (error)
1761                         goto error0;
1762         }
1763
1764         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1765         return 0;
1766
1767 error0:
1768         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1769         return error;
1770 }
1771
1772 /*
1773  * Move 1 record left from cur/level if possible.
1774  * Update cur to reflect the new path.
1775  */
1776 STATIC int                                      /* error */
1777 xfs_btree_lshift(
1778         struct xfs_btree_cur    *cur,
1779         int                     level,
1780         int                     *stat)          /* success/failure */
1781 {
1782         union xfs_btree_key     key;            /* btree key */
1783         struct xfs_buf          *lbp;           /* left buffer pointer */
1784         struct xfs_btree_block  *left;          /* left btree block */
1785         int                     lrecs;          /* left record count */
1786         struct xfs_buf          *rbp;           /* right buffer pointer */
1787         struct xfs_btree_block  *right;         /* right btree block */
1788         int                     rrecs;          /* right record count */
1789         union xfs_btree_ptr     lptr;           /* left btree pointer */
1790         union xfs_btree_key     *rkp = NULL;    /* right btree key */
1791         union xfs_btree_ptr     *rpp = NULL;    /* right address pointer */
1792         union xfs_btree_rec     *rrp = NULL;    /* right record pointer */
1793         int                     error;          /* error return value */
1794
1795         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1796         XFS_BTREE_TRACE_ARGI(cur, level);
1797
1798         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
1799             level == cur->bc_nlevels - 1)
1800                 goto out0;
1801
1802         /* Set up variables for this block as "right". */
1803         right = xfs_btree_get_block(cur, level, &rbp);
1804
1805 #ifdef DEBUG
1806         error = xfs_btree_check_block(cur, right, level, rbp);
1807         if (error)
1808                 goto error0;
1809 #endif
1810
1811         /* If we've got no left sibling then we can't shift an entry left. */
1812         xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
1813         if (xfs_btree_ptr_is_null(cur, &lptr))
1814                 goto out0;
1815
1816         /*
1817          * If the cursor entry is the one that would be moved, don't
1818          * do it... it's too complicated.
1819          */
1820         if (cur->bc_ptrs[level] <= 1)
1821                 goto out0;
1822
1823         /* Set up the left neighbor as "left". */
1824         error = xfs_btree_read_buf_block(cur, &lptr, level, 0, &left, &lbp);
1825         if (error)
1826                 goto error0;
1827
1828         /* If it's full, it can't take another entry. */
1829         lrecs = xfs_btree_get_numrecs(left);
1830         if (lrecs == cur->bc_ops->get_maxrecs(cur, level))
1831                 goto out0;
1832
1833         rrecs = xfs_btree_get_numrecs(right);
1834
1835         /*
1836          * We add one entry to the left side and remove one for the right side.
1837          * Account for it here, the changes will be updated on disk and logged
1838          * later.
1839          */
1840         lrecs++;
1841         rrecs--;
1842
1843         XFS_BTREE_STATS_INC(cur, lshift);
1844         XFS_BTREE_STATS_ADD(cur, moves, 1);
1845
1846         /*
1847          * If non-leaf, copy a key and a ptr to the left block.
1848          * Log the changes to the left block.
1849          */
1850         if (level > 0) {
1851                 /* It's a non-leaf.  Move keys and pointers. */
1852                 union xfs_btree_key     *lkp;   /* left btree key */
1853                 union xfs_btree_ptr     *lpp;   /* left address pointer */
1854
1855                 lkp = xfs_btree_key_addr(cur, lrecs, left);
1856                 rkp = xfs_btree_key_addr(cur, 1, right);
1857
1858                 lpp = xfs_btree_ptr_addr(cur, lrecs, left);
1859                 rpp = xfs_btree_ptr_addr(cur, 1, right);
1860 #ifdef DEBUG
1861                 error = xfs_btree_check_ptr(cur, rpp, 0, level);
1862                 if (error)
1863                         goto error0;
1864 #endif
1865                 xfs_btree_copy_keys(cur, lkp, rkp, 1);
1866                 xfs_btree_copy_ptrs(cur, lpp, rpp, 1);
1867
1868                 xfs_btree_log_keys(cur, lbp, lrecs, lrecs);
1869                 xfs_btree_log_ptrs(cur, lbp, lrecs, lrecs);
1870
1871                 ASSERT(cur->bc_ops->keys_inorder(cur,
1872                         xfs_btree_key_addr(cur, lrecs - 1, left), lkp));
1873         } else {
1874                 /* It's a leaf.  Move records.  */
1875                 union xfs_btree_rec     *lrp;   /* left record pointer */
1876
1877                 lrp = xfs_btree_rec_addr(cur, lrecs, left);
1878                 rrp = xfs_btree_rec_addr(cur, 1, right);
1879
1880                 xfs_btree_copy_recs(cur, lrp, rrp, 1);
1881                 xfs_btree_log_recs(cur, lbp, lrecs, lrecs);
1882
1883                 ASSERT(cur->bc_ops->recs_inorder(cur,
1884                         xfs_btree_rec_addr(cur, lrecs - 1, left), lrp));
1885         }
1886
1887         xfs_btree_set_numrecs(left, lrecs);
1888         xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
1889
1890         xfs_btree_set_numrecs(right, rrecs);
1891         xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
1892
1893         /*
1894          * Slide the contents of right down one entry.
1895          */
1896         XFS_BTREE_STATS_ADD(cur, moves, rrecs - 1);
1897         if (level > 0) {
1898                 /* It's a nonleaf. operate on keys and ptrs */
1899 #ifdef DEBUG
1900                 int                     i;              /* loop index */
1901
1902                 for (i = 0; i < rrecs; i++) {
1903                         error = xfs_btree_check_ptr(cur, rpp, i + 1, level);
1904                         if (error)
1905                                 goto error0;
1906                 }
1907 #endif
1908                 xfs_btree_shift_keys(cur,
1909                                 xfs_btree_key_addr(cur, 2, right),
1910                                 -1, rrecs);
1911                 xfs_btree_shift_ptrs(cur,
1912                                 xfs_btree_ptr_addr(cur, 2, right),
1913                                 -1, rrecs);
1914
1915                 xfs_btree_log_keys(cur, rbp, 1, rrecs);
1916                 xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
1917         } else {
1918                 /* It's a leaf. operate on records */
1919                 xfs_btree_shift_recs(cur,
1920                         xfs_btree_rec_addr(cur, 2, right),
1921                         -1, rrecs);
1922                 xfs_btree_log_recs(cur, rbp, 1, rrecs);
1923
1924                 /*
1925                  * If it's the first record in the block, we'll need a key
1926                  * structure to pass up to the next level (updkey).
1927                  */
1928                 cur->bc_ops->init_key_from_rec(&key,
1929                         xfs_btree_rec_addr(cur, 1, right));
1930                 rkp = &key;
1931         }
1932
1933         /* Update the parent key values of right. */
1934         error = xfs_btree_updkey(cur, rkp, level + 1);
1935         if (error)
1936                 goto error0;
1937
1938         /* Slide the cursor value left one. */
1939         cur->bc_ptrs[level]--;
1940
1941         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1942         *stat = 1;
1943         return 0;
1944
1945 out0:
1946         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1947         *stat = 0;
1948         return 0;
1949
1950 error0:
1951         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1952         return error;
1953 }
1954
1955 /*
1956  * Move 1 record right from cur/level if possible.
1957  * Update cur to reflect the new path.
1958  */
1959 STATIC int                                      /* error */
1960 xfs_btree_rshift(
1961         struct xfs_btree_cur    *cur,
1962         int                     level,
1963         int                     *stat)          /* success/failure */
1964 {
1965         union xfs_btree_key     key;            /* btree key */
1966         struct xfs_buf          *lbp;           /* left buffer pointer */
1967         struct xfs_btree_block  *left;          /* left btree block */
1968         struct xfs_buf          *rbp;           /* right buffer pointer */
1969         struct xfs_btree_block  *right;         /* right btree block */
1970         struct xfs_btree_cur    *tcur;          /* temporary btree cursor */
1971         union xfs_btree_ptr     rptr;           /* right block pointer */
1972         union xfs_btree_key     *rkp;           /* right btree key */
1973         int                     rrecs;          /* right record count */
1974         int                     lrecs;          /* left record count */
1975         int                     error;          /* error return value */
1976         int                     i;              /* loop counter */
1977
1978         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1979         XFS_BTREE_TRACE_ARGI(cur, level);
1980
1981         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
1982             (level == cur->bc_nlevels - 1))
1983                 goto out0;
1984
1985         /* Set up variables for this block as "left". */
1986         left = xfs_btree_get_block(cur, level, &lbp);
1987
1988 #ifdef DEBUG
1989         error = xfs_btree_check_block(cur, left, level, lbp);
1990         if (error)
1991                 goto error0;
1992 #endif
1993
1994         /* If we've got no right sibling then we can't shift an entry right. */
1995         xfs_btree_get_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
1996         if (xfs_btree_ptr_is_null(cur, &rptr))
1997                 goto out0;
1998
1999         /*
2000          * If the cursor entry is the one that would be moved, don't
2001          * do it... it's too complicated.
2002          */
2003         lrecs = xfs_btree_get_numrecs(left);
2004         if (cur->bc_ptrs[level] >= lrecs)
2005                 goto out0;
2006
2007         /* Set up the right neighbor as "right". */
2008         error = xfs_btree_read_buf_block(cur, &rptr, level, 0, &right, &rbp);
2009         if (error)
2010                 goto error0;
2011
2012         /* If it's full, it can't take another entry. */
2013         rrecs = xfs_btree_get_numrecs(right);
2014         if (rrecs == cur->bc_ops->get_maxrecs(cur, level))
2015                 goto out0;
2016
2017         XFS_BTREE_STATS_INC(cur, rshift);
2018         XFS_BTREE_STATS_ADD(cur, moves, rrecs);
2019
2020         /*
2021          * Make a hole at the start of the right neighbor block, then
2022          * copy the last left block entry to the hole.
2023          */
2024         if (level > 0) {
2025                 /* It's a nonleaf. make a hole in the keys and ptrs */
2026                 union xfs_btree_key     *lkp;
2027                 union xfs_btree_ptr     *lpp;
2028                 union xfs_btree_ptr     *rpp;
2029
2030                 lkp = xfs_btree_key_addr(cur, lrecs, left);
2031                 lpp = xfs_btree_ptr_addr(cur, lrecs, left);
2032                 rkp = xfs_btree_key_addr(cur, 1, right);
2033                 rpp = xfs_btree_ptr_addr(cur, 1, right);
2034
2035 #ifdef DEBUG
2036                 for (i = rrecs - 1; i >= 0; i--) {
2037                         error = xfs_btree_check_ptr(cur, rpp, i, level);
2038                         if (error)
2039                                 goto error0;
2040                 }
2041 #endif
2042
2043                 xfs_btree_shift_keys(cur, rkp, 1, rrecs);
2044                 xfs_btree_shift_ptrs(cur, rpp, 1, rrecs);
2045
2046 #ifdef DEBUG
2047                 error = xfs_btree_check_ptr(cur, lpp, 0, level);
2048                 if (error)
2049                         goto error0;
2050 #endif
2051
2052                 /* Now put the new data in, and log it. */
2053                 xfs_btree_copy_keys(cur, rkp, lkp, 1);
2054                 xfs_btree_copy_ptrs(cur, rpp, lpp, 1);
2055
2056                 xfs_btree_log_keys(cur, rbp, 1, rrecs + 1);
2057                 xfs_btree_log_ptrs(cur, rbp, 1, rrecs + 1);
2058
2059                 ASSERT(cur->bc_ops->keys_inorder(cur, rkp,
2060                         xfs_btree_key_addr(cur, 2, right)));
2061         } else {
2062                 /* It's a leaf. make a hole in the records */
2063                 union xfs_btree_rec     *lrp;
2064                 union xfs_btree_rec     *rrp;
2065
2066                 lrp = xfs_btree_rec_addr(cur, lrecs, left);
2067                 rrp = xfs_btree_rec_addr(cur, 1, right);
2068
2069                 xfs_btree_shift_recs(cur, rrp, 1, rrecs);
2070
2071                 /* Now put the new data in, and log it. */
2072                 xfs_btree_copy_recs(cur, rrp, lrp, 1);
2073                 xfs_btree_log_recs(cur, rbp, 1, rrecs + 1);
2074
2075                 cur->bc_ops->init_key_from_rec(&key, rrp);
2076                 rkp = &key;
2077
2078                 ASSERT(cur->bc_ops->recs_inorder(cur, rrp,
2079                         xfs_btree_rec_addr(cur, 2, right)));
2080         }
2081
2082         /*
2083          * Decrement and log left's numrecs, bump and log right's numrecs.
2084          */
2085         xfs_btree_set_numrecs(left, --lrecs);
2086         xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
2087
2088         xfs_btree_set_numrecs(right, ++rrecs);
2089         xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
2090
2091         /*
2092          * Using a temporary cursor, update the parent key values of the
2093          * block on the right.
2094          */
2095         error = xfs_btree_dup_cursor(cur, &tcur);
2096         if (error)
2097                 goto error0;
2098         i = xfs_btree_lastrec(tcur, level);
2099         XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
2100
2101         error = xfs_btree_increment(tcur, level, &i);
2102         if (error)
2103                 goto error1;
2104
2105         error = xfs_btree_updkey(tcur, rkp, level + 1);
2106         if (error)
2107                 goto error1;
2108
2109         xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
2110
2111         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2112         *stat = 1;
2113         return 0;
2114
2115 out0:
2116         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2117         *stat = 0;
2118         return 0;
2119
2120 error0:
2121         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2122         return error;
2123
2124 error1:
2125         XFS_BTREE_TRACE_CURSOR(tcur, XBT_ERROR);
2126         xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
2127         return error;
2128 }
2129
2130 /*
2131  * Split cur/level block in half.
2132  * Return new block number and the key to its first
2133  * record (to be inserted into parent).
2134  */
2135 STATIC int                                      /* error */
2136 xfs_btree_split(
2137         struct xfs_btree_cur    *cur,
2138         int                     level,
2139         union xfs_btree_ptr     *ptrp,
2140         union xfs_btree_key     *key,
2141         struct xfs_btree_cur    **curp,
2142         int                     *stat)          /* success/failure */
2143 {
2144         union xfs_btree_ptr     lptr;           /* left sibling block ptr */
2145         struct xfs_buf          *lbp;           /* left buffer pointer */
2146         struct xfs_btree_block  *left;          /* left btree block */
2147         union xfs_btree_ptr     rptr;           /* right sibling block ptr */
2148         struct xfs_buf          *rbp;           /* right buffer pointer */
2149         struct xfs_btree_block  *right;         /* right btree block */
2150         union xfs_btree_ptr     rrptr;          /* right-right sibling ptr */
2151         struct xfs_buf          *rrbp;          /* right-right buffer pointer */
2152         struct xfs_btree_block  *rrblock;       /* right-right btree block */
2153         int                     lrecs;
2154         int                     rrecs;
2155         int                     src_index;
2156         int                     error;          /* error return value */
2157 #ifdef DEBUG
2158         int                     i;
2159 #endif
2160
2161         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2162         XFS_BTREE_TRACE_ARGIPK(cur, level, *ptrp, key);
2163
2164         XFS_BTREE_STATS_INC(cur, split);
2165
2166         /* Set up left block (current one). */
2167         left = xfs_btree_get_block(cur, level, &lbp);
2168
2169 #ifdef DEBUG
2170         error = xfs_btree_check_block(cur, left, level, lbp);
2171         if (error)
2172                 goto error0;
2173 #endif
2174
2175         xfs_btree_buf_to_ptr(cur, lbp, &lptr);
2176
2177         /* Allocate the new block. If we can't do it, we're toast. Give up. */
2178         error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, 1, stat);
2179         if (error)
2180                 goto error0;
2181         if (*stat == 0)
2182                 goto out0;
2183         XFS_BTREE_STATS_INC(cur, alloc);
2184
2185         /* Set up the new block as "right". */
2186         error = xfs_btree_get_buf_block(cur, &rptr, 0, &right, &rbp);
2187         if (error)
2188                 goto error0;
2189
2190         /* Fill in the btree header for the new right block. */
2191         xfs_btree_init_block(cur, xfs_btree_get_level(left), 0, right);
2192
2193         /*
2194          * Split the entries between the old and the new block evenly.
2195          * Make sure that if there's an odd number of entries now, that
2196          * each new block will have the same number of entries.
2197          */
2198         lrecs = xfs_btree_get_numrecs(left);
2199         rrecs = lrecs / 2;
2200         if ((lrecs & 1) && cur->bc_ptrs[level] <= rrecs + 1)
2201                 rrecs++;
2202         src_index = (lrecs - rrecs + 1);
2203
2204         XFS_BTREE_STATS_ADD(cur, moves, rrecs);
2205
2206         /*
2207          * Copy btree block entries from the left block over to the
2208          * new block, the right. Update the right block and log the
2209          * changes.
2210          */
2211         if (level > 0) {
2212                 /* It's a non-leaf.  Move keys and pointers. */
2213                 union xfs_btree_key     *lkp;   /* left btree key */
2214                 union xfs_btree_ptr     *lpp;   /* left address pointer */
2215                 union xfs_btree_key     *rkp;   /* right btree key */
2216                 union xfs_btree_ptr     *rpp;   /* right address pointer */
2217
2218                 lkp = xfs_btree_key_addr(cur, src_index, left);
2219                 lpp = xfs_btree_ptr_addr(cur, src_index, left);
2220                 rkp = xfs_btree_key_addr(cur, 1, right);
2221                 rpp = xfs_btree_ptr_addr(cur, 1, right);
2222
2223 #ifdef DEBUG
2224                 for (i = src_index; i < rrecs; i++) {
2225                         error = xfs_btree_check_ptr(cur, lpp, i, level);
2226                         if (error)
2227                                 goto error0;
2228                 }
2229 #endif
2230
2231                 xfs_btree_copy_keys(cur, rkp, lkp, rrecs);
2232                 xfs_btree_copy_ptrs(cur, rpp, lpp, rrecs);
2233
2234                 xfs_btree_log_keys(cur, rbp, 1, rrecs);
2235                 xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
2236
2237                 /* Grab the keys to the entries moved to the right block */
2238                 xfs_btree_copy_keys(cur, key, rkp, 1);
2239         } else {
2240                 /* It's a leaf.  Move records.  */
2241                 union xfs_btree_rec     *lrp;   /* left record pointer */
2242                 union xfs_btree_rec     *rrp;   /* right record pointer */
2243
2244                 lrp = xfs_btree_rec_addr(cur, src_index, left);
2245                 rrp = xfs_btree_rec_addr(cur, 1, right);
2246
2247                 xfs_btree_copy_recs(cur, rrp, lrp, rrecs);
2248                 xfs_btree_log_recs(cur, rbp, 1, rrecs);
2249
2250                 cur->bc_ops->init_key_from_rec(key,
2251                         xfs_btree_rec_addr(cur, 1, right));
2252         }
2253
2254
2255         /*
2256          * Find the left block number by looking in the buffer.
2257          * Adjust numrecs, sibling pointers.
2258          */
2259         xfs_btree_get_sibling(cur, left, &rrptr, XFS_BB_RIGHTSIB);
2260         xfs_btree_set_sibling(cur, right, &rrptr, XFS_BB_RIGHTSIB);
2261         xfs_btree_set_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
2262         xfs_btree_set_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
2263
2264         lrecs -= rrecs;
2265         xfs_btree_set_numrecs(left, lrecs);
2266         xfs_btree_set_numrecs(right, xfs_btree_get_numrecs(right) + rrecs);
2267
2268         xfs_btree_log_block(cur, rbp, XFS_BB_ALL_BITS);
2269         xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
2270
2271         /*
2272          * If there's a block to the new block's right, make that block
2273          * point back to right instead of to left.
2274          */
2275         if (!xfs_btree_ptr_is_null(cur, &rrptr)) {
2276                 error = xfs_btree_read_buf_block(cur, &rrptr, level,
2277                                                         0, &rrblock, &rrbp);
2278                 if (error)
2279                         goto error0;
2280                 xfs_btree_set_sibling(cur, rrblock, &rptr, XFS_BB_LEFTSIB);
2281                 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
2282         }
2283         /*
2284          * If the cursor is really in the right block, move it there.
2285          * If it's just pointing past the last entry in left, then we'll
2286          * insert there, so don't change anything in that case.
2287          */
2288         if (cur->bc_ptrs[level] > lrecs + 1) {
2289                 xfs_btree_setbuf(cur, level, rbp);
2290                 cur->bc_ptrs[level] -= lrecs;
2291         }
2292         /*
2293          * If there are more levels, we'll need another cursor which refers
2294          * the right block, no matter where this cursor was.
2295          */
2296         if (level + 1 < cur->bc_nlevels) {
2297                 error = xfs_btree_dup_cursor(cur, curp);
2298                 if (error)
2299                         goto error0;
2300                 (*curp)->bc_ptrs[level + 1]++;
2301         }
2302         *ptrp = rptr;
2303         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2304         *stat = 1;
2305         return 0;
2306 out0:
2307         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2308         *stat = 0;
2309         return 0;
2310
2311 error0:
2312         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2313         return error;
2314 }
2315
2316 /*
2317  * Copy the old inode root contents into a real block and make the
2318  * broot point to it.
2319  */
2320 int                                             /* error */
2321 xfs_btree_new_iroot(
2322         struct xfs_btree_cur    *cur,           /* btree cursor */
2323         int                     *logflags,      /* logging flags for inode */
2324         int                     *stat)          /* return status - 0 fail */
2325 {
2326         struct xfs_buf          *cbp;           /* buffer for cblock */
2327         struct xfs_btree_block  *block;         /* btree block */
2328         struct xfs_btree_block  *cblock;        /* child btree block */
2329         union xfs_btree_key     *ckp;           /* child key pointer */
2330         union xfs_btree_ptr     *cpp;           /* child ptr pointer */
2331         union xfs_btree_key     *kp;            /* pointer to btree key */
2332         union xfs_btree_ptr     *pp;            /* pointer to block addr */
2333         union xfs_btree_ptr     nptr;           /* new block addr */
2334         int                     level;          /* btree level */
2335         int                     error;          /* error return code */
2336 #ifdef DEBUG
2337         int                     i;              /* loop counter */
2338 #endif
2339
2340         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2341         XFS_BTREE_STATS_INC(cur, newroot);
2342
2343         ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
2344
2345         level = cur->bc_nlevels - 1;
2346
2347         block = xfs_btree_get_iroot(cur);
2348         pp = xfs_btree_ptr_addr(cur, 1, block);
2349
2350         /* Allocate the new block. If we can't do it, we're toast. Give up. */
2351         error = cur->bc_ops->alloc_block(cur, pp, &nptr, 1, stat);
2352         if (error)
2353                 goto error0;
2354         if (*stat == 0) {
2355                 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2356                 return 0;
2357         }
2358         XFS_BTREE_STATS_INC(cur, alloc);
2359
2360         /* Copy the root into a real block. */
2361         error = xfs_btree_get_buf_block(cur, &nptr, 0, &cblock, &cbp);
2362         if (error)
2363                 goto error0;
2364
2365         memcpy(cblock, block, xfs_btree_block_len(cur));
2366
2367         be16_add_cpu(&block->bb_level, 1);
2368         xfs_btree_set_numrecs(block, 1);
2369         cur->bc_nlevels++;
2370         cur->bc_ptrs[level + 1] = 1;
2371
2372         kp = xfs_btree_key_addr(cur, 1, block);
2373         ckp = xfs_btree_key_addr(cur, 1, cblock);
2374         xfs_btree_copy_keys(cur, ckp, kp, xfs_btree_get_numrecs(cblock));
2375
2376         cpp = xfs_btree_ptr_addr(cur, 1, cblock);
2377 #ifdef DEBUG
2378         for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) {
2379                 error = xfs_btree_check_ptr(cur, pp, i, level);
2380                 if (error)
2381                         goto error0;
2382         }
2383 #endif
2384         xfs_btree_copy_ptrs(cur, cpp, pp, xfs_btree_get_numrecs(cblock));
2385
2386 #ifdef DEBUG
2387         error = xfs_btree_check_ptr(cur, &nptr, 0, level);
2388         if (error)
2389                 goto error0;
2390 #endif
2391         xfs_btree_copy_ptrs(cur, pp, &nptr, 1);
2392
2393         xfs_iroot_realloc(cur->bc_private.b.ip,
2394                           1 - xfs_btree_get_numrecs(cblock),
2395                           cur->bc_private.b.whichfork);
2396
2397         xfs_btree_setbuf(cur, level, cbp);
2398
2399         /*
2400          * Do all this logging at the end so that
2401          * the root is at the right level.
2402          */
2403         xfs_btree_log_block(cur, cbp, XFS_BB_ALL_BITS);
2404         xfs_btree_log_keys(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
2405         xfs_btree_log_ptrs(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
2406
2407         *logflags |=
2408                 XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork);
2409         *stat = 1;
2410         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2411         return 0;
2412 error0:
2413         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2414         return error;
2415 }
2416
2417 /*
2418  * Allocate a new root block, fill it in.
2419  */
2420 STATIC int                              /* error */
2421 xfs_btree_new_root(
2422         struct xfs_btree_cur    *cur,   /* btree cursor */
2423         int                     *stat)  /* success/failure */
2424 {
2425         struct xfs_btree_block  *block; /* one half of the old root block */
2426         struct xfs_buf          *bp;    /* buffer containing block */
2427         int                     error;  /* error return value */
2428         struct xfs_buf          *lbp;   /* left buffer pointer */
2429         struct xfs_btree_block  *left;  /* left btree block */
2430         struct xfs_buf          *nbp;   /* new (root) buffer */
2431         struct xfs_btree_block  *new;   /* new (root) btree block */
2432         int                     nptr;   /* new value for key index, 1 or 2 */
2433         struct xfs_buf          *rbp;   /* right buffer pointer */
2434         struct xfs_btree_block  *right; /* right btree block */
2435         union xfs_btree_ptr     rptr;
2436         union xfs_btree_ptr     lptr;
2437
2438         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2439         XFS_BTREE_STATS_INC(cur, newroot);
2440
2441         /* initialise our start point from the cursor */
2442         cur->bc_ops->init_ptr_from_cur(cur, &rptr);
2443
2444         /* Allocate the new block. If we can't do it, we're toast. Give up. */
2445         error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, 1, stat);
2446         if (error)
2447                 goto error0;
2448         if (*stat == 0)
2449                 goto out0;
2450         XFS_BTREE_STATS_INC(cur, alloc);
2451
2452         /* Set up the new block. */
2453         error = xfs_btree_get_buf_block(cur, &lptr, 0, &new, &nbp);
2454         if (error)
2455                 goto error0;
2456
2457         /* Set the root in the holding structure  increasing the level by 1. */
2458         cur->bc_ops->set_root(cur, &lptr, 1);
2459
2460         /*
2461          * At the previous root level there are now two blocks: the old root,
2462          * and the new block generated when it was split.  We don't know which
2463          * one the cursor is pointing at, so we set up variables "left" and
2464          * "right" for each case.
2465          */
2466         block = xfs_btree_get_block(cur, cur->bc_nlevels - 1, &bp);
2467
2468 #ifdef DEBUG
2469         error = xfs_btree_check_block(cur, block, cur->bc_nlevels - 1, bp);
2470         if (error)
2471                 goto error0;
2472 #endif
2473
2474         xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
2475         if (!xfs_btree_ptr_is_null(cur, &rptr)) {
2476                 /* Our block is left, pick up the right block. */
2477                 lbp = bp;
2478                 xfs_btree_buf_to_ptr(cur, lbp, &lptr);
2479                 left = block;
2480                 error = xfs_btree_read_buf_block(cur, &rptr,
2481                                         cur->bc_nlevels - 1, 0, &right, &rbp);
2482                 if (error)
2483                         goto error0;
2484                 bp = rbp;
2485                 nptr = 1;
2486         } else {
2487                 /* Our block is right, pick up the left block. */
2488                 rbp = bp;
2489                 xfs_btree_buf_to_ptr(cur, rbp, &rptr);
2490                 right = block;
2491                 xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
2492                 error = xfs_btree_read_buf_block(cur, &lptr,
2493                                         cur->bc_nlevels - 1, 0, &left, &lbp);
2494                 if (error)
2495                         goto error0;
2496                 bp = lbp;
2497                 nptr = 2;
2498         }
2499         /* Fill in the new block's btree header and log it. */
2500         xfs_btree_init_block(cur, cur->bc_nlevels, 2, new);
2501         xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS);
2502         ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) &&
2503                         !xfs_btree_ptr_is_null(cur, &rptr));
2504
2505         /* Fill in the key data in the new root. */
2506         if (xfs_btree_get_level(left) > 0) {
2507                 xfs_btree_copy_keys(cur,
2508                                 xfs_btree_key_addr(cur, 1, new),
2509                                 xfs_btree_key_addr(cur, 1, left), 1);
2510                 xfs_btree_copy_keys(cur,
2511                                 xfs_btree_key_addr(cur, 2, new),
2512                                 xfs_btree_key_addr(cur, 1, right), 1);
2513         } else {
2514                 cur->bc_ops->init_key_from_rec(
2515                                 xfs_btree_key_addr(cur, 1, new),
2516                                 xfs_btree_rec_addr(cur, 1, left));
2517                 cur->bc_ops->init_key_from_rec(
2518                                 xfs_btree_key_addr(cur, 2, new),
2519                                 xfs_btree_rec_addr(cur, 1, right));
2520         }
2521         xfs_btree_log_keys(cur, nbp, 1, 2);
2522
2523         /* Fill in the pointer data in the new root. */
2524         xfs_btree_copy_ptrs(cur,
2525                 xfs_btree_ptr_addr(cur, 1, new), &lptr, 1);
2526         xfs_btree_copy_ptrs(cur,
2527                 xfs_btree_ptr_addr(cur, 2, new), &rptr, 1);
2528         xfs_btree_log_ptrs(cur, nbp, 1, 2);
2529
2530         /* Fix up the cursor. */
2531         xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
2532         cur->bc_ptrs[cur->bc_nlevels] = nptr;
2533         cur->bc_nlevels++;
2534         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2535         *stat = 1;
2536         return 0;
2537 error0:
2538         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2539         return error;
2540 out0:
2541         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2542         *stat = 0;
2543         return 0;
2544 }
2545
2546 STATIC int
2547 xfs_btree_make_block_unfull(
2548         struct xfs_btree_cur    *cur,   /* btree cursor */
2549         int                     level,  /* btree level */
2550         int                     numrecs,/* # of recs in block */
2551         int                     *oindex,/* old tree index */
2552         int                     *index, /* new tree index */
2553         union xfs_btree_ptr     *nptr,  /* new btree ptr */
2554         struct xfs_btree_cur    **ncur, /* new btree cursor */
2555         union xfs_btree_rec     *nrec,  /* new record */
2556         int                     *stat)
2557 {
2558         union xfs_btree_key     key;    /* new btree key value */
2559         int                     error = 0;
2560
2561         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2562             level == cur->bc_nlevels - 1) {
2563                 struct xfs_inode *ip = cur->bc_private.b.ip;
2564
2565                 if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) {
2566                         /* A root block that can be made bigger. */
2567
2568                         xfs_iroot_realloc(ip, 1, cur->bc_private.b.whichfork);
2569                 } else {
2570                         /* A root block that needs replacing */
2571                         int     logflags = 0;
2572
2573                         error = xfs_btree_new_iroot(cur, &logflags, stat);
2574                         if (error || *stat == 0)
2575                                 return error;
2576
2577                         xfs_trans_log_inode(cur->bc_tp, ip, logflags);
2578                 }
2579
2580                 return 0;
2581         }
2582
2583         /* First, try shifting an entry to the right neighbor. */
2584         error = xfs_btree_rshift(cur, level, stat);
2585         if (error || *stat)
2586                 return error;
2587
2588         /* Next, try shifting an entry to the left neighbor. */
2589         error = xfs_btree_lshift(cur, level, stat);
2590         if (error)
2591                 return error;
2592
2593         if (*stat) {
2594                 *oindex = *index = cur->bc_ptrs[level];
2595                 return 0;
2596         }
2597
2598         /*
2599          * Next, try splitting the current block in half.
2600          *
2601          * If this works we have to re-set our variables because we
2602          * could be in a different block now.
2603          */
2604         error = xfs_btree_split(cur, level, nptr, &key, ncur, stat);
2605         if (error || *stat == 0)
2606                 return error;
2607
2608
2609         *index = cur->bc_ptrs[level];
2610         cur->bc_ops->init_rec_from_key(&key, nrec);
2611         return 0;
2612 }
2613
2614 /*
2615  * Insert one record/level.  Return information to the caller
2616  * allowing the next level up to proceed if necessary.
2617  */
2618 STATIC int
2619 xfs_btree_insrec(
2620         struct xfs_btree_cur    *cur,   /* btree cursor */
2621         int                     level,  /* level to insert record at */
2622         union xfs_btree_ptr     *ptrp,  /* i/o: block number inserted */
2623         union xfs_btree_rec     *recp,  /* i/o: record data inserted */
2624         struct xfs_btree_cur    **curp, /* output: new cursor replacing cur */
2625         int                     *stat)  /* success/failure */
2626 {
2627         struct xfs_btree_block  *block; /* btree block */
2628         struct xfs_buf          *bp;    /* buffer for block */
2629         union xfs_btree_key     key;    /* btree key */
2630         union xfs_btree_ptr     nptr;   /* new block ptr */
2631         struct xfs_btree_cur    *ncur;  /* new btree cursor */
2632         union xfs_btree_rec     nrec;   /* new record count */
2633         int                     optr;   /* old key/record index */
2634         int                     ptr;    /* key/record index */
2635         int                     numrecs;/* number of records */
2636         int                     error;  /* error return value */
2637 #ifdef DEBUG
2638         int                     i;
2639 #endif
2640
2641         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2642         XFS_BTREE_TRACE_ARGIPR(cur, level, *ptrp, recp);
2643
2644         ncur = NULL;
2645
2646         /*
2647          * If we have an external root pointer, and we've made it to the
2648          * root level, allocate a new root block and we're done.
2649          */
2650         if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2651             (level >= cur->bc_nlevels)) {
2652                 error = xfs_btree_new_root(cur, stat);
2653                 xfs_btree_set_ptr_null(cur, ptrp);
2654
2655                 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2656                 return error;
2657         }
2658
2659         /* If we're off the left edge, return failure. */
2660         ptr = cur->bc_ptrs[level];
2661         if (ptr == 0) {
2662                 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2663                 *stat = 0;
2664                 return 0;
2665         }
2666
2667         /* Make a key out of the record data to be inserted, and save it. */
2668         cur->bc_ops->init_key_from_rec(&key, recp);
2669
2670         optr = ptr;
2671
2672         XFS_BTREE_STATS_INC(cur, insrec);
2673
2674         /* Get pointers to the btree buffer and block. */
2675         block = xfs_btree_get_block(cur, level, &bp);
2676         numrecs = xfs_btree_get_numrecs(block);
2677
2678 #ifdef DEBUG
2679         error = xfs_btree_check_block(cur, block, level, bp);
2680         if (error)
2681                 goto error0;
2682
2683         /* Check that the new entry is being inserted in the right place. */
2684         if (ptr <= numrecs) {
2685                 if (level == 0) {
2686                         ASSERT(cur->bc_ops->recs_inorder(cur, recp,
2687                                 xfs_btree_rec_addr(cur, ptr, block)));
2688                 } else {
2689                         ASSERT(cur->bc_ops->keys_inorder(cur, &key,
2690                                 xfs_btree_key_addr(cur, ptr, block)));
2691                 }
2692         }
2693 #endif
2694
2695         /*
2696          * If the block is full, we can't insert the new entry until we
2697          * make the block un-full.
2698          */
2699         xfs_btree_set_ptr_null(cur, &nptr);
2700         if (numrecs == cur->bc_ops->get_maxrecs(cur, level)) {
2701                 error = xfs_btree_make_block_unfull(cur, level, numrecs,
2702                                         &optr, &ptr, &nptr, &ncur, &nrec, stat);
2703                 if (error || *stat == 0)
2704                         goto error0;
2705         }
2706
2707         /*
2708          * The current block may have changed if the block was
2709          * previously full and we have just made space in it.
2710          */
2711         block = xfs_btree_get_block(cur, level, &bp);
2712         numrecs = xfs_btree_get_numrecs(block);
2713
2714 #ifdef DEBUG
2715         error = xfs_btree_check_block(cur, block, level, bp);
2716         if (error)
2717                 return error;
2718 #endif
2719
2720         /*
2721          * At this point we know there's room for our new entry in the block
2722          * we're pointing at.
2723          */
2724         XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr + 1);
2725
2726         if (level > 0) {
2727                 /* It's a nonleaf. make a hole in the keys and ptrs */
2728                 union xfs_btree_key     *kp;
2729                 union xfs_btree_ptr     *pp;
2730
2731                 kp = xfs_btree_key_addr(cur, ptr, block);
2732                 pp = xfs_btree_ptr_addr(cur, ptr, block);
2733
2734 #ifdef DEBUG
2735                 for (i = numrecs - ptr; i >= 0; i--) {
2736                         error = xfs_btree_check_ptr(cur, pp, i, level);
2737                         if (error)
2738                                 return error;
2739                 }
2740 #endif
2741
2742                 xfs_btree_shift_keys(cur, kp, 1, numrecs - ptr + 1);
2743                 xfs_btree_shift_ptrs(cur, pp, 1, numrecs - ptr + 1);
2744
2745 #ifdef DEBUG
2746                 error = xfs_btree_check_ptr(cur, ptrp, 0, level);
2747                 if (error)
2748                         goto error0;
2749 #endif
2750
2751                 /* Now put the new data in, bump numrecs and log it. */
2752                 xfs_btree_copy_keys(cur, kp, &key, 1);
2753                 xfs_btree_copy_ptrs(cur, pp, ptrp, 1);
2754                 numrecs++;
2755                 xfs_btree_set_numrecs(block, numrecs);
2756                 xfs_btree_log_ptrs(cur, bp, ptr, numrecs);
2757                 xfs_btree_log_keys(cur, bp, ptr, numrecs);
2758 #ifdef DEBUG
2759                 if (ptr < numrecs) {
2760                         ASSERT(cur->bc_ops->keys_inorder(cur, kp,
2761                                 xfs_btree_key_addr(cur, ptr + 1, block)));
2762                 }
2763 #endif
2764         } else {
2765                 /* It's a leaf. make a hole in the records */
2766                 union xfs_btree_rec             *rp;
2767
2768                 rp = xfs_btree_rec_addr(cur, ptr, block);
2769
2770                 xfs_btree_shift_recs(cur, rp, 1, numrecs - ptr + 1);
2771
2772                 /* Now put the new data in, bump numrecs and log it. */
2773                 xfs_btree_copy_recs(cur, rp, recp, 1);
2774                 xfs_btree_set_numrecs(block, ++numrecs);
2775                 xfs_btree_log_recs(cur, bp, ptr, numrecs);
2776 #ifdef DEBUG
2777                 if (ptr < numrecs) {
2778                         ASSERT(cur->bc_ops->recs_inorder(cur, rp,
2779                                 xfs_btree_rec_addr(cur, ptr + 1, block)));
2780                 }
2781 #endif
2782         }
2783
2784         /* Log the new number of records in the btree header. */
2785         xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
2786
2787         /* If we inserted at the start of a block, update the parents' keys. */
2788         if (optr == 1) {
2789                 error = xfs_btree_updkey(cur, &key, level + 1);
2790                 if (error)
2791                         goto error0;
2792         }
2793
2794         /*
2795          * If we are tracking the last record in the tree and
2796          * we are at the far right edge of the tree, update it.
2797          */
2798         if (xfs_btree_is_lastrec(cur, block, level)) {
2799                 cur->bc_ops->update_lastrec(cur, block, recp,
2800                                             ptr, LASTREC_INSREC);
2801         }
2802
2803         /*
2804          * Return the new block number, if any.
2805          * If there is one, give back a record value and a cursor too.
2806          */
2807         *ptrp = nptr;
2808         if (!xfs_btree_ptr_is_null(cur, &nptr)) {
2809                 *recp = nrec;
2810                 *curp = ncur;
2811         }
2812
2813         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2814         *stat = 1;
2815         return 0;
2816
2817 error0:
2818         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2819         return error;
2820 }
2821
2822 /*
2823  * Insert the record at the point referenced by cur.
2824  *
2825  * A multi-level split of the tree on insert will invalidate the original
2826  * cursor.  All callers of this function should assume that the cursor is
2827  * no longer valid and revalidate it.
2828  */
2829 int
2830 xfs_btree_insert(
2831         struct xfs_btree_cur    *cur,
2832         int                     *stat)
2833 {
2834         int                     error;  /* error return value */
2835         int                     i;      /* result value, 0 for failure */
2836         int                     level;  /* current level number in btree */
2837         union xfs_btree_ptr     nptr;   /* new block number (split result) */
2838         struct xfs_btree_cur    *ncur;  /* new cursor (split result) */
2839         struct xfs_btree_cur    *pcur;  /* previous level's cursor */
2840         union xfs_btree_rec     rec;    /* record to insert */
2841
2842         level = 0;
2843         ncur = NULL;
2844         pcur = cur;
2845
2846         xfs_btree_set_ptr_null(cur, &nptr);
2847         cur->bc_ops->init_rec_from_cur(cur, &rec);
2848
2849         /*
2850          * Loop going up the tree, starting at the leaf level.
2851          * Stop when we don't get a split block, that must mean that
2852          * the insert is finished with this level.
2853          */
2854         do {
2855                 /*
2856                  * Insert nrec/nptr into this level of the tree.
2857                  * Note if we fail, nptr will be null.
2858                  */
2859                 error = xfs_btree_insrec(pcur, level, &nptr, &rec, &ncur, &i);
2860                 if (error) {
2861                         if (pcur != cur)
2862                                 xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
2863                         goto error0;
2864                 }
2865
2866                 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
2867                 level++;
2868
2869                 /*
2870                  * See if the cursor we just used is trash.
2871                  * Can't trash the caller's cursor, but otherwise we should
2872                  * if ncur is a new cursor or we're about to be done.
2873                  */
2874                 if (pcur != cur &&
2875                     (ncur || xfs_btree_ptr_is_null(cur, &nptr))) {
2876                         /* Save the state from the cursor before we trash it */
2877                         if (cur->bc_ops->update_cursor)
2878                                 cur->bc_ops->update_cursor(pcur, cur);
2879                         cur->bc_nlevels = pcur->bc_nlevels;
2880                         xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
2881                 }
2882                 /* If we got a new cursor, switch to it. */
2883                 if (ncur) {
2884                         pcur = ncur;
2885                         ncur = NULL;
2886                 }
2887         } while (!xfs_btree_ptr_is_null(cur, &nptr));
2888
2889         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2890         *stat = i;
2891         return 0;
2892 error0:
2893         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2894         return error;
2895 }
2896
2897 /*
2898  * Try to merge a non-leaf block back into the inode root.
2899  *
2900  * Note: the killroot names comes from the fact that we're effectively
2901  * killing the old root block.  But because we can't just delete the
2902  * inode we have to copy the single block it was pointing to into the
2903  * inode.
2904  */
2905 STATIC int
2906 xfs_btree_kill_iroot(
2907         struct xfs_btree_cur    *cur)
2908 {
2909         int                     whichfork = cur->bc_private.b.whichfork;
2910         struct xfs_inode        *ip = cur->bc_private.b.ip;
2911         struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
2912         struct xfs_btree_block  *block;
2913         struct xfs_btree_block  *cblock;
2914         union xfs_btree_key     *kp;
2915         union xfs_btree_key     *ckp;
2916         union xfs_btree_ptr     *pp;
2917         union xfs_btree_ptr     *cpp;
2918         struct xfs_buf          *cbp;
2919         int                     level;
2920         int                     index;
2921         int                     numrecs;
2922 #ifdef DEBUG
2923         union xfs_btree_ptr     ptr;
2924         int                     i;
2925 #endif
2926
2927         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2928
2929         ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
2930         ASSERT(cur->bc_nlevels > 1);
2931
2932         /*
2933          * Don't deal with the root block needs to be a leaf case.
2934          * We're just going to turn the thing back into extents anyway.
2935          */
2936         level = cur->bc_nlevels - 1;
2937         if (level == 1)
2938                 goto out0;
2939
2940         /*
2941          * Give up if the root has multiple children.
2942          */
2943         block = xfs_btree_get_iroot(cur);
2944         if (xfs_btree_get_numrecs(block) != 1)
2945                 goto out0;
2946
2947         cblock = xfs_btree_get_block(cur, level - 1, &cbp);
2948         numrecs = xfs_btree_get_numrecs(cblock);
2949
2950         /*
2951          * Only do this if the next level will fit.
2952          * Then the data must be copied up to the inode,
2953          * instead of freeing the root you free the next level.
2954          */
2955         if (numrecs > cur->bc_ops->get_dmaxrecs(cur, level))
2956                 goto out0;
2957
2958         XFS_BTREE_STATS_INC(cur, killroot);
2959
2960 #ifdef DEBUG
2961         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
2962         ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
2963         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
2964         ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
2965 #endif
2966
2967         index = numrecs - cur->bc_ops->get_maxrecs(cur, level);
2968         if (index) {
2969                 xfs_iroot_realloc(cur->bc_private.b.ip, index,
2970                                   cur->bc_private.b.whichfork);
2971                 block = ifp->if_broot;
2972         }
2973
2974         be16_add_cpu(&block->bb_numrecs, index);
2975         ASSERT(block->bb_numrecs == cblock->bb_numrecs);
2976
2977         kp = xfs_btree_key_addr(cur, 1, block);
2978         ckp = xfs_btree_key_addr(cur, 1, cblock);
2979         xfs_btree_copy_keys(cur, kp, ckp, numrecs);
2980
2981         pp = xfs_btree_ptr_addr(cur, 1, block);
2982         cpp = xfs_btree_ptr_addr(cur, 1, cblock);
2983 #ifdef DEBUG
2984         for (i = 0; i < numrecs; i++) {
2985                 int             error;
2986
2987                 error = xfs_btree_check_ptr(cur, cpp, i, level - 1);
2988                 if (error) {
2989                         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2990                         return error;
2991                 }
2992         }
2993 #endif
2994         xfs_btree_copy_ptrs(cur, pp, cpp, numrecs);
2995
2996         cur->bc_ops->free_block(cur, cbp);
2997         XFS_BTREE_STATS_INC(cur, free);
2998
2999         cur->bc_bufs[level - 1] = NULL;
3000         be16_add_cpu(&block->bb_level, -1);
3001         xfs_trans_log_inode(cur->bc_tp, ip,
3002                 XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork));
3003         cur->bc_nlevels--;
3004 out0:
3005         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3006         return 0;
3007 }
3008
3009 /*
3010  * Kill the current root node, and replace it with it's only child node.
3011  */
3012 STATIC int
3013 xfs_btree_kill_root(
3014         struct xfs_btree_cur    *cur,
3015         struct xfs_buf          *bp,
3016         int                     level,
3017         union xfs_btree_ptr     *newroot)
3018 {
3019         int                     error;
3020
3021         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3022         XFS_BTREE_STATS_INC(cur, killroot);
3023
3024         /*
3025          * Update the root pointer, decreasing the level by 1 and then
3026          * free the old root.
3027          */
3028         cur->bc_ops->set_root(cur, newroot, -1);
3029
3030         error = cur->bc_ops->free_block(cur, bp);
3031         if (error) {
3032                 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3033                 return error;
3034         }
3035
3036         XFS_BTREE_STATS_INC(cur, free);
3037
3038         cur->bc_bufs[level] = NULL;
3039         cur->bc_ra[level] = 0;
3040         cur->bc_nlevels--;
3041
3042         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3043         return 0;
3044 }
3045
3046 STATIC int
3047 xfs_btree_dec_cursor(
3048         struct xfs_btree_cur    *cur,
3049         int                     level,
3050         int                     *stat)
3051 {
3052         int                     error;
3053         int                     i;
3054
3055         if (level > 0) {
3056                 error = xfs_btree_decrement(cur, level, &i);
3057                 if (error)
3058                         return error;
3059         }
3060
3061         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3062         *stat = 1;
3063         return 0;
3064 }
3065
3066 /*
3067  * Single level of the btree record deletion routine.
3068  * Delete record pointed to by cur/level.
3069  * Remove the record from its block then rebalance the tree.
3070  * Return 0 for error, 1 for done, 2 to go on to the next level.
3071  */
3072 STATIC int                                      /* error */
3073 xfs_btree_delrec(
3074         struct xfs_btree_cur    *cur,           /* btree cursor */
3075         int                     level,          /* level removing record from */
3076         int                     *stat)          /* fail/done/go-on */
3077 {
3078         struct xfs_btree_block  *block;         /* btree block */
3079         union xfs_btree_ptr     cptr;           /* current block ptr */
3080         struct xfs_buf          *bp;            /* buffer for block */
3081         int                     error;          /* error return value */
3082         int                     i;              /* loop counter */
3083         union xfs_btree_key     key;            /* storage for keyp */
3084         union xfs_btree_key     *keyp = &key;   /* passed to the next level */
3085         union xfs_btree_ptr     lptr;           /* left sibling block ptr */
3086         struct xfs_buf          *lbp;           /* left buffer pointer */
3087         struct xfs_btree_block  *left;          /* left btree block */
3088         int                     lrecs = 0;      /* left record count */
3089         int                     ptr;            /* key/record index */
3090         union xfs_btree_ptr     rptr;           /* right sibling block ptr */
3091         struct xfs_buf          *rbp;           /* right buffer pointer */
3092         struct xfs_btree_block  *right;         /* right btree block */
3093         struct xfs_btree_block  *rrblock;       /* right-right btree block */
3094         struct xfs_buf          *rrbp;          /* right-right buffer pointer */
3095         int                     rrecs = 0;      /* right record count */
3096         struct xfs_btree_cur    *tcur;          /* temporary btree cursor */
3097         int                     numrecs;        /* temporary numrec count */
3098
3099         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3100         XFS_BTREE_TRACE_ARGI(cur, level);
3101
3102         tcur = NULL;
3103
3104         /* Get the index of the entry being deleted, check for nothing there. */
3105         ptr = cur->bc_ptrs[level];
3106         if (ptr == 0) {
3107                 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3108                 *stat = 0;
3109                 return 0;
3110         }
3111
3112         /* Get the buffer & block containing the record or key/ptr. */
3113         block = xfs_btree_get_block(cur, level, &bp);
3114         numrecs = xfs_btree_get_numrecs(block);
3115
3116 #ifdef DEBUG
3117         error = xfs_btree_check_block(cur, block, level, bp);
3118         if (error)
3119                 goto error0;
3120 #endif
3121
3122         /* Fail if we're off the end of the block. */
3123         if (ptr > numrecs) {
3124                 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3125                 *stat = 0;
3126                 return 0;
3127         }
3128
3129         XFS_BTREE_STATS_INC(cur, delrec);
3130         XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr);
3131
3132         /* Excise the entries being deleted. */
3133         if (level > 0) {
3134                 /* It's a nonleaf. operate on keys and ptrs */
3135                 union xfs_btree_key     *lkp;
3136                 union xfs_btree_ptr     *lpp;
3137
3138                 lkp = xfs_btree_key_addr(cur, ptr + 1, block);
3139                 lpp = xfs_btree_ptr_addr(cur, ptr + 1, block);
3140
3141 #ifdef DEBUG
3142                 for (i = 0; i < numrecs - ptr; i++) {
3143                         error = xfs_btree_check_ptr(cur, lpp, i, level);
3144                         if (error)
3145                                 goto error0;
3146                 }
3147 #endif
3148
3149                 if (ptr < numrecs) {
3150                         xfs_btree_shift_keys(cur, lkp, -1, numrecs - ptr);
3151                         xfs_btree_shift_ptrs(cur, lpp, -1, numrecs - ptr);
3152                         xfs_btree_log_keys(cur, bp, ptr, numrecs - 1);
3153                         xfs_btree_log_ptrs(cur, bp, ptr, numrecs - 1);
3154                 }
3155
3156                 /*
3157                  * If it's the first record in the block, we'll need to pass a
3158                  * key up to the next level (updkey).
3159                  */
3160                 if (ptr == 1)
3161                         keyp = xfs_btree_key_addr(cur, 1, block);
3162         } else {
3163                 /* It's a leaf. operate on records */
3164                 if (ptr < numrecs) {
3165                         xfs_btree_shift_recs(cur,
3166                                 xfs_btree_rec_addr(cur, ptr + 1, block),
3167                                 -1, numrecs - ptr);
3168                         xfs_btree_log_recs(cur, bp, ptr, numrecs - 1);
3169                 }
3170
3171                 /*
3172                  * If it's the first record in the block, we'll need a key
3173                  * structure to pass up to the next level (updkey).
3174                  */
3175                 if (ptr == 1) {
3176                         cur->bc_ops->init_key_from_rec(&key,
3177                                         xfs_btree_rec_addr(cur, 1, block));
3178                         keyp = &key;
3179                 }
3180         }
3181
3182         /*
3183          * Decrement and log the number of entries in the block.
3184          */
3185         xfs_btree_set_numrecs(block, --numrecs);
3186         xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
3187
3188         /*
3189          * If we are tracking the last record in the tree and
3190          * we are at the far right edge of the tree, update it.
3191          */
3192         if (xfs_btree_is_lastrec(cur, block, level)) {
3193                 cur->bc_ops->update_lastrec(cur, block, NULL,
3194                                             ptr, LASTREC_DELREC);
3195         }
3196
3197         /*
3198          * We're at the root level.  First, shrink the root block in-memory.
3199          * Try to get rid of the next level down.  If we can't then there's
3200          * nothing left to do.
3201          */
3202         if (level == cur->bc_nlevels - 1) {
3203                 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
3204                         xfs_iroot_realloc(cur->bc_private.b.ip, -1,
3205                                           cur->bc_private.b.whichfork);
3206
3207                         error = xfs_btree_kill_iroot(cur);
3208                         if (error)
3209                                 goto error0;
3210
3211                         error = xfs_btree_dec_cursor(cur, level, stat);
3212                         if (error)
3213                                 goto error0;
3214                         *stat = 1;
3215                         return 0;
3216                 }
3217
3218                 /*
3219                  * If this is the root level, and there's only one entry left,
3220                  * and it's NOT the leaf level, then we can get rid of this
3221                  * level.
3222                  */
3223                 if (numrecs == 1 && level > 0) {
3224                         union xfs_btree_ptr     *pp;
3225                         /*
3226                          * pp is still set to the first pointer in the block.
3227                          * Make it the new root of the btree.
3228                          */
3229                         pp = xfs_btree_ptr_addr(cur, 1, block);
3230                         error = xfs_btree_kill_root(cur, bp, level, pp);
3231                         if (error)
3232                                 goto error0;
3233                 } else if (level > 0) {
3234                         error = xfs_btree_dec_cursor(cur, level, stat);
3235                         if (error)
3236                                 goto error0;
3237                 }
3238                 *stat = 1;
3239                 return 0;
3240         }
3241
3242         /*
3243          * If we deleted the leftmost entry in the block, update the
3244          * key values above us in the tree.
3245          */
3246         if (ptr == 1) {
3247                 error = xfs_btree_updkey(cur, keyp, level + 1);
3248                 if (error)
3249                         goto error0;
3250         }
3251
3252         /*
3253          * If the number of records remaining in the block is at least
3254          * the minimum, we're done.
3255          */
3256         if (numrecs >= cur->bc_ops->get_minrecs(cur, level)) {
3257                 error = xfs_btree_dec_cursor(cur, level, stat);
3258                 if (error)
3259                         goto error0;
3260                 return 0;
3261         }
3262
3263         /*
3264          * Otherwise, we have to move some records around to keep the
3265          * tree balanced.  Look at the left and right sibling blocks to
3266          * see if we can re-balance by moving only one record.
3267          */
3268         xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
3269         xfs_btree_get_sibling(cur, block, &lptr, XFS_BB_LEFTSIB);
3270
3271         if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
3272                 /*
3273                  * One child of root, need to get a chance to copy its contents
3274                  * into the root and delete it. Can't go up to next level,
3275                  * there's nothing to delete there.
3276                  */
3277                 if (xfs_btree_ptr_is_null(cur, &rptr) &&
3278                     xfs_btree_ptr_is_null(cur, &lptr) &&
3279                     level == cur->bc_nlevels - 2) {
3280                         error = xfs_btree_kill_iroot(cur);
3281                         if (!error)
3282                                 error = xfs_btree_dec_cursor(cur, level, stat);
3283                         if (error)
3284                                 goto error0;
3285                         return 0;
3286                 }
3287         }
3288
3289         ASSERT(!xfs_btree_ptr_is_null(cur, &rptr) ||
3290                !xfs_btree_ptr_is_null(cur, &lptr));
3291
3292         /*
3293          * Duplicate the cursor so our btree manipulations here won't
3294          * disrupt the next level up.
3295          */
3296         error = xfs_btree_dup_cursor(cur, &tcur);
3297         if (error)
3298                 goto error0;
3299
3300         /*
3301          * If there's a right sibling, see if it's ok to shift an entry
3302          * out of it.
3303          */
3304         if (!xfs_btree_ptr_is_null(cur, &rptr)) {
3305                 /*
3306                  * Move the temp cursor to the last entry in the next block.
3307                  * Actually any entry but the first would suffice.
3308                  */
3309                 i = xfs_btree_lastrec(tcur, level);
3310                 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3311
3312                 error = xfs_btree_increment(tcur, level, &i);
3313                 if (error)
3314                         goto error0;
3315                 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3316
3317                 i = xfs_btree_lastrec(tcur, level);
3318                 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3319
3320                 /* Grab a pointer to the block. */
3321                 right = xfs_btree_get_block(tcur, level, &rbp);
3322 #ifdef DEBUG
3323                 error = xfs_btree_check_block(tcur, right, level, rbp);
3324                 if (error)
3325                         goto error0;
3326 #endif
3327                 /* Grab the current block number, for future use. */
3328                 xfs_btree_get_sibling(tcur, right, &cptr, XFS_BB_LEFTSIB);
3329
3330                 /*
3331                  * If right block is full enough so that removing one entry
3332                  * won't make it too empty, and left-shifting an entry out
3333                  * of right to us works, we're done.
3334                  */
3335                 if (xfs_btree_get_numrecs(right) - 1 >=
3336                     cur->bc_ops->get_minrecs(tcur, level)) {
3337                         error = xfs_btree_lshift(tcur, level, &i);
3338                         if (error)
3339                                 goto error0;
3340                         if (i) {
3341                                 ASSERT(xfs_btree_get_numrecs(block) >=
3342                                        cur->bc_ops->get_minrecs(tcur, level));
3343
3344                                 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
3345                                 tcur = NULL;
3346
3347                                 error = xfs_btree_dec_cursor(cur, level, stat);
3348                                 if (error)
3349                                         goto error0;
3350                                 return 0;
3351                         }
3352                 }
3353
3354                 /*
3355                  * Otherwise, grab the number of records in right for
3356                  * future reference, and fix up the temp cursor to point
3357                  * to our block again (last record).
3358                  */
3359                 rrecs = xfs_btree_get_numrecs(right);
3360                 if (!xfs_btree_ptr_is_null(cur, &lptr)) {
3361                         i = xfs_btree_firstrec(tcur, level);
3362                         XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3363
3364                         error = xfs_btree_decrement(tcur, level, &i);
3365                         if (error)
3366                                 goto error0;
3367                         XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3368                 }
3369         }
3370
3371         /*
3372          * If there's a left sibling, see if it's ok to shift an entry
3373          * out of it.
3374          */
3375         if (!xfs_btree_ptr_is_null(cur, &lptr)) {
3376                 /*
3377                  * Move the temp cursor to the first entry in the
3378                  * previous block.
3379                  */
3380                 i = xfs_btree_firstrec(tcur, level);
3381                 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3382
3383                 error = xfs_btree_decrement(tcur, level, &i);
3384                 if (error)
3385                         goto error0;
3386                 i = xfs_btree_firstrec(tcur, level);
3387                 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3388
3389                 /* Grab a pointer to the block. */
3390                 left = xfs_btree_get_block(tcur, level, &lbp);
3391 #ifdef DEBUG
3392                 error = xfs_btree_check_block(cur, left, level, lbp);
3393                 if (error)
3394                         goto error0;
3395 #endif
3396                 /* Grab the current block number, for future use. */
3397                 xfs_btree_get_sibling(tcur, left, &cptr, XFS_BB_RIGHTSIB);
3398
3399                 /*
3400                  * If left block is full enough so that removing one entry
3401                  * won't make it too empty, and right-shifting an entry out
3402                  * of left to us works, we're done.
3403                  */
3404                 if (xfs_btree_get_numrecs(left) - 1 >=
3405                     cur->bc_ops->get_minrecs(tcur, level)) {
3406                         error = xfs_btree_rshift(tcur, level, &i);
3407                         if (error)
3408                                 goto error0;
3409                         if (i) {
3410                                 ASSERT(xfs_btree_get_numrecs(block) >=
3411                                        cur->bc_ops->get_minrecs(tcur, level));
3412                                 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
3413                                 tcur = NULL;
3414                                 if (level == 0)
3415                                         cur->bc_ptrs[0]++;
3416                                 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3417                                 *stat = 1;
3418                                 return 0;
3419                         }
3420                 }
3421
3422                 /*
3423                  * Otherwise, grab the number of records in right for
3424                  * future reference.
3425                  */
3426                 lrecs = xfs_btree_get_numrecs(left);
3427         }
3428
3429         /* Delete the temp cursor, we're done with it. */
3430         xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
3431         tcur = NULL;
3432
3433         /* If here, we need to do a join to keep the tree balanced. */
3434         ASSERT(!xfs_btree_ptr_is_null(cur, &cptr));
3435
3436         if (!xfs_btree_ptr_is_null(cur, &lptr) &&
3437             lrecs + xfs_btree_get_numrecs(block) <=
3438                         cur->bc_ops->get_maxrecs(cur, level)) {
3439                 /*
3440                  * Set "right" to be the starting block,
3441                  * "left" to be the left neighbor.
3442                  */
3443                 rptr = cptr;
3444                 right = block;
3445                 rbp = bp;
3446                 error = xfs_btree_read_buf_block(cur, &lptr, level,
3447                                                         0, &left, &lbp);
3448                 if (error)
3449                         goto error0;
3450
3451         /*
3452          * If that won't work, see if we can join with the right neighbor block.
3453          */
3454         } else if (!xfs_btree_ptr_is_null(cur, &rptr) &&
3455                    rrecs + xfs_btree_get_numrecs(block) <=
3456                         cur->bc_ops->get_maxrecs(cur, level)) {
3457                 /*
3458                  * Set "left" to be the starting block,
3459                  * "right" to be the right neighbor.
3460                  */
3461                 lptr = cptr;
3462                 left = block;
3463                 lbp = bp;
3464                 error = xfs_btree_read_buf_block(cur, &rptr, level,
3465                                                         0, &right, &rbp);
3466                 if (error)
3467                         goto error0;
3468
3469         /*
3470          * Otherwise, we can't fix the imbalance.
3471          * Just return.  This is probably a logic error, but it's not fatal.
3472          */
3473         } else {
3474                 error = xfs_btree_dec_cursor(cur, level, stat);
3475                 if (error)
3476                         goto error0;
3477                 return 0;
3478         }
3479
3480         rrecs = xfs_btree_get_numrecs(right);
3481         lrecs = xfs_btree_get_numrecs(left);
3482
3483         /*
3484          * We're now going to join "left" and "right" by moving all the stuff
3485          * in "right" to "left" and deleting "right".
3486          */
3487         XFS_BTREE_STATS_ADD(cur, moves, rrecs);
3488         if (level > 0) {
3489                 /* It's a non-leaf.  Move keys and pointers. */
3490                 union xfs_btree_key     *lkp;   /* left btree key */
3491                 union xfs_btree_ptr     *lpp;   /* left address pointer */
3492                 union xfs_btree_key     *rkp;   /* right btree key */
3493                 union xfs_btree_ptr     *rpp;   /* right address pointer */
3494
3495                 lkp = xfs_btree_key_addr(cur, lrecs + 1, left);
3496                 lpp = xfs_btree_ptr_addr(cur, lrecs + 1, left);
3497                 rkp = xfs_btree_key_addr(cur, 1, right);
3498                 rpp = xfs_btree_ptr_addr(cur, 1, right);
3499 #ifdef DEBUG
3500                 for (i = 1; i < rrecs; i++) {
3501                         error = xfs_btree_check_ptr(cur, rpp, i, level);
3502                         if (error)
3503                                 goto error0;
3504                 }
3505 #endif
3506                 xfs_btree_copy_keys(cur, lkp, rkp, rrecs);
3507                 xfs_btree_copy_ptrs(cur, lpp, rpp, rrecs);
3508
3509                 xfs_btree_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs);
3510                 xfs_btree_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs);
3511         } else {
3512                 /* It's a leaf.  Move records.  */
3513                 union xfs_btree_rec     *lrp;   /* left record pointer */
3514                 union xfs_btree_rec     *rrp;   /* right record pointer */
3515
3516                 lrp = xfs_btree_rec_addr(cur, lrecs + 1, left);
3517                 rrp = xfs_btree_rec_addr(cur, 1, right);
3518
3519                 xfs_btree_copy_recs(cur, lrp, rrp, rrecs);
3520                 xfs_btree_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs);
3521         }
3522
3523         XFS_BTREE_STATS_INC(cur, join);
3524
3525         /*
3526          * Fix up the number of records and right block pointer in the
3527          * surviving block, and log it.
3528          */
3529         xfs_btree_set_numrecs(left, lrecs + rrecs);
3530         xfs_btree_get_sibling(cur, right, &cptr, XFS_BB_RIGHTSIB),
3531         xfs_btree_set_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
3532         xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
3533
3534         /* If there is a right sibling, point it to the remaining block. */
3535         xfs_btree_get_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
3536         if (!xfs_btree_ptr_is_null(cur, &cptr)) {
3537                 error = xfs_btree_read_buf_block(cur, &cptr, level,
3538                                                         0, &rrblock, &rrbp);
3539                 if (error)
3540                         goto error0;
3541                 xfs_btree_set_sibling(cur, rrblock, &lptr, XFS_BB_LEFTSIB);
3542                 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
3543         }
3544
3545         /* Free the deleted block. */
3546         error = cur->bc_ops->free_block(cur, rbp);
3547         if (error)
3548                 goto error0;
3549         XFS_BTREE_STATS_INC(cur, free);
3550
3551         /*
3552          * If we joined with the left neighbor, set the buffer in the
3553          * cursor to the left block, and fix up the index.
3554          */
3555         if (bp != lbp) {
3556                 cur->bc_bufs[level] = lbp;
3557                 cur->bc_ptrs[level] += lrecs;
3558                 cur->bc_ra[level] = 0;
3559         }
3560         /*
3561          * If we joined with the right neighbor and there's a level above
3562          * us, increment the cursor at that level.
3563          */
3564         else if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) ||
3565                    (level + 1 < cur->bc_nlevels)) {
3566                 error = xfs_btree_increment(cur, level + 1, &i);
3567                 if (error)
3568                         goto error0;
3569         }
3570
3571         /*
3572          * Readjust the ptr at this level if it's not a leaf, since it's
3573          * still pointing at the deletion point, which makes the cursor
3574          * inconsistent.  If this makes the ptr 0, the caller fixes it up.
3575          * We can't use decrement because it would change the next level up.
3576          */
3577         if (level > 0)
3578                 cur->bc_ptrs[level]--;
3579
3580         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3581         /* Return value means the next level up has something to do. */
3582         *stat = 2;
3583         return 0;
3584
3585 error0:
3586         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3587         if (tcur)
3588                 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
3589         return error;
3590 }
3591
3592 /*
3593  * Delete the record pointed to by cur.
3594  * The cursor refers to the place where the record was (could be inserted)
3595  * when the operation returns.
3596  */
3597 int                                     /* error */
3598 xfs_btree_delete(
3599         struct xfs_btree_cur    *cur,
3600         int                     *stat)  /* success/failure */
3601 {
3602         int                     error;  /* error return value */
3603         int                     level;
3604         int                     i;
3605
3606         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3607
3608         /*
3609          * Go up the tree, starting at leaf level.
3610          *
3611          * If 2 is returned then a join was done; go to the next level.
3612          * Otherwise we are done.
3613          */
3614         for (level = 0, i = 2; i == 2; level++) {
3615                 error = xfs_btree_delrec(cur, level, &i);
3616                 if (error)
3617                         goto error0;
3618         }
3619
3620         if (i == 0) {
3621                 for (level = 1; level < cur->bc_nlevels; level++) {
3622                         if (cur->bc_ptrs[level] == 0) {
3623                                 error = xfs_btree_decrement(cur, level, &i);
3624                                 if (error)
3625                                         goto error0;
3626                                 break;
3627                         }
3628                 }
3629         }
3630
3631         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3632         *stat = i;
3633         return 0;
3634 error0:
3635         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3636         return error;
3637 }
3638
3639 /*
3640  * Get the data from the pointed-to record.
3641  */
3642 int                                     /* error */
3643 xfs_btree_get_rec(
3644         struct xfs_btree_cur    *cur,   /* btree cursor */
3645         union xfs_btree_rec     **recp, /* output: btree record */
3646         int                     *stat)  /* output: success/failure */
3647 {
3648         struct xfs_btree_block  *block; /* btree block */
3649         struct xfs_buf          *bp;    /* buffer pointer */
3650         int                     ptr;    /* record number */
3651 #ifdef DEBUG
3652         int                     error;  /* error return value */
3653 #endif
3654
3655         ptr = cur->bc_ptrs[0];
3656         block = xfs_btree_get_block(cur, 0, &bp);
3657
3658 #ifdef DEBUG
3659         error = xfs_btree_check_block(cur, block, 0, bp);
3660         if (error)
3661                 return error;
3662 #endif
3663
3664         /*
3665          * Off the right end or left end, return failure.
3666          */
3667         if (ptr > xfs_btree_get_numrecs(block) || ptr <= 0) {
3668                 *stat = 0;
3669                 return 0;
3670         }
3671
3672         /*
3673          * Point to the record and extract its data.
3674          */
3675         *recp = xfs_btree_rec_addr(cur, ptr, block);
3676         *stat = 1;
3677         return 0;
3678 }