[PATCH] de_thread: fix lockless do_each_thread
[pandora-kernel.git] / fs / xfs / xfs_btree.c
1 /*
2  * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_types.h"
21 #include "xfs_bit.h"
22 #include "xfs_log.h"
23 #include "xfs_inum.h"
24 #include "xfs_trans.h"
25 #include "xfs_sb.h"
26 #include "xfs_ag.h"
27 #include "xfs_dir2.h"
28 #include "xfs_dmapi.h"
29 #include "xfs_mount.h"
30 #include "xfs_bmap_btree.h"
31 #include "xfs_alloc_btree.h"
32 #include "xfs_ialloc_btree.h"
33 #include "xfs_dir2_sf.h"
34 #include "xfs_attr_sf.h"
35 #include "xfs_dinode.h"
36 #include "xfs_inode.h"
37 #include "xfs_btree.h"
38 #include "xfs_ialloc.h"
39 #include "xfs_error.h"
40
41 /*
42  * Cursor allocation zone.
43  */
44 kmem_zone_t     *xfs_btree_cur_zone;
45
46 /*
47  * Btree magic numbers.
48  */
49 const __uint32_t xfs_magics[XFS_BTNUM_MAX] =
50 {
51         XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC
52 };
53
54 /*
55  * Prototypes for internal routines.
56  */
57
58 /*
59  * Checking routine: return maxrecs for the block.
60  */
61 STATIC int                              /* number of records fitting in block */
62 xfs_btree_maxrecs(
63         xfs_btree_cur_t         *cur,   /* btree cursor */
64         xfs_btree_block_t       *block);/* generic btree block pointer */
65
66 /*
67  * Internal routines.
68  */
69
70 /*
71  * Retrieve the block pointer from the cursor at the given level.
72  * This may be a bmap btree root or from a buffer.
73  */
74 STATIC xfs_btree_block_t *                      /* generic btree block pointer */
75 xfs_btree_get_block(
76         xfs_btree_cur_t         *cur,   /* btree cursor */
77         int                     level,  /* level in btree */
78         struct xfs_buf          **bpp); /* buffer containing the block */
79
80 /*
81  * Checking routine: return maxrecs for the block.
82  */
83 STATIC int                              /* number of records fitting in block */
84 xfs_btree_maxrecs(
85         xfs_btree_cur_t         *cur,   /* btree cursor */
86         xfs_btree_block_t       *block) /* generic btree block pointer */
87 {
88         switch (cur->bc_btnum) {
89         case XFS_BTNUM_BNO:
90         case XFS_BTNUM_CNT:
91                 return (int)XFS_ALLOC_BLOCK_MAXRECS(
92                                 be16_to_cpu(block->bb_h.bb_level), cur);
93         case XFS_BTNUM_BMAP:
94                 return (int)XFS_BMAP_BLOCK_IMAXRECS(
95                                 be16_to_cpu(block->bb_h.bb_level), cur);
96         case XFS_BTNUM_INO:
97                 return (int)XFS_INOBT_BLOCK_MAXRECS(
98                                 be16_to_cpu(block->bb_h.bb_level), cur);
99         default:
100                 ASSERT(0);
101                 return 0;
102         }
103 }
104
105 /*
106  * External routines.
107  */
108
109 #ifdef DEBUG
110 /*
111  * Debug routine: check that block header is ok.
112  */
113 void
114 xfs_btree_check_block(
115         xfs_btree_cur_t         *cur,   /* btree cursor */
116         xfs_btree_block_t       *block, /* generic btree block pointer */
117         int                     level,  /* level of the btree block */
118         xfs_buf_t               *bp)    /* buffer containing block, if any */
119 {
120         if (XFS_BTREE_LONG_PTRS(cur->bc_btnum))
121                 xfs_btree_check_lblock(cur, (xfs_btree_lblock_t *)block, level,
122                         bp);
123         else
124                 xfs_btree_check_sblock(cur, (xfs_btree_sblock_t *)block, level,
125                         bp);
126 }
127
128 /*
129  * Debug routine: check that keys are in the right order.
130  */
131 void
132 xfs_btree_check_key(
133         xfs_btnum_t     btnum,          /* btree identifier */
134         void            *ak1,           /* pointer to left (lower) key */
135         void            *ak2)           /* pointer to right (higher) key */
136 {
137         switch (btnum) {
138         case XFS_BTNUM_BNO: {
139                 xfs_alloc_key_t *k1;
140                 xfs_alloc_key_t *k2;
141
142                 k1 = ak1;
143                 k2 = ak2;
144                 ASSERT(be32_to_cpu(k1->ar_startblock) < be32_to_cpu(k2->ar_startblock));
145                 break;
146             }
147         case XFS_BTNUM_CNT: {
148                 xfs_alloc_key_t *k1;
149                 xfs_alloc_key_t *k2;
150
151                 k1 = ak1;
152                 k2 = ak2;
153                 ASSERT(be32_to_cpu(k1->ar_blockcount) < be32_to_cpu(k2->ar_blockcount) ||
154                        (k1->ar_blockcount == k2->ar_blockcount &&
155                         be32_to_cpu(k1->ar_startblock) < be32_to_cpu(k2->ar_startblock)));
156                 break;
157             }
158         case XFS_BTNUM_BMAP: {
159                 xfs_bmbt_key_t  *k1;
160                 xfs_bmbt_key_t  *k2;
161
162                 k1 = ak1;
163                 k2 = ak2;
164                 ASSERT(INT_GET(k1->br_startoff, ARCH_CONVERT) < INT_GET(k2->br_startoff, ARCH_CONVERT));
165                 break;
166             }
167         case XFS_BTNUM_INO: {
168                 xfs_inobt_key_t *k1;
169                 xfs_inobt_key_t *k2;
170
171                 k1 = ak1;
172                 k2 = ak2;
173                 ASSERT(INT_GET(k1->ir_startino, ARCH_CONVERT) < INT_GET(k2->ir_startino, ARCH_CONVERT));
174                 break;
175             }
176         default:
177                 ASSERT(0);
178         }
179 }
180 #endif  /* DEBUG */
181
182 /*
183  * Checking routine: check that long form block header is ok.
184  */
185 /* ARGSUSED */
186 int                                     /* error (0 or EFSCORRUPTED) */
187 xfs_btree_check_lblock(
188         xfs_btree_cur_t         *cur,   /* btree cursor */
189         xfs_btree_lblock_t      *block, /* btree long form block pointer */
190         int                     level,  /* level of the btree block */
191         xfs_buf_t               *bp)    /* buffer for block, if any */
192 {
193         int                     lblock_ok; /* block passes checks */
194         xfs_mount_t             *mp;    /* file system mount point */
195
196         mp = cur->bc_mp;
197         lblock_ok =
198                 be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] &&
199                 be16_to_cpu(block->bb_level) == level &&
200                 be16_to_cpu(block->bb_numrecs) <=
201                         xfs_btree_maxrecs(cur, (xfs_btree_block_t *)block) &&
202                 block->bb_leftsib &&
203                 (be64_to_cpu(block->bb_leftsib) == NULLDFSBNO ||
204                  XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_leftsib))) &&
205                 block->bb_rightsib &&
206                 (be64_to_cpu(block->bb_rightsib) == NULLDFSBNO ||
207                  XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_rightsib)));
208         if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp, XFS_ERRTAG_BTREE_CHECK_LBLOCK,
209                         XFS_RANDOM_BTREE_CHECK_LBLOCK))) {
210                 if (bp)
211                         xfs_buftrace("LBTREE ERROR", bp);
212                 XFS_ERROR_REPORT("xfs_btree_check_lblock", XFS_ERRLEVEL_LOW,
213                                  mp);
214                 return XFS_ERROR(EFSCORRUPTED);
215         }
216         return 0;
217 }
218
219 /*
220  * Checking routine: check that (long) pointer is ok.
221  */
222 int                                     /* error (0 or EFSCORRUPTED) */
223 xfs_btree_check_lptr(
224         xfs_btree_cur_t *cur,           /* btree cursor */
225         xfs_dfsbno_t    ptr,            /* btree block disk address */
226         int             level)          /* btree block level */
227 {
228         xfs_mount_t     *mp;            /* file system mount point */
229
230         mp = cur->bc_mp;
231         XFS_WANT_CORRUPTED_RETURN(
232                 level > 0 &&
233                 ptr != NULLDFSBNO &&
234                 XFS_FSB_SANITY_CHECK(mp, ptr));
235         return 0;
236 }
237
238 #ifdef DEBUG
239 /*
240  * Debug routine: check that records are in the right order.
241  */
242 void
243 xfs_btree_check_rec(
244         xfs_btnum_t     btnum,          /* btree identifier */
245         void            *ar1,           /* pointer to left (lower) record */
246         void            *ar2)           /* pointer to right (higher) record */
247 {
248         switch (btnum) {
249         case XFS_BTNUM_BNO: {
250                 xfs_alloc_rec_t *r1;
251                 xfs_alloc_rec_t *r2;
252
253                 r1 = ar1;
254                 r2 = ar2;
255                 ASSERT(be32_to_cpu(r1->ar_startblock) +
256                        be32_to_cpu(r1->ar_blockcount) <=
257                        be32_to_cpu(r2->ar_startblock));
258                 break;
259             }
260         case XFS_BTNUM_CNT: {
261                 xfs_alloc_rec_t *r1;
262                 xfs_alloc_rec_t *r2;
263
264                 r1 = ar1;
265                 r2 = ar2;
266                 ASSERT(be32_to_cpu(r1->ar_blockcount) < be32_to_cpu(r2->ar_blockcount) ||
267                        (r1->ar_blockcount == r2->ar_blockcount &&
268                         be32_to_cpu(r1->ar_startblock) < be32_to_cpu(r2->ar_startblock)));
269                 break;
270             }
271         case XFS_BTNUM_BMAP: {
272                 xfs_bmbt_rec_t  *r1;
273                 xfs_bmbt_rec_t  *r2;
274
275                 r1 = ar1;
276                 r2 = ar2;
277                 ASSERT(xfs_bmbt_disk_get_startoff(r1) +
278                        xfs_bmbt_disk_get_blockcount(r1) <=
279                        xfs_bmbt_disk_get_startoff(r2));
280                 break;
281             }
282         case XFS_BTNUM_INO: {
283                 xfs_inobt_rec_t *r1;
284                 xfs_inobt_rec_t *r2;
285
286                 r1 = ar1;
287                 r2 = ar2;
288                 ASSERT(INT_GET(r1->ir_startino, ARCH_CONVERT) + XFS_INODES_PER_CHUNK <=
289                        INT_GET(r2->ir_startino, ARCH_CONVERT));
290                 break;
291             }
292         default:
293                 ASSERT(0);
294         }
295 }
296 #endif  /* DEBUG */
297
298 /*
299  * Checking routine: check that block header is ok.
300  */
301 /* ARGSUSED */
302 int                                     /* error (0 or EFSCORRUPTED) */
303 xfs_btree_check_sblock(
304         xfs_btree_cur_t         *cur,   /* btree cursor */
305         xfs_btree_sblock_t      *block, /* btree short form block pointer */
306         int                     level,  /* level of the btree block */
307         xfs_buf_t               *bp)    /* buffer containing block */
308 {
309         xfs_buf_t               *agbp;  /* buffer for ag. freespace struct */
310         xfs_agf_t               *agf;   /* ag. freespace structure */
311         xfs_agblock_t           agflen; /* native ag. freespace length */
312         int                     sblock_ok; /* block passes checks */
313
314         agbp = cur->bc_private.a.agbp;
315         agf = XFS_BUF_TO_AGF(agbp);
316         agflen = be32_to_cpu(agf->agf_length);
317         sblock_ok =
318                 be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] &&
319                 be16_to_cpu(block->bb_level) == level &&
320                 be16_to_cpu(block->bb_numrecs) <=
321                         xfs_btree_maxrecs(cur, (xfs_btree_block_t *)block) &&
322                 (be32_to_cpu(block->bb_leftsib) == NULLAGBLOCK ||
323                  be32_to_cpu(block->bb_leftsib) < agflen) &&
324                 block->bb_leftsib &&
325                 (be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK ||
326                  be32_to_cpu(block->bb_rightsib) < agflen) &&
327                 block->bb_rightsib;
328         if (unlikely(XFS_TEST_ERROR(!sblock_ok, cur->bc_mp,
329                         XFS_ERRTAG_BTREE_CHECK_SBLOCK,
330                         XFS_RANDOM_BTREE_CHECK_SBLOCK))) {
331                 if (bp)
332                         xfs_buftrace("SBTREE ERROR", bp);
333                 XFS_ERROR_REPORT("xfs_btree_check_sblock", XFS_ERRLEVEL_LOW,
334                                  cur->bc_mp);
335                 return XFS_ERROR(EFSCORRUPTED);
336         }
337         return 0;
338 }
339
340 /*
341  * Checking routine: check that (short) pointer is ok.
342  */
343 int                                     /* error (0 or EFSCORRUPTED) */
344 xfs_btree_check_sptr(
345         xfs_btree_cur_t *cur,           /* btree cursor */
346         xfs_agblock_t   ptr,            /* btree block disk address */
347         int             level)          /* btree block level */
348 {
349         xfs_buf_t       *agbp;          /* buffer for ag. freespace struct */
350         xfs_agf_t       *agf;           /* ag. freespace structure */
351
352         agbp = cur->bc_private.a.agbp;
353         agf = XFS_BUF_TO_AGF(agbp);
354         XFS_WANT_CORRUPTED_RETURN(
355                 level > 0 &&
356                 ptr != NULLAGBLOCK && ptr != 0 &&
357                 ptr < be32_to_cpu(agf->agf_length));
358         return 0;
359 }
360
361 /*
362  * Delete the btree cursor.
363  */
364 void
365 xfs_btree_del_cursor(
366         xfs_btree_cur_t *cur,           /* btree cursor */
367         int             error)          /* del because of error */
368 {
369         int             i;              /* btree level */
370
371         /*
372          * Clear the buffer pointers, and release the buffers.
373          * If we're doing this in the face of an error, we
374          * need to make sure to inspect all of the entries
375          * in the bc_bufs array for buffers to be unlocked.
376          * This is because some of the btree code works from
377          * level n down to 0, and if we get an error along
378          * the way we won't have initialized all the entries
379          * down to 0.
380          */
381         for (i = 0; i < cur->bc_nlevels; i++) {
382                 if (cur->bc_bufs[i])
383                         xfs_btree_setbuf(cur, i, NULL);
384                 else if (!error)
385                         break;
386         }
387         /*
388          * Can't free a bmap cursor without having dealt with the
389          * allocated indirect blocks' accounting.
390          */
391         ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP ||
392                cur->bc_private.b.allocated == 0);
393         /*
394          * Free the cursor.
395          */
396         kmem_zone_free(xfs_btree_cur_zone, cur);
397 }
398
399 /*
400  * Duplicate the btree cursor.
401  * Allocate a new one, copy the record, re-get the buffers.
402  */
403 int                                     /* error */
404 xfs_btree_dup_cursor(
405         xfs_btree_cur_t *cur,           /* input cursor */
406         xfs_btree_cur_t **ncur)         /* output cursor */
407 {
408         xfs_buf_t       *bp;            /* btree block's buffer pointer */
409         int             error;          /* error return value */
410         int             i;              /* level number of btree block */
411         xfs_mount_t     *mp;            /* mount structure for filesystem */
412         xfs_btree_cur_t *new;           /* new cursor value */
413         xfs_trans_t     *tp;            /* transaction pointer, can be NULL */
414
415         tp = cur->bc_tp;
416         mp = cur->bc_mp;
417         /*
418          * Allocate a new cursor like the old one.
419          */
420         new = xfs_btree_init_cursor(mp, tp, cur->bc_private.a.agbp,
421                 cur->bc_private.a.agno, cur->bc_btnum, cur->bc_private.b.ip,
422                 cur->bc_private.b.whichfork);
423         /*
424          * Copy the record currently in the cursor.
425          */
426         new->bc_rec = cur->bc_rec;
427         /*
428          * For each level current, re-get the buffer and copy the ptr value.
429          */
430         for (i = 0; i < new->bc_nlevels; i++) {
431                 new->bc_ptrs[i] = cur->bc_ptrs[i];
432                 new->bc_ra[i] = cur->bc_ra[i];
433                 if ((bp = cur->bc_bufs[i])) {
434                         if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
435                                 XFS_BUF_ADDR(bp), mp->m_bsize, 0, &bp))) {
436                                 xfs_btree_del_cursor(new, error);
437                                 *ncur = NULL;
438                                 return error;
439                         }
440                         new->bc_bufs[i] = bp;
441                         ASSERT(bp);
442                         ASSERT(!XFS_BUF_GETERROR(bp));
443                 } else
444                         new->bc_bufs[i] = NULL;
445         }
446         /*
447          * For bmap btrees, copy the firstblock, flist, and flags values,
448          * since init cursor doesn't get them.
449          */
450         if (new->bc_btnum == XFS_BTNUM_BMAP) {
451                 new->bc_private.b.firstblock = cur->bc_private.b.firstblock;
452                 new->bc_private.b.flist = cur->bc_private.b.flist;
453                 new->bc_private.b.flags = cur->bc_private.b.flags;
454         }
455         *ncur = new;
456         return 0;
457 }
458
459 /*
460  * Change the cursor to point to the first record at the given level.
461  * Other levels are unaffected.
462  */
463 int                                     /* success=1, failure=0 */
464 xfs_btree_firstrec(
465         xfs_btree_cur_t         *cur,   /* btree cursor */
466         int                     level)  /* level to change */
467 {
468         xfs_btree_block_t       *block; /* generic btree block pointer */
469         xfs_buf_t               *bp;    /* buffer containing block */
470
471         /*
472          * Get the block pointer for this level.
473          */
474         block = xfs_btree_get_block(cur, level, &bp);
475         xfs_btree_check_block(cur, block, level, bp);
476         /*
477          * It's empty, there is no such record.
478          */
479         if (!block->bb_h.bb_numrecs)
480                 return 0;
481         /*
482          * Set the ptr value to 1, that's the first record/key.
483          */
484         cur->bc_ptrs[level] = 1;
485         return 1;
486 }
487
488 /*
489  * Retrieve the block pointer from the cursor at the given level.
490  * This may be a bmap btree root or from a buffer.
491  */
492 STATIC xfs_btree_block_t *              /* generic btree block pointer */
493 xfs_btree_get_block(
494         xfs_btree_cur_t         *cur,   /* btree cursor */
495         int                     level,  /* level in btree */
496         xfs_buf_t               **bpp)  /* buffer containing the block */
497 {
498         xfs_btree_block_t       *block; /* return value */
499         xfs_buf_t               *bp;    /* return buffer */
500         xfs_ifork_t             *ifp;   /* inode fork pointer */
501         int                     whichfork; /* data or attr fork */
502
503         if (cur->bc_btnum == XFS_BTNUM_BMAP && level == cur->bc_nlevels - 1) {
504                 whichfork = cur->bc_private.b.whichfork;
505                 ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, whichfork);
506                 block = (xfs_btree_block_t *)ifp->if_broot;
507                 bp = NULL;
508         } else {
509                 bp = cur->bc_bufs[level];
510                 block = XFS_BUF_TO_BLOCK(bp);
511         }
512         ASSERT(block != NULL);
513         *bpp = bp;
514         return block;
515 }
516
517 /*
518  * Get a buffer for the block, return it with no data read.
519  * Long-form addressing.
520  */
521 xfs_buf_t *                             /* buffer for fsbno */
522 xfs_btree_get_bufl(
523         xfs_mount_t     *mp,            /* file system mount point */
524         xfs_trans_t     *tp,            /* transaction pointer */
525         xfs_fsblock_t   fsbno,          /* file system block number */
526         uint            lock)           /* lock flags for get_buf */
527 {
528         xfs_buf_t       *bp;            /* buffer pointer (return value) */
529         xfs_daddr_t             d;              /* real disk block address */
530
531         ASSERT(fsbno != NULLFSBLOCK);
532         d = XFS_FSB_TO_DADDR(mp, fsbno);
533         bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
534         ASSERT(bp);
535         ASSERT(!XFS_BUF_GETERROR(bp));
536         return bp;
537 }
538
539 /*
540  * Get a buffer for the block, return it with no data read.
541  * Short-form addressing.
542  */
543 xfs_buf_t *                             /* buffer for agno/agbno */
544 xfs_btree_get_bufs(
545         xfs_mount_t     *mp,            /* file system mount point */
546         xfs_trans_t     *tp,            /* transaction pointer */
547         xfs_agnumber_t  agno,           /* allocation group number */
548         xfs_agblock_t   agbno,          /* allocation group block number */
549         uint            lock)           /* lock flags for get_buf */
550 {
551         xfs_buf_t       *bp;            /* buffer pointer (return value) */
552         xfs_daddr_t             d;              /* real disk block address */
553
554         ASSERT(agno != NULLAGNUMBER);
555         ASSERT(agbno != NULLAGBLOCK);
556         d = XFS_AGB_TO_DADDR(mp, agno, agbno);
557         bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
558         ASSERT(bp);
559         ASSERT(!XFS_BUF_GETERROR(bp));
560         return bp;
561 }
562
563 /*
564  * Allocate a new btree cursor.
565  * The cursor is either for allocation (A) or bmap (B) or inodes (I).
566  */
567 xfs_btree_cur_t *                       /* new btree cursor */
568 xfs_btree_init_cursor(
569         xfs_mount_t     *mp,            /* file system mount point */
570         xfs_trans_t     *tp,            /* transaction pointer */
571         xfs_buf_t       *agbp,          /* (A only) buffer for agf structure */
572                                         /* (I only) buffer for agi structure */
573         xfs_agnumber_t  agno,           /* (AI only) allocation group number */
574         xfs_btnum_t     btnum,          /* btree identifier */
575         xfs_inode_t     *ip,            /* (B only) inode owning the btree */
576         int             whichfork)      /* (B only) data or attr fork */
577 {
578         xfs_agf_t       *agf;           /* (A) allocation group freespace */
579         xfs_agi_t       *agi;           /* (I) allocation group inodespace */
580         xfs_btree_cur_t *cur;           /* return value */
581         xfs_ifork_t     *ifp;           /* (I) inode fork pointer */
582         int             nlevels=0;      /* number of levels in the btree */
583
584         ASSERT(xfs_btree_cur_zone != NULL);
585         /*
586          * Allocate a new cursor.
587          */
588         cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP);
589         /*
590          * Deduce the number of btree levels from the arguments.
591          */
592         switch (btnum) {
593         case XFS_BTNUM_BNO:
594         case XFS_BTNUM_CNT:
595                 agf = XFS_BUF_TO_AGF(agbp);
596                 nlevels = be32_to_cpu(agf->agf_levels[btnum]);
597                 break;
598         case XFS_BTNUM_BMAP:
599                 ifp = XFS_IFORK_PTR(ip, whichfork);
600                 nlevels = be16_to_cpu(ifp->if_broot->bb_level) + 1;
601                 break;
602         case XFS_BTNUM_INO:
603                 agi = XFS_BUF_TO_AGI(agbp);
604                 nlevels = be32_to_cpu(agi->agi_level);
605                 break;
606         default:
607                 ASSERT(0);
608         }
609         /*
610          * Fill in the common fields.
611          */
612         cur->bc_tp = tp;
613         cur->bc_mp = mp;
614         cur->bc_nlevels = nlevels;
615         cur->bc_btnum = btnum;
616         cur->bc_blocklog = mp->m_sb.sb_blocklog;
617         /*
618          * Fill in private fields.
619          */
620         switch (btnum) {
621         case XFS_BTNUM_BNO:
622         case XFS_BTNUM_CNT:
623                 /*
624                  * Allocation btree fields.
625                  */
626                 cur->bc_private.a.agbp = agbp;
627                 cur->bc_private.a.agno = agno;
628                 break;
629         case XFS_BTNUM_BMAP:
630                 /*
631                  * Bmap btree fields.
632                  */
633                 cur->bc_private.b.forksize = XFS_IFORK_SIZE(ip, whichfork);
634                 cur->bc_private.b.ip = ip;
635                 cur->bc_private.b.firstblock = NULLFSBLOCK;
636                 cur->bc_private.b.flist = NULL;
637                 cur->bc_private.b.allocated = 0;
638                 cur->bc_private.b.flags = 0;
639                 cur->bc_private.b.whichfork = whichfork;
640                 break;
641         case XFS_BTNUM_INO:
642                 /*
643                  * Inode allocation btree fields.
644                  */
645                 cur->bc_private.i.agbp = agbp;
646                 cur->bc_private.i.agno = agno;
647                 break;
648         default:
649                 ASSERT(0);
650         }
651         return cur;
652 }
653
654 /*
655  * Check for the cursor referring to the last block at the given level.
656  */
657 int                                     /* 1=is last block, 0=not last block */
658 xfs_btree_islastblock(
659         xfs_btree_cur_t         *cur,   /* btree cursor */
660         int                     level)  /* level to check */
661 {
662         xfs_btree_block_t       *block; /* generic btree block pointer */
663         xfs_buf_t               *bp;    /* buffer containing block */
664
665         block = xfs_btree_get_block(cur, level, &bp);
666         xfs_btree_check_block(cur, block, level, bp);
667         if (XFS_BTREE_LONG_PTRS(cur->bc_btnum))
668                 return be64_to_cpu(block->bb_u.l.bb_rightsib) == NULLDFSBNO;
669         else
670                 return be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK;
671 }
672
673 /*
674  * Change the cursor to point to the last record in the current block
675  * at the given level.  Other levels are unaffected.
676  */
677 int                                     /* success=1, failure=0 */
678 xfs_btree_lastrec(
679         xfs_btree_cur_t         *cur,   /* btree cursor */
680         int                     level)  /* level to change */
681 {
682         xfs_btree_block_t       *block; /* generic btree block pointer */
683         xfs_buf_t               *bp;    /* buffer containing block */
684
685         /*
686          * Get the block pointer for this level.
687          */
688         block = xfs_btree_get_block(cur, level, &bp);
689         xfs_btree_check_block(cur, block, level, bp);
690         /*
691          * It's empty, there is no such record.
692          */
693         if (!block->bb_h.bb_numrecs)
694                 return 0;
695         /*
696          * Set the ptr value to numrecs, that's the last record/key.
697          */
698         cur->bc_ptrs[level] = be16_to_cpu(block->bb_h.bb_numrecs);
699         return 1;
700 }
701
702 /*
703  * Compute first and last byte offsets for the fields given.
704  * Interprets the offsets table, which contains struct field offsets.
705  */
706 void
707 xfs_btree_offsets(
708         __int64_t       fields,         /* bitmask of fields */
709         const short     *offsets,       /* table of field offsets */
710         int             nbits,          /* number of bits to inspect */
711         int             *first,         /* output: first byte offset */
712         int             *last)          /* output: last byte offset */
713 {
714         int             i;              /* current bit number */
715         __int64_t       imask;          /* mask for current bit number */
716
717         ASSERT(fields != 0);
718         /*
719          * Find the lowest bit, so the first byte offset.
720          */
721         for (i = 0, imask = 1LL; ; i++, imask <<= 1) {
722                 if (imask & fields) {
723                         *first = offsets[i];
724                         break;
725                 }
726         }
727         /*
728          * Find the highest bit, so the last byte offset.
729          */
730         for (i = nbits - 1, imask = 1LL << i; ; i--, imask >>= 1) {
731                 if (imask & fields) {
732                         *last = offsets[i + 1] - 1;
733                         break;
734                 }
735         }
736 }
737
738 /*
739  * Get a buffer for the block, return it read in.
740  * Long-form addressing.
741  */
742 int                                     /* error */
743 xfs_btree_read_bufl(
744         xfs_mount_t     *mp,            /* file system mount point */
745         xfs_trans_t     *tp,            /* transaction pointer */
746         xfs_fsblock_t   fsbno,          /* file system block number */
747         uint            lock,           /* lock flags for read_buf */
748         xfs_buf_t       **bpp,          /* buffer for fsbno */
749         int             refval)         /* ref count value for buffer */
750 {
751         xfs_buf_t       *bp;            /* return value */
752         xfs_daddr_t             d;              /* real disk block address */
753         int             error;
754
755         ASSERT(fsbno != NULLFSBLOCK);
756         d = XFS_FSB_TO_DADDR(mp, fsbno);
757         if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
758                         mp->m_bsize, lock, &bp))) {
759                 return error;
760         }
761         ASSERT(!bp || !XFS_BUF_GETERROR(bp));
762         if (bp != NULL) {
763                 XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval);
764         }
765         *bpp = bp;
766         return 0;
767 }
768
769 /*
770  * Get a buffer for the block, return it read in.
771  * Short-form addressing.
772  */
773 int                                     /* error */
774 xfs_btree_read_bufs(
775         xfs_mount_t     *mp,            /* file system mount point */
776         xfs_trans_t     *tp,            /* transaction pointer */
777         xfs_agnumber_t  agno,           /* allocation group number */
778         xfs_agblock_t   agbno,          /* allocation group block number */
779         uint            lock,           /* lock flags for read_buf */
780         xfs_buf_t       **bpp,          /* buffer for agno/agbno */
781         int             refval)         /* ref count value for buffer */
782 {
783         xfs_buf_t       *bp;            /* return value */
784         xfs_daddr_t     d;              /* real disk block address */
785         int             error;
786
787         ASSERT(agno != NULLAGNUMBER);
788         ASSERT(agbno != NULLAGBLOCK);
789         d = XFS_AGB_TO_DADDR(mp, agno, agbno);
790         if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
791                                         mp->m_bsize, lock, &bp))) {
792                 return error;
793         }
794         ASSERT(!bp || !XFS_BUF_GETERROR(bp));
795         if (bp != NULL) {
796                 switch (refval) {
797                 case XFS_ALLOC_BTREE_REF:
798                         XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval);
799                         break;
800                 case XFS_INO_BTREE_REF:
801                         XFS_BUF_SET_VTYPE_REF(bp, B_FS_INOMAP, refval);
802                         break;
803                 }
804         }
805         *bpp = bp;
806         return 0;
807 }
808
809 /*
810  * Read-ahead the block, don't wait for it, don't return a buffer.
811  * Long-form addressing.
812  */
813 /* ARGSUSED */
814 void
815 xfs_btree_reada_bufl(
816         xfs_mount_t     *mp,            /* file system mount point */
817         xfs_fsblock_t   fsbno,          /* file system block number */
818         xfs_extlen_t    count)          /* count of filesystem blocks */
819 {
820         xfs_daddr_t             d;
821
822         ASSERT(fsbno != NULLFSBLOCK);
823         d = XFS_FSB_TO_DADDR(mp, fsbno);
824         xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count);
825 }
826
827 /*
828  * Read-ahead the block, don't wait for it, don't return a buffer.
829  * Short-form addressing.
830  */
831 /* ARGSUSED */
832 void
833 xfs_btree_reada_bufs(
834         xfs_mount_t     *mp,            /* file system mount point */
835         xfs_agnumber_t  agno,           /* allocation group number */
836         xfs_agblock_t   agbno,          /* allocation group block number */
837         xfs_extlen_t    count)          /* count of filesystem blocks */
838 {
839         xfs_daddr_t             d;
840
841         ASSERT(agno != NULLAGNUMBER);
842         ASSERT(agbno != NULLAGBLOCK);
843         d = XFS_AGB_TO_DADDR(mp, agno, agbno);
844         xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count);
845 }
846
847 /*
848  * Read-ahead btree blocks, at the given level.
849  * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA.
850  */
851 int
852 xfs_btree_readahead_core(
853         xfs_btree_cur_t         *cur,           /* btree cursor */
854         int                     lev,            /* level in btree */
855         int                     lr)             /* left/right bits */
856 {
857         xfs_alloc_block_t       *a;
858         xfs_bmbt_block_t        *b;
859         xfs_inobt_block_t       *i;
860         int                     rval = 0;
861
862         ASSERT(cur->bc_bufs[lev] != NULL);
863         cur->bc_ra[lev] |= lr;
864         switch (cur->bc_btnum) {
865         case XFS_BTNUM_BNO:
866         case XFS_BTNUM_CNT:
867                 a = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[lev]);
868                 if ((lr & XFS_BTCUR_LEFTRA) && be32_to_cpu(a->bb_leftsib) != NULLAGBLOCK) {
869                         xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
870                                 be32_to_cpu(a->bb_leftsib), 1);
871                         rval++;
872                 }
873                 if ((lr & XFS_BTCUR_RIGHTRA) && be32_to_cpu(a->bb_rightsib) != NULLAGBLOCK) {
874                         xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
875                                 be32_to_cpu(a->bb_rightsib), 1);
876                         rval++;
877                 }
878                 break;
879         case XFS_BTNUM_BMAP:
880                 b = XFS_BUF_TO_BMBT_BLOCK(cur->bc_bufs[lev]);
881                 if ((lr & XFS_BTCUR_LEFTRA) && be64_to_cpu(b->bb_leftsib) != NULLDFSBNO) {
882                         xfs_btree_reada_bufl(cur->bc_mp, be64_to_cpu(b->bb_leftsib), 1);
883                         rval++;
884                 }
885                 if ((lr & XFS_BTCUR_RIGHTRA) && be64_to_cpu(b->bb_rightsib) != NULLDFSBNO) {
886                         xfs_btree_reada_bufl(cur->bc_mp, be64_to_cpu(b->bb_rightsib), 1);
887                         rval++;
888                 }
889                 break;
890         case XFS_BTNUM_INO:
891                 i = XFS_BUF_TO_INOBT_BLOCK(cur->bc_bufs[lev]);
892                 if ((lr & XFS_BTCUR_LEFTRA) && be32_to_cpu(i->bb_leftsib) != NULLAGBLOCK) {
893                         xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.i.agno,
894                                 be32_to_cpu(i->bb_leftsib), 1);
895                         rval++;
896                 }
897                 if ((lr & XFS_BTCUR_RIGHTRA) && be32_to_cpu(i->bb_rightsib) != NULLAGBLOCK) {
898                         xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.i.agno,
899                                 be32_to_cpu(i->bb_rightsib), 1);
900                         rval++;
901                 }
902                 break;
903         default:
904                 ASSERT(0);
905         }
906         return rval;
907 }
908
909 /*
910  * Set the buffer for level "lev" in the cursor to bp, releasing
911  * any previous buffer.
912  */
913 void
914 xfs_btree_setbuf(
915         xfs_btree_cur_t         *cur,   /* btree cursor */
916         int                     lev,    /* level in btree */
917         xfs_buf_t               *bp)    /* new buffer to set */
918 {
919         xfs_btree_block_t       *b;     /* btree block */
920         xfs_buf_t               *obp;   /* old buffer pointer */
921
922         obp = cur->bc_bufs[lev];
923         if (obp)
924                 xfs_trans_brelse(cur->bc_tp, obp);
925         cur->bc_bufs[lev] = bp;
926         cur->bc_ra[lev] = 0;
927         if (!bp)
928                 return;
929         b = XFS_BUF_TO_BLOCK(bp);
930         if (XFS_BTREE_LONG_PTRS(cur->bc_btnum)) {
931                 if (be64_to_cpu(b->bb_u.l.bb_leftsib) == NULLDFSBNO)
932                         cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
933                 if (be64_to_cpu(b->bb_u.l.bb_rightsib) == NULLDFSBNO)
934                         cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
935         } else {
936                 if (be32_to_cpu(b->bb_u.s.bb_leftsib) == NULLAGBLOCK)
937                         cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
938                 if (be32_to_cpu(b->bb_u.s.bb_rightsib) == NULLAGBLOCK)
939                         cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
940         }
941 }