efb906f8a9c92de0680617fd00a25c8828f6426b
[pandora-kernel.git] / fs / xfs / xfs_da_btree.c
1 /*
2  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_types.h"
21 #include "xfs_bit.h"
22 #include "xfs_log.h"
23 #include "xfs_inum.h"
24 #include "xfs_trans.h"
25 #include "xfs_sb.h"
26 #include "xfs_ag.h"
27 #include "xfs_dir2.h"
28 #include "xfs_mount.h"
29 #include "xfs_da_btree.h"
30 #include "xfs_bmap_btree.h"
31 #include "xfs_dir2_sf.h"
32 #include "xfs_dinode.h"
33 #include "xfs_inode.h"
34 #include "xfs_inode_item.h"
35 #include "xfs_alloc.h"
36 #include "xfs_bmap.h"
37 #include "xfs_attr.h"
38 #include "xfs_attr_leaf.h"
39 #include "xfs_dir2_data.h"
40 #include "xfs_dir2_leaf.h"
41 #include "xfs_dir2_block.h"
42 #include "xfs_dir2_node.h"
43 #include "xfs_error.h"
44 #include "xfs_trace.h"
45
46 /*
47  * xfs_da_btree.c
48  *
49  * Routines to implement directories as Btrees of hashed names.
50  */
51
52 /*========================================================================
53  * Function prototypes for the kernel.
54  *========================================================================*/
55
56 /*
57  * Routines used for growing the Btree.
58  */
59 STATIC int xfs_da_root_split(xfs_da_state_t *state,
60                                             xfs_da_state_blk_t *existing_root,
61                                             xfs_da_state_blk_t *new_child);
62 STATIC int xfs_da_node_split(xfs_da_state_t *state,
63                                             xfs_da_state_blk_t *existing_blk,
64                                             xfs_da_state_blk_t *split_blk,
65                                             xfs_da_state_blk_t *blk_to_add,
66                                             int treelevel,
67                                             int *result);
68 STATIC void xfs_da_node_rebalance(xfs_da_state_t *state,
69                                          xfs_da_state_blk_t *node_blk_1,
70                                          xfs_da_state_blk_t *node_blk_2);
71 STATIC void xfs_da_node_add(xfs_da_state_t *state,
72                                    xfs_da_state_blk_t *old_node_blk,
73                                    xfs_da_state_blk_t *new_node_blk);
74
75 /*
76  * Routines used for shrinking the Btree.
77  */
78 STATIC int xfs_da_root_join(xfs_da_state_t *state,
79                                            xfs_da_state_blk_t *root_blk);
80 STATIC int xfs_da_node_toosmall(xfs_da_state_t *state, int *retval);
81 STATIC void xfs_da_node_remove(xfs_da_state_t *state,
82                                               xfs_da_state_blk_t *drop_blk);
83 STATIC void xfs_da_node_unbalance(xfs_da_state_t *state,
84                                          xfs_da_state_blk_t *src_node_blk,
85                                          xfs_da_state_blk_t *dst_node_blk);
86
87 /*
88  * Utility routines.
89  */
90 STATIC uint     xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count);
91 STATIC int      xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp);
92 STATIC xfs_dabuf_t *xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra);
93 STATIC int      xfs_da_blk_unlink(xfs_da_state_t *state,
94                                   xfs_da_state_blk_t *drop_blk,
95                                   xfs_da_state_blk_t *save_blk);
96 STATIC void     xfs_da_state_kill_altpath(xfs_da_state_t *state);
97
98 /*========================================================================
99  * Routines used for growing the Btree.
100  *========================================================================*/
101
102 /*
103  * Create the initial contents of an intermediate node.
104  */
105 int
106 xfs_da_node_create(xfs_da_args_t *args, xfs_dablk_t blkno, int level,
107                                  xfs_dabuf_t **bpp, int whichfork)
108 {
109         xfs_da_intnode_t *node;
110         xfs_dabuf_t *bp;
111         int error;
112         xfs_trans_t *tp;
113
114         tp = args->trans;
115         error = xfs_da_get_buf(tp, args->dp, blkno, -1, &bp, whichfork);
116         if (error)
117                 return(error);
118         ASSERT(bp != NULL);
119         node = bp->data;
120         node->hdr.info.forw = 0;
121         node->hdr.info.back = 0;
122         node->hdr.info.magic = cpu_to_be16(XFS_DA_NODE_MAGIC);
123         node->hdr.info.pad = 0;
124         node->hdr.count = 0;
125         node->hdr.level = cpu_to_be16(level);
126
127         xfs_da_log_buf(tp, bp,
128                 XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr)));
129
130         *bpp = bp;
131         return(0);
132 }
133
134 /*
135  * Split a leaf node, rebalance, then possibly split
136  * intermediate nodes, rebalance, etc.
137  */
138 int                                                     /* error */
139 xfs_da_split(xfs_da_state_t *state)
140 {
141         xfs_da_state_blk_t *oldblk, *newblk, *addblk;
142         xfs_da_intnode_t *node;
143         xfs_dabuf_t *bp;
144         int max, action, error, i;
145
146         /*
147          * Walk back up the tree splitting/inserting/adjusting as necessary.
148          * If we need to insert and there isn't room, split the node, then
149          * decide which fragment to insert the new block from below into.
150          * Note that we may split the root this way, but we need more fixup.
151          */
152         max = state->path.active - 1;
153         ASSERT((max >= 0) && (max < XFS_DA_NODE_MAXDEPTH));
154         ASSERT(state->path.blk[max].magic == XFS_ATTR_LEAF_MAGIC ||
155                state->path.blk[max].magic == XFS_DIR2_LEAFN_MAGIC);
156
157         addblk = &state->path.blk[max];         /* initial dummy value */
158         for (i = max; (i >= 0) && addblk; state->path.active--, i--) {
159                 oldblk = &state->path.blk[i];
160                 newblk = &state->altpath.blk[i];
161
162                 /*
163                  * If a leaf node then
164                  *     Allocate a new leaf node, then rebalance across them.
165                  * else if an intermediate node then
166                  *     We split on the last layer, must we split the node?
167                  */
168                 switch (oldblk->magic) {
169                 case XFS_ATTR_LEAF_MAGIC:
170                         error = xfs_attr_leaf_split(state, oldblk, newblk);
171                         if ((error != 0) && (error != ENOSPC)) {
172                                 return(error);  /* GROT: attr is inconsistent */
173                         }
174                         if (!error) {
175                                 addblk = newblk;
176                                 break;
177                         }
178                         /*
179                          * Entry wouldn't fit, split the leaf again.
180                          */
181                         state->extravalid = 1;
182                         if (state->inleaf) {
183                                 state->extraafter = 0;  /* before newblk */
184                                 error = xfs_attr_leaf_split(state, oldblk,
185                                                             &state->extrablk);
186                         } else {
187                                 state->extraafter = 1;  /* after newblk */
188                                 error = xfs_attr_leaf_split(state, newblk,
189                                                             &state->extrablk);
190                         }
191                         if (error)
192                                 return(error);  /* GROT: attr inconsistent */
193                         addblk = newblk;
194                         break;
195                 case XFS_DIR2_LEAFN_MAGIC:
196                         error = xfs_dir2_leafn_split(state, oldblk, newblk);
197                         if (error)
198                                 return error;
199                         addblk = newblk;
200                         break;
201                 case XFS_DA_NODE_MAGIC:
202                         error = xfs_da_node_split(state, oldblk, newblk, addblk,
203                                                          max - i, &action);
204                         xfs_da_buf_done(addblk->bp);
205                         addblk->bp = NULL;
206                         if (error)
207                                 return(error);  /* GROT: dir is inconsistent */
208                         /*
209                          * Record the newly split block for the next time thru?
210                          */
211                         if (action)
212                                 addblk = newblk;
213                         else
214                                 addblk = NULL;
215                         break;
216                 }
217
218                 /*
219                  * Update the btree to show the new hashval for this child.
220                  */
221                 xfs_da_fixhashpath(state, &state->path);
222                 /*
223                  * If we won't need this block again, it's getting dropped
224                  * from the active path by the loop control, so we need
225                  * to mark it done now.
226                  */
227                 if (i > 0 || !addblk)
228                         xfs_da_buf_done(oldblk->bp);
229         }
230         if (!addblk)
231                 return(0);
232
233         /*
234          * Split the root node.
235          */
236         ASSERT(state->path.active == 0);
237         oldblk = &state->path.blk[0];
238         error = xfs_da_root_split(state, oldblk, addblk);
239         if (error) {
240                 xfs_da_buf_done(oldblk->bp);
241                 xfs_da_buf_done(addblk->bp);
242                 addblk->bp = NULL;
243                 return(error);  /* GROT: dir is inconsistent */
244         }
245
246         /*
247          * Update pointers to the node which used to be block 0 and
248          * just got bumped because of the addition of a new root node.
249          * There might be three blocks involved if a double split occurred,
250          * and the original block 0 could be at any position in the list.
251          */
252
253         node = oldblk->bp->data;
254         if (node->hdr.info.forw) {
255                 if (be32_to_cpu(node->hdr.info.forw) == addblk->blkno) {
256                         bp = addblk->bp;
257                 } else {
258                         ASSERT(state->extravalid);
259                         bp = state->extrablk.bp;
260                 }
261                 node = bp->data;
262                 node->hdr.info.back = cpu_to_be32(oldblk->blkno);
263                 xfs_da_log_buf(state->args->trans, bp,
264                     XFS_DA_LOGRANGE(node, &node->hdr.info,
265                     sizeof(node->hdr.info)));
266         }
267         node = oldblk->bp->data;
268         if (node->hdr.info.back) {
269                 if (be32_to_cpu(node->hdr.info.back) == addblk->blkno) {
270                         bp = addblk->bp;
271                 } else {
272                         ASSERT(state->extravalid);
273                         bp = state->extrablk.bp;
274                 }
275                 node = bp->data;
276                 node->hdr.info.forw = cpu_to_be32(oldblk->blkno);
277                 xfs_da_log_buf(state->args->trans, bp,
278                     XFS_DA_LOGRANGE(node, &node->hdr.info,
279                     sizeof(node->hdr.info)));
280         }
281         xfs_da_buf_done(oldblk->bp);
282         xfs_da_buf_done(addblk->bp);
283         addblk->bp = NULL;
284         return(0);
285 }
286
287 /*
288  * Split the root.  We have to create a new root and point to the two
289  * parts (the split old root) that we just created.  Copy block zero to
290  * the EOF, extending the inode in process.
291  */
292 STATIC int                                              /* error */
293 xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
294                                  xfs_da_state_blk_t *blk2)
295 {
296         xfs_da_intnode_t *node, *oldroot;
297         xfs_da_args_t *args;
298         xfs_dablk_t blkno;
299         xfs_dabuf_t *bp;
300         int error, size;
301         xfs_inode_t *dp;
302         xfs_trans_t *tp;
303         xfs_mount_t *mp;
304         xfs_dir2_leaf_t *leaf;
305
306         /*
307          * Copy the existing (incorrect) block from the root node position
308          * to a free space somewhere.
309          */
310         args = state->args;
311         ASSERT(args != NULL);
312         error = xfs_da_grow_inode(args, &blkno);
313         if (error)
314                 return(error);
315         dp = args->dp;
316         tp = args->trans;
317         mp = state->mp;
318         error = xfs_da_get_buf(tp, dp, blkno, -1, &bp, args->whichfork);
319         if (error)
320                 return(error);
321         ASSERT(bp != NULL);
322         node = bp->data;
323         oldroot = blk1->bp->data;
324         if (be16_to_cpu(oldroot->hdr.info.magic) == XFS_DA_NODE_MAGIC) {
325                 size = (int)((char *)&oldroot->btree[be16_to_cpu(oldroot->hdr.count)] -
326                              (char *)oldroot);
327         } else {
328                 ASSERT(be16_to_cpu(oldroot->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
329                 leaf = (xfs_dir2_leaf_t *)oldroot;
330                 size = (int)((char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] -
331                              (char *)leaf);
332         }
333         memcpy(node, oldroot, size);
334         xfs_da_log_buf(tp, bp, 0, size - 1);
335         xfs_da_buf_done(blk1->bp);
336         blk1->bp = bp;
337         blk1->blkno = blkno;
338
339         /*
340          * Set up the new root node.
341          */
342         error = xfs_da_node_create(args,
343                 (args->whichfork == XFS_DATA_FORK) ? mp->m_dirleafblk : 0,
344                 be16_to_cpu(node->hdr.level) + 1, &bp, args->whichfork);
345         if (error)
346                 return(error);
347         node = bp->data;
348         node->btree[0].hashval = cpu_to_be32(blk1->hashval);
349         node->btree[0].before = cpu_to_be32(blk1->blkno);
350         node->btree[1].hashval = cpu_to_be32(blk2->hashval);
351         node->btree[1].before = cpu_to_be32(blk2->blkno);
352         node->hdr.count = cpu_to_be16(2);
353
354 #ifdef DEBUG
355         if (be16_to_cpu(oldroot->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC) {
356                 ASSERT(blk1->blkno >= mp->m_dirleafblk &&
357                        blk1->blkno < mp->m_dirfreeblk);
358                 ASSERT(blk2->blkno >= mp->m_dirleafblk &&
359                        blk2->blkno < mp->m_dirfreeblk);
360         }
361 #endif
362
363         /* Header is already logged by xfs_da_node_create */
364         xfs_da_log_buf(tp, bp,
365                 XFS_DA_LOGRANGE(node, node->btree,
366                         sizeof(xfs_da_node_entry_t) * 2));
367         xfs_da_buf_done(bp);
368
369         return(0);
370 }
371
372 /*
373  * Split the node, rebalance, then add the new entry.
374  */
375 STATIC int                                              /* error */
376 xfs_da_node_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
377                                  xfs_da_state_blk_t *newblk,
378                                  xfs_da_state_blk_t *addblk,
379                                  int treelevel, int *result)
380 {
381         xfs_da_intnode_t *node;
382         xfs_dablk_t blkno;
383         int newcount, error;
384         int useextra;
385
386         node = oldblk->bp->data;
387         ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
388
389         /*
390          * With V2 dirs the extra block is data or freespace.
391          */
392         useextra = state->extravalid && state->args->whichfork == XFS_ATTR_FORK;
393         newcount = 1 + useextra;
394         /*
395          * Do we have to split the node?
396          */
397         if ((be16_to_cpu(node->hdr.count) + newcount) > state->node_ents) {
398                 /*
399                  * Allocate a new node, add to the doubly linked chain of
400                  * nodes, then move some of our excess entries into it.
401                  */
402                 error = xfs_da_grow_inode(state->args, &blkno);
403                 if (error)
404                         return(error);  /* GROT: dir is inconsistent */
405
406                 error = xfs_da_node_create(state->args, blkno, treelevel,
407                                            &newblk->bp, state->args->whichfork);
408                 if (error)
409                         return(error);  /* GROT: dir is inconsistent */
410                 newblk->blkno = blkno;
411                 newblk->magic = XFS_DA_NODE_MAGIC;
412                 xfs_da_node_rebalance(state, oldblk, newblk);
413                 error = xfs_da_blk_link(state, oldblk, newblk);
414                 if (error)
415                         return(error);
416                 *result = 1;
417         } else {
418                 *result = 0;
419         }
420
421         /*
422          * Insert the new entry(s) into the correct block
423          * (updating last hashval in the process).
424          *
425          * xfs_da_node_add() inserts BEFORE the given index,
426          * and as a result of using node_lookup_int() we always
427          * point to a valid entry (not after one), but a split
428          * operation always results in a new block whose hashvals
429          * FOLLOW the current block.
430          *
431          * If we had double-split op below us, then add the extra block too.
432          */
433         node = oldblk->bp->data;
434         if (oldblk->index <= be16_to_cpu(node->hdr.count)) {
435                 oldblk->index++;
436                 xfs_da_node_add(state, oldblk, addblk);
437                 if (useextra) {
438                         if (state->extraafter)
439                                 oldblk->index++;
440                         xfs_da_node_add(state, oldblk, &state->extrablk);
441                         state->extravalid = 0;
442                 }
443         } else {
444                 newblk->index++;
445                 xfs_da_node_add(state, newblk, addblk);
446                 if (useextra) {
447                         if (state->extraafter)
448                                 newblk->index++;
449                         xfs_da_node_add(state, newblk, &state->extrablk);
450                         state->extravalid = 0;
451                 }
452         }
453
454         return(0);
455 }
456
457 /*
458  * Balance the btree elements between two intermediate nodes,
459  * usually one full and one empty.
460  *
461  * NOTE: if blk2 is empty, then it will get the upper half of blk1.
462  */
463 STATIC void
464 xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
465                                      xfs_da_state_blk_t *blk2)
466 {
467         xfs_da_intnode_t *node1, *node2, *tmpnode;
468         xfs_da_node_entry_t *btree_s, *btree_d;
469         int count, tmp;
470         xfs_trans_t *tp;
471
472         node1 = blk1->bp->data;
473         node2 = blk2->bp->data;
474         /*
475          * Figure out how many entries need to move, and in which direction.
476          * Swap the nodes around if that makes it simpler.
477          */
478         if ((be16_to_cpu(node1->hdr.count) > 0) && (be16_to_cpu(node2->hdr.count) > 0) &&
479             ((be32_to_cpu(node2->btree[0].hashval) < be32_to_cpu(node1->btree[0].hashval)) ||
480              (be32_to_cpu(node2->btree[be16_to_cpu(node2->hdr.count)-1].hashval) <
481               be32_to_cpu(node1->btree[be16_to_cpu(node1->hdr.count)-1].hashval)))) {
482                 tmpnode = node1;
483                 node1 = node2;
484                 node2 = tmpnode;
485         }
486         ASSERT(be16_to_cpu(node1->hdr.info.magic) == XFS_DA_NODE_MAGIC);
487         ASSERT(be16_to_cpu(node2->hdr.info.magic) == XFS_DA_NODE_MAGIC);
488         count = (be16_to_cpu(node1->hdr.count) - be16_to_cpu(node2->hdr.count)) / 2;
489         if (count == 0)
490                 return;
491         tp = state->args->trans;
492         /*
493          * Two cases: high-to-low and low-to-high.
494          */
495         if (count > 0) {
496                 /*
497                  * Move elements in node2 up to make a hole.
498                  */
499                 if ((tmp = be16_to_cpu(node2->hdr.count)) > 0) {
500                         tmp *= (uint)sizeof(xfs_da_node_entry_t);
501                         btree_s = &node2->btree[0];
502                         btree_d = &node2->btree[count];
503                         memmove(btree_d, btree_s, tmp);
504                 }
505
506                 /*
507                  * Move the req'd B-tree elements from high in node1 to
508                  * low in node2.
509                  */
510                 be16_add_cpu(&node2->hdr.count, count);
511                 tmp = count * (uint)sizeof(xfs_da_node_entry_t);
512                 btree_s = &node1->btree[be16_to_cpu(node1->hdr.count) - count];
513                 btree_d = &node2->btree[0];
514                 memcpy(btree_d, btree_s, tmp);
515                 be16_add_cpu(&node1->hdr.count, -count);
516         } else {
517                 /*
518                  * Move the req'd B-tree elements from low in node2 to
519                  * high in node1.
520                  */
521                 count = -count;
522                 tmp = count * (uint)sizeof(xfs_da_node_entry_t);
523                 btree_s = &node2->btree[0];
524                 btree_d = &node1->btree[be16_to_cpu(node1->hdr.count)];
525                 memcpy(btree_d, btree_s, tmp);
526                 be16_add_cpu(&node1->hdr.count, count);
527                 xfs_da_log_buf(tp, blk1->bp,
528                         XFS_DA_LOGRANGE(node1, btree_d, tmp));
529
530                 /*
531                  * Move elements in node2 down to fill the hole.
532                  */
533                 tmp  = be16_to_cpu(node2->hdr.count) - count;
534                 tmp *= (uint)sizeof(xfs_da_node_entry_t);
535                 btree_s = &node2->btree[count];
536                 btree_d = &node2->btree[0];
537                 memmove(btree_d, btree_s, tmp);
538                 be16_add_cpu(&node2->hdr.count, -count);
539         }
540
541         /*
542          * Log header of node 1 and all current bits of node 2.
543          */
544         xfs_da_log_buf(tp, blk1->bp,
545                 XFS_DA_LOGRANGE(node1, &node1->hdr, sizeof(node1->hdr)));
546         xfs_da_log_buf(tp, blk2->bp,
547                 XFS_DA_LOGRANGE(node2, &node2->hdr,
548                         sizeof(node2->hdr) +
549                         sizeof(node2->btree[0]) * be16_to_cpu(node2->hdr.count)));
550
551         /*
552          * Record the last hashval from each block for upward propagation.
553          * (note: don't use the swapped node pointers)
554          */
555         node1 = blk1->bp->data;
556         node2 = blk2->bp->data;
557         blk1->hashval = be32_to_cpu(node1->btree[be16_to_cpu(node1->hdr.count)-1].hashval);
558         blk2->hashval = be32_to_cpu(node2->btree[be16_to_cpu(node2->hdr.count)-1].hashval);
559
560         /*
561          * Adjust the expected index for insertion.
562          */
563         if (blk1->index >= be16_to_cpu(node1->hdr.count)) {
564                 blk2->index = blk1->index - be16_to_cpu(node1->hdr.count);
565                 blk1->index = be16_to_cpu(node1->hdr.count) + 1;        /* make it invalid */
566         }
567 }
568
569 /*
570  * Add a new entry to an intermediate node.
571  */
572 STATIC void
573 xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
574                                xfs_da_state_blk_t *newblk)
575 {
576         xfs_da_intnode_t *node;
577         xfs_da_node_entry_t *btree;
578         int tmp;
579
580         node = oldblk->bp->data;
581         ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
582         ASSERT((oldblk->index >= 0) && (oldblk->index <= be16_to_cpu(node->hdr.count)));
583         ASSERT(newblk->blkno != 0);
584         if (state->args->whichfork == XFS_DATA_FORK)
585                 ASSERT(newblk->blkno >= state->mp->m_dirleafblk &&
586                        newblk->blkno < state->mp->m_dirfreeblk);
587
588         /*
589          * We may need to make some room before we insert the new node.
590          */
591         tmp = 0;
592         btree = &node->btree[ oldblk->index ];
593         if (oldblk->index < be16_to_cpu(node->hdr.count)) {
594                 tmp = (be16_to_cpu(node->hdr.count) - oldblk->index) * (uint)sizeof(*btree);
595                 memmove(btree + 1, btree, tmp);
596         }
597         btree->hashval = cpu_to_be32(newblk->hashval);
598         btree->before = cpu_to_be32(newblk->blkno);
599         xfs_da_log_buf(state->args->trans, oldblk->bp,
600                 XFS_DA_LOGRANGE(node, btree, tmp + sizeof(*btree)));
601         be16_add_cpu(&node->hdr.count, 1);
602         xfs_da_log_buf(state->args->trans, oldblk->bp,
603                 XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr)));
604
605         /*
606          * Copy the last hash value from the oldblk to propagate upwards.
607          */
608         oldblk->hashval = be32_to_cpu(node->btree[be16_to_cpu(node->hdr.count)-1 ].hashval);
609 }
610
611 /*========================================================================
612  * Routines used for shrinking the Btree.
613  *========================================================================*/
614
615 /*
616  * Deallocate an empty leaf node, remove it from its parent,
617  * possibly deallocating that block, etc...
618  */
619 int
620 xfs_da_join(xfs_da_state_t *state)
621 {
622         xfs_da_state_blk_t *drop_blk, *save_blk;
623         int action, error;
624
625         action = 0;
626         drop_blk = &state->path.blk[ state->path.active-1 ];
627         save_blk = &state->altpath.blk[ state->path.active-1 ];
628         ASSERT(state->path.blk[0].magic == XFS_DA_NODE_MAGIC);
629         ASSERT(drop_blk->magic == XFS_ATTR_LEAF_MAGIC ||
630                drop_blk->magic == XFS_DIR2_LEAFN_MAGIC);
631
632         /*
633          * Walk back up the tree joining/deallocating as necessary.
634          * When we stop dropping blocks, break out.
635          */
636         for (  ; state->path.active >= 2; drop_blk--, save_blk--,
637                  state->path.active--) {
638                 /*
639                  * See if we can combine the block with a neighbor.
640                  *   (action == 0) => no options, just leave
641                  *   (action == 1) => coalesce, then unlink
642                  *   (action == 2) => block empty, unlink it
643                  */
644                 switch (drop_blk->magic) {
645                 case XFS_ATTR_LEAF_MAGIC:
646                         error = xfs_attr_leaf_toosmall(state, &action);
647                         if (error)
648                                 return(error);
649                         if (action == 0)
650                                 return(0);
651                         xfs_attr_leaf_unbalance(state, drop_blk, save_blk);
652                         break;
653                 case XFS_DIR2_LEAFN_MAGIC:
654                         error = xfs_dir2_leafn_toosmall(state, &action);
655                         if (error)
656                                 return error;
657                         if (action == 0)
658                                 return 0;
659                         xfs_dir2_leafn_unbalance(state, drop_blk, save_blk);
660                         break;
661                 case XFS_DA_NODE_MAGIC:
662                         /*
663                          * Remove the offending node, fixup hashvals,
664                          * check for a toosmall neighbor.
665                          */
666                         xfs_da_node_remove(state, drop_blk);
667                         xfs_da_fixhashpath(state, &state->path);
668                         error = xfs_da_node_toosmall(state, &action);
669                         if (error)
670                                 return(error);
671                         if (action == 0)
672                                 return 0;
673                         xfs_da_node_unbalance(state, drop_blk, save_blk);
674                         break;
675                 }
676                 xfs_da_fixhashpath(state, &state->altpath);
677                 error = xfs_da_blk_unlink(state, drop_blk, save_blk);
678                 xfs_da_state_kill_altpath(state);
679                 if (error)
680                         return(error);
681                 error = xfs_da_shrink_inode(state->args, drop_blk->blkno,
682                                                          drop_blk->bp);
683                 drop_blk->bp = NULL;
684                 if (error)
685                         return(error);
686         }
687         /*
688          * We joined all the way to the top.  If it turns out that
689          * we only have one entry in the root, make the child block
690          * the new root.
691          */
692         xfs_da_node_remove(state, drop_blk);
693         xfs_da_fixhashpath(state, &state->path);
694         error = xfs_da_root_join(state, &state->path.blk[0]);
695         return(error);
696 }
697
698 /*
699  * We have only one entry in the root.  Copy the only remaining child of
700  * the old root to block 0 as the new root node.
701  */
702 STATIC int
703 xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk)
704 {
705         xfs_da_intnode_t *oldroot;
706         /* REFERENCED */
707         xfs_da_blkinfo_t *blkinfo;
708         xfs_da_args_t *args;
709         xfs_dablk_t child;
710         xfs_dabuf_t *bp;
711         int error;
712
713         args = state->args;
714         ASSERT(args != NULL);
715         ASSERT(root_blk->magic == XFS_DA_NODE_MAGIC);
716         oldroot = root_blk->bp->data;
717         ASSERT(be16_to_cpu(oldroot->hdr.info.magic) == XFS_DA_NODE_MAGIC);
718         ASSERT(!oldroot->hdr.info.forw);
719         ASSERT(!oldroot->hdr.info.back);
720
721         /*
722          * If the root has more than one child, then don't do anything.
723          */
724         if (be16_to_cpu(oldroot->hdr.count) > 1)
725                 return(0);
726
727         /*
728          * Read in the (only) child block, then copy those bytes into
729          * the root block's buffer and free the original child block.
730          */
731         child = be32_to_cpu(oldroot->btree[0].before);
732         ASSERT(child != 0);
733         error = xfs_da_read_buf(args->trans, args->dp, child, -1, &bp,
734                                              args->whichfork);
735         if (error)
736                 return(error);
737         ASSERT(bp != NULL);
738         blkinfo = bp->data;
739         if (be16_to_cpu(oldroot->hdr.level) == 1) {
740                 ASSERT(be16_to_cpu(blkinfo->magic) == XFS_DIR2_LEAFN_MAGIC ||
741                        be16_to_cpu(blkinfo->magic) == XFS_ATTR_LEAF_MAGIC);
742         } else {
743                 ASSERT(be16_to_cpu(blkinfo->magic) == XFS_DA_NODE_MAGIC);
744         }
745         ASSERT(!blkinfo->forw);
746         ASSERT(!blkinfo->back);
747         memcpy(root_blk->bp->data, bp->data, state->blocksize);
748         xfs_da_log_buf(args->trans, root_blk->bp, 0, state->blocksize - 1);
749         error = xfs_da_shrink_inode(args, child, bp);
750         return(error);
751 }
752
753 /*
754  * Check a node block and its neighbors to see if the block should be
755  * collapsed into one or the other neighbor.  Always keep the block
756  * with the smaller block number.
757  * If the current block is over 50% full, don't try to join it, return 0.
758  * If the block is empty, fill in the state structure and return 2.
759  * If it can be collapsed, fill in the state structure and return 1.
760  * If nothing can be done, return 0.
761  */
762 STATIC int
763 xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
764 {
765         xfs_da_intnode_t *node;
766         xfs_da_state_blk_t *blk;
767         xfs_da_blkinfo_t *info;
768         int count, forward, error, retval, i;
769         xfs_dablk_t blkno;
770         xfs_dabuf_t *bp;
771
772         /*
773          * Check for the degenerate case of the block being over 50% full.
774          * If so, it's not worth even looking to see if we might be able
775          * to coalesce with a sibling.
776          */
777         blk = &state->path.blk[ state->path.active-1 ];
778         info = blk->bp->data;
779         ASSERT(be16_to_cpu(info->magic) == XFS_DA_NODE_MAGIC);
780         node = (xfs_da_intnode_t *)info;
781         count = be16_to_cpu(node->hdr.count);
782         if (count > (state->node_ents >> 1)) {
783                 *action = 0;    /* blk over 50%, don't try to join */
784                 return(0);      /* blk over 50%, don't try to join */
785         }
786
787         /*
788          * Check for the degenerate case of the block being empty.
789          * If the block is empty, we'll simply delete it, no need to
790          * coalesce it with a sibling block.  We choose (arbitrarily)
791          * to merge with the forward block unless it is NULL.
792          */
793         if (count == 0) {
794                 /*
795                  * Make altpath point to the block we want to keep and
796                  * path point to the block we want to drop (this one).
797                  */
798                 forward = (info->forw != 0);
799                 memcpy(&state->altpath, &state->path, sizeof(state->path));
800                 error = xfs_da_path_shift(state, &state->altpath, forward,
801                                                  0, &retval);
802                 if (error)
803                         return(error);
804                 if (retval) {
805                         *action = 0;
806                 } else {
807                         *action = 2;
808                 }
809                 return(0);
810         }
811
812         /*
813          * Examine each sibling block to see if we can coalesce with
814          * at least 25% free space to spare.  We need to figure out
815          * whether to merge with the forward or the backward block.
816          * We prefer coalescing with the lower numbered sibling so as
817          * to shrink a directory over time.
818          */
819         /* start with smaller blk num */
820         forward = (be32_to_cpu(info->forw) < be32_to_cpu(info->back));
821         for (i = 0; i < 2; forward = !forward, i++) {
822                 if (forward)
823                         blkno = be32_to_cpu(info->forw);
824                 else
825                         blkno = be32_to_cpu(info->back);
826                 if (blkno == 0)
827                         continue;
828                 error = xfs_da_read_buf(state->args->trans, state->args->dp,
829                                         blkno, -1, &bp, state->args->whichfork);
830                 if (error)
831                         return(error);
832                 ASSERT(bp != NULL);
833
834                 node = (xfs_da_intnode_t *)info;
835                 count  = state->node_ents;
836                 count -= state->node_ents >> 2;
837                 count -= be16_to_cpu(node->hdr.count);
838                 node = bp->data;
839                 ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
840                 count -= be16_to_cpu(node->hdr.count);
841                 xfs_da_brelse(state->args->trans, bp);
842                 if (count >= 0)
843                         break;  /* fits with at least 25% to spare */
844         }
845         if (i >= 2) {
846                 *action = 0;
847                 return(0);
848         }
849
850         /*
851          * Make altpath point to the block we want to keep (the lower
852          * numbered block) and path point to the block we want to drop.
853          */
854         memcpy(&state->altpath, &state->path, sizeof(state->path));
855         if (blkno < blk->blkno) {
856                 error = xfs_da_path_shift(state, &state->altpath, forward,
857                                                  0, &retval);
858                 if (error) {
859                         return(error);
860                 }
861                 if (retval) {
862                         *action = 0;
863                         return(0);
864                 }
865         } else {
866                 error = xfs_da_path_shift(state, &state->path, forward,
867                                                  0, &retval);
868                 if (error) {
869                         return(error);
870                 }
871                 if (retval) {
872                         *action = 0;
873                         return(0);
874                 }
875         }
876         *action = 1;
877         return(0);
878 }
879
880 /*
881  * Walk back up the tree adjusting hash values as necessary,
882  * when we stop making changes, return.
883  */
884 void
885 xfs_da_fixhashpath(xfs_da_state_t *state, xfs_da_state_path_t *path)
886 {
887         xfs_da_state_blk_t *blk;
888         xfs_da_intnode_t *node;
889         xfs_da_node_entry_t *btree;
890         xfs_dahash_t lasthash=0;
891         int level, count;
892
893         level = path->active-1;
894         blk = &path->blk[ level ];
895         switch (blk->magic) {
896         case XFS_ATTR_LEAF_MAGIC:
897                 lasthash = xfs_attr_leaf_lasthash(blk->bp, &count);
898                 if (count == 0)
899                         return;
900                 break;
901         case XFS_DIR2_LEAFN_MAGIC:
902                 lasthash = xfs_dir2_leafn_lasthash(blk->bp, &count);
903                 if (count == 0)
904                         return;
905                 break;
906         case XFS_DA_NODE_MAGIC:
907                 lasthash = xfs_da_node_lasthash(blk->bp, &count);
908                 if (count == 0)
909                         return;
910                 break;
911         }
912         for (blk--, level--; level >= 0; blk--, level--) {
913                 node = blk->bp->data;
914                 ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
915                 btree = &node->btree[ blk->index ];
916                 if (be32_to_cpu(btree->hashval) == lasthash)
917                         break;
918                 blk->hashval = lasthash;
919                 btree->hashval = cpu_to_be32(lasthash);
920                 xfs_da_log_buf(state->args->trans, blk->bp,
921                                   XFS_DA_LOGRANGE(node, btree, sizeof(*btree)));
922
923                 lasthash = be32_to_cpu(node->btree[be16_to_cpu(node->hdr.count)-1].hashval);
924         }
925 }
926
927 /*
928  * Remove an entry from an intermediate node.
929  */
930 STATIC void
931 xfs_da_node_remove(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk)
932 {
933         xfs_da_intnode_t *node;
934         xfs_da_node_entry_t *btree;
935         int tmp;
936
937         node = drop_blk->bp->data;
938         ASSERT(drop_blk->index < be16_to_cpu(node->hdr.count));
939         ASSERT(drop_blk->index >= 0);
940
941         /*
942          * Copy over the offending entry, or just zero it out.
943          */
944         btree = &node->btree[drop_blk->index];
945         if (drop_blk->index < (be16_to_cpu(node->hdr.count)-1)) {
946                 tmp  = be16_to_cpu(node->hdr.count) - drop_blk->index - 1;
947                 tmp *= (uint)sizeof(xfs_da_node_entry_t);
948                 memmove(btree, btree + 1, tmp);
949                 xfs_da_log_buf(state->args->trans, drop_blk->bp,
950                     XFS_DA_LOGRANGE(node, btree, tmp));
951                 btree = &node->btree[be16_to_cpu(node->hdr.count)-1];
952         }
953         memset((char *)btree, 0, sizeof(xfs_da_node_entry_t));
954         xfs_da_log_buf(state->args->trans, drop_blk->bp,
955             XFS_DA_LOGRANGE(node, btree, sizeof(*btree)));
956         be16_add_cpu(&node->hdr.count, -1);
957         xfs_da_log_buf(state->args->trans, drop_blk->bp,
958             XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr)));
959
960         /*
961          * Copy the last hash value from the block to propagate upwards.
962          */
963         btree--;
964         drop_blk->hashval = be32_to_cpu(btree->hashval);
965 }
966
967 /*
968  * Unbalance the btree elements between two intermediate nodes,
969  * move all Btree elements from one node into another.
970  */
971 STATIC void
972 xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
973                                      xfs_da_state_blk_t *save_blk)
974 {
975         xfs_da_intnode_t *drop_node, *save_node;
976         xfs_da_node_entry_t *btree;
977         int tmp;
978         xfs_trans_t *tp;
979
980         drop_node = drop_blk->bp->data;
981         save_node = save_blk->bp->data;
982         ASSERT(be16_to_cpu(drop_node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
983         ASSERT(be16_to_cpu(save_node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
984         tp = state->args->trans;
985
986         /*
987          * If the dying block has lower hashvals, then move all the
988          * elements in the remaining block up to make a hole.
989          */
990         if ((be32_to_cpu(drop_node->btree[0].hashval) < be32_to_cpu(save_node->btree[ 0 ].hashval)) ||
991             (be32_to_cpu(drop_node->btree[be16_to_cpu(drop_node->hdr.count)-1].hashval) <
992              be32_to_cpu(save_node->btree[be16_to_cpu(save_node->hdr.count)-1].hashval)))
993         {
994                 btree = &save_node->btree[be16_to_cpu(drop_node->hdr.count)];
995                 tmp = be16_to_cpu(save_node->hdr.count) * (uint)sizeof(xfs_da_node_entry_t);
996                 memmove(btree, &save_node->btree[0], tmp);
997                 btree = &save_node->btree[0];
998                 xfs_da_log_buf(tp, save_blk->bp,
999                         XFS_DA_LOGRANGE(save_node, btree,
1000                                 (be16_to_cpu(save_node->hdr.count) + be16_to_cpu(drop_node->hdr.count)) *
1001                                 sizeof(xfs_da_node_entry_t)));
1002         } else {
1003                 btree = &save_node->btree[be16_to_cpu(save_node->hdr.count)];
1004                 xfs_da_log_buf(tp, save_blk->bp,
1005                         XFS_DA_LOGRANGE(save_node, btree,
1006                                 be16_to_cpu(drop_node->hdr.count) *
1007                                 sizeof(xfs_da_node_entry_t)));
1008         }
1009
1010         /*
1011          * Move all the B-tree elements from drop_blk to save_blk.
1012          */
1013         tmp = be16_to_cpu(drop_node->hdr.count) * (uint)sizeof(xfs_da_node_entry_t);
1014         memcpy(btree, &drop_node->btree[0], tmp);
1015         be16_add_cpu(&save_node->hdr.count, be16_to_cpu(drop_node->hdr.count));
1016
1017         xfs_da_log_buf(tp, save_blk->bp,
1018                 XFS_DA_LOGRANGE(save_node, &save_node->hdr,
1019                         sizeof(save_node->hdr)));
1020
1021         /*
1022          * Save the last hashval in the remaining block for upward propagation.
1023          */
1024         save_blk->hashval = be32_to_cpu(save_node->btree[be16_to_cpu(save_node->hdr.count)-1].hashval);
1025 }
1026
1027 /*========================================================================
1028  * Routines used for finding things in the Btree.
1029  *========================================================================*/
1030
1031 /*
1032  * Walk down the Btree looking for a particular filename, filling
1033  * in the state structure as we go.
1034  *
1035  * We will set the state structure to point to each of the elements
1036  * in each of the nodes where either the hashval is or should be.
1037  *
1038  * We support duplicate hashval's so for each entry in the current
1039  * node that could contain the desired hashval, descend.  This is a
1040  * pruned depth-first tree search.
1041  */
1042 int                                                     /* error */
1043 xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
1044 {
1045         xfs_da_state_blk_t *blk;
1046         xfs_da_blkinfo_t *curr;
1047         xfs_da_intnode_t *node;
1048         xfs_da_node_entry_t *btree;
1049         xfs_dablk_t blkno;
1050         int probe, span, max, error, retval;
1051         xfs_dahash_t hashval, btreehashval;
1052         xfs_da_args_t *args;
1053
1054         args = state->args;
1055
1056         /*
1057          * Descend thru the B-tree searching each level for the right
1058          * node to use, until the right hashval is found.
1059          */
1060         blkno = (args->whichfork == XFS_DATA_FORK)? state->mp->m_dirleafblk : 0;
1061         for (blk = &state->path.blk[0], state->path.active = 1;
1062                          state->path.active <= XFS_DA_NODE_MAXDEPTH;
1063                          blk++, state->path.active++) {
1064                 /*
1065                  * Read the next node down in the tree.
1066                  */
1067                 blk->blkno = blkno;
1068                 error = xfs_da_read_buf(args->trans, args->dp, blkno,
1069                                         -1, &blk->bp, args->whichfork);
1070                 if (error) {
1071                         blk->blkno = 0;
1072                         state->path.active--;
1073                         return(error);
1074                 }
1075                 curr = blk->bp->data;
1076                 blk->magic = be16_to_cpu(curr->magic);
1077                 ASSERT(blk->magic == XFS_DA_NODE_MAGIC ||
1078                        blk->magic == XFS_DIR2_LEAFN_MAGIC ||
1079                        blk->magic == XFS_ATTR_LEAF_MAGIC);
1080
1081                 /*
1082                  * Search an intermediate node for a match.
1083                  */
1084                 if (blk->magic == XFS_DA_NODE_MAGIC) {
1085                         node = blk->bp->data;
1086                         max = be16_to_cpu(node->hdr.count);
1087                         blk->hashval = be32_to_cpu(node->btree[max-1].hashval);
1088
1089                         /*
1090                          * Binary search.  (note: small blocks will skip loop)
1091                          */
1092                         probe = span = max / 2;
1093                         hashval = args->hashval;
1094                         for (btree = &node->btree[probe]; span > 4;
1095                                    btree = &node->btree[probe]) {
1096                                 span /= 2;
1097                                 btreehashval = be32_to_cpu(btree->hashval);
1098                                 if (btreehashval < hashval)
1099                                         probe += span;
1100                                 else if (btreehashval > hashval)
1101                                         probe -= span;
1102                                 else
1103                                         break;
1104                         }
1105                         ASSERT((probe >= 0) && (probe < max));
1106                         ASSERT((span <= 4) || (be32_to_cpu(btree->hashval) == hashval));
1107
1108                         /*
1109                          * Since we may have duplicate hashval's, find the first
1110                          * matching hashval in the node.
1111                          */
1112                         while ((probe > 0) && (be32_to_cpu(btree->hashval) >= hashval)) {
1113                                 btree--;
1114                                 probe--;
1115                         }
1116                         while ((probe < max) && (be32_to_cpu(btree->hashval) < hashval)) {
1117                                 btree++;
1118                                 probe++;
1119                         }
1120
1121                         /*
1122                          * Pick the right block to descend on.
1123                          */
1124                         if (probe == max) {
1125                                 blk->index = max-1;
1126                                 blkno = be32_to_cpu(node->btree[max-1].before);
1127                         } else {
1128                                 blk->index = probe;
1129                                 blkno = be32_to_cpu(btree->before);
1130                         }
1131                 } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
1132                         blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL);
1133                         break;
1134                 } else if (blk->magic == XFS_DIR2_LEAFN_MAGIC) {
1135                         blk->hashval = xfs_dir2_leafn_lasthash(blk->bp, NULL);
1136                         break;
1137                 }
1138         }
1139
1140         /*
1141          * A leaf block that ends in the hashval that we are interested in
1142          * (final hashval == search hashval) means that the next block may
1143          * contain more entries with the same hashval, shift upward to the
1144          * next leaf and keep searching.
1145          */
1146         for (;;) {
1147                 if (blk->magic == XFS_DIR2_LEAFN_MAGIC) {
1148                         retval = xfs_dir2_leafn_lookup_int(blk->bp, args,
1149                                                         &blk->index, state);
1150                 } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
1151                         retval = xfs_attr_leaf_lookup_int(blk->bp, args);
1152                         blk->index = args->index;
1153                         args->blkno = blk->blkno;
1154                 } else {
1155                         ASSERT(0);
1156                         return XFS_ERROR(EFSCORRUPTED);
1157                 }
1158                 if (((retval == ENOENT) || (retval == ENOATTR)) &&
1159                     (blk->hashval == args->hashval)) {
1160                         error = xfs_da_path_shift(state, &state->path, 1, 1,
1161                                                          &retval);
1162                         if (error)
1163                                 return(error);
1164                         if (retval == 0) {
1165                                 continue;
1166                         } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
1167                                 /* path_shift() gives ENOENT */
1168                                 retval = XFS_ERROR(ENOATTR);
1169                         }
1170                 }
1171                 break;
1172         }
1173         *result = retval;
1174         return(0);
1175 }
1176
1177 /*========================================================================
1178  * Utility routines.
1179  *========================================================================*/
1180
1181 /*
1182  * Link a new block into a doubly linked list of blocks (of whatever type).
1183  */
1184 int                                                     /* error */
1185 xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
1186                                xfs_da_state_blk_t *new_blk)
1187 {
1188         xfs_da_blkinfo_t *old_info, *new_info, *tmp_info;
1189         xfs_da_args_t *args;
1190         int before=0, error;
1191         xfs_dabuf_t *bp;
1192
1193         /*
1194          * Set up environment.
1195          */
1196         args = state->args;
1197         ASSERT(args != NULL);
1198         old_info = old_blk->bp->data;
1199         new_info = new_blk->bp->data;
1200         ASSERT(old_blk->magic == XFS_DA_NODE_MAGIC ||
1201                old_blk->magic == XFS_DIR2_LEAFN_MAGIC ||
1202                old_blk->magic == XFS_ATTR_LEAF_MAGIC);
1203         ASSERT(old_blk->magic == be16_to_cpu(old_info->magic));
1204         ASSERT(new_blk->magic == be16_to_cpu(new_info->magic));
1205         ASSERT(old_blk->magic == new_blk->magic);
1206
1207         switch (old_blk->magic) {
1208         case XFS_ATTR_LEAF_MAGIC:
1209                 before = xfs_attr_leaf_order(old_blk->bp, new_blk->bp);
1210                 break;
1211         case XFS_DIR2_LEAFN_MAGIC:
1212                 before = xfs_dir2_leafn_order(old_blk->bp, new_blk->bp);
1213                 break;
1214         case XFS_DA_NODE_MAGIC:
1215                 before = xfs_da_node_order(old_blk->bp, new_blk->bp);
1216                 break;
1217         }
1218
1219         /*
1220          * Link blocks in appropriate order.
1221          */
1222         if (before) {
1223                 /*
1224                  * Link new block in before existing block.
1225                  */
1226                 new_info->forw = cpu_to_be32(old_blk->blkno);
1227                 new_info->back = old_info->back;
1228                 if (old_info->back) {
1229                         error = xfs_da_read_buf(args->trans, args->dp,
1230                                                 be32_to_cpu(old_info->back),
1231                                                 -1, &bp, args->whichfork);
1232                         if (error)
1233                                 return(error);
1234                         ASSERT(bp != NULL);
1235                         tmp_info = bp->data;
1236                         ASSERT(be16_to_cpu(tmp_info->magic) == be16_to_cpu(old_info->magic));
1237                         ASSERT(be32_to_cpu(tmp_info->forw) == old_blk->blkno);
1238                         tmp_info->forw = cpu_to_be32(new_blk->blkno);
1239                         xfs_da_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
1240                         xfs_da_buf_done(bp);
1241                 }
1242                 old_info->back = cpu_to_be32(new_blk->blkno);
1243         } else {
1244                 /*
1245                  * Link new block in after existing block.
1246                  */
1247                 new_info->forw = old_info->forw;
1248                 new_info->back = cpu_to_be32(old_blk->blkno);
1249                 if (old_info->forw) {
1250                         error = xfs_da_read_buf(args->trans, args->dp,
1251                                                 be32_to_cpu(old_info->forw),
1252                                                 -1, &bp, args->whichfork);
1253                         if (error)
1254                                 return(error);
1255                         ASSERT(bp != NULL);
1256                         tmp_info = bp->data;
1257                         ASSERT(tmp_info->magic == old_info->magic);
1258                         ASSERT(be32_to_cpu(tmp_info->back) == old_blk->blkno);
1259                         tmp_info->back = cpu_to_be32(new_blk->blkno);
1260                         xfs_da_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
1261                         xfs_da_buf_done(bp);
1262                 }
1263                 old_info->forw = cpu_to_be32(new_blk->blkno);
1264         }
1265
1266         xfs_da_log_buf(args->trans, old_blk->bp, 0, sizeof(*tmp_info) - 1);
1267         xfs_da_log_buf(args->trans, new_blk->bp, 0, sizeof(*tmp_info) - 1);
1268         return(0);
1269 }
1270
1271 /*
1272  * Compare two intermediate nodes for "order".
1273  */
1274 STATIC int
1275 xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp)
1276 {
1277         xfs_da_intnode_t *node1, *node2;
1278
1279         node1 = node1_bp->data;
1280         node2 = node2_bp->data;
1281         ASSERT((be16_to_cpu(node1->hdr.info.magic) == XFS_DA_NODE_MAGIC) &&
1282                (be16_to_cpu(node2->hdr.info.magic) == XFS_DA_NODE_MAGIC));
1283         if ((be16_to_cpu(node1->hdr.count) > 0) && (be16_to_cpu(node2->hdr.count) > 0) &&
1284             ((be32_to_cpu(node2->btree[0].hashval) <
1285               be32_to_cpu(node1->btree[0].hashval)) ||
1286              (be32_to_cpu(node2->btree[be16_to_cpu(node2->hdr.count)-1].hashval) <
1287               be32_to_cpu(node1->btree[be16_to_cpu(node1->hdr.count)-1].hashval)))) {
1288                 return(1);
1289         }
1290         return(0);
1291 }
1292
1293 /*
1294  * Pick up the last hashvalue from an intermediate node.
1295  */
1296 STATIC uint
1297 xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count)
1298 {
1299         xfs_da_intnode_t *node;
1300
1301         node = bp->data;
1302         ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
1303         if (count)
1304                 *count = be16_to_cpu(node->hdr.count);
1305         if (!node->hdr.count)
1306                 return(0);
1307         return be32_to_cpu(node->btree[be16_to_cpu(node->hdr.count)-1].hashval);
1308 }
1309
1310 /*
1311  * Unlink a block from a doubly linked list of blocks.
1312  */
1313 STATIC int                                              /* error */
1314 xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
1315                                  xfs_da_state_blk_t *save_blk)
1316 {
1317         xfs_da_blkinfo_t *drop_info, *save_info, *tmp_info;
1318         xfs_da_args_t *args;
1319         xfs_dabuf_t *bp;
1320         int error;
1321
1322         /*
1323          * Set up environment.
1324          */
1325         args = state->args;
1326         ASSERT(args != NULL);
1327         save_info = save_blk->bp->data;
1328         drop_info = drop_blk->bp->data;
1329         ASSERT(save_blk->magic == XFS_DA_NODE_MAGIC ||
1330                save_blk->magic == XFS_DIR2_LEAFN_MAGIC ||
1331                save_blk->magic == XFS_ATTR_LEAF_MAGIC);
1332         ASSERT(save_blk->magic == be16_to_cpu(save_info->magic));
1333         ASSERT(drop_blk->magic == be16_to_cpu(drop_info->magic));
1334         ASSERT(save_blk->magic == drop_blk->magic);
1335         ASSERT((be32_to_cpu(save_info->forw) == drop_blk->blkno) ||
1336                (be32_to_cpu(save_info->back) == drop_blk->blkno));
1337         ASSERT((be32_to_cpu(drop_info->forw) == save_blk->blkno) ||
1338                (be32_to_cpu(drop_info->back) == save_blk->blkno));
1339
1340         /*
1341          * Unlink the leaf block from the doubly linked chain of leaves.
1342          */
1343         if (be32_to_cpu(save_info->back) == drop_blk->blkno) {
1344                 save_info->back = drop_info->back;
1345                 if (drop_info->back) {
1346                         error = xfs_da_read_buf(args->trans, args->dp,
1347                                                 be32_to_cpu(drop_info->back),
1348                                                 -1, &bp, args->whichfork);
1349                         if (error)
1350                                 return(error);
1351                         ASSERT(bp != NULL);
1352                         tmp_info = bp->data;
1353                         ASSERT(tmp_info->magic == save_info->magic);
1354                         ASSERT(be32_to_cpu(tmp_info->forw) == drop_blk->blkno);
1355                         tmp_info->forw = cpu_to_be32(save_blk->blkno);
1356                         xfs_da_log_buf(args->trans, bp, 0,
1357                                                     sizeof(*tmp_info) - 1);
1358                         xfs_da_buf_done(bp);
1359                 }
1360         } else {
1361                 save_info->forw = drop_info->forw;
1362                 if (drop_info->forw) {
1363                         error = xfs_da_read_buf(args->trans, args->dp,
1364                                                 be32_to_cpu(drop_info->forw),
1365                                                 -1, &bp, args->whichfork);
1366                         if (error)
1367                                 return(error);
1368                         ASSERT(bp != NULL);
1369                         tmp_info = bp->data;
1370                         ASSERT(tmp_info->magic == save_info->magic);
1371                         ASSERT(be32_to_cpu(tmp_info->back) == drop_blk->blkno);
1372                         tmp_info->back = cpu_to_be32(save_blk->blkno);
1373                         xfs_da_log_buf(args->trans, bp, 0,
1374                                                     sizeof(*tmp_info) - 1);
1375                         xfs_da_buf_done(bp);
1376                 }
1377         }
1378
1379         xfs_da_log_buf(args->trans, save_blk->bp, 0, sizeof(*save_info) - 1);
1380         return(0);
1381 }
1382
1383 /*
1384  * Move a path "forward" or "!forward" one block at the current level.
1385  *
1386  * This routine will adjust a "path" to point to the next block
1387  * "forward" (higher hashvalues) or "!forward" (lower hashvals) in the
1388  * Btree, including updating pointers to the intermediate nodes between
1389  * the new bottom and the root.
1390  */
1391 int                                                     /* error */
1392 xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
1393                                  int forward, int release, int *result)
1394 {
1395         xfs_da_state_blk_t *blk;
1396         xfs_da_blkinfo_t *info;
1397         xfs_da_intnode_t *node;
1398         xfs_da_args_t *args;
1399         xfs_dablk_t blkno=0;
1400         int level, error;
1401
1402         /*
1403          * Roll up the Btree looking for the first block where our
1404          * current index is not at the edge of the block.  Note that
1405          * we skip the bottom layer because we want the sibling block.
1406          */
1407         args = state->args;
1408         ASSERT(args != NULL);
1409         ASSERT(path != NULL);
1410         ASSERT((path->active > 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1411         level = (path->active-1) - 1;   /* skip bottom layer in path */
1412         for (blk = &path->blk[level]; level >= 0; blk--, level--) {
1413                 ASSERT(blk->bp != NULL);
1414                 node = blk->bp->data;
1415                 ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
1416                 if (forward && (blk->index < be16_to_cpu(node->hdr.count)-1)) {
1417                         blk->index++;
1418                         blkno = be32_to_cpu(node->btree[blk->index].before);
1419                         break;
1420                 } else if (!forward && (blk->index > 0)) {
1421                         blk->index--;
1422                         blkno = be32_to_cpu(node->btree[blk->index].before);
1423                         break;
1424                 }
1425         }
1426         if (level < 0) {
1427                 *result = XFS_ERROR(ENOENT);    /* we're out of our tree */
1428                 ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
1429                 return(0);
1430         }
1431
1432         /*
1433          * Roll down the edge of the subtree until we reach the
1434          * same depth we were at originally.
1435          */
1436         for (blk++, level++; level < path->active; blk++, level++) {
1437                 /*
1438                  * Release the old block.
1439                  * (if it's dirty, trans won't actually let go)
1440                  */
1441                 if (release)
1442                         xfs_da_brelse(args->trans, blk->bp);
1443
1444                 /*
1445                  * Read the next child block.
1446                  */
1447                 blk->blkno = blkno;
1448                 error = xfs_da_read_buf(args->trans, args->dp, blkno, -1,
1449                                                      &blk->bp, args->whichfork);
1450                 if (error)
1451                         return(error);
1452                 ASSERT(blk->bp != NULL);
1453                 info = blk->bp->data;
1454                 ASSERT(be16_to_cpu(info->magic) == XFS_DA_NODE_MAGIC ||
1455                        be16_to_cpu(info->magic) == XFS_DIR2_LEAFN_MAGIC ||
1456                        be16_to_cpu(info->magic) == XFS_ATTR_LEAF_MAGIC);
1457                 blk->magic = be16_to_cpu(info->magic);
1458                 if (blk->magic == XFS_DA_NODE_MAGIC) {
1459                         node = (xfs_da_intnode_t *)info;
1460                         blk->hashval = be32_to_cpu(node->btree[be16_to_cpu(node->hdr.count)-1].hashval);
1461                         if (forward)
1462                                 blk->index = 0;
1463                         else
1464                                 blk->index = be16_to_cpu(node->hdr.count)-1;
1465                         blkno = be32_to_cpu(node->btree[blk->index].before);
1466                 } else {
1467                         ASSERT(level == path->active-1);
1468                         blk->index = 0;
1469                         switch(blk->magic) {
1470                         case XFS_ATTR_LEAF_MAGIC:
1471                                 blk->hashval = xfs_attr_leaf_lasthash(blk->bp,
1472                                                                       NULL);
1473                                 break;
1474                         case XFS_DIR2_LEAFN_MAGIC:
1475                                 blk->hashval = xfs_dir2_leafn_lasthash(blk->bp,
1476                                                                        NULL);
1477                                 break;
1478                         default:
1479                                 ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC ||
1480                                        blk->magic == XFS_DIR2_LEAFN_MAGIC);
1481                                 break;
1482                         }
1483                 }
1484         }
1485         *result = 0;
1486         return(0);
1487 }
1488
1489
1490 /*========================================================================
1491  * Utility routines.
1492  *========================================================================*/
1493
1494 /*
1495  * Implement a simple hash on a character string.
1496  * Rotate the hash value by 7 bits, then XOR each character in.
1497  * This is implemented with some source-level loop unrolling.
1498  */
1499 xfs_dahash_t
1500 xfs_da_hashname(const __uint8_t *name, int namelen)
1501 {
1502         xfs_dahash_t hash;
1503
1504         /*
1505          * Do four characters at a time as long as we can.
1506          */
1507         for (hash = 0; namelen >= 4; namelen -= 4, name += 4)
1508                 hash = (name[0] << 21) ^ (name[1] << 14) ^ (name[2] << 7) ^
1509                        (name[3] << 0) ^ rol32(hash, 7 * 4);
1510
1511         /*
1512          * Now do the rest of the characters.
1513          */
1514         switch (namelen) {
1515         case 3:
1516                 return (name[0] << 14) ^ (name[1] << 7) ^ (name[2] << 0) ^
1517                        rol32(hash, 7 * 3);
1518         case 2:
1519                 return (name[0] << 7) ^ (name[1] << 0) ^ rol32(hash, 7 * 2);
1520         case 1:
1521                 return (name[0] << 0) ^ rol32(hash, 7 * 1);
1522         default: /* case 0: */
1523                 return hash;
1524         }
1525 }
1526
1527 enum xfs_dacmp
1528 xfs_da_compname(
1529         struct xfs_da_args *args,
1530         const unsigned char *name,
1531         int             len)
1532 {
1533         return (args->namelen == len && memcmp(args->name, name, len) == 0) ?
1534                                         XFS_CMP_EXACT : XFS_CMP_DIFFERENT;
1535 }
1536
1537 static xfs_dahash_t
1538 xfs_default_hashname(
1539         struct xfs_name *name)
1540 {
1541         return xfs_da_hashname(name->name, name->len);
1542 }
1543
1544 const struct xfs_nameops xfs_default_nameops = {
1545         .hashname       = xfs_default_hashname,
1546         .compname       = xfs_da_compname
1547 };
1548
1549 /*
1550  * Add a block to the btree ahead of the file.
1551  * Return the new block number to the caller.
1552  */
1553 int
1554 xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno)
1555 {
1556         xfs_fileoff_t bno, b;
1557         xfs_bmbt_irec_t map;
1558         xfs_bmbt_irec_t *mapp;
1559         xfs_inode_t *dp;
1560         int nmap, error, w, count, c, got, i, mapi;
1561         xfs_trans_t *tp;
1562         xfs_mount_t *mp;
1563         xfs_drfsbno_t   nblks;
1564
1565         dp = args->dp;
1566         mp = dp->i_mount;
1567         w = args->whichfork;
1568         tp = args->trans;
1569         nblks = dp->i_d.di_nblocks;
1570
1571         /*
1572          * For new directories adjust the file offset and block count.
1573          */
1574         if (w == XFS_DATA_FORK) {
1575                 bno = mp->m_dirleafblk;
1576                 count = mp->m_dirblkfsbs;
1577         } else {
1578                 bno = 0;
1579                 count = 1;
1580         }
1581         /*
1582          * Find a spot in the file space to put the new block.
1583          */
1584         if ((error = xfs_bmap_first_unused(tp, dp, count, &bno, w)))
1585                 return error;
1586         if (w == XFS_DATA_FORK)
1587                 ASSERT(bno >= mp->m_dirleafblk && bno < mp->m_dirfreeblk);
1588         /*
1589          * Try mapping it in one filesystem block.
1590          */
1591         nmap = 1;
1592         ASSERT(args->firstblock != NULL);
1593         if ((error = xfs_bmapi(tp, dp, bno, count,
1594                         xfs_bmapi_aflag(w)|XFS_BMAPI_WRITE|XFS_BMAPI_METADATA|
1595                         XFS_BMAPI_CONTIG,
1596                         args->firstblock, args->total, &map, &nmap,
1597                         args->flist))) {
1598                 return error;
1599         }
1600         ASSERT(nmap <= 1);
1601         if (nmap == 1) {
1602                 mapp = &map;
1603                 mapi = 1;
1604         }
1605         /*
1606          * If we didn't get it and the block might work if fragmented,
1607          * try without the CONTIG flag.  Loop until we get it all.
1608          */
1609         else if (nmap == 0 && count > 1) {
1610                 mapp = kmem_alloc(sizeof(*mapp) * count, KM_SLEEP);
1611                 for (b = bno, mapi = 0; b < bno + count; ) {
1612                         nmap = MIN(XFS_BMAP_MAX_NMAP, count);
1613                         c = (int)(bno + count - b);
1614                         if ((error = xfs_bmapi(tp, dp, b, c,
1615                                         xfs_bmapi_aflag(w)|XFS_BMAPI_WRITE|
1616                                         XFS_BMAPI_METADATA,
1617                                         args->firstblock, args->total,
1618                                         &mapp[mapi], &nmap, args->flist))) {
1619                                 kmem_free(mapp);
1620                                 return error;
1621                         }
1622                         if (nmap < 1)
1623                                 break;
1624                         mapi += nmap;
1625                         b = mapp[mapi - 1].br_startoff +
1626                             mapp[mapi - 1].br_blockcount;
1627                 }
1628         } else {
1629                 mapi = 0;
1630                 mapp = NULL;
1631         }
1632         /*
1633          * Count the blocks we got, make sure it matches the total.
1634          */
1635         for (i = 0, got = 0; i < mapi; i++)
1636                 got += mapp[i].br_blockcount;
1637         if (got != count || mapp[0].br_startoff != bno ||
1638             mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount !=
1639             bno + count) {
1640                 if (mapp != &map)
1641                         kmem_free(mapp);
1642                 return XFS_ERROR(ENOSPC);
1643         }
1644         if (mapp != &map)
1645                 kmem_free(mapp);
1646         /* account for newly allocated blocks in reserved blocks total */
1647         args->total -= dp->i_d.di_nblocks - nblks;
1648         *new_blkno = (xfs_dablk_t)bno;
1649         return 0;
1650 }
1651
1652 /*
1653  * Ick.  We need to always be able to remove a btree block, even
1654  * if there's no space reservation because the filesystem is full.
1655  * This is called if xfs_bunmapi on a btree block fails due to ENOSPC.
1656  * It swaps the target block with the last block in the file.  The
1657  * last block in the file can always be removed since it can't cause
1658  * a bmap btree split to do that.
1659  */
1660 STATIC int
1661 xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
1662                       xfs_dabuf_t **dead_bufp)
1663 {
1664         xfs_dablk_t dead_blkno, last_blkno, sib_blkno, par_blkno;
1665         xfs_dabuf_t *dead_buf, *last_buf, *sib_buf, *par_buf;
1666         xfs_fileoff_t lastoff;
1667         xfs_inode_t *ip;
1668         xfs_trans_t *tp;
1669         xfs_mount_t *mp;
1670         int error, w, entno, level, dead_level;
1671         xfs_da_blkinfo_t *dead_info, *sib_info;
1672         xfs_da_intnode_t *par_node, *dead_node;
1673         xfs_dir2_leaf_t *dead_leaf2;
1674         xfs_dahash_t dead_hash;
1675
1676         dead_buf = *dead_bufp;
1677         dead_blkno = *dead_blknop;
1678         tp = args->trans;
1679         ip = args->dp;
1680         w = args->whichfork;
1681         ASSERT(w == XFS_DATA_FORK);
1682         mp = ip->i_mount;
1683         lastoff = mp->m_dirfreeblk;
1684         error = xfs_bmap_last_before(tp, ip, &lastoff, w);
1685         if (error)
1686                 return error;
1687         if (unlikely(lastoff == 0)) {
1688                 XFS_ERROR_REPORT("xfs_da_swap_lastblock(1)", XFS_ERRLEVEL_LOW,
1689                                  mp);
1690                 return XFS_ERROR(EFSCORRUPTED);
1691         }
1692         /*
1693          * Read the last block in the btree space.
1694          */
1695         last_blkno = (xfs_dablk_t)lastoff - mp->m_dirblkfsbs;
1696         if ((error = xfs_da_read_buf(tp, ip, last_blkno, -1, &last_buf, w)))
1697                 return error;
1698         /*
1699          * Copy the last block into the dead buffer and log it.
1700          */
1701         memcpy(dead_buf->data, last_buf->data, mp->m_dirblksize);
1702         xfs_da_log_buf(tp, dead_buf, 0, mp->m_dirblksize - 1);
1703         dead_info = dead_buf->data;
1704         /*
1705          * Get values from the moved block.
1706          */
1707         if (be16_to_cpu(dead_info->magic) == XFS_DIR2_LEAFN_MAGIC) {
1708                 dead_leaf2 = (xfs_dir2_leaf_t *)dead_info;
1709                 dead_level = 0;
1710                 dead_hash = be32_to_cpu(dead_leaf2->ents[be16_to_cpu(dead_leaf2->hdr.count) - 1].hashval);
1711         } else {
1712                 ASSERT(be16_to_cpu(dead_info->magic) == XFS_DA_NODE_MAGIC);
1713                 dead_node = (xfs_da_intnode_t *)dead_info;
1714                 dead_level = be16_to_cpu(dead_node->hdr.level);
1715                 dead_hash = be32_to_cpu(dead_node->btree[be16_to_cpu(dead_node->hdr.count) - 1].hashval);
1716         }
1717         sib_buf = par_buf = NULL;
1718         /*
1719          * If the moved block has a left sibling, fix up the pointers.
1720          */
1721         if ((sib_blkno = be32_to_cpu(dead_info->back))) {
1722                 if ((error = xfs_da_read_buf(tp, ip, sib_blkno, -1, &sib_buf, w)))
1723                         goto done;
1724                 sib_info = sib_buf->data;
1725                 if (unlikely(
1726                     be32_to_cpu(sib_info->forw) != last_blkno ||
1727                     sib_info->magic != dead_info->magic)) {
1728                         XFS_ERROR_REPORT("xfs_da_swap_lastblock(2)",
1729                                          XFS_ERRLEVEL_LOW, mp);
1730                         error = XFS_ERROR(EFSCORRUPTED);
1731                         goto done;
1732                 }
1733                 sib_info->forw = cpu_to_be32(dead_blkno);
1734                 xfs_da_log_buf(tp, sib_buf,
1735                         XFS_DA_LOGRANGE(sib_info, &sib_info->forw,
1736                                         sizeof(sib_info->forw)));
1737                 xfs_da_buf_done(sib_buf);
1738                 sib_buf = NULL;
1739         }
1740         /*
1741          * If the moved block has a right sibling, fix up the pointers.
1742          */
1743         if ((sib_blkno = be32_to_cpu(dead_info->forw))) {
1744                 if ((error = xfs_da_read_buf(tp, ip, sib_blkno, -1, &sib_buf, w)))
1745                         goto done;
1746                 sib_info = sib_buf->data;
1747                 if (unlikely(
1748                        be32_to_cpu(sib_info->back) != last_blkno ||
1749                        sib_info->magic != dead_info->magic)) {
1750                         XFS_ERROR_REPORT("xfs_da_swap_lastblock(3)",
1751                                          XFS_ERRLEVEL_LOW, mp);
1752                         error = XFS_ERROR(EFSCORRUPTED);
1753                         goto done;
1754                 }
1755                 sib_info->back = cpu_to_be32(dead_blkno);
1756                 xfs_da_log_buf(tp, sib_buf,
1757                         XFS_DA_LOGRANGE(sib_info, &sib_info->back,
1758                                         sizeof(sib_info->back)));
1759                 xfs_da_buf_done(sib_buf);
1760                 sib_buf = NULL;
1761         }
1762         par_blkno = mp->m_dirleafblk;
1763         level = -1;
1764         /*
1765          * Walk down the tree looking for the parent of the moved block.
1766          */
1767         for (;;) {
1768                 if ((error = xfs_da_read_buf(tp, ip, par_blkno, -1, &par_buf, w)))
1769                         goto done;
1770                 par_node = par_buf->data;
1771                 if (unlikely(
1772                     be16_to_cpu(par_node->hdr.info.magic) != XFS_DA_NODE_MAGIC ||
1773                     (level >= 0 && level != be16_to_cpu(par_node->hdr.level) + 1))) {
1774                         XFS_ERROR_REPORT("xfs_da_swap_lastblock(4)",
1775                                          XFS_ERRLEVEL_LOW, mp);
1776                         error = XFS_ERROR(EFSCORRUPTED);
1777                         goto done;
1778                 }
1779                 level = be16_to_cpu(par_node->hdr.level);
1780                 for (entno = 0;
1781                      entno < be16_to_cpu(par_node->hdr.count) &&
1782                      be32_to_cpu(par_node->btree[entno].hashval) < dead_hash;
1783                      entno++)
1784                         continue;
1785                 if (unlikely(entno == be16_to_cpu(par_node->hdr.count))) {
1786                         XFS_ERROR_REPORT("xfs_da_swap_lastblock(5)",
1787                                          XFS_ERRLEVEL_LOW, mp);
1788                         error = XFS_ERROR(EFSCORRUPTED);
1789                         goto done;
1790                 }
1791                 par_blkno = be32_to_cpu(par_node->btree[entno].before);
1792                 if (level == dead_level + 1)
1793                         break;
1794                 xfs_da_brelse(tp, par_buf);
1795                 par_buf = NULL;
1796         }
1797         /*
1798          * We're in the right parent block.
1799          * Look for the right entry.
1800          */
1801         for (;;) {
1802                 for (;
1803                      entno < be16_to_cpu(par_node->hdr.count) &&
1804                      be32_to_cpu(par_node->btree[entno].before) != last_blkno;
1805                      entno++)
1806                         continue;
1807                 if (entno < be16_to_cpu(par_node->hdr.count))
1808                         break;
1809                 par_blkno = be32_to_cpu(par_node->hdr.info.forw);
1810                 xfs_da_brelse(tp, par_buf);
1811                 par_buf = NULL;
1812                 if (unlikely(par_blkno == 0)) {
1813                         XFS_ERROR_REPORT("xfs_da_swap_lastblock(6)",
1814                                          XFS_ERRLEVEL_LOW, mp);
1815                         error = XFS_ERROR(EFSCORRUPTED);
1816                         goto done;
1817                 }
1818                 if ((error = xfs_da_read_buf(tp, ip, par_blkno, -1, &par_buf, w)))
1819                         goto done;
1820                 par_node = par_buf->data;
1821                 if (unlikely(
1822                     be16_to_cpu(par_node->hdr.level) != level ||
1823                     be16_to_cpu(par_node->hdr.info.magic) != XFS_DA_NODE_MAGIC)) {
1824                         XFS_ERROR_REPORT("xfs_da_swap_lastblock(7)",
1825                                          XFS_ERRLEVEL_LOW, mp);
1826                         error = XFS_ERROR(EFSCORRUPTED);
1827                         goto done;
1828                 }
1829                 entno = 0;
1830         }
1831         /*
1832          * Update the parent entry pointing to the moved block.
1833          */
1834         par_node->btree[entno].before = cpu_to_be32(dead_blkno);
1835         xfs_da_log_buf(tp, par_buf,
1836                 XFS_DA_LOGRANGE(par_node, &par_node->btree[entno].before,
1837                                 sizeof(par_node->btree[entno].before)));
1838         xfs_da_buf_done(par_buf);
1839         xfs_da_buf_done(dead_buf);
1840         *dead_blknop = last_blkno;
1841         *dead_bufp = last_buf;
1842         return 0;
1843 done:
1844         if (par_buf)
1845                 xfs_da_brelse(tp, par_buf);
1846         if (sib_buf)
1847                 xfs_da_brelse(tp, sib_buf);
1848         xfs_da_brelse(tp, last_buf);
1849         return error;
1850 }
1851
1852 /*
1853  * Remove a btree block from a directory or attribute.
1854  */
1855 int
1856 xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
1857                     xfs_dabuf_t *dead_buf)
1858 {
1859         xfs_inode_t *dp;
1860         int done, error, w, count;
1861         xfs_trans_t *tp;
1862         xfs_mount_t *mp;
1863
1864         dp = args->dp;
1865         w = args->whichfork;
1866         tp = args->trans;
1867         mp = dp->i_mount;
1868         if (w == XFS_DATA_FORK)
1869                 count = mp->m_dirblkfsbs;
1870         else
1871                 count = 1;
1872         for (;;) {
1873                 /*
1874                  * Remove extents.  If we get ENOSPC for a dir we have to move
1875                  * the last block to the place we want to kill.
1876                  */
1877                 if ((error = xfs_bunmapi(tp, dp, dead_blkno, count,
1878                                 xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA,
1879                                 0, args->firstblock, args->flist,
1880                                 &done)) == ENOSPC) {
1881                         if (w != XFS_DATA_FORK)
1882                                 break;
1883                         if ((error = xfs_da_swap_lastblock(args, &dead_blkno,
1884                                         &dead_buf)))
1885                                 break;
1886                 } else {
1887                         break;
1888                 }
1889         }
1890         xfs_da_binval(tp, dead_buf);
1891         return error;
1892 }
1893
1894 /*
1895  * See if the mapping(s) for this btree block are valid, i.e.
1896  * don't contain holes, are logically contiguous, and cover the whole range.
1897  */
1898 STATIC int
1899 xfs_da_map_covers_blocks(
1900         int             nmap,
1901         xfs_bmbt_irec_t *mapp,
1902         xfs_dablk_t     bno,
1903         int             count)
1904 {
1905         int             i;
1906         xfs_fileoff_t   off;
1907
1908         for (i = 0, off = bno; i < nmap; i++) {
1909                 if (mapp[i].br_startblock == HOLESTARTBLOCK ||
1910                     mapp[i].br_startblock == DELAYSTARTBLOCK) {
1911                         return 0;
1912                 }
1913                 if (off != mapp[i].br_startoff) {
1914                         return 0;
1915                 }
1916                 off += mapp[i].br_blockcount;
1917         }
1918         return off == bno + count;
1919 }
1920
1921 /*
1922  * Make a dabuf.
1923  * Used for get_buf, read_buf, read_bufr, and reada_buf.
1924  */
1925 STATIC int
1926 xfs_da_do_buf(
1927         xfs_trans_t     *trans,
1928         xfs_inode_t     *dp,
1929         xfs_dablk_t     bno,
1930         xfs_daddr_t     *mappedbnop,
1931         xfs_dabuf_t     **bpp,
1932         int             whichfork,
1933         int             caller,
1934         inst_t          *ra)
1935 {
1936         xfs_buf_t       *bp = NULL;
1937         xfs_buf_t       **bplist;
1938         int             error=0;
1939         int             i;
1940         xfs_bmbt_irec_t map;
1941         xfs_bmbt_irec_t *mapp;
1942         xfs_daddr_t     mappedbno;
1943         xfs_mount_t     *mp;
1944         int             nbplist=0;
1945         int             nfsb;
1946         int             nmap;
1947         xfs_dabuf_t     *rbp;
1948
1949         mp = dp->i_mount;
1950         nfsb = (whichfork == XFS_DATA_FORK) ? mp->m_dirblkfsbs : 1;
1951         mappedbno = *mappedbnop;
1952         /*
1953          * Caller doesn't have a mapping.  -2 means don't complain
1954          * if we land in a hole.
1955          */
1956         if (mappedbno == -1 || mappedbno == -2) {
1957                 /*
1958                  * Optimize the one-block case.
1959                  */
1960                 if (nfsb == 1) {
1961                         xfs_fsblock_t   fsb;
1962
1963                         if ((error =
1964                             xfs_bmapi_single(trans, dp, whichfork, &fsb,
1965                                     (xfs_fileoff_t)bno))) {
1966                                 return error;
1967                         }
1968                         mapp = &map;
1969                         if (fsb == NULLFSBLOCK) {
1970                                 nmap = 0;
1971                         } else {
1972                                 map.br_startblock = fsb;
1973                                 map.br_startoff = (xfs_fileoff_t)bno;
1974                                 map.br_blockcount = 1;
1975                                 nmap = 1;
1976                         }
1977                 } else {
1978                         mapp = kmem_alloc(sizeof(*mapp) * nfsb, KM_SLEEP);
1979                         nmap = nfsb;
1980                         if ((error = xfs_bmapi(trans, dp, (xfs_fileoff_t)bno,
1981                                         nfsb,
1982                                         XFS_BMAPI_METADATA |
1983                                                 xfs_bmapi_aflag(whichfork),
1984                                         NULL, 0, mapp, &nmap, NULL)))
1985                                 goto exit0;
1986                 }
1987         } else {
1988                 map.br_startblock = XFS_DADDR_TO_FSB(mp, mappedbno);
1989                 map.br_startoff = (xfs_fileoff_t)bno;
1990                 map.br_blockcount = nfsb;
1991                 mapp = &map;
1992                 nmap = 1;
1993         }
1994         if (!xfs_da_map_covers_blocks(nmap, mapp, bno, nfsb)) {
1995                 error = mappedbno == -2 ? 0 : XFS_ERROR(EFSCORRUPTED);
1996                 if (unlikely(error == EFSCORRUPTED)) {
1997                         if (xfs_error_level >= XFS_ERRLEVEL_LOW) {
1998                                 xfs_alert(mp, "%s: bno %lld dir: inode %lld",
1999                                         __func__, (long long)bno,
2000                                         (long long)dp->i_ino);
2001                                 for (i = 0; i < nmap; i++) {
2002                                         xfs_alert(mp,
2003 "[%02d] br_startoff %lld br_startblock %lld br_blockcount %lld br_state %d",
2004                                                 i,
2005                                                 (long long)mapp[i].br_startoff,
2006                                                 (long long)mapp[i].br_startblock,
2007                                                 (long long)mapp[i].br_blockcount,
2008                                                 mapp[i].br_state);
2009                                 }
2010                         }
2011                         XFS_ERROR_REPORT("xfs_da_do_buf(1)",
2012                                          XFS_ERRLEVEL_LOW, mp);
2013                 }
2014                 goto exit0;
2015         }
2016         if (caller != 3 && nmap > 1) {
2017                 bplist = kmem_alloc(sizeof(*bplist) * nmap, KM_SLEEP);
2018                 nbplist = 0;
2019         } else
2020                 bplist = NULL;
2021         /*
2022          * Turn the mapping(s) into buffer(s).
2023          */
2024         for (i = 0; i < nmap; i++) {
2025                 int     nmapped;
2026
2027                 mappedbno = XFS_FSB_TO_DADDR(mp, mapp[i].br_startblock);
2028                 if (i == 0)
2029                         *mappedbnop = mappedbno;
2030                 nmapped = (int)XFS_FSB_TO_BB(mp, mapp[i].br_blockcount);
2031                 switch (caller) {
2032                 case 0:
2033                         bp = xfs_trans_get_buf(trans, mp->m_ddev_targp,
2034                                 mappedbno, nmapped, 0);
2035                         error = bp ? XFS_BUF_GETERROR(bp) : XFS_ERROR(EIO);
2036                         break;
2037                 case 1:
2038                 case 2:
2039                         bp = NULL;
2040                         error = xfs_trans_read_buf(mp, trans, mp->m_ddev_targp,
2041                                 mappedbno, nmapped, 0, &bp);
2042                         break;
2043                 case 3:
2044                         xfs_buf_readahead(mp->m_ddev_targp, mappedbno, nmapped);
2045                         error = 0;
2046                         bp = NULL;
2047                         break;
2048                 }
2049                 if (error) {
2050                         if (bp)
2051                                 xfs_trans_brelse(trans, bp);
2052                         goto exit1;
2053                 }
2054                 if (!bp)
2055                         continue;
2056                 if (caller == 1) {
2057                         if (whichfork == XFS_ATTR_FORK) {
2058                                 XFS_BUF_SET_VTYPE_REF(bp, B_FS_ATTR_BTREE,
2059                                                 XFS_ATTR_BTREE_REF);
2060                         } else {
2061                                 XFS_BUF_SET_VTYPE_REF(bp, B_FS_DIR_BTREE,
2062                                                 XFS_DIR_BTREE_REF);
2063                         }
2064                 }
2065                 if (bplist) {
2066                         bplist[nbplist++] = bp;
2067                 }
2068         }
2069         /*
2070          * Build a dabuf structure.
2071          */
2072         if (bplist) {
2073                 rbp = xfs_da_buf_make(nbplist, bplist, ra);
2074         } else if (bp)
2075                 rbp = xfs_da_buf_make(1, &bp, ra);
2076         else
2077                 rbp = NULL;
2078         /*
2079          * For read_buf, check the magic number.
2080          */
2081         if (caller == 1) {
2082                 xfs_dir2_data_hdr_t     *hdr = rbp->data;
2083                 xfs_dir2_free_t         *free = rbp->data;
2084                 xfs_da_blkinfo_t        *info = rbp->data;
2085                 uint                    magic, magic1;
2086
2087                 magic = be16_to_cpu(info->magic);
2088                 magic1 = be32_to_cpu(hdr->magic);
2089                 if (unlikely(
2090                     XFS_TEST_ERROR((magic != XFS_DA_NODE_MAGIC) &&
2091                                    (magic != XFS_ATTR_LEAF_MAGIC) &&
2092                                    (magic != XFS_DIR2_LEAF1_MAGIC) &&
2093                                    (magic != XFS_DIR2_LEAFN_MAGIC) &&
2094                                    (magic1 != XFS_DIR2_BLOCK_MAGIC) &&
2095                                    (magic1 != XFS_DIR2_DATA_MAGIC) &&
2096                                    (be32_to_cpu(free->hdr.magic) != XFS_DIR2_FREE_MAGIC),
2097                                 mp, XFS_ERRTAG_DA_READ_BUF,
2098                                 XFS_RANDOM_DA_READ_BUF))) {
2099                         trace_xfs_da_btree_corrupt(rbp->bps[0], _RET_IP_);
2100                         XFS_CORRUPTION_ERROR("xfs_da_do_buf(2)",
2101                                              XFS_ERRLEVEL_LOW, mp, info);
2102                         error = XFS_ERROR(EFSCORRUPTED);
2103                         xfs_da_brelse(trans, rbp);
2104                         nbplist = 0;
2105                         goto exit1;
2106                 }
2107         }
2108         if (bplist) {
2109                 kmem_free(bplist);
2110         }
2111         if (mapp != &map) {
2112                 kmem_free(mapp);
2113         }
2114         if (bpp)
2115                 *bpp = rbp;
2116         return 0;
2117 exit1:
2118         if (bplist) {
2119                 for (i = 0; i < nbplist; i++)
2120                         xfs_trans_brelse(trans, bplist[i]);
2121                 kmem_free(bplist);
2122         }
2123 exit0:
2124         if (mapp != &map)
2125                 kmem_free(mapp);
2126         if (bpp)
2127                 *bpp = NULL;
2128         return error;
2129 }
2130
2131 /*
2132  * Get a buffer for the dir/attr block.
2133  */
2134 int
2135 xfs_da_get_buf(
2136         xfs_trans_t     *trans,
2137         xfs_inode_t     *dp,
2138         xfs_dablk_t     bno,
2139         xfs_daddr_t             mappedbno,
2140         xfs_dabuf_t     **bpp,
2141         int             whichfork)
2142 {
2143         return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 0,
2144                                                  (inst_t *)__return_address);
2145 }
2146
2147 /*
2148  * Get a buffer for the dir/attr block, fill in the contents.
2149  */
2150 int
2151 xfs_da_read_buf(
2152         xfs_trans_t     *trans,
2153         xfs_inode_t     *dp,
2154         xfs_dablk_t     bno,
2155         xfs_daddr_t             mappedbno,
2156         xfs_dabuf_t     **bpp,
2157         int             whichfork)
2158 {
2159         return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 1,
2160                 (inst_t *)__return_address);
2161 }
2162
2163 /*
2164  * Readahead the dir/attr block.
2165  */
2166 xfs_daddr_t
2167 xfs_da_reada_buf(
2168         xfs_trans_t     *trans,
2169         xfs_inode_t     *dp,
2170         xfs_dablk_t     bno,
2171         int             whichfork)
2172 {
2173         xfs_daddr_t             rval;
2174
2175         rval = -1;
2176         if (xfs_da_do_buf(trans, dp, bno, &rval, NULL, whichfork, 3,
2177                         (inst_t *)__return_address))
2178                 return -1;
2179         else
2180                 return rval;
2181 }
2182
2183 kmem_zone_t *xfs_da_state_zone; /* anchor for state struct zone */
2184 kmem_zone_t *xfs_dabuf_zone;            /* dabuf zone */
2185
2186 /*
2187  * Allocate a dir-state structure.
2188  * We don't put them on the stack since they're large.
2189  */
2190 xfs_da_state_t *
2191 xfs_da_state_alloc(void)
2192 {
2193         return kmem_zone_zalloc(xfs_da_state_zone, KM_NOFS);
2194 }
2195
2196 /*
2197  * Kill the altpath contents of a da-state structure.
2198  */
2199 STATIC void
2200 xfs_da_state_kill_altpath(xfs_da_state_t *state)
2201 {
2202         int     i;
2203
2204         for (i = 0; i < state->altpath.active; i++) {
2205                 if (state->altpath.blk[i].bp) {
2206                         if (state->altpath.blk[i].bp != state->path.blk[i].bp)
2207                                 xfs_da_buf_done(state->altpath.blk[i].bp);
2208                         state->altpath.blk[i].bp = NULL;
2209                 }
2210         }
2211         state->altpath.active = 0;
2212 }
2213
2214 /*
2215  * Free a da-state structure.
2216  */
2217 void
2218 xfs_da_state_free(xfs_da_state_t *state)
2219 {
2220         int     i;
2221
2222         xfs_da_state_kill_altpath(state);
2223         for (i = 0; i < state->path.active; i++) {
2224                 if (state->path.blk[i].bp)
2225                         xfs_da_buf_done(state->path.blk[i].bp);
2226         }
2227         if (state->extravalid && state->extrablk.bp)
2228                 xfs_da_buf_done(state->extrablk.bp);
2229 #ifdef DEBUG
2230         memset((char *)state, 0, sizeof(*state));
2231 #endif /* DEBUG */
2232         kmem_zone_free(xfs_da_state_zone, state);
2233 }
2234
2235 #ifdef XFS_DABUF_DEBUG
2236 xfs_dabuf_t     *xfs_dabuf_global_list;
2237 static DEFINE_SPINLOCK(xfs_dabuf_global_lock);
2238 #endif
2239
2240 /*
2241  * Create a dabuf.
2242  */
2243 /* ARGSUSED */
2244 STATIC xfs_dabuf_t *
2245 xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra)
2246 {
2247         xfs_buf_t       *bp;
2248         xfs_dabuf_t     *dabuf;
2249         int             i;
2250         int             off;
2251
2252         if (nbuf == 1)
2253                 dabuf = kmem_zone_alloc(xfs_dabuf_zone, KM_NOFS);
2254         else
2255                 dabuf = kmem_alloc(XFS_DA_BUF_SIZE(nbuf), KM_NOFS);
2256         dabuf->dirty = 0;
2257 #ifdef XFS_DABUF_DEBUG
2258         dabuf->ra = ra;
2259         dabuf->target = XFS_BUF_TARGET(bps[0]);
2260         dabuf->blkno = XFS_BUF_ADDR(bps[0]);
2261 #endif
2262         if (nbuf == 1) {
2263                 dabuf->nbuf = 1;
2264                 bp = bps[0];
2265                 dabuf->bbcount = (short)BTOBB(XFS_BUF_COUNT(bp));
2266                 dabuf->data = XFS_BUF_PTR(bp);
2267                 dabuf->bps[0] = bp;
2268         } else {
2269                 dabuf->nbuf = nbuf;
2270                 for (i = 0, dabuf->bbcount = 0; i < nbuf; i++) {
2271                         dabuf->bps[i] = bp = bps[i];
2272                         dabuf->bbcount += BTOBB(XFS_BUF_COUNT(bp));
2273                 }
2274                 dabuf->data = kmem_alloc(BBTOB(dabuf->bbcount), KM_SLEEP);
2275                 for (i = off = 0; i < nbuf; i++, off += XFS_BUF_COUNT(bp)) {
2276                         bp = bps[i];
2277                         memcpy((char *)dabuf->data + off, XFS_BUF_PTR(bp),
2278                                 XFS_BUF_COUNT(bp));
2279                 }
2280         }
2281 #ifdef XFS_DABUF_DEBUG
2282         {
2283                 xfs_dabuf_t     *p;
2284
2285                 spin_lock(&xfs_dabuf_global_lock);
2286                 for (p = xfs_dabuf_global_list; p; p = p->next) {
2287                         ASSERT(p->blkno != dabuf->blkno ||
2288                                p->target != dabuf->target);
2289                 }
2290                 dabuf->prev = NULL;
2291                 if (xfs_dabuf_global_list)
2292                         xfs_dabuf_global_list->prev = dabuf;
2293                 dabuf->next = xfs_dabuf_global_list;
2294                 xfs_dabuf_global_list = dabuf;
2295                 spin_unlock(&xfs_dabuf_global_lock);
2296         }
2297 #endif
2298         return dabuf;
2299 }
2300
2301 /*
2302  * Un-dirty a dabuf.
2303  */
2304 STATIC void
2305 xfs_da_buf_clean(xfs_dabuf_t *dabuf)
2306 {
2307         xfs_buf_t       *bp;
2308         int             i;
2309         int             off;
2310
2311         if (dabuf->dirty) {
2312                 ASSERT(dabuf->nbuf > 1);
2313                 dabuf->dirty = 0;
2314                 for (i = off = 0; i < dabuf->nbuf;
2315                                 i++, off += XFS_BUF_COUNT(bp)) {
2316                         bp = dabuf->bps[i];
2317                         memcpy(XFS_BUF_PTR(bp), (char *)dabuf->data + off,
2318                                 XFS_BUF_COUNT(bp));
2319                 }
2320         }
2321 }
2322
2323 /*
2324  * Release a dabuf.
2325  */
2326 void
2327 xfs_da_buf_done(xfs_dabuf_t *dabuf)
2328 {
2329         ASSERT(dabuf);
2330         ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
2331         if (dabuf->dirty)
2332                 xfs_da_buf_clean(dabuf);
2333         if (dabuf->nbuf > 1)
2334                 kmem_free(dabuf->data);
2335 #ifdef XFS_DABUF_DEBUG
2336         {
2337                 spin_lock(&xfs_dabuf_global_lock);
2338                 if (dabuf->prev)
2339                         dabuf->prev->next = dabuf->next;
2340                 else
2341                         xfs_dabuf_global_list = dabuf->next;
2342                 if (dabuf->next)
2343                         dabuf->next->prev = dabuf->prev;
2344                 spin_unlock(&xfs_dabuf_global_lock);
2345         }
2346         memset(dabuf, 0, XFS_DA_BUF_SIZE(dabuf->nbuf));
2347 #endif
2348         if (dabuf->nbuf == 1)
2349                 kmem_zone_free(xfs_dabuf_zone, dabuf);
2350         else
2351                 kmem_free(dabuf);
2352 }
2353
2354 /*
2355  * Log transaction from a dabuf.
2356  */
2357 void
2358 xfs_da_log_buf(xfs_trans_t *tp, xfs_dabuf_t *dabuf, uint first, uint last)
2359 {
2360         xfs_buf_t       *bp;
2361         uint            f;
2362         int             i;
2363         uint            l;
2364         int             off;
2365
2366         ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
2367         if (dabuf->nbuf == 1) {
2368                 ASSERT(dabuf->data == (void *)XFS_BUF_PTR(dabuf->bps[0]));
2369                 xfs_trans_log_buf(tp, dabuf->bps[0], first, last);
2370                 return;
2371         }
2372         dabuf->dirty = 1;
2373         ASSERT(first <= last);
2374         for (i = off = 0; i < dabuf->nbuf; i++, off += XFS_BUF_COUNT(bp)) {
2375                 bp = dabuf->bps[i];
2376                 f = off;
2377                 l = f + XFS_BUF_COUNT(bp) - 1;
2378                 if (f < first)
2379                         f = first;
2380                 if (l > last)
2381                         l = last;
2382                 if (f <= l)
2383                         xfs_trans_log_buf(tp, bp, f - off, l - off);
2384                 /*
2385                  * B_DONE is set by xfs_trans_log buf.
2386                  * If we don't set it on a new buffer (get not read)
2387                  * then if we don't put anything in the buffer it won't
2388                  * be set, and at commit it it released into the cache,
2389                  * and then a read will fail.
2390                  */
2391                 else if (!(XFS_BUF_ISDONE(bp)))
2392                   XFS_BUF_DONE(bp);
2393         }
2394         ASSERT(last < off);
2395 }
2396
2397 /*
2398  * Release dabuf from a transaction.
2399  * Have to free up the dabuf before the buffers are released,
2400  * since the synchronization on the dabuf is really the lock on the buffer.
2401  */
2402 void
2403 xfs_da_brelse(xfs_trans_t *tp, xfs_dabuf_t *dabuf)
2404 {
2405         xfs_buf_t       *bp;
2406         xfs_buf_t       **bplist;
2407         int             i;
2408         int             nbuf;
2409
2410         ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
2411         if ((nbuf = dabuf->nbuf) == 1) {
2412                 bplist = &bp;
2413                 bp = dabuf->bps[0];
2414         } else {
2415                 bplist = kmem_alloc(nbuf * sizeof(*bplist), KM_SLEEP);
2416                 memcpy(bplist, dabuf->bps, nbuf * sizeof(*bplist));
2417         }
2418         xfs_da_buf_done(dabuf);
2419         for (i = 0; i < nbuf; i++)
2420                 xfs_trans_brelse(tp, bplist[i]);
2421         if (bplist != &bp)
2422                 kmem_free(bplist);
2423 }
2424
2425 /*
2426  * Invalidate dabuf from a transaction.
2427  */
2428 void
2429 xfs_da_binval(xfs_trans_t *tp, xfs_dabuf_t *dabuf)
2430 {
2431         xfs_buf_t       *bp;
2432         xfs_buf_t       **bplist;
2433         int             i;
2434         int             nbuf;
2435
2436         ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
2437         if ((nbuf = dabuf->nbuf) == 1) {
2438                 bplist = &bp;
2439                 bp = dabuf->bps[0];
2440         } else {
2441                 bplist = kmem_alloc(nbuf * sizeof(*bplist), KM_SLEEP);
2442                 memcpy(bplist, dabuf->bps, nbuf * sizeof(*bplist));
2443         }
2444         xfs_da_buf_done(dabuf);
2445         for (i = 0; i < nbuf; i++)
2446                 xfs_trans_binval(tp, bplist[i]);
2447         if (bplist != &bp)
2448                 kmem_free(bplist);
2449 }
2450
2451 /*
2452  * Get the first daddr from a dabuf.
2453  */
2454 xfs_daddr_t
2455 xfs_da_blkno(xfs_dabuf_t *dabuf)
2456 {
2457         ASSERT(dabuf->nbuf);
2458         ASSERT(dabuf->data);
2459         return XFS_BUF_ADDR(dabuf->bps[0]);
2460 }