xfs: rename xfs_bmapi to xfs_bmapi_write
[pandora-kernel.git] / fs / xfs / xfs_bmap.c
1 /*
2  * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_types.h"
21 #include "xfs_bit.h"
22 #include "xfs_log.h"
23 #include "xfs_inum.h"
24 #include "xfs_trans.h"
25 #include "xfs_sb.h"
26 #include "xfs_ag.h"
27 #include "xfs_dir2.h"
28 #include "xfs_da_btree.h"
29 #include "xfs_bmap_btree.h"
30 #include "xfs_alloc_btree.h"
31 #include "xfs_ialloc_btree.h"
32 #include "xfs_dinode.h"
33 #include "xfs_inode.h"
34 #include "xfs_btree.h"
35 #include "xfs_mount.h"
36 #include "xfs_itable.h"
37 #include "xfs_inode_item.h"
38 #include "xfs_extfree_item.h"
39 #include "xfs_alloc.h"
40 #include "xfs_bmap.h"
41 #include "xfs_rtalloc.h"
42 #include "xfs_error.h"
43 #include "xfs_attr_leaf.h"
44 #include "xfs_rw.h"
45 #include "xfs_quota.h"
46 #include "xfs_trans_space.h"
47 #include "xfs_buf_item.h"
48 #include "xfs_filestream.h"
49 #include "xfs_vnodeops.h"
50 #include "xfs_trace.h"
51
52
53 #ifdef DEBUG
54 STATIC void
55 xfs_bmap_check_leaf_extents(xfs_btree_cur_t *cur, xfs_inode_t *ip, int whichfork);
56 #endif
57
58 kmem_zone_t             *xfs_bmap_free_item_zone;
59
60 /*
61  * Prototypes for internal bmap routines.
62  */
63
64
65 /*
66  * Called from xfs_bmap_add_attrfork to handle extents format files.
67  */
68 STATIC int                                      /* error */
69 xfs_bmap_add_attrfork_extents(
70         xfs_trans_t             *tp,            /* transaction pointer */
71         xfs_inode_t             *ip,            /* incore inode pointer */
72         xfs_fsblock_t           *firstblock,    /* first block allocated */
73         xfs_bmap_free_t         *flist,         /* blocks to free at commit */
74         int                     *flags);        /* inode logging flags */
75
76 /*
77  * Called from xfs_bmap_add_attrfork to handle local format files.
78  */
79 STATIC int                                      /* error */
80 xfs_bmap_add_attrfork_local(
81         xfs_trans_t             *tp,            /* transaction pointer */
82         xfs_inode_t             *ip,            /* incore inode pointer */
83         xfs_fsblock_t           *firstblock,    /* first block allocated */
84         xfs_bmap_free_t         *flist,         /* blocks to free at commit */
85         int                     *flags);        /* inode logging flags */
86
87 /*
88  * Called by xfs_bmap_add_extent to handle cases converting a delayed
89  * allocation to a real allocation.
90  */
91 STATIC int                              /* error */
92 xfs_bmap_add_extent_delay_real(
93         struct xfs_trans        *tp,    /* transaction pointer */
94         xfs_inode_t             *ip,    /* incore inode pointer */
95         xfs_extnum_t            *idx,   /* extent number to update/insert */
96         xfs_btree_cur_t         **curp, /* if *curp is null, not a btree */
97         xfs_bmbt_irec_t         *new,   /* new data to add to file extents */
98         xfs_filblks_t           *dnew,  /* new delayed-alloc indirect blocks */
99         xfs_fsblock_t           *first, /* pointer to firstblock variable */
100         xfs_bmap_free_t         *flist, /* list of extents to be freed */
101         int                     *logflagsp); /* inode logging flags */
102
103 /*
104  * Called by xfs_bmap_add_extent to handle cases converting a hole
105  * to a real allocation.
106  */
107 STATIC int                              /* error */
108 xfs_bmap_add_extent_hole_real(
109         xfs_inode_t             *ip,    /* incore inode pointer */
110         xfs_extnum_t            *idx,   /* extent number to update/insert */
111         xfs_btree_cur_t         *cur,   /* if null, not a btree */
112         xfs_bmbt_irec_t         *new,   /* new data to add to file extents */
113         int                     *logflagsp, /* inode logging flags */
114         int                     whichfork); /* data or attr fork */
115
116 /*
117  * Called by xfs_bmap_add_extent to handle cases converting an unwritten
118  * allocation to a real allocation or vice versa.
119  */
120 STATIC int                              /* error */
121 xfs_bmap_add_extent_unwritten_real(
122         xfs_inode_t             *ip,    /* incore inode pointer */
123         xfs_extnum_t            *idx,   /* extent number to update/insert */
124         xfs_btree_cur_t         **curp, /* if *curp is null, not a btree */
125         xfs_bmbt_irec_t         *new,   /* new data to add to file extents */
126         int                     *logflagsp); /* inode logging flags */
127
128 /*
129  * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file.
130  * It figures out where to ask the underlying allocator to put the new extent.
131  */
132 STATIC int                              /* error */
133 xfs_bmap_alloc(
134         xfs_bmalloca_t          *ap);   /* bmap alloc argument struct */
135
136 /*
137  * Transform a btree format file with only one leaf node, where the
138  * extents list will fit in the inode, into an extents format file.
139  * Since the file extents are already in-core, all we have to do is
140  * give up the space for the btree root and pitch the leaf block.
141  */
142 STATIC int                              /* error */
143 xfs_bmap_btree_to_extents(
144         xfs_trans_t             *tp,    /* transaction pointer */
145         xfs_inode_t             *ip,    /* incore inode pointer */
146         xfs_btree_cur_t         *cur,   /* btree cursor */
147         int                     *logflagsp, /* inode logging flags */
148         int                     whichfork); /* data or attr fork */
149
150 /*
151  * Remove the entry "free" from the free item list.  Prev points to the
152  * previous entry, unless "free" is the head of the list.
153  */
154 STATIC void
155 xfs_bmap_del_free(
156         xfs_bmap_free_t         *flist, /* free item list header */
157         xfs_bmap_free_item_t    *prev,  /* previous item on list, if any */
158         xfs_bmap_free_item_t    *free); /* list item to be freed */
159
160 /*
161  * Convert an extents-format file into a btree-format file.
162  * The new file will have a root block (in the inode) and a single child block.
163  */
164 STATIC int                                      /* error */
165 xfs_bmap_extents_to_btree(
166         xfs_trans_t             *tp,            /* transaction pointer */
167         xfs_inode_t             *ip,            /* incore inode pointer */
168         xfs_fsblock_t           *firstblock,    /* first-block-allocated */
169         xfs_bmap_free_t         *flist,         /* blocks freed in xaction */
170         xfs_btree_cur_t         **curp,         /* cursor returned to caller */
171         int                     wasdel,         /* converting a delayed alloc */
172         int                     *logflagsp,     /* inode logging flags */
173         int                     whichfork);     /* data or attr fork */
174
175 /*
176  * Convert a local file to an extents file.
177  * This code is sort of bogus, since the file data needs to get
178  * logged so it won't be lost.  The bmap-level manipulations are ok, though.
179  */
180 STATIC int                              /* error */
181 xfs_bmap_local_to_extents(
182         xfs_trans_t     *tp,            /* transaction pointer */
183         xfs_inode_t     *ip,            /* incore inode pointer */
184         xfs_fsblock_t   *firstblock,    /* first block allocated in xaction */
185         xfs_extlen_t    total,          /* total blocks needed by transaction */
186         int             *logflagsp,     /* inode logging flags */
187         int             whichfork);     /* data or attr fork */
188
189 /*
190  * Search the extents list for the inode, for the extent containing bno.
191  * If bno lies in a hole, point to the next entry.  If bno lies past eof,
192  * *eofp will be set, and *prevp will contain the last entry (null if none).
193  * Else, *lastxp will be set to the index of the found
194  * entry; *gotp will contain the entry.
195  */
196 STATIC xfs_bmbt_rec_host_t *            /* pointer to found extent entry */
197 xfs_bmap_search_extents(
198         xfs_inode_t     *ip,            /* incore inode pointer */
199         xfs_fileoff_t   bno,            /* block number searched for */
200         int             whichfork,      /* data or attr fork */
201         int             *eofp,          /* out: end of file found */
202         xfs_extnum_t    *lastxp,        /* out: last extent index */
203         xfs_bmbt_irec_t *gotp,          /* out: extent entry found */
204         xfs_bmbt_irec_t *prevp);        /* out: previous extent entry found */
205
206 /*
207  * Check the last inode extent to determine whether this allocation will result
208  * in blocks being allocated at the end of the file. When we allocate new data
209  * blocks at the end of the file which do not start at the previous data block,
210  * we will try to align the new blocks at stripe unit boundaries.
211  */
212 STATIC int                              /* error */
213 xfs_bmap_isaeof(
214         xfs_inode_t     *ip,            /* incore inode pointer */
215         xfs_fileoff_t   off,            /* file offset in fsblocks */
216         int             whichfork,      /* data or attribute fork */
217         char            *aeof);         /* return value */
218
219 /*
220  * Compute the worst-case number of indirect blocks that will be used
221  * for ip's delayed extent of length "len".
222  */
223 STATIC xfs_filblks_t
224 xfs_bmap_worst_indlen(
225         xfs_inode_t             *ip,    /* incore inode pointer */
226         xfs_filblks_t           len);   /* delayed extent length */
227
228 #ifdef DEBUG
229 /*
230  * Perform various validation checks on the values being returned
231  * from xfs_bmapi().
232  */
233 STATIC void
234 xfs_bmap_validate_ret(
235         xfs_fileoff_t           bno,
236         xfs_filblks_t           len,
237         int                     flags,
238         xfs_bmbt_irec_t         *mval,
239         int                     nmap,
240         int                     ret_nmap);
241 #else
242 #define xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap)
243 #endif /* DEBUG */
244
245 STATIC int
246 xfs_bmap_count_tree(
247         xfs_mount_t     *mp,
248         xfs_trans_t     *tp,
249         xfs_ifork_t     *ifp,
250         xfs_fsblock_t   blockno,
251         int             levelin,
252         int             *count);
253
254 STATIC void
255 xfs_bmap_count_leaves(
256         xfs_ifork_t             *ifp,
257         xfs_extnum_t            idx,
258         int                     numrecs,
259         int                     *count);
260
261 STATIC void
262 xfs_bmap_disk_count_leaves(
263         struct xfs_mount        *mp,
264         struct xfs_btree_block  *block,
265         int                     numrecs,
266         int                     *count);
267
268 /*
269  * Bmap internal routines.
270  */
271
272 STATIC int                              /* error */
273 xfs_bmbt_lookup_eq(
274         struct xfs_btree_cur    *cur,
275         xfs_fileoff_t           off,
276         xfs_fsblock_t           bno,
277         xfs_filblks_t           len,
278         int                     *stat)  /* success/failure */
279 {
280         cur->bc_rec.b.br_startoff = off;
281         cur->bc_rec.b.br_startblock = bno;
282         cur->bc_rec.b.br_blockcount = len;
283         return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
284 }
285
286 STATIC int                              /* error */
287 xfs_bmbt_lookup_ge(
288         struct xfs_btree_cur    *cur,
289         xfs_fileoff_t           off,
290         xfs_fsblock_t           bno,
291         xfs_filblks_t           len,
292         int                     *stat)  /* success/failure */
293 {
294         cur->bc_rec.b.br_startoff = off;
295         cur->bc_rec.b.br_startblock = bno;
296         cur->bc_rec.b.br_blockcount = len;
297         return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
298 }
299
300 /*
301 * Update the record referred to by cur to the value given
302  * by [off, bno, len, state].
303  * This either works (return 0) or gets an EFSCORRUPTED error.
304  */
305 STATIC int
306 xfs_bmbt_update(
307         struct xfs_btree_cur    *cur,
308         xfs_fileoff_t           off,
309         xfs_fsblock_t           bno,
310         xfs_filblks_t           len,
311         xfs_exntst_t            state)
312 {
313         union xfs_btree_rec     rec;
314
315         xfs_bmbt_disk_set_allf(&rec.bmbt, off, bno, len, state);
316         return xfs_btree_update(cur, &rec);
317 }
318
319 /*
320  * Called from xfs_bmap_add_attrfork to handle btree format files.
321  */
322 STATIC int                                      /* error */
323 xfs_bmap_add_attrfork_btree(
324         xfs_trans_t             *tp,            /* transaction pointer */
325         xfs_inode_t             *ip,            /* incore inode pointer */
326         xfs_fsblock_t           *firstblock,    /* first block allocated */
327         xfs_bmap_free_t         *flist,         /* blocks to free at commit */
328         int                     *flags)         /* inode logging flags */
329 {
330         xfs_btree_cur_t         *cur;           /* btree cursor */
331         int                     error;          /* error return value */
332         xfs_mount_t             *mp;            /* file system mount struct */
333         int                     stat;           /* newroot status */
334
335         mp = ip->i_mount;
336         if (ip->i_df.if_broot_bytes <= XFS_IFORK_DSIZE(ip))
337                 *flags |= XFS_ILOG_DBROOT;
338         else {
339                 cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK);
340                 cur->bc_private.b.flist = flist;
341                 cur->bc_private.b.firstblock = *firstblock;
342                 if ((error = xfs_bmbt_lookup_ge(cur, 0, 0, 0, &stat)))
343                         goto error0;
344                 /* must be at least one entry */
345                 XFS_WANT_CORRUPTED_GOTO(stat == 1, error0);
346                 if ((error = xfs_btree_new_iroot(cur, flags, &stat)))
347                         goto error0;
348                 if (stat == 0) {
349                         xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
350                         return XFS_ERROR(ENOSPC);
351                 }
352                 *firstblock = cur->bc_private.b.firstblock;
353                 cur->bc_private.b.allocated = 0;
354                 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
355         }
356         return 0;
357 error0:
358         xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
359         return error;
360 }
361
362 /*
363  * Called from xfs_bmap_add_attrfork to handle extents format files.
364  */
365 STATIC int                                      /* error */
366 xfs_bmap_add_attrfork_extents(
367         xfs_trans_t             *tp,            /* transaction pointer */
368         xfs_inode_t             *ip,            /* incore inode pointer */
369         xfs_fsblock_t           *firstblock,    /* first block allocated */
370         xfs_bmap_free_t         *flist,         /* blocks to free at commit */
371         int                     *flags)         /* inode logging flags */
372 {
373         xfs_btree_cur_t         *cur;           /* bmap btree cursor */
374         int                     error;          /* error return value */
375
376         if (ip->i_d.di_nextents * sizeof(xfs_bmbt_rec_t) <= XFS_IFORK_DSIZE(ip))
377                 return 0;
378         cur = NULL;
379         error = xfs_bmap_extents_to_btree(tp, ip, firstblock, flist, &cur, 0,
380                 flags, XFS_DATA_FORK);
381         if (cur) {
382                 cur->bc_private.b.allocated = 0;
383                 xfs_btree_del_cursor(cur,
384                         error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
385         }
386         return error;
387 }
388
389 /*
390  * Called from xfs_bmap_add_attrfork to handle local format files.
391  */
392 STATIC int                                      /* error */
393 xfs_bmap_add_attrfork_local(
394         xfs_trans_t             *tp,            /* transaction pointer */
395         xfs_inode_t             *ip,            /* incore inode pointer */
396         xfs_fsblock_t           *firstblock,    /* first block allocated */
397         xfs_bmap_free_t         *flist,         /* blocks to free at commit */
398         int                     *flags)         /* inode logging flags */
399 {
400         xfs_da_args_t           dargs;          /* args for dir/attr code */
401         int                     error;          /* error return value */
402         xfs_mount_t             *mp;            /* mount structure pointer */
403
404         if (ip->i_df.if_bytes <= XFS_IFORK_DSIZE(ip))
405                 return 0;
406         if (S_ISDIR(ip->i_d.di_mode)) {
407                 mp = ip->i_mount;
408                 memset(&dargs, 0, sizeof(dargs));
409                 dargs.dp = ip;
410                 dargs.firstblock = firstblock;
411                 dargs.flist = flist;
412                 dargs.total = mp->m_dirblkfsbs;
413                 dargs.whichfork = XFS_DATA_FORK;
414                 dargs.trans = tp;
415                 error = xfs_dir2_sf_to_block(&dargs);
416         } else
417                 error = xfs_bmap_local_to_extents(tp, ip, firstblock, 1, flags,
418                         XFS_DATA_FORK);
419         return error;
420 }
421
422 /*
423  * Update file extent records and the btree after allocating space.
424  */
425 STATIC int                              /* error */
426 xfs_bmap_add_extent(
427         struct xfs_trans        *tp,    /* transaction pointer */
428         xfs_inode_t             *ip,    /* incore inode pointer */
429         xfs_extnum_t            *idx,   /* extent number to update/insert */
430         xfs_btree_cur_t         **curp, /* if *curp is null, not a btree */
431         xfs_bmbt_irec_t         *new,   /* new data to add to file extents */
432         xfs_fsblock_t           *first, /* pointer to firstblock variable */
433         xfs_bmap_free_t         *flist, /* list of extents to be freed */
434         int                     *logflagsp, /* inode logging flags */
435         int                     whichfork) /* data or attr fork */
436 {
437         xfs_btree_cur_t         *cur;   /* btree cursor or null */
438         xfs_filblks_t           da_new; /* new count del alloc blocks used */
439         xfs_filblks_t           da_old; /* old count del alloc blocks used */
440         int                     error;  /* error return value */
441         xfs_ifork_t             *ifp;   /* inode fork ptr */
442         int                     logflags; /* returned value */
443         xfs_extnum_t            nextents; /* number of extents in file now */
444
445         XFS_STATS_INC(xs_add_exlist);
446
447         cur = *curp;
448         ifp = XFS_IFORK_PTR(ip, whichfork);
449         nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
450         da_old = da_new = 0;
451         error = 0;
452
453         ASSERT(*idx >= 0);
454         ASSERT(*idx <= nextents);
455         ASSERT(!isnullstartblock(new->br_startblock));
456
457         /*
458          * Real allocation off the end of the file.
459          */
460         if (*idx == nextents) {
461                 if (cur)
462                         ASSERT((cur->bc_private.b.flags &
463                                 XFS_BTCUR_BPRV_WASDEL) == 0);
464                 error = xfs_bmap_add_extent_hole_real(ip, idx, cur, new,
465                                 &logflags, whichfork);
466         } else {
467                 xfs_bmbt_irec_t prev;   /* old extent at offset idx */
468
469                 /*
470                  * Get the record referred to by idx.
471                  */
472                 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &prev);
473                 /*
474                  * If it's a real allocation record, and the new allocation ends
475                  * after the start of the referred to record, then we're filling
476                  * in a delayed or unwritten allocation with a real one, or
477                  * converting real back to unwritten.
478                  */
479                 if (!isnullstartblock(new->br_startblock) &&
480                     new->br_startoff + new->br_blockcount > prev.br_startoff) {
481                         if (prev.br_state != XFS_EXT_UNWRITTEN &&
482                             isnullstartblock(prev.br_startblock)) {
483                                 da_old = startblockval(prev.br_startblock);
484                                 if (cur)
485                                         ASSERT(cur->bc_private.b.flags &
486                                                 XFS_BTCUR_BPRV_WASDEL);
487                                 error = xfs_bmap_add_extent_delay_real(tp, ip,
488                                                 idx, &cur, new, &da_new,
489                                                 first, flist, &logflags);
490                         } else {
491                                 ASSERT(new->br_state == XFS_EXT_NORM ||
492                                        new->br_state == XFS_EXT_UNWRITTEN);
493
494                                 error = xfs_bmap_add_extent_unwritten_real(ip,
495                                                 idx, &cur, new, &logflags);
496                                 if (error)
497                                         goto done;
498                         }
499                 }
500                 /*
501                  * Otherwise we're filling in a hole with an allocation.
502                  */
503                 else {
504                         if (cur)
505                                 ASSERT((cur->bc_private.b.flags &
506                                         XFS_BTCUR_BPRV_WASDEL) == 0);
507                         error = xfs_bmap_add_extent_hole_real(ip, idx, cur,
508                                         new, &logflags, whichfork);
509                 }
510         }
511
512         if (error)
513                 goto done;
514         ASSERT(*curp == cur || *curp == NULL);
515
516         /*
517          * Convert to a btree if necessary.
518          */
519         if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
520             XFS_IFORK_NEXTENTS(ip, whichfork) > ifp->if_ext_max) {
521                 int     tmp_logflags;   /* partial log flag return val */
522
523                 ASSERT(cur == NULL);
524                 error = xfs_bmap_extents_to_btree(tp, ip, first,
525                         flist, &cur, da_old > 0, &tmp_logflags, whichfork);
526                 logflags |= tmp_logflags;
527                 if (error)
528                         goto done;
529         }
530         /*
531          * Adjust for changes in reserved delayed indirect blocks.
532          * Nothing to do for disk quotas here.
533          */
534         if (da_old || da_new) {
535                 xfs_filblks_t   nblks;
536
537                 nblks = da_new;
538                 if (cur)
539                         nblks += cur->bc_private.b.allocated;
540                 ASSERT(nblks <= da_old);
541                 if (nblks < da_old)
542                         xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS,
543                                 (int64_t)(da_old - nblks), 0);
544         }
545         /*
546          * Clear out the allocated field, done with it now in any case.
547          */
548         if (cur) {
549                 cur->bc_private.b.allocated = 0;
550                 *curp = cur;
551         }
552 done:
553 #ifdef DEBUG
554         if (!error)
555                 xfs_bmap_check_leaf_extents(*curp, ip, whichfork);
556 #endif
557         *logflagsp = logflags;
558         return error;
559 }
560
561 /*
562  * Called by xfs_bmap_add_extent to handle cases converting a delayed
563  * allocation to a real allocation.
564  */
565 STATIC int                              /* error */
566 xfs_bmap_add_extent_delay_real(
567         struct xfs_trans        *tp,    /* transaction pointer */
568         xfs_inode_t             *ip,    /* incore inode pointer */
569         xfs_extnum_t            *idx,   /* extent number to update/insert */
570         xfs_btree_cur_t         **curp, /* if *curp is null, not a btree */
571         xfs_bmbt_irec_t         *new,   /* new data to add to file extents */
572         xfs_filblks_t           *dnew,  /* new delayed-alloc indirect blocks */
573         xfs_fsblock_t           *first, /* pointer to firstblock variable */
574         xfs_bmap_free_t         *flist, /* list of extents to be freed */
575         int                     *logflagsp) /* inode logging flags */
576 {
577         xfs_btree_cur_t         *cur;   /* btree cursor */
578         int                     diff;   /* temp value */
579         xfs_bmbt_rec_host_t     *ep;    /* extent entry for idx */
580         int                     error;  /* error return value */
581         int                     i;      /* temp state */
582         xfs_ifork_t             *ifp;   /* inode fork pointer */
583         xfs_fileoff_t           new_endoff;     /* end offset of new entry */
584         xfs_bmbt_irec_t         r[3];   /* neighbor extent entries */
585                                         /* left is 0, right is 1, prev is 2 */
586         int                     rval=0; /* return value (logging flags) */
587         int                     state = 0;/* state bits, accessed thru macros */
588         xfs_filblks_t           temp=0; /* value for dnew calculations */
589         xfs_filblks_t           temp2=0;/* value for dnew calculations */
590         int                     tmp_rval;       /* partial logging flags */
591
592 #define LEFT            r[0]
593 #define RIGHT           r[1]
594 #define PREV            r[2]
595
596         /*
597          * Set up a bunch of variables to make the tests simpler.
598          */
599         cur = *curp;
600         ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
601         ep = xfs_iext_get_ext(ifp, *idx);
602         xfs_bmbt_get_all(ep, &PREV);
603         new_endoff = new->br_startoff + new->br_blockcount;
604         ASSERT(PREV.br_startoff <= new->br_startoff);
605         ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
606
607         /*
608          * Set flags determining what part of the previous delayed allocation
609          * extent is being replaced by a real allocation.
610          */
611         if (PREV.br_startoff == new->br_startoff)
612                 state |= BMAP_LEFT_FILLING;
613         if (PREV.br_startoff + PREV.br_blockcount == new_endoff)
614                 state |= BMAP_RIGHT_FILLING;
615
616         /*
617          * Check and set flags if this segment has a left neighbor.
618          * Don't set contiguous if the combined extent would be too large.
619          */
620         if (*idx > 0) {
621                 state |= BMAP_LEFT_VALID;
622                 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &LEFT);
623
624                 if (isnullstartblock(LEFT.br_startblock))
625                         state |= BMAP_LEFT_DELAY;
626         }
627
628         if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
629             LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
630             LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
631             LEFT.br_state == new->br_state &&
632             LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN)
633                 state |= BMAP_LEFT_CONTIG;
634
635         /*
636          * Check and set flags if this segment has a right neighbor.
637          * Don't set contiguous if the combined extent would be too large.
638          * Also check for all-three-contiguous being too large.
639          */
640         if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) {
641                 state |= BMAP_RIGHT_VALID;
642                 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx + 1), &RIGHT);
643
644                 if (isnullstartblock(RIGHT.br_startblock))
645                         state |= BMAP_RIGHT_DELAY;
646         }
647
648         if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
649             new_endoff == RIGHT.br_startoff &&
650             new->br_startblock + new->br_blockcount == RIGHT.br_startblock &&
651             new->br_state == RIGHT.br_state &&
652             new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN &&
653             ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
654                        BMAP_RIGHT_FILLING)) !=
655                       (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
656                        BMAP_RIGHT_FILLING) ||
657              LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
658                         <= MAXEXTLEN))
659                 state |= BMAP_RIGHT_CONTIG;
660
661         error = 0;
662         /*
663          * Switch out based on the FILLING and CONTIG state bits.
664          */
665         switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
666                          BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) {
667         case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
668              BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
669                 /*
670                  * Filling in all of a previously delayed allocation extent.
671                  * The left and right neighbors are both contiguous with new.
672                  */
673                 --*idx;
674                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
675                 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx),
676                         LEFT.br_blockcount + PREV.br_blockcount +
677                         RIGHT.br_blockcount);
678                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
679
680                 xfs_iext_remove(ip, *idx + 1, 2, state);
681                 ip->i_d.di_nextents--;
682                 if (cur == NULL)
683                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
684                 else {
685                         rval = XFS_ILOG_CORE;
686                         if ((error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff,
687                                         RIGHT.br_startblock,
688                                         RIGHT.br_blockcount, &i)))
689                                 goto done;
690                         XFS_WANT_CORRUPTED_GOTO(i == 1, done);
691                         if ((error = xfs_btree_delete(cur, &i)))
692                                 goto done;
693                         XFS_WANT_CORRUPTED_GOTO(i == 1, done);
694                         if ((error = xfs_btree_decrement(cur, 0, &i)))
695                                 goto done;
696                         XFS_WANT_CORRUPTED_GOTO(i == 1, done);
697                         if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
698                                         LEFT.br_startblock,
699                                         LEFT.br_blockcount +
700                                         PREV.br_blockcount +
701                                         RIGHT.br_blockcount, LEFT.br_state)))
702                                 goto done;
703                 }
704                 *dnew = 0;
705                 break;
706
707         case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
708                 /*
709                  * Filling in all of a previously delayed allocation extent.
710                  * The left neighbor is contiguous, the right is not.
711                  */
712                 --*idx;
713
714                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
715                 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx),
716                         LEFT.br_blockcount + PREV.br_blockcount);
717                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
718
719                 xfs_iext_remove(ip, *idx + 1, 1, state);
720                 if (cur == NULL)
721                         rval = XFS_ILOG_DEXT;
722                 else {
723                         rval = 0;
724                         if ((error = xfs_bmbt_lookup_eq(cur, LEFT.br_startoff,
725                                         LEFT.br_startblock, LEFT.br_blockcount,
726                                         &i)))
727                                 goto done;
728                         XFS_WANT_CORRUPTED_GOTO(i == 1, done);
729                         if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
730                                         LEFT.br_startblock,
731                                         LEFT.br_blockcount +
732                                         PREV.br_blockcount, LEFT.br_state)))
733                                 goto done;
734                 }
735                 *dnew = 0;
736                 break;
737
738         case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
739                 /*
740                  * Filling in all of a previously delayed allocation extent.
741                  * The right neighbor is contiguous, the left is not.
742                  */
743                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
744                 xfs_bmbt_set_startblock(ep, new->br_startblock);
745                 xfs_bmbt_set_blockcount(ep,
746                         PREV.br_blockcount + RIGHT.br_blockcount);
747                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
748
749                 xfs_iext_remove(ip, *idx + 1, 1, state);
750                 if (cur == NULL)
751                         rval = XFS_ILOG_DEXT;
752                 else {
753                         rval = 0;
754                         if ((error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff,
755                                         RIGHT.br_startblock,
756                                         RIGHT.br_blockcount, &i)))
757                                 goto done;
758                         XFS_WANT_CORRUPTED_GOTO(i == 1, done);
759                         if ((error = xfs_bmbt_update(cur, PREV.br_startoff,
760                                         new->br_startblock,
761                                         PREV.br_blockcount +
762                                         RIGHT.br_blockcount, PREV.br_state)))
763                                 goto done;
764                 }
765
766                 *dnew = 0;
767                 break;
768
769         case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
770                 /*
771                  * Filling in all of a previously delayed allocation extent.
772                  * Neither the left nor right neighbors are contiguous with
773                  * the new one.
774                  */
775                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
776                 xfs_bmbt_set_startblock(ep, new->br_startblock);
777                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
778
779                 ip->i_d.di_nextents++;
780                 if (cur == NULL)
781                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
782                 else {
783                         rval = XFS_ILOG_CORE;
784                         if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
785                                         new->br_startblock, new->br_blockcount,
786                                         &i)))
787                                 goto done;
788                         XFS_WANT_CORRUPTED_GOTO(i == 0, done);
789                         cur->bc_rec.b.br_state = XFS_EXT_NORM;
790                         if ((error = xfs_btree_insert(cur, &i)))
791                                 goto done;
792                         XFS_WANT_CORRUPTED_GOTO(i == 1, done);
793                 }
794
795                 *dnew = 0;
796                 break;
797
798         case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
799                 /*
800                  * Filling in the first part of a previous delayed allocation.
801                  * The left neighbor is contiguous.
802                  */
803                 trace_xfs_bmap_pre_update(ip, *idx - 1, state, _THIS_IP_);
804                 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx - 1),
805                         LEFT.br_blockcount + new->br_blockcount);
806                 xfs_bmbt_set_startoff(ep,
807                         PREV.br_startoff + new->br_blockcount);
808                 trace_xfs_bmap_post_update(ip, *idx - 1, state, _THIS_IP_);
809
810                 temp = PREV.br_blockcount - new->br_blockcount;
811                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
812                 xfs_bmbt_set_blockcount(ep, temp);
813                 if (cur == NULL)
814                         rval = XFS_ILOG_DEXT;
815                 else {
816                         rval = 0;
817                         if ((error = xfs_bmbt_lookup_eq(cur, LEFT.br_startoff,
818                                         LEFT.br_startblock, LEFT.br_blockcount,
819                                         &i)))
820                                 goto done;
821                         XFS_WANT_CORRUPTED_GOTO(i == 1, done);
822                         if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
823                                         LEFT.br_startblock,
824                                         LEFT.br_blockcount +
825                                         new->br_blockcount,
826                                         LEFT.br_state)))
827                                 goto done;
828                 }
829                 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
830                         startblockval(PREV.br_startblock));
831                 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
832                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
833
834                 --*idx;
835                 *dnew = temp;
836                 break;
837
838         case BMAP_LEFT_FILLING:
839                 /*
840                  * Filling in the first part of a previous delayed allocation.
841                  * The left neighbor is not contiguous.
842                  */
843                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
844                 xfs_bmbt_set_startoff(ep, new_endoff);
845                 temp = PREV.br_blockcount - new->br_blockcount;
846                 xfs_bmbt_set_blockcount(ep, temp);
847                 xfs_iext_insert(ip, *idx, 1, new, state);
848                 ip->i_d.di_nextents++;
849                 if (cur == NULL)
850                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
851                 else {
852                         rval = XFS_ILOG_CORE;
853                         if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
854                                         new->br_startblock, new->br_blockcount,
855                                         &i)))
856                                 goto done;
857                         XFS_WANT_CORRUPTED_GOTO(i == 0, done);
858                         cur->bc_rec.b.br_state = XFS_EXT_NORM;
859                         if ((error = xfs_btree_insert(cur, &i)))
860                                 goto done;
861                         XFS_WANT_CORRUPTED_GOTO(i == 1, done);
862                 }
863                 if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
864                     ip->i_d.di_nextents > ip->i_df.if_ext_max) {
865                         error = xfs_bmap_extents_to_btree(tp, ip,
866                                         first, flist, &cur, 1, &tmp_rval,
867                                         XFS_DATA_FORK);
868                         rval |= tmp_rval;
869                         if (error)
870                                 goto done;
871                 }
872                 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
873                         startblockval(PREV.br_startblock) -
874                         (cur ? cur->bc_private.b.allocated : 0));
875                 ep = xfs_iext_get_ext(ifp, *idx + 1);
876                 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
877                 trace_xfs_bmap_post_update(ip, *idx + 1, state, _THIS_IP_);
878
879                 *dnew = temp;
880                 break;
881
882         case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
883                 /*
884                  * Filling in the last part of a previous delayed allocation.
885                  * The right neighbor is contiguous with the new allocation.
886                  */
887                 temp = PREV.br_blockcount - new->br_blockcount;
888                 trace_xfs_bmap_pre_update(ip, *idx + 1, state, _THIS_IP_);
889                 xfs_bmbt_set_blockcount(ep, temp);
890                 xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx + 1),
891                         new->br_startoff, new->br_startblock,
892                         new->br_blockcount + RIGHT.br_blockcount,
893                         RIGHT.br_state);
894                 trace_xfs_bmap_post_update(ip, *idx + 1, state, _THIS_IP_);
895                 if (cur == NULL)
896                         rval = XFS_ILOG_DEXT;
897                 else {
898                         rval = 0;
899                         if ((error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff,
900                                         RIGHT.br_startblock,
901                                         RIGHT.br_blockcount, &i)))
902                                 goto done;
903                         XFS_WANT_CORRUPTED_GOTO(i == 1, done);
904                         if ((error = xfs_bmbt_update(cur, new->br_startoff,
905                                         new->br_startblock,
906                                         new->br_blockcount +
907                                         RIGHT.br_blockcount,
908                                         RIGHT.br_state)))
909                                 goto done;
910                 }
911
912                 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
913                         startblockval(PREV.br_startblock));
914                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
915                 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
916                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
917
918                 ++*idx;
919                 *dnew = temp;
920                 break;
921
922         case BMAP_RIGHT_FILLING:
923                 /*
924                  * Filling in the last part of a previous delayed allocation.
925                  * The right neighbor is not contiguous.
926                  */
927                 temp = PREV.br_blockcount - new->br_blockcount;
928                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
929                 xfs_bmbt_set_blockcount(ep, temp);
930                 xfs_iext_insert(ip, *idx + 1, 1, new, state);
931                 ip->i_d.di_nextents++;
932                 if (cur == NULL)
933                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
934                 else {
935                         rval = XFS_ILOG_CORE;
936                         if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
937                                         new->br_startblock, new->br_blockcount,
938                                         &i)))
939                                 goto done;
940                         XFS_WANT_CORRUPTED_GOTO(i == 0, done);
941                         cur->bc_rec.b.br_state = XFS_EXT_NORM;
942                         if ((error = xfs_btree_insert(cur, &i)))
943                                 goto done;
944                         XFS_WANT_CORRUPTED_GOTO(i == 1, done);
945                 }
946                 if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
947                     ip->i_d.di_nextents > ip->i_df.if_ext_max) {
948                         error = xfs_bmap_extents_to_btree(tp, ip,
949                                 first, flist, &cur, 1, &tmp_rval,
950                                 XFS_DATA_FORK);
951                         rval |= tmp_rval;
952                         if (error)
953                                 goto done;
954                 }
955                 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
956                         startblockval(PREV.br_startblock) -
957                         (cur ? cur->bc_private.b.allocated : 0));
958                 ep = xfs_iext_get_ext(ifp, *idx);
959                 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
960                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
961
962                 ++*idx;
963                 *dnew = temp;
964                 break;
965
966         case 0:
967                 /*
968                  * Filling in the middle part of a previous delayed allocation.
969                  * Contiguity is impossible here.
970                  * This case is avoided almost all the time.
971                  *
972                  * We start with a delayed allocation:
973                  *
974                  * +ddddddddddddddddddddddddddddddddddddddddddddddddddddddd+
975                  *  PREV @ idx
976                  *
977                  * and we are allocating:
978                  *                     +rrrrrrrrrrrrrrrrr+
979                  *                            new
980                  *
981                  * and we set it up for insertion as:
982                  * +ddddddddddddddddddd+rrrrrrrrrrrrrrrrr+ddddddddddddddddd+
983                  *                            new
984                  *  PREV @ idx          LEFT              RIGHT
985                  *                      inserted at idx + 1
986                  */
987                 temp = new->br_startoff - PREV.br_startoff;
988                 temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff;
989                 trace_xfs_bmap_pre_update(ip, *idx, 0, _THIS_IP_);
990                 xfs_bmbt_set_blockcount(ep, temp);      /* truncate PREV */
991                 LEFT = *new;
992                 RIGHT.br_state = PREV.br_state;
993                 RIGHT.br_startblock = nullstartblock(
994                                 (int)xfs_bmap_worst_indlen(ip, temp2));
995                 RIGHT.br_startoff = new_endoff;
996                 RIGHT.br_blockcount = temp2;
997                 /* insert LEFT (r[0]) and RIGHT (r[1]) at the same time */
998                 xfs_iext_insert(ip, *idx + 1, 2, &LEFT, state);
999                 ip->i_d.di_nextents++;
1000                 if (cur == NULL)
1001                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1002                 else {
1003                         rval = XFS_ILOG_CORE;
1004                         if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
1005                                         new->br_startblock, new->br_blockcount,
1006                                         &i)))
1007                                 goto done;
1008                         XFS_WANT_CORRUPTED_GOTO(i == 0, done);
1009                         cur->bc_rec.b.br_state = XFS_EXT_NORM;
1010                         if ((error = xfs_btree_insert(cur, &i)))
1011                                 goto done;
1012                         XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1013                 }
1014                 if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
1015                     ip->i_d.di_nextents > ip->i_df.if_ext_max) {
1016                         error = xfs_bmap_extents_to_btree(tp, ip,
1017                                         first, flist, &cur, 1, &tmp_rval,
1018                                         XFS_DATA_FORK);
1019                         rval |= tmp_rval;
1020                         if (error)
1021                                 goto done;
1022                 }
1023                 temp = xfs_bmap_worst_indlen(ip, temp);
1024                 temp2 = xfs_bmap_worst_indlen(ip, temp2);
1025                 diff = (int)(temp + temp2 - startblockval(PREV.br_startblock) -
1026                         (cur ? cur->bc_private.b.allocated : 0));
1027                 if (diff > 0) {
1028                         error = xfs_icsb_modify_counters(ip->i_mount,
1029                                         XFS_SBS_FDBLOCKS,
1030                                         -((int64_t)diff), 0);
1031                         ASSERT(!error);
1032                         if (error)
1033                                 goto done;
1034                 }
1035
1036                 ep = xfs_iext_get_ext(ifp, *idx);
1037                 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
1038                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
1039                 trace_xfs_bmap_pre_update(ip, *idx + 2, state, _THIS_IP_);
1040                 xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx + 2),
1041                         nullstartblock((int)temp2));
1042                 trace_xfs_bmap_post_update(ip, *idx + 2, state, _THIS_IP_);
1043
1044                 ++*idx;
1045                 *dnew = temp + temp2;
1046                 break;
1047
1048         case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1049         case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1050         case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG:
1051         case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
1052         case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1053         case BMAP_LEFT_CONTIG:
1054         case BMAP_RIGHT_CONTIG:
1055                 /*
1056                  * These cases are all impossible.
1057                  */
1058                 ASSERT(0);
1059         }
1060         *curp = cur;
1061 done:
1062         *logflagsp = rval;
1063         return error;
1064 #undef  LEFT
1065 #undef  RIGHT
1066 #undef  PREV
1067 }
1068
1069 /*
1070  * Called by xfs_bmap_add_extent to handle cases converting an unwritten
1071  * allocation to a real allocation or vice versa.
1072  */
1073 STATIC int                              /* error */
1074 xfs_bmap_add_extent_unwritten_real(
1075         xfs_inode_t             *ip,    /* incore inode pointer */
1076         xfs_extnum_t            *idx,   /* extent number to update/insert */
1077         xfs_btree_cur_t         **curp, /* if *curp is null, not a btree */
1078         xfs_bmbt_irec_t         *new,   /* new data to add to file extents */
1079         int                     *logflagsp) /* inode logging flags */
1080 {
1081         xfs_btree_cur_t         *cur;   /* btree cursor */
1082         xfs_bmbt_rec_host_t     *ep;    /* extent entry for idx */
1083         int                     error;  /* error return value */
1084         int                     i;      /* temp state */
1085         xfs_ifork_t             *ifp;   /* inode fork pointer */
1086         xfs_fileoff_t           new_endoff;     /* end offset of new entry */
1087         xfs_exntst_t            newext; /* new extent state */
1088         xfs_exntst_t            oldext; /* old extent state */
1089         xfs_bmbt_irec_t         r[3];   /* neighbor extent entries */
1090                                         /* left is 0, right is 1, prev is 2 */
1091         int                     rval=0; /* return value (logging flags) */
1092         int                     state = 0;/* state bits, accessed thru macros */
1093
1094 #define LEFT            r[0]
1095 #define RIGHT           r[1]
1096 #define PREV            r[2]
1097         /*
1098          * Set up a bunch of variables to make the tests simpler.
1099          */
1100         error = 0;
1101         cur = *curp;
1102         ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
1103         ep = xfs_iext_get_ext(ifp, *idx);
1104         xfs_bmbt_get_all(ep, &PREV);
1105         newext = new->br_state;
1106         oldext = (newext == XFS_EXT_UNWRITTEN) ?
1107                 XFS_EXT_NORM : XFS_EXT_UNWRITTEN;
1108         ASSERT(PREV.br_state == oldext);
1109         new_endoff = new->br_startoff + new->br_blockcount;
1110         ASSERT(PREV.br_startoff <= new->br_startoff);
1111         ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
1112
1113         /*
1114          * Set flags determining what part of the previous oldext allocation
1115          * extent is being replaced by a newext allocation.
1116          */
1117         if (PREV.br_startoff == new->br_startoff)
1118                 state |= BMAP_LEFT_FILLING;
1119         if (PREV.br_startoff + PREV.br_blockcount == new_endoff)
1120                 state |= BMAP_RIGHT_FILLING;
1121
1122         /*
1123          * Check and set flags if this segment has a left neighbor.
1124          * Don't set contiguous if the combined extent would be too large.
1125          */
1126         if (*idx > 0) {
1127                 state |= BMAP_LEFT_VALID;
1128                 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &LEFT);
1129
1130                 if (isnullstartblock(LEFT.br_startblock))
1131                         state |= BMAP_LEFT_DELAY;
1132         }
1133
1134         if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
1135             LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
1136             LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
1137             LEFT.br_state == newext &&
1138             LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN)
1139                 state |= BMAP_LEFT_CONTIG;
1140
1141         /*
1142          * Check and set flags if this segment has a right neighbor.
1143          * Don't set contiguous if the combined extent would be too large.
1144          * Also check for all-three-contiguous being too large.
1145          */
1146         if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) {
1147                 state |= BMAP_RIGHT_VALID;
1148                 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx + 1), &RIGHT);
1149                 if (isnullstartblock(RIGHT.br_startblock))
1150                         state |= BMAP_RIGHT_DELAY;
1151         }
1152
1153         if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
1154             new_endoff == RIGHT.br_startoff &&
1155             new->br_startblock + new->br_blockcount == RIGHT.br_startblock &&
1156             newext == RIGHT.br_state &&
1157             new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN &&
1158             ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
1159                        BMAP_RIGHT_FILLING)) !=
1160                       (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
1161                        BMAP_RIGHT_FILLING) ||
1162              LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
1163                         <= MAXEXTLEN))
1164                 state |= BMAP_RIGHT_CONTIG;
1165
1166         /*
1167          * Switch out based on the FILLING and CONTIG state bits.
1168          */
1169         switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
1170                          BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) {
1171         case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
1172              BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1173                 /*
1174                  * Setting all of a previous oldext extent to newext.
1175                  * The left and right neighbors are both contiguous with new.
1176                  */
1177                 --*idx;
1178
1179                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
1180                 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx),
1181                         LEFT.br_blockcount + PREV.br_blockcount +
1182                         RIGHT.br_blockcount);
1183                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
1184
1185                 xfs_iext_remove(ip, *idx + 1, 2, state);
1186                 ip->i_d.di_nextents -= 2;
1187                 if (cur == NULL)
1188                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1189                 else {
1190                         rval = XFS_ILOG_CORE;
1191                         if ((error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff,
1192                                         RIGHT.br_startblock,
1193                                         RIGHT.br_blockcount, &i)))
1194                                 goto done;
1195                         XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1196                         if ((error = xfs_btree_delete(cur, &i)))
1197                                 goto done;
1198                         XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1199                         if ((error = xfs_btree_decrement(cur, 0, &i)))
1200                                 goto done;
1201                         XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1202                         if ((error = xfs_btree_delete(cur, &i)))
1203                                 goto done;
1204                         XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1205                         if ((error = xfs_btree_decrement(cur, 0, &i)))
1206                                 goto done;
1207                         XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1208                         if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
1209                                 LEFT.br_startblock,
1210                                 LEFT.br_blockcount + PREV.br_blockcount +
1211                                 RIGHT.br_blockcount, LEFT.br_state)))
1212                                 goto done;
1213                 }
1214                 break;
1215
1216         case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
1217                 /*
1218                  * Setting all of a previous oldext extent to newext.
1219                  * The left neighbor is contiguous, the right is not.
1220                  */
1221                 --*idx;
1222
1223                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
1224                 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx),
1225                         LEFT.br_blockcount + PREV.br_blockcount);
1226                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
1227
1228                 xfs_iext_remove(ip, *idx + 1, 1, state);
1229                 ip->i_d.di_nextents--;
1230                 if (cur == NULL)
1231                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1232                 else {
1233                         rval = XFS_ILOG_CORE;
1234                         if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
1235                                         PREV.br_startblock, PREV.br_blockcount,
1236                                         &i)))
1237                                 goto done;
1238                         XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1239                         if ((error = xfs_btree_delete(cur, &i)))
1240                                 goto done;
1241                         XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1242                         if ((error = xfs_btree_decrement(cur, 0, &i)))
1243                                 goto done;
1244                         XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1245                         if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
1246                                 LEFT.br_startblock,
1247                                 LEFT.br_blockcount + PREV.br_blockcount,
1248                                 LEFT.br_state)))
1249                                 goto done;
1250                 }
1251                 break;
1252
1253         case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1254                 /*
1255                  * Setting all of a previous oldext extent to newext.
1256                  * The right neighbor is contiguous, the left is not.
1257                  */
1258                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
1259                 xfs_bmbt_set_blockcount(ep,
1260                         PREV.br_blockcount + RIGHT.br_blockcount);
1261                 xfs_bmbt_set_state(ep, newext);
1262                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
1263                 xfs_iext_remove(ip, *idx + 1, 1, state);
1264                 ip->i_d.di_nextents--;
1265                 if (cur == NULL)
1266                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1267                 else {
1268                         rval = XFS_ILOG_CORE;
1269                         if ((error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff,
1270                                         RIGHT.br_startblock,
1271                                         RIGHT.br_blockcount, &i)))
1272                                 goto done;
1273                         XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1274                         if ((error = xfs_btree_delete(cur, &i)))
1275                                 goto done;
1276                         XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1277                         if ((error = xfs_btree_decrement(cur, 0, &i)))
1278                                 goto done;
1279                         XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1280                         if ((error = xfs_bmbt_update(cur, new->br_startoff,
1281                                 new->br_startblock,
1282                                 new->br_blockcount + RIGHT.br_blockcount,
1283                                 newext)))
1284                                 goto done;
1285                 }
1286                 break;
1287
1288         case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
1289                 /*
1290                  * Setting all of a previous oldext extent to newext.
1291                  * Neither the left nor right neighbors are contiguous with
1292                  * the new one.
1293                  */
1294                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
1295                 xfs_bmbt_set_state(ep, newext);
1296                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
1297
1298                 if (cur == NULL)
1299                         rval = XFS_ILOG_DEXT;
1300                 else {
1301                         rval = 0;
1302                         if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
1303                                         new->br_startblock, new->br_blockcount,
1304                                         &i)))
1305                                 goto done;
1306                         XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1307                         if ((error = xfs_bmbt_update(cur, new->br_startoff,
1308                                 new->br_startblock, new->br_blockcount,
1309                                 newext)))
1310                                 goto done;
1311                 }
1312                 break;
1313
1314         case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
1315                 /*
1316                  * Setting the first part of a previous oldext extent to newext.
1317                  * The left neighbor is contiguous.
1318                  */
1319                 trace_xfs_bmap_pre_update(ip, *idx - 1, state, _THIS_IP_);
1320                 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx - 1),
1321                         LEFT.br_blockcount + new->br_blockcount);
1322                 xfs_bmbt_set_startoff(ep,
1323                         PREV.br_startoff + new->br_blockcount);
1324                 trace_xfs_bmap_post_update(ip, *idx - 1, state, _THIS_IP_);
1325
1326                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
1327                 xfs_bmbt_set_startblock(ep,
1328                         new->br_startblock + new->br_blockcount);
1329                 xfs_bmbt_set_blockcount(ep,
1330                         PREV.br_blockcount - new->br_blockcount);
1331                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
1332
1333                 --*idx;
1334
1335                 if (cur == NULL)
1336                         rval = XFS_ILOG_DEXT;
1337                 else {
1338                         rval = 0;
1339                         if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
1340                                         PREV.br_startblock, PREV.br_blockcount,
1341                                         &i)))
1342                                 goto done;
1343                         XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1344                         if ((error = xfs_bmbt_update(cur,
1345                                 PREV.br_startoff + new->br_blockcount,
1346                                 PREV.br_startblock + new->br_blockcount,
1347                                 PREV.br_blockcount - new->br_blockcount,
1348                                 oldext)))
1349                                 goto done;
1350                         if ((error = xfs_btree_decrement(cur, 0, &i)))
1351                                 goto done;
1352                         if (xfs_bmbt_update(cur, LEFT.br_startoff,
1353                                 LEFT.br_startblock,
1354                                 LEFT.br_blockcount + new->br_blockcount,
1355                                 LEFT.br_state))
1356                                 goto done;
1357                 }
1358                 break;
1359
1360         case BMAP_LEFT_FILLING:
1361                 /*
1362                  * Setting the first part of a previous oldext extent to newext.
1363                  * The left neighbor is not contiguous.
1364                  */
1365                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
1366                 ASSERT(ep && xfs_bmbt_get_state(ep) == oldext);
1367                 xfs_bmbt_set_startoff(ep, new_endoff);
1368                 xfs_bmbt_set_blockcount(ep,
1369                         PREV.br_blockcount - new->br_blockcount);
1370                 xfs_bmbt_set_startblock(ep,
1371                         new->br_startblock + new->br_blockcount);
1372                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
1373
1374                 xfs_iext_insert(ip, *idx, 1, new, state);
1375                 ip->i_d.di_nextents++;
1376                 if (cur == NULL)
1377                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1378                 else {
1379                         rval = XFS_ILOG_CORE;
1380                         if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
1381                                         PREV.br_startblock, PREV.br_blockcount,
1382                                         &i)))
1383                                 goto done;
1384                         XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1385                         if ((error = xfs_bmbt_update(cur,
1386                                 PREV.br_startoff + new->br_blockcount,
1387                                 PREV.br_startblock + new->br_blockcount,
1388                                 PREV.br_blockcount - new->br_blockcount,
1389                                 oldext)))
1390                                 goto done;
1391                         cur->bc_rec.b = *new;
1392                         if ((error = xfs_btree_insert(cur, &i)))
1393                                 goto done;
1394                         XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1395                 }
1396                 break;
1397
1398         case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1399                 /*
1400                  * Setting the last part of a previous oldext extent to newext.
1401                  * The right neighbor is contiguous with the new allocation.
1402                  */
1403                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
1404                 xfs_bmbt_set_blockcount(ep,
1405                         PREV.br_blockcount - new->br_blockcount);
1406                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
1407
1408                 ++*idx;
1409
1410                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
1411                 xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx),
1412                         new->br_startoff, new->br_startblock,
1413                         new->br_blockcount + RIGHT.br_blockcount, newext);
1414                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
1415
1416                 if (cur == NULL)
1417                         rval = XFS_ILOG_DEXT;
1418                 else {
1419                         rval = 0;
1420                         if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
1421                                         PREV.br_startblock,
1422                                         PREV.br_blockcount, &i)))
1423                                 goto done;
1424                         XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1425                         if ((error = xfs_bmbt_update(cur, PREV.br_startoff,
1426                                 PREV.br_startblock,
1427                                 PREV.br_blockcount - new->br_blockcount,
1428                                 oldext)))
1429                                 goto done;
1430                         if ((error = xfs_btree_increment(cur, 0, &i)))
1431                                 goto done;
1432                         if ((error = xfs_bmbt_update(cur, new->br_startoff,
1433                                 new->br_startblock,
1434                                 new->br_blockcount + RIGHT.br_blockcount,
1435                                 newext)))
1436                                 goto done;
1437                 }
1438                 break;
1439
1440         case BMAP_RIGHT_FILLING:
1441                 /*
1442                  * Setting the last part of a previous oldext extent to newext.
1443                  * The right neighbor is not contiguous.
1444                  */
1445                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
1446                 xfs_bmbt_set_blockcount(ep,
1447                         PREV.br_blockcount - new->br_blockcount);
1448                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
1449
1450                 ++*idx;
1451                 xfs_iext_insert(ip, *idx, 1, new, state);
1452
1453                 ip->i_d.di_nextents++;
1454                 if (cur == NULL)
1455                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1456                 else {
1457                         rval = XFS_ILOG_CORE;
1458                         if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
1459                                         PREV.br_startblock, PREV.br_blockcount,
1460                                         &i)))
1461                                 goto done;
1462                         XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1463                         if ((error = xfs_bmbt_update(cur, PREV.br_startoff,
1464                                 PREV.br_startblock,
1465                                 PREV.br_blockcount - new->br_blockcount,
1466                                 oldext)))
1467                                 goto done;
1468                         if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
1469                                         new->br_startblock, new->br_blockcount,
1470                                         &i)))
1471                                 goto done;
1472                         XFS_WANT_CORRUPTED_GOTO(i == 0, done);
1473                         cur->bc_rec.b.br_state = XFS_EXT_NORM;
1474                         if ((error = xfs_btree_insert(cur, &i)))
1475                                 goto done;
1476                         XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1477                 }
1478                 break;
1479
1480         case 0:
1481                 /*
1482                  * Setting the middle part of a previous oldext extent to
1483                  * newext.  Contiguity is impossible here.
1484                  * One extent becomes three extents.
1485                  */
1486                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
1487                 xfs_bmbt_set_blockcount(ep,
1488                         new->br_startoff - PREV.br_startoff);
1489                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
1490
1491                 r[0] = *new;
1492                 r[1].br_startoff = new_endoff;
1493                 r[1].br_blockcount =
1494                         PREV.br_startoff + PREV.br_blockcount - new_endoff;
1495                 r[1].br_startblock = new->br_startblock + new->br_blockcount;
1496                 r[1].br_state = oldext;
1497
1498                 ++*idx;
1499                 xfs_iext_insert(ip, *idx, 2, &r[0], state);
1500
1501                 ip->i_d.di_nextents += 2;
1502                 if (cur == NULL)
1503                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1504                 else {
1505                         rval = XFS_ILOG_CORE;
1506                         if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
1507                                         PREV.br_startblock, PREV.br_blockcount,
1508                                         &i)))
1509                                 goto done;
1510                         XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1511                         /* new right extent - oldext */
1512                         if ((error = xfs_bmbt_update(cur, r[1].br_startoff,
1513                                 r[1].br_startblock, r[1].br_blockcount,
1514                                 r[1].br_state)))
1515                                 goto done;
1516                         /* new left extent - oldext */
1517                         cur->bc_rec.b = PREV;
1518                         cur->bc_rec.b.br_blockcount =
1519                                 new->br_startoff - PREV.br_startoff;
1520                         if ((error = xfs_btree_insert(cur, &i)))
1521                                 goto done;
1522                         XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1523                         /*
1524                          * Reset the cursor to the position of the new extent
1525                          * we are about to insert as we can't trust it after
1526                          * the previous insert.
1527                          */
1528                         if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
1529                                         new->br_startblock, new->br_blockcount,
1530                                         &i)))
1531                                 goto done;
1532                         XFS_WANT_CORRUPTED_GOTO(i == 0, done);
1533                         /* new middle extent - newext */
1534                         cur->bc_rec.b.br_state = new->br_state;
1535                         if ((error = xfs_btree_insert(cur, &i)))
1536                                 goto done;
1537                         XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1538                 }
1539                 break;
1540
1541         case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1542         case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1543         case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG:
1544         case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
1545         case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1546         case BMAP_LEFT_CONTIG:
1547         case BMAP_RIGHT_CONTIG:
1548                 /*
1549                  * These cases are all impossible.
1550                  */
1551                 ASSERT(0);
1552         }
1553         *curp = cur;
1554 done:
1555         *logflagsp = rval;
1556         return error;
1557 #undef  LEFT
1558 #undef  RIGHT
1559 #undef  PREV
1560 }
1561
1562 /*
1563  * Convert a hole to a delayed allocation.
1564  */
1565 STATIC void
1566 xfs_bmap_add_extent_hole_delay(
1567         xfs_inode_t             *ip,    /* incore inode pointer */
1568         xfs_extnum_t            *idx,   /* extent number to update/insert */
1569         xfs_bmbt_irec_t         *new)   /* new data to add to file extents */
1570 {
1571         xfs_ifork_t             *ifp;   /* inode fork pointer */
1572         xfs_bmbt_irec_t         left;   /* left neighbor extent entry */
1573         xfs_filblks_t           newlen=0;       /* new indirect size */
1574         xfs_filblks_t           oldlen=0;       /* old indirect size */
1575         xfs_bmbt_irec_t         right;  /* right neighbor extent entry */
1576         int                     state;  /* state bits, accessed thru macros */
1577         xfs_filblks_t           temp=0; /* temp for indirect calculations */
1578
1579         ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
1580         state = 0;
1581         ASSERT(isnullstartblock(new->br_startblock));
1582
1583         /*
1584          * Check and set flags if this segment has a left neighbor
1585          */
1586         if (*idx > 0) {
1587                 state |= BMAP_LEFT_VALID;
1588                 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &left);
1589
1590                 if (isnullstartblock(left.br_startblock))
1591                         state |= BMAP_LEFT_DELAY;
1592         }
1593
1594         /*
1595          * Check and set flags if the current (right) segment exists.
1596          * If it doesn't exist, we're converting the hole at end-of-file.
1597          */
1598         if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) {
1599                 state |= BMAP_RIGHT_VALID;
1600                 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &right);
1601
1602                 if (isnullstartblock(right.br_startblock))
1603                         state |= BMAP_RIGHT_DELAY;
1604         }
1605
1606         /*
1607          * Set contiguity flags on the left and right neighbors.
1608          * Don't let extents get too large, even if the pieces are contiguous.
1609          */
1610         if ((state & BMAP_LEFT_VALID) && (state & BMAP_LEFT_DELAY) &&
1611             left.br_startoff + left.br_blockcount == new->br_startoff &&
1612             left.br_blockcount + new->br_blockcount <= MAXEXTLEN)
1613                 state |= BMAP_LEFT_CONTIG;
1614
1615         if ((state & BMAP_RIGHT_VALID) && (state & BMAP_RIGHT_DELAY) &&
1616             new->br_startoff + new->br_blockcount == right.br_startoff &&
1617             new->br_blockcount + right.br_blockcount <= MAXEXTLEN &&
1618             (!(state & BMAP_LEFT_CONTIG) ||
1619              (left.br_blockcount + new->br_blockcount +
1620               right.br_blockcount <= MAXEXTLEN)))
1621                 state |= BMAP_RIGHT_CONTIG;
1622
1623         /*
1624          * Switch out based on the contiguity flags.
1625          */
1626         switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
1627         case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1628                 /*
1629                  * New allocation is contiguous with delayed allocations
1630                  * on the left and on the right.
1631                  * Merge all three into a single extent record.
1632                  */
1633                 --*idx;
1634                 temp = left.br_blockcount + new->br_blockcount +
1635                         right.br_blockcount;
1636
1637                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
1638                 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp);
1639                 oldlen = startblockval(left.br_startblock) +
1640                         startblockval(new->br_startblock) +
1641                         startblockval(right.br_startblock);
1642                 newlen = xfs_bmap_worst_indlen(ip, temp);
1643                 xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx),
1644                         nullstartblock((int)newlen));
1645                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
1646
1647                 xfs_iext_remove(ip, *idx + 1, 1, state);
1648                 break;
1649
1650         case BMAP_LEFT_CONTIG:
1651                 /*
1652                  * New allocation is contiguous with a delayed allocation
1653                  * on the left.
1654                  * Merge the new allocation with the left neighbor.
1655                  */
1656                 --*idx;
1657                 temp = left.br_blockcount + new->br_blockcount;
1658
1659                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
1660                 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp);
1661                 oldlen = startblockval(left.br_startblock) +
1662                         startblockval(new->br_startblock);
1663                 newlen = xfs_bmap_worst_indlen(ip, temp);
1664                 xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx),
1665                         nullstartblock((int)newlen));
1666                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
1667                 break;
1668
1669         case BMAP_RIGHT_CONTIG:
1670                 /*
1671                  * New allocation is contiguous with a delayed allocation
1672                  * on the right.
1673                  * Merge the new allocation with the right neighbor.
1674                  */
1675                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
1676                 temp = new->br_blockcount + right.br_blockcount;
1677                 oldlen = startblockval(new->br_startblock) +
1678                         startblockval(right.br_startblock);
1679                 newlen = xfs_bmap_worst_indlen(ip, temp);
1680                 xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx),
1681                         new->br_startoff,
1682                         nullstartblock((int)newlen), temp, right.br_state);
1683                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
1684                 break;
1685
1686         case 0:
1687                 /*
1688                  * New allocation is not contiguous with another
1689                  * delayed allocation.
1690                  * Insert a new entry.
1691                  */
1692                 oldlen = newlen = 0;
1693                 xfs_iext_insert(ip, *idx, 1, new, state);
1694                 break;
1695         }
1696         if (oldlen != newlen) {
1697                 ASSERT(oldlen > newlen);
1698                 xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS,
1699                         (int64_t)(oldlen - newlen), 0);
1700                 /*
1701                  * Nothing to do for disk quota accounting here.
1702                  */
1703         }
1704 }
1705
1706 /*
1707  * Called by xfs_bmap_add_extent to handle cases converting a hole
1708  * to a real allocation.
1709  */
1710 STATIC int                              /* error */
1711 xfs_bmap_add_extent_hole_real(
1712         xfs_inode_t             *ip,    /* incore inode pointer */
1713         xfs_extnum_t            *idx,   /* extent number to update/insert */
1714         xfs_btree_cur_t         *cur,   /* if null, not a btree */
1715         xfs_bmbt_irec_t         *new,   /* new data to add to file extents */
1716         int                     *logflagsp, /* inode logging flags */
1717         int                     whichfork) /* data or attr fork */
1718 {
1719         int                     error;  /* error return value */
1720         int                     i;      /* temp state */
1721         xfs_ifork_t             *ifp;   /* inode fork pointer */
1722         xfs_bmbt_irec_t         left;   /* left neighbor extent entry */
1723         xfs_bmbt_irec_t         right;  /* right neighbor extent entry */
1724         int                     rval=0; /* return value (logging flags) */
1725         int                     state;  /* state bits, accessed thru macros */
1726
1727         ifp = XFS_IFORK_PTR(ip, whichfork);
1728         ASSERT(*idx <= ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t));
1729         state = 0;
1730
1731         if (whichfork == XFS_ATTR_FORK)
1732                 state |= BMAP_ATTRFORK;
1733
1734         /*
1735          * Check and set flags if this segment has a left neighbor.
1736          */
1737         if (*idx > 0) {
1738                 state |= BMAP_LEFT_VALID;
1739                 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &left);
1740                 if (isnullstartblock(left.br_startblock))
1741                         state |= BMAP_LEFT_DELAY;
1742         }
1743
1744         /*
1745          * Check and set flags if this segment has a current value.
1746          * Not true if we're inserting into the "hole" at eof.
1747          */
1748         if (*idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) {
1749                 state |= BMAP_RIGHT_VALID;
1750                 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &right);
1751                 if (isnullstartblock(right.br_startblock))
1752                         state |= BMAP_RIGHT_DELAY;
1753         }
1754
1755         /*
1756          * We're inserting a real allocation between "left" and "right".
1757          * Set the contiguity flags.  Don't let extents get too large.
1758          */
1759         if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
1760             left.br_startoff + left.br_blockcount == new->br_startoff &&
1761             left.br_startblock + left.br_blockcount == new->br_startblock &&
1762             left.br_state == new->br_state &&
1763             left.br_blockcount + new->br_blockcount <= MAXEXTLEN)
1764                 state |= BMAP_LEFT_CONTIG;
1765
1766         if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
1767             new->br_startoff + new->br_blockcount == right.br_startoff &&
1768             new->br_startblock + new->br_blockcount == right.br_startblock &&
1769             new->br_state == right.br_state &&
1770             new->br_blockcount + right.br_blockcount <= MAXEXTLEN &&
1771             (!(state & BMAP_LEFT_CONTIG) ||
1772              left.br_blockcount + new->br_blockcount +
1773              right.br_blockcount <= MAXEXTLEN))
1774                 state |= BMAP_RIGHT_CONTIG;
1775
1776         error = 0;
1777         /*
1778          * Select which case we're in here, and implement it.
1779          */
1780         switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
1781         case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1782                 /*
1783                  * New allocation is contiguous with real allocations on the
1784                  * left and on the right.
1785                  * Merge all three into a single extent record.
1786                  */
1787                 --*idx;
1788                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
1789                 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx),
1790                         left.br_blockcount + new->br_blockcount +
1791                         right.br_blockcount);
1792                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
1793
1794                 xfs_iext_remove(ip, *idx + 1, 1, state);
1795
1796                 XFS_IFORK_NEXT_SET(ip, whichfork,
1797                         XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
1798                 if (cur == NULL) {
1799                         rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
1800                 } else {
1801                         rval = XFS_ILOG_CORE;
1802                         if ((error = xfs_bmbt_lookup_eq(cur,
1803                                         right.br_startoff,
1804                                         right.br_startblock,
1805                                         right.br_blockcount, &i)))
1806                                 goto done;
1807                         XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1808                         if ((error = xfs_btree_delete(cur, &i)))
1809                                 goto done;
1810                         XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1811                         if ((error = xfs_btree_decrement(cur, 0, &i)))
1812                                 goto done;
1813                         XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1814                         if ((error = xfs_bmbt_update(cur, left.br_startoff,
1815                                         left.br_startblock,
1816                                         left.br_blockcount +
1817                                                 new->br_blockcount +
1818                                                 right.br_blockcount,
1819                                         left.br_state)))
1820                                 goto done;
1821                 }
1822                 break;
1823
1824         case BMAP_LEFT_CONTIG:
1825                 /*
1826                  * New allocation is contiguous with a real allocation
1827                  * on the left.
1828                  * Merge the new allocation with the left neighbor.
1829                  */
1830                 --*idx;
1831                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
1832                 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx),
1833                         left.br_blockcount + new->br_blockcount);
1834                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
1835
1836                 if (cur == NULL) {
1837                         rval = xfs_ilog_fext(whichfork);
1838                 } else {
1839                         rval = 0;
1840                         if ((error = xfs_bmbt_lookup_eq(cur,
1841                                         left.br_startoff,
1842                                         left.br_startblock,
1843                                         left.br_blockcount, &i)))
1844                                 goto done;
1845                         XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1846                         if ((error = xfs_bmbt_update(cur, left.br_startoff,
1847                                         left.br_startblock,
1848                                         left.br_blockcount +
1849                                                 new->br_blockcount,
1850                                         left.br_state)))
1851                                 goto done;
1852                 }
1853                 break;
1854
1855         case BMAP_RIGHT_CONTIG:
1856                 /*
1857                  * New allocation is contiguous with a real allocation
1858                  * on the right.
1859                  * Merge the new allocation with the right neighbor.
1860                  */
1861                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
1862                 xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx),
1863                         new->br_startoff, new->br_startblock,
1864                         new->br_blockcount + right.br_blockcount,
1865                         right.br_state);
1866                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
1867
1868                 if (cur == NULL) {
1869                         rval = xfs_ilog_fext(whichfork);
1870                 } else {
1871                         rval = 0;
1872                         if ((error = xfs_bmbt_lookup_eq(cur,
1873                                         right.br_startoff,
1874                                         right.br_startblock,
1875                                         right.br_blockcount, &i)))
1876                                 goto done;
1877                         XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1878                         if ((error = xfs_bmbt_update(cur, new->br_startoff,
1879                                         new->br_startblock,
1880                                         new->br_blockcount +
1881                                                 right.br_blockcount,
1882                                         right.br_state)))
1883                                 goto done;
1884                 }
1885                 break;
1886
1887         case 0:
1888                 /*
1889                  * New allocation is not contiguous with another
1890                  * real allocation.
1891                  * Insert a new entry.
1892                  */
1893                 xfs_iext_insert(ip, *idx, 1, new, state);
1894                 XFS_IFORK_NEXT_SET(ip, whichfork,
1895                         XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
1896                 if (cur == NULL) {
1897                         rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
1898                 } else {
1899                         rval = XFS_ILOG_CORE;
1900                         if ((error = xfs_bmbt_lookup_eq(cur,
1901                                         new->br_startoff,
1902                                         new->br_startblock,
1903                                         new->br_blockcount, &i)))
1904                                 goto done;
1905                         XFS_WANT_CORRUPTED_GOTO(i == 0, done);
1906                         cur->bc_rec.b.br_state = new->br_state;
1907                         if ((error = xfs_btree_insert(cur, &i)))
1908                                 goto done;
1909                         XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1910                 }
1911                 break;
1912         }
1913 done:
1914         *logflagsp = rval;
1915         return error;
1916 }
1917
1918 /*
1919  * Adjust the size of the new extent based on di_extsize and rt extsize.
1920  */
1921 STATIC int
1922 xfs_bmap_extsize_align(
1923         xfs_mount_t     *mp,
1924         xfs_bmbt_irec_t *gotp,          /* next extent pointer */
1925         xfs_bmbt_irec_t *prevp,         /* previous extent pointer */
1926         xfs_extlen_t    extsz,          /* align to this extent size */
1927         int             rt,             /* is this a realtime inode? */
1928         int             eof,            /* is extent at end-of-file? */
1929         int             delay,          /* creating delalloc extent? */
1930         int             convert,        /* overwriting unwritten extent? */
1931         xfs_fileoff_t   *offp,          /* in/out: aligned offset */
1932         xfs_extlen_t    *lenp)          /* in/out: aligned length */
1933 {
1934         xfs_fileoff_t   orig_off;       /* original offset */
1935         xfs_extlen_t    orig_alen;      /* original length */
1936         xfs_fileoff_t   orig_end;       /* original off+len */
1937         xfs_fileoff_t   nexto;          /* next file offset */
1938         xfs_fileoff_t   prevo;          /* previous file offset */
1939         xfs_fileoff_t   align_off;      /* temp for offset */
1940         xfs_extlen_t    align_alen;     /* temp for length */
1941         xfs_extlen_t    temp;           /* temp for calculations */
1942
1943         if (convert)
1944                 return 0;
1945
1946         orig_off = align_off = *offp;
1947         orig_alen = align_alen = *lenp;
1948         orig_end = orig_off + orig_alen;
1949
1950         /*
1951          * If this request overlaps an existing extent, then don't
1952          * attempt to perform any additional alignment.
1953          */
1954         if (!delay && !eof &&
1955             (orig_off >= gotp->br_startoff) &&
1956             (orig_end <= gotp->br_startoff + gotp->br_blockcount)) {
1957                 return 0;
1958         }
1959
1960         /*
1961          * If the file offset is unaligned vs. the extent size
1962          * we need to align it.  This will be possible unless
1963          * the file was previously written with a kernel that didn't
1964          * perform this alignment, or if a truncate shot us in the
1965          * foot.
1966          */
1967         temp = do_mod(orig_off, extsz);
1968         if (temp) {
1969                 align_alen += temp;
1970                 align_off -= temp;
1971         }
1972         /*
1973          * Same adjustment for the end of the requested area.
1974          */
1975         if ((temp = (align_alen % extsz))) {
1976                 align_alen += extsz - temp;
1977         }
1978         /*
1979          * If the previous block overlaps with this proposed allocation
1980          * then move the start forward without adjusting the length.
1981          */
1982         if (prevp->br_startoff != NULLFILEOFF) {
1983                 if (prevp->br_startblock == HOLESTARTBLOCK)
1984                         prevo = prevp->br_startoff;
1985                 else
1986                         prevo = prevp->br_startoff + prevp->br_blockcount;
1987         } else
1988                 prevo = 0;
1989         if (align_off != orig_off && align_off < prevo)
1990                 align_off = prevo;
1991         /*
1992          * If the next block overlaps with this proposed allocation
1993          * then move the start back without adjusting the length,
1994          * but not before offset 0.
1995          * This may of course make the start overlap previous block,
1996          * and if we hit the offset 0 limit then the next block
1997          * can still overlap too.
1998          */
1999         if (!eof && gotp->br_startoff != NULLFILEOFF) {
2000                 if ((delay && gotp->br_startblock == HOLESTARTBLOCK) ||
2001                     (!delay && gotp->br_startblock == DELAYSTARTBLOCK))
2002                         nexto = gotp->br_startoff + gotp->br_blockcount;
2003                 else
2004                         nexto = gotp->br_startoff;
2005         } else
2006                 nexto = NULLFILEOFF;
2007         if (!eof &&
2008             align_off + align_alen != orig_end &&
2009             align_off + align_alen > nexto)
2010                 align_off = nexto > align_alen ? nexto - align_alen : 0;
2011         /*
2012          * If we're now overlapping the next or previous extent that
2013          * means we can't fit an extsz piece in this hole.  Just move
2014          * the start forward to the first valid spot and set
2015          * the length so we hit the end.
2016          */
2017         if (align_off != orig_off && align_off < prevo)
2018                 align_off = prevo;
2019         if (align_off + align_alen != orig_end &&
2020             align_off + align_alen > nexto &&
2021             nexto != NULLFILEOFF) {
2022                 ASSERT(nexto > prevo);
2023                 align_alen = nexto - align_off;
2024         }
2025
2026         /*
2027          * If realtime, and the result isn't a multiple of the realtime
2028          * extent size we need to remove blocks until it is.
2029          */
2030         if (rt && (temp = (align_alen % mp->m_sb.sb_rextsize))) {
2031                 /*
2032                  * We're not covering the original request, or
2033                  * we won't be able to once we fix the length.
2034                  */
2035                 if (orig_off < align_off ||
2036                     orig_end > align_off + align_alen ||
2037                     align_alen - temp < orig_alen)
2038                         return XFS_ERROR(EINVAL);
2039                 /*
2040                  * Try to fix it by moving the start up.
2041                  */
2042                 if (align_off + temp <= orig_off) {
2043                         align_alen -= temp;
2044                         align_off += temp;
2045                 }
2046                 /*
2047                  * Try to fix it by moving the end in.
2048                  */
2049                 else if (align_off + align_alen - temp >= orig_end)
2050                         align_alen -= temp;
2051                 /*
2052                  * Set the start to the minimum then trim the length.
2053                  */
2054                 else {
2055                         align_alen -= orig_off - align_off;
2056                         align_off = orig_off;
2057                         align_alen -= align_alen % mp->m_sb.sb_rextsize;
2058                 }
2059                 /*
2060                  * Result doesn't cover the request, fail it.
2061                  */
2062                 if (orig_off < align_off || orig_end > align_off + align_alen)
2063                         return XFS_ERROR(EINVAL);
2064         } else {
2065                 ASSERT(orig_off >= align_off);
2066                 ASSERT(orig_end <= align_off + align_alen);
2067         }
2068
2069 #ifdef DEBUG
2070         if (!eof && gotp->br_startoff != NULLFILEOFF)
2071                 ASSERT(align_off + align_alen <= gotp->br_startoff);
2072         if (prevp->br_startoff != NULLFILEOFF)
2073                 ASSERT(align_off >= prevp->br_startoff + prevp->br_blockcount);
2074 #endif
2075
2076         *lenp = align_alen;
2077         *offp = align_off;
2078         return 0;
2079 }
2080
2081 #define XFS_ALLOC_GAP_UNITS     4
2082
2083 STATIC void
2084 xfs_bmap_adjacent(
2085         xfs_bmalloca_t  *ap)            /* bmap alloc argument struct */
2086 {
2087         xfs_fsblock_t   adjust;         /* adjustment to block numbers */
2088         xfs_agnumber_t  fb_agno;        /* ag number of ap->firstblock */
2089         xfs_mount_t     *mp;            /* mount point structure */
2090         int             nullfb;         /* true if ap->firstblock isn't set */
2091         int             rt;             /* true if inode is realtime */
2092
2093 #define ISVALID(x,y)    \
2094         (rt ? \
2095                 (x) < mp->m_sb.sb_rblocks : \
2096                 XFS_FSB_TO_AGNO(mp, x) == XFS_FSB_TO_AGNO(mp, y) && \
2097                 XFS_FSB_TO_AGNO(mp, x) < mp->m_sb.sb_agcount && \
2098                 XFS_FSB_TO_AGBNO(mp, x) < mp->m_sb.sb_agblocks)
2099
2100         mp = ap->ip->i_mount;
2101         nullfb = ap->firstblock == NULLFSBLOCK;
2102         rt = XFS_IS_REALTIME_INODE(ap->ip) && ap->userdata;
2103         fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, ap->firstblock);
2104         /*
2105          * If allocating at eof, and there's a previous real block,
2106          * try to use its last block as our starting point.
2107          */
2108         if (ap->eof && ap->prevp->br_startoff != NULLFILEOFF &&
2109             !isnullstartblock(ap->prevp->br_startblock) &&
2110             ISVALID(ap->prevp->br_startblock + ap->prevp->br_blockcount,
2111                     ap->prevp->br_startblock)) {
2112                 ap->rval = ap->prevp->br_startblock + ap->prevp->br_blockcount;
2113                 /*
2114                  * Adjust for the gap between prevp and us.
2115                  */
2116                 adjust = ap->off -
2117                         (ap->prevp->br_startoff + ap->prevp->br_blockcount);
2118                 if (adjust &&
2119                     ISVALID(ap->rval + adjust, ap->prevp->br_startblock))
2120                         ap->rval += adjust;
2121         }
2122         /*
2123          * If not at eof, then compare the two neighbor blocks.
2124          * Figure out whether either one gives us a good starting point,
2125          * and pick the better one.
2126          */
2127         else if (!ap->eof) {
2128                 xfs_fsblock_t   gotbno;         /* right side block number */
2129                 xfs_fsblock_t   gotdiff=0;      /* right side difference */
2130                 xfs_fsblock_t   prevbno;        /* left side block number */
2131                 xfs_fsblock_t   prevdiff=0;     /* left side difference */
2132
2133                 /*
2134                  * If there's a previous (left) block, select a requested
2135                  * start block based on it.
2136                  */
2137                 if (ap->prevp->br_startoff != NULLFILEOFF &&
2138                     !isnullstartblock(ap->prevp->br_startblock) &&
2139                     (prevbno = ap->prevp->br_startblock +
2140                                ap->prevp->br_blockcount) &&
2141                     ISVALID(prevbno, ap->prevp->br_startblock)) {
2142                         /*
2143                          * Calculate gap to end of previous block.
2144                          */
2145                         adjust = prevdiff = ap->off -
2146                                 (ap->prevp->br_startoff +
2147                                  ap->prevp->br_blockcount);
2148                         /*
2149                          * Figure the startblock based on the previous block's
2150                          * end and the gap size.
2151                          * Heuristic!
2152                          * If the gap is large relative to the piece we're
2153                          * allocating, or using it gives us an invalid block
2154                          * number, then just use the end of the previous block.
2155                          */
2156                         if (prevdiff <= XFS_ALLOC_GAP_UNITS * ap->alen &&
2157                             ISVALID(prevbno + prevdiff,
2158                                     ap->prevp->br_startblock))
2159                                 prevbno += adjust;
2160                         else
2161                                 prevdiff += adjust;
2162                         /*
2163                          * If the firstblock forbids it, can't use it,
2164                          * must use default.
2165                          */
2166                         if (!rt && !nullfb &&
2167                             XFS_FSB_TO_AGNO(mp, prevbno) != fb_agno)
2168                                 prevbno = NULLFSBLOCK;
2169                 }
2170                 /*
2171                  * No previous block or can't follow it, just default.
2172                  */
2173                 else
2174                         prevbno = NULLFSBLOCK;
2175                 /*
2176                  * If there's a following (right) block, select a requested
2177                  * start block based on it.
2178                  */
2179                 if (!isnullstartblock(ap->gotp->br_startblock)) {
2180                         /*
2181                          * Calculate gap to start of next block.
2182                          */
2183                         adjust = gotdiff = ap->gotp->br_startoff - ap->off;
2184                         /*
2185                          * Figure the startblock based on the next block's
2186                          * start and the gap size.
2187                          */
2188                         gotbno = ap->gotp->br_startblock;
2189                         /*
2190                          * Heuristic!
2191                          * If the gap is large relative to the piece we're
2192                          * allocating, or using it gives us an invalid block
2193                          * number, then just use the start of the next block
2194                          * offset by our length.
2195                          */
2196                         if (gotdiff <= XFS_ALLOC_GAP_UNITS * ap->alen &&
2197                             ISVALID(gotbno - gotdiff, gotbno))
2198                                 gotbno -= adjust;
2199                         else if (ISVALID(gotbno - ap->alen, gotbno)) {
2200                                 gotbno -= ap->alen;
2201                                 gotdiff += adjust - ap->alen;
2202                         } else
2203                                 gotdiff += adjust;
2204                         /*
2205                          * If the firstblock forbids it, can't use it,
2206                          * must use default.
2207                          */
2208                         if (!rt && !nullfb &&
2209                             XFS_FSB_TO_AGNO(mp, gotbno) != fb_agno)
2210                                 gotbno = NULLFSBLOCK;
2211                 }
2212                 /*
2213                  * No next block, just default.
2214                  */
2215                 else
2216                         gotbno = NULLFSBLOCK;
2217                 /*
2218                  * If both valid, pick the better one, else the only good
2219                  * one, else ap->rval is already set (to 0 or the inode block).
2220                  */
2221                 if (prevbno != NULLFSBLOCK && gotbno != NULLFSBLOCK)
2222                         ap->rval = prevdiff <= gotdiff ? prevbno : gotbno;
2223                 else if (prevbno != NULLFSBLOCK)
2224                         ap->rval = prevbno;
2225                 else if (gotbno != NULLFSBLOCK)
2226                         ap->rval = gotbno;
2227         }
2228 #undef ISVALID
2229 }
2230
2231 STATIC int
2232 xfs_bmap_rtalloc(
2233         xfs_bmalloca_t  *ap)            /* bmap alloc argument struct */
2234 {
2235         xfs_alloctype_t atype = 0;      /* type for allocation routines */
2236         int             error;          /* error return value */
2237         xfs_mount_t     *mp;            /* mount point structure */
2238         xfs_extlen_t    prod = 0;       /* product factor for allocators */
2239         xfs_extlen_t    ralen = 0;      /* realtime allocation length */
2240         xfs_extlen_t    align;          /* minimum allocation alignment */
2241         xfs_rtblock_t   rtb;
2242
2243         mp = ap->ip->i_mount;
2244         align = xfs_get_extsz_hint(ap->ip);
2245         prod = align / mp->m_sb.sb_rextsize;
2246         error = xfs_bmap_extsize_align(mp, ap->gotp, ap->prevp,
2247                                         align, 1, ap->eof, 0,
2248                                         ap->conv, &ap->off, &ap->alen);
2249         if (error)
2250                 return error;
2251         ASSERT(ap->alen);
2252         ASSERT(ap->alen % mp->m_sb.sb_rextsize == 0);
2253
2254         /*
2255          * If the offset & length are not perfectly aligned
2256          * then kill prod, it will just get us in trouble.
2257          */
2258         if (do_mod(ap->off, align) || ap->alen % align)
2259                 prod = 1;
2260         /*
2261          * Set ralen to be the actual requested length in rtextents.
2262          */
2263         ralen = ap->alen / mp->m_sb.sb_rextsize;
2264         /*
2265          * If the old value was close enough to MAXEXTLEN that
2266          * we rounded up to it, cut it back so it's valid again.
2267          * Note that if it's a really large request (bigger than
2268          * MAXEXTLEN), we don't hear about that number, and can't
2269          * adjust the starting point to match it.
2270          */
2271         if (ralen * mp->m_sb.sb_rextsize >= MAXEXTLEN)
2272                 ralen = MAXEXTLEN / mp->m_sb.sb_rextsize;
2273
2274         /*
2275          * Lock out other modifications to the RT bitmap inode.
2276          */
2277         xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL);
2278         xfs_trans_ijoin_ref(ap->tp, mp->m_rbmip, XFS_ILOCK_EXCL);
2279
2280         /*
2281          * If it's an allocation to an empty file at offset 0,
2282          * pick an extent that will space things out in the rt area.
2283          */
2284         if (ap->eof && ap->off == 0) {
2285                 xfs_rtblock_t uninitialized_var(rtx); /* realtime extent no */
2286
2287                 error = xfs_rtpick_extent(mp, ap->tp, ralen, &rtx);
2288                 if (error)
2289                         return error;
2290                 ap->rval = rtx * mp->m_sb.sb_rextsize;
2291         } else {
2292                 ap->rval = 0;
2293         }
2294
2295         xfs_bmap_adjacent(ap);
2296
2297         /*
2298          * Realtime allocation, done through xfs_rtallocate_extent.
2299          */
2300         atype = ap->rval == 0 ?  XFS_ALLOCTYPE_ANY_AG : XFS_ALLOCTYPE_NEAR_BNO;
2301         do_div(ap->rval, mp->m_sb.sb_rextsize);
2302         rtb = ap->rval;
2303         ap->alen = ralen;
2304         if ((error = xfs_rtallocate_extent(ap->tp, ap->rval, 1, ap->alen,
2305                                 &ralen, atype, ap->wasdel, prod, &rtb)))
2306                 return error;
2307         if (rtb == NULLFSBLOCK && prod > 1 &&
2308             (error = xfs_rtallocate_extent(ap->tp, ap->rval, 1,
2309                                            ap->alen, &ralen, atype,
2310                                            ap->wasdel, 1, &rtb)))
2311                 return error;
2312         ap->rval = rtb;
2313         if (ap->rval != NULLFSBLOCK) {
2314                 ap->rval *= mp->m_sb.sb_rextsize;
2315                 ralen *= mp->m_sb.sb_rextsize;
2316                 ap->alen = ralen;
2317                 ap->ip->i_d.di_nblocks += ralen;
2318                 xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
2319                 if (ap->wasdel)
2320                         ap->ip->i_delayed_blks -= ralen;
2321                 /*
2322                  * Adjust the disk quota also. This was reserved
2323                  * earlier.
2324                  */
2325                 xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
2326                         ap->wasdel ? XFS_TRANS_DQ_DELRTBCOUNT :
2327                                         XFS_TRANS_DQ_RTBCOUNT, (long) ralen);
2328         } else {
2329                 ap->alen = 0;
2330         }
2331         return 0;
2332 }
2333
2334 STATIC int
2335 xfs_bmap_btalloc_nullfb(
2336         struct xfs_bmalloca     *ap,
2337         struct xfs_alloc_arg    *args,
2338         xfs_extlen_t            *blen)
2339 {
2340         struct xfs_mount        *mp = ap->ip->i_mount;
2341         struct xfs_perag        *pag;
2342         xfs_agnumber_t          ag, startag;
2343         int                     notinit = 0;
2344         int                     error;
2345
2346         if (ap->userdata && xfs_inode_is_filestream(ap->ip))
2347                 args->type = XFS_ALLOCTYPE_NEAR_BNO;
2348         else
2349                 args->type = XFS_ALLOCTYPE_START_BNO;
2350         args->total = ap->total;
2351
2352         /*
2353          * Search for an allocation group with a single extent large enough
2354          * for the request.  If one isn't found, then adjust the minimum
2355          * allocation size to the largest space found.
2356          */
2357         startag = ag = XFS_FSB_TO_AGNO(mp, args->fsbno);
2358         if (startag == NULLAGNUMBER)
2359                 startag = ag = 0;
2360
2361         pag = xfs_perag_get(mp, ag);
2362         while (*blen < args->maxlen) {
2363                 if (!pag->pagf_init) {
2364                         error = xfs_alloc_pagf_init(mp, args->tp, ag,
2365                                                     XFS_ALLOC_FLAG_TRYLOCK);
2366                         if (error) {
2367                                 xfs_perag_put(pag);
2368                                 return error;
2369                         }
2370                 }
2371
2372                 /*
2373                  * See xfs_alloc_fix_freelist...
2374                  */
2375                 if (pag->pagf_init) {
2376                         xfs_extlen_t    longest;
2377                         longest = xfs_alloc_longest_free_extent(mp, pag);
2378                         if (*blen < longest)
2379                                 *blen = longest;
2380                 } else
2381                         notinit = 1;
2382
2383                 if (xfs_inode_is_filestream(ap->ip)) {
2384                         if (*blen >= args->maxlen)
2385                                 break;
2386
2387                         if (ap->userdata) {
2388                                 /*
2389                                  * If startag is an invalid AG, we've
2390                                  * come here once before and
2391                                  * xfs_filestream_new_ag picked the
2392                                  * best currently available.
2393                                  *
2394                                  * Don't continue looping, since we
2395                                  * could loop forever.
2396                                  */
2397                                 if (startag == NULLAGNUMBER)
2398                                         break;
2399
2400                                 error = xfs_filestream_new_ag(ap, &ag);
2401                                 xfs_perag_put(pag);
2402                                 if (error)
2403                                         return error;
2404
2405                                 /* loop again to set 'blen'*/
2406                                 startag = NULLAGNUMBER;
2407                                 pag = xfs_perag_get(mp, ag);
2408                                 continue;
2409                         }
2410                 }
2411                 if (++ag == mp->m_sb.sb_agcount)
2412                         ag = 0;
2413                 if (ag == startag)
2414                         break;
2415                 xfs_perag_put(pag);
2416                 pag = xfs_perag_get(mp, ag);
2417         }
2418         xfs_perag_put(pag);
2419
2420         /*
2421          * Since the above loop did a BUF_TRYLOCK, it is
2422          * possible that there is space for this request.
2423          */
2424         if (notinit || *blen < ap->minlen)
2425                 args->minlen = ap->minlen;
2426         /*
2427          * If the best seen length is less than the request
2428          * length, use the best as the minimum.
2429          */
2430         else if (*blen < args->maxlen)
2431                 args->minlen = *blen;
2432         /*
2433          * Otherwise we've seen an extent as big as maxlen,
2434          * use that as the minimum.
2435          */
2436         else
2437                 args->minlen = args->maxlen;
2438
2439         /*
2440          * set the failure fallback case to look in the selected
2441          * AG as the stream may have moved.
2442          */
2443         if (xfs_inode_is_filestream(ap->ip))
2444                 ap->rval = args->fsbno = XFS_AGB_TO_FSB(mp, ag, 0);
2445
2446         return 0;
2447 }
2448
2449 STATIC int
2450 xfs_bmap_btalloc(
2451         xfs_bmalloca_t  *ap)            /* bmap alloc argument struct */
2452 {
2453         xfs_mount_t     *mp;            /* mount point structure */
2454         xfs_alloctype_t atype = 0;      /* type for allocation routines */
2455         xfs_extlen_t    align;          /* minimum allocation alignment */
2456         xfs_agnumber_t  fb_agno;        /* ag number of ap->firstblock */
2457         xfs_agnumber_t  ag;
2458         xfs_alloc_arg_t args;
2459         xfs_extlen_t    blen;
2460         xfs_extlen_t    nextminlen = 0;
2461         int             nullfb;         /* true if ap->firstblock isn't set */
2462         int             isaligned;
2463         int             tryagain;
2464         int             error;
2465
2466         mp = ap->ip->i_mount;
2467         align = ap->userdata ? xfs_get_extsz_hint(ap->ip) : 0;
2468         if (unlikely(align)) {
2469                 error = xfs_bmap_extsize_align(mp, ap->gotp, ap->prevp,
2470                                                 align, 0, ap->eof, 0, ap->conv,
2471                                                 &ap->off, &ap->alen);
2472                 ASSERT(!error);
2473                 ASSERT(ap->alen);
2474         }
2475         nullfb = ap->firstblock == NULLFSBLOCK;
2476         fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, ap->firstblock);
2477         if (nullfb) {
2478                 if (ap->userdata && xfs_inode_is_filestream(ap->ip)) {
2479                         ag = xfs_filestream_lookup_ag(ap->ip);
2480                         ag = (ag != NULLAGNUMBER) ? ag : 0;
2481                         ap->rval = XFS_AGB_TO_FSB(mp, ag, 0);
2482                 } else {
2483                         ap->rval = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
2484                 }
2485         } else
2486                 ap->rval = ap->firstblock;
2487
2488         xfs_bmap_adjacent(ap);
2489
2490         /*
2491          * If allowed, use ap->rval; otherwise must use firstblock since
2492          * it's in the right allocation group.
2493          */
2494         if (nullfb || XFS_FSB_TO_AGNO(mp, ap->rval) == fb_agno)
2495                 ;
2496         else
2497                 ap->rval = ap->firstblock;
2498         /*
2499          * Normal allocation, done through xfs_alloc_vextent.
2500          */
2501         tryagain = isaligned = 0;
2502         args.tp = ap->tp;
2503         args.mp = mp;
2504         args.fsbno = ap->rval;
2505
2506         /* Trim the allocation back to the maximum an AG can fit. */
2507         args.maxlen = MIN(ap->alen, XFS_ALLOC_AG_MAX_USABLE(mp));
2508         args.firstblock = ap->firstblock;
2509         blen = 0;
2510         if (nullfb) {
2511                 error = xfs_bmap_btalloc_nullfb(ap, &args, &blen);
2512                 if (error)
2513                         return error;
2514         } else if (ap->low) {
2515                 if (xfs_inode_is_filestream(ap->ip))
2516                         args.type = XFS_ALLOCTYPE_FIRST_AG;
2517                 else
2518                         args.type = XFS_ALLOCTYPE_START_BNO;
2519                 args.total = args.minlen = ap->minlen;
2520         } else {
2521                 args.type = XFS_ALLOCTYPE_NEAR_BNO;
2522                 args.total = ap->total;
2523                 args.minlen = ap->minlen;
2524         }
2525         /* apply extent size hints if obtained earlier */
2526         if (unlikely(align)) {
2527                 args.prod = align;
2528                 if ((args.mod = (xfs_extlen_t)do_mod(ap->off, args.prod)))
2529                         args.mod = (xfs_extlen_t)(args.prod - args.mod);
2530         } else if (mp->m_sb.sb_blocksize >= PAGE_CACHE_SIZE) {
2531                 args.prod = 1;
2532                 args.mod = 0;
2533         } else {
2534                 args.prod = PAGE_CACHE_SIZE >> mp->m_sb.sb_blocklog;
2535                 if ((args.mod = (xfs_extlen_t)(do_mod(ap->off, args.prod))))
2536                         args.mod = (xfs_extlen_t)(args.prod - args.mod);
2537         }
2538         /*
2539          * If we are not low on available data blocks, and the
2540          * underlying logical volume manager is a stripe, and
2541          * the file offset is zero then try to allocate data
2542          * blocks on stripe unit boundary.
2543          * NOTE: ap->aeof is only set if the allocation length
2544          * is >= the stripe unit and the allocation offset is
2545          * at the end of file.
2546          */
2547         if (!ap->low && ap->aeof) {
2548                 if (!ap->off) {
2549                         args.alignment = mp->m_dalign;
2550                         atype = args.type;
2551                         isaligned = 1;
2552                         /*
2553                          * Adjust for alignment
2554                          */
2555                         if (blen > args.alignment && blen <= args.maxlen)
2556                                 args.minlen = blen - args.alignment;
2557                         args.minalignslop = 0;
2558                 } else {
2559                         /*
2560                          * First try an exact bno allocation.
2561                          * If it fails then do a near or start bno
2562                          * allocation with alignment turned on.
2563                          */
2564                         atype = args.type;
2565                         tryagain = 1;
2566                         args.type = XFS_ALLOCTYPE_THIS_BNO;
2567                         args.alignment = 1;
2568                         /*
2569                          * Compute the minlen+alignment for the
2570                          * next case.  Set slop so that the value
2571                          * of minlen+alignment+slop doesn't go up
2572                          * between the calls.
2573                          */
2574                         if (blen > mp->m_dalign && blen <= args.maxlen)
2575                                 nextminlen = blen - mp->m_dalign;
2576                         else
2577                                 nextminlen = args.minlen;
2578                         if (nextminlen + mp->m_dalign > args.minlen + 1)
2579                                 args.minalignslop =
2580                                         nextminlen + mp->m_dalign -
2581                                         args.minlen - 1;
2582                         else
2583                                 args.minalignslop = 0;
2584                 }
2585         } else {
2586                 args.alignment = 1;
2587                 args.minalignslop = 0;
2588         }
2589         args.minleft = ap->minleft;
2590         args.wasdel = ap->wasdel;
2591         args.isfl = 0;
2592         args.userdata = ap->userdata;
2593         if ((error = xfs_alloc_vextent(&args)))
2594                 return error;
2595         if (tryagain && args.fsbno == NULLFSBLOCK) {
2596                 /*
2597                  * Exact allocation failed. Now try with alignment
2598                  * turned on.
2599                  */
2600                 args.type = atype;
2601                 args.fsbno = ap->rval;
2602                 args.alignment = mp->m_dalign;
2603                 args.minlen = nextminlen;
2604                 args.minalignslop = 0;
2605                 isaligned = 1;
2606                 if ((error = xfs_alloc_vextent(&args)))
2607                         return error;
2608         }
2609         if (isaligned && args.fsbno == NULLFSBLOCK) {
2610                 /*
2611                  * allocation failed, so turn off alignment and
2612                  * try again.
2613                  */
2614                 args.type = atype;
2615                 args.fsbno = ap->rval;
2616                 args.alignment = 0;
2617                 if ((error = xfs_alloc_vextent(&args)))
2618                         return error;
2619         }
2620         if (args.fsbno == NULLFSBLOCK && nullfb &&
2621             args.minlen > ap->minlen) {
2622                 args.minlen = ap->minlen;
2623                 args.type = XFS_ALLOCTYPE_START_BNO;
2624                 args.fsbno = ap->rval;
2625                 if ((error = xfs_alloc_vextent(&args)))
2626                         return error;
2627         }
2628         if (args.fsbno == NULLFSBLOCK && nullfb) {
2629                 args.fsbno = 0;
2630                 args.type = XFS_ALLOCTYPE_FIRST_AG;
2631                 args.total = ap->minlen;
2632                 args.minleft = 0;
2633                 if ((error = xfs_alloc_vextent(&args)))
2634                         return error;
2635                 ap->low = 1;
2636         }
2637         if (args.fsbno != NULLFSBLOCK) {
2638                 ap->firstblock = ap->rval = args.fsbno;
2639                 ASSERT(nullfb || fb_agno == args.agno ||
2640                        (ap->low && fb_agno < args.agno));
2641                 ap->alen = args.len;
2642                 ap->ip->i_d.di_nblocks += args.len;
2643                 xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
2644                 if (ap->wasdel)
2645                         ap->ip->i_delayed_blks -= args.len;
2646                 /*
2647                  * Adjust the disk quota also. This was reserved
2648                  * earlier.
2649                  */
2650                 xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
2651                         ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT :
2652                                         XFS_TRANS_DQ_BCOUNT,
2653                         (long) args.len);
2654         } else {
2655                 ap->rval = NULLFSBLOCK;
2656                 ap->alen = 0;
2657         }
2658         return 0;
2659 }
2660
2661 /*
2662  * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file.
2663  * It figures out where to ask the underlying allocator to put the new extent.
2664  */
2665 STATIC int
2666 xfs_bmap_alloc(
2667         xfs_bmalloca_t  *ap)            /* bmap alloc argument struct */
2668 {
2669         if (XFS_IS_REALTIME_INODE(ap->ip) && ap->userdata)
2670                 return xfs_bmap_rtalloc(ap);
2671         return xfs_bmap_btalloc(ap);
2672 }
2673
2674 /*
2675  * Transform a btree format file with only one leaf node, where the
2676  * extents list will fit in the inode, into an extents format file.
2677  * Since the file extents are already in-core, all we have to do is
2678  * give up the space for the btree root and pitch the leaf block.
2679  */
2680 STATIC int                              /* error */
2681 xfs_bmap_btree_to_extents(
2682         xfs_trans_t             *tp,    /* transaction pointer */
2683         xfs_inode_t             *ip,    /* incore inode pointer */
2684         xfs_btree_cur_t         *cur,   /* btree cursor */
2685         int                     *logflagsp, /* inode logging flags */
2686         int                     whichfork)  /* data or attr fork */
2687 {
2688         /* REFERENCED */
2689         struct xfs_btree_block  *cblock;/* child btree block */
2690         xfs_fsblock_t           cbno;   /* child block number */
2691         xfs_buf_t               *cbp;   /* child block's buffer */
2692         int                     error;  /* error return value */
2693         xfs_ifork_t             *ifp;   /* inode fork data */
2694         xfs_mount_t             *mp;    /* mount point structure */
2695         __be64                  *pp;    /* ptr to block address */
2696         struct xfs_btree_block  *rblock;/* root btree block */
2697
2698         mp = ip->i_mount;
2699         ifp = XFS_IFORK_PTR(ip, whichfork);
2700         ASSERT(ifp->if_flags & XFS_IFEXTENTS);
2701         ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
2702         rblock = ifp->if_broot;
2703         ASSERT(be16_to_cpu(rblock->bb_level) == 1);
2704         ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1);
2705         ASSERT(xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0) == 1);
2706         pp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, ifp->if_broot_bytes);
2707         cbno = be64_to_cpu(*pp);
2708         *logflagsp = 0;
2709 #ifdef DEBUG
2710         if ((error = xfs_btree_check_lptr(cur, cbno, 1)))
2711                 return error;
2712 #endif
2713         if ((error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp,
2714                         XFS_BMAP_BTREE_REF)))
2715                 return error;
2716         cblock = XFS_BUF_TO_BLOCK(cbp);
2717         if ((error = xfs_btree_check_block(cur, cblock, 0, cbp)))
2718                 return error;
2719         xfs_bmap_add_free(cbno, 1, cur->bc_private.b.flist, mp);
2720         ip->i_d.di_nblocks--;
2721         xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
2722         xfs_trans_binval(tp, cbp);
2723         if (cur->bc_bufs[0] == cbp)
2724                 cur->bc_bufs[0] = NULL;
2725         xfs_iroot_realloc(ip, -1, whichfork);
2726         ASSERT(ifp->if_broot == NULL);
2727         ASSERT((ifp->if_flags & XFS_IFBROOT) == 0);
2728         XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
2729         *logflagsp = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
2730         return 0;
2731 }
2732
2733 /*
2734  * Called by xfs_bmapi to update file extent records and the btree
2735  * after removing space (or undoing a delayed allocation).
2736  */
2737 STATIC int                              /* error */
2738 xfs_bmap_del_extent(
2739         xfs_inode_t             *ip,    /* incore inode pointer */
2740         xfs_trans_t             *tp,    /* current transaction pointer */
2741         xfs_extnum_t            *idx,   /* extent number to update/delete */
2742         xfs_bmap_free_t         *flist, /* list of extents to be freed */
2743         xfs_btree_cur_t         *cur,   /* if null, not a btree */
2744         xfs_bmbt_irec_t         *del,   /* data to remove from extents */
2745         int                     *logflagsp, /* inode logging flags */
2746         int                     whichfork) /* data or attr fork */
2747 {
2748         xfs_filblks_t           da_new; /* new delay-alloc indirect blocks */
2749         xfs_filblks_t           da_old; /* old delay-alloc indirect blocks */
2750         xfs_fsblock_t           del_endblock=0; /* first block past del */
2751         xfs_fileoff_t           del_endoff;     /* first offset past del */
2752         int                     delay;  /* current block is delayed allocated */
2753         int                     do_fx;  /* free extent at end of routine */
2754         xfs_bmbt_rec_host_t     *ep;    /* current extent entry pointer */
2755         int                     error;  /* error return value */
2756         int                     flags;  /* inode logging flags */
2757         xfs_bmbt_irec_t         got;    /* current extent entry */
2758         xfs_fileoff_t           got_endoff;     /* first offset past got */
2759         int                     i;      /* temp state */
2760         xfs_ifork_t             *ifp;   /* inode fork pointer */
2761         xfs_mount_t             *mp;    /* mount structure */
2762         xfs_filblks_t           nblks;  /* quota/sb block count */
2763         xfs_bmbt_irec_t         new;    /* new record to be inserted */
2764         /* REFERENCED */
2765         uint                    qfield; /* quota field to update */
2766         xfs_filblks_t           temp;   /* for indirect length calculations */
2767         xfs_filblks_t           temp2;  /* for indirect length calculations */
2768         int                     state = 0;
2769
2770         XFS_STATS_INC(xs_del_exlist);
2771
2772         if (whichfork == XFS_ATTR_FORK)
2773                 state |= BMAP_ATTRFORK;
2774
2775         mp = ip->i_mount;
2776         ifp = XFS_IFORK_PTR(ip, whichfork);
2777         ASSERT((*idx >= 0) && (*idx < ifp->if_bytes /
2778                 (uint)sizeof(xfs_bmbt_rec_t)));
2779         ASSERT(del->br_blockcount > 0);
2780         ep = xfs_iext_get_ext(ifp, *idx);
2781         xfs_bmbt_get_all(ep, &got);
2782         ASSERT(got.br_startoff <= del->br_startoff);
2783         del_endoff = del->br_startoff + del->br_blockcount;
2784         got_endoff = got.br_startoff + got.br_blockcount;
2785         ASSERT(got_endoff >= del_endoff);
2786         delay = isnullstartblock(got.br_startblock);
2787         ASSERT(isnullstartblock(del->br_startblock) == delay);
2788         flags = 0;
2789         qfield = 0;
2790         error = 0;
2791         /*
2792          * If deleting a real allocation, must free up the disk space.
2793          */
2794         if (!delay) {
2795                 flags = XFS_ILOG_CORE;
2796                 /*
2797                  * Realtime allocation.  Free it and record di_nblocks update.
2798                  */
2799                 if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) {
2800                         xfs_fsblock_t   bno;
2801                         xfs_filblks_t   len;
2802
2803                         ASSERT(do_mod(del->br_blockcount,
2804                                       mp->m_sb.sb_rextsize) == 0);
2805                         ASSERT(do_mod(del->br_startblock,
2806                                       mp->m_sb.sb_rextsize) == 0);
2807                         bno = del->br_startblock;
2808                         len = del->br_blockcount;
2809                         do_div(bno, mp->m_sb.sb_rextsize);
2810                         do_div(len, mp->m_sb.sb_rextsize);
2811                         error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len);
2812                         if (error)
2813                                 goto done;
2814                         do_fx = 0;
2815                         nblks = len * mp->m_sb.sb_rextsize;
2816                         qfield = XFS_TRANS_DQ_RTBCOUNT;
2817                 }
2818                 /*
2819                  * Ordinary allocation.
2820                  */
2821                 else {
2822                         do_fx = 1;
2823                         nblks = del->br_blockcount;
2824                         qfield = XFS_TRANS_DQ_BCOUNT;
2825                 }
2826                 /*
2827                  * Set up del_endblock and cur for later.
2828                  */
2829                 del_endblock = del->br_startblock + del->br_blockcount;
2830                 if (cur) {
2831                         if ((error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
2832                                         got.br_startblock, got.br_blockcount,
2833                                         &i)))
2834                                 goto done;
2835                         XFS_WANT_CORRUPTED_GOTO(i == 1, done);
2836                 }
2837                 da_old = da_new = 0;
2838         } else {
2839                 da_old = startblockval(got.br_startblock);
2840                 da_new = 0;
2841                 nblks = 0;
2842                 do_fx = 0;
2843         }
2844         /*
2845          * Set flag value to use in switch statement.
2846          * Left-contig is 2, right-contig is 1.
2847          */
2848         switch (((got.br_startoff == del->br_startoff) << 1) |
2849                 (got_endoff == del_endoff)) {
2850         case 3:
2851                 /*
2852                  * Matches the whole extent.  Delete the entry.
2853                  */
2854                 xfs_iext_remove(ip, *idx, 1,
2855                                 whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0);
2856                 --*idx;
2857                 if (delay)
2858                         break;
2859
2860                 XFS_IFORK_NEXT_SET(ip, whichfork,
2861                         XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
2862                 flags |= XFS_ILOG_CORE;
2863                 if (!cur) {
2864                         flags |= xfs_ilog_fext(whichfork);
2865                         break;
2866                 }
2867                 if ((error = xfs_btree_delete(cur, &i)))
2868                         goto done;
2869                 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
2870                 break;
2871
2872         case 2:
2873                 /*
2874                  * Deleting the first part of the extent.
2875                  */
2876                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2877                 xfs_bmbt_set_startoff(ep, del_endoff);
2878                 temp = got.br_blockcount - del->br_blockcount;
2879                 xfs_bmbt_set_blockcount(ep, temp);
2880                 if (delay) {
2881                         temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
2882                                 da_old);
2883                         xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
2884                         trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2885                         da_new = temp;
2886                         break;
2887                 }
2888                 xfs_bmbt_set_startblock(ep, del_endblock);
2889                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2890                 if (!cur) {
2891                         flags |= xfs_ilog_fext(whichfork);
2892                         break;
2893                 }
2894                 if ((error = xfs_bmbt_update(cur, del_endoff, del_endblock,
2895                                 got.br_blockcount - del->br_blockcount,
2896                                 got.br_state)))
2897                         goto done;
2898                 break;
2899
2900         case 1:
2901                 /*
2902                  * Deleting the last part of the extent.
2903                  */
2904                 temp = got.br_blockcount - del->br_blockcount;
2905                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2906                 xfs_bmbt_set_blockcount(ep, temp);
2907                 if (delay) {
2908                         temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
2909                                 da_old);
2910                         xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
2911                         trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2912                         da_new = temp;
2913                         break;
2914                 }
2915                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2916                 if (!cur) {
2917                         flags |= xfs_ilog_fext(whichfork);
2918                         break;
2919                 }
2920                 if ((error = xfs_bmbt_update(cur, got.br_startoff,
2921                                 got.br_startblock,
2922                                 got.br_blockcount - del->br_blockcount,
2923                                 got.br_state)))
2924                         goto done;
2925                 break;
2926
2927         case 0:
2928                 /*
2929                  * Deleting the middle of the extent.
2930                  */
2931                 temp = del->br_startoff - got.br_startoff;
2932                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2933                 xfs_bmbt_set_blockcount(ep, temp);
2934                 new.br_startoff = del_endoff;
2935                 temp2 = got_endoff - del_endoff;
2936                 new.br_blockcount = temp2;
2937                 new.br_state = got.br_state;
2938                 if (!delay) {
2939                         new.br_startblock = del_endblock;
2940                         flags |= XFS_ILOG_CORE;
2941                         if (cur) {
2942                                 if ((error = xfs_bmbt_update(cur,
2943                                                 got.br_startoff,
2944                                                 got.br_startblock, temp,
2945                                                 got.br_state)))
2946                                         goto done;
2947                                 if ((error = xfs_btree_increment(cur, 0, &i)))
2948                                         goto done;
2949                                 cur->bc_rec.b = new;
2950                                 error = xfs_btree_insert(cur, &i);
2951                                 if (error && error != ENOSPC)
2952                                         goto done;
2953                                 /*
2954                                  * If get no-space back from btree insert,
2955                                  * it tried a split, and we have a zero
2956                                  * block reservation.
2957                                  * Fix up our state and return the error.
2958                                  */
2959                                 if (error == ENOSPC) {
2960                                         /*
2961                                          * Reset the cursor, don't trust
2962                                          * it after any insert operation.
2963                                          */
2964                                         if ((error = xfs_bmbt_lookup_eq(cur,
2965                                                         got.br_startoff,
2966                                                         got.br_startblock,
2967                                                         temp, &i)))
2968                                                 goto done;
2969                                         XFS_WANT_CORRUPTED_GOTO(i == 1, done);
2970                                         /*
2971                                          * Update the btree record back
2972                                          * to the original value.
2973                                          */
2974                                         if ((error = xfs_bmbt_update(cur,
2975                                                         got.br_startoff,
2976                                                         got.br_startblock,
2977                                                         got.br_blockcount,
2978                                                         got.br_state)))
2979                                                 goto done;
2980                                         /*
2981                                          * Reset the extent record back
2982                                          * to the original value.
2983                                          */
2984                                         xfs_bmbt_set_blockcount(ep,
2985                                                 got.br_blockcount);
2986                                         flags = 0;
2987                                         error = XFS_ERROR(ENOSPC);
2988                                         goto done;
2989                                 }
2990                                 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
2991                         } else
2992                                 flags |= xfs_ilog_fext(whichfork);
2993                         XFS_IFORK_NEXT_SET(ip, whichfork,
2994                                 XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
2995                 } else {
2996                         ASSERT(whichfork == XFS_DATA_FORK);
2997                         temp = xfs_bmap_worst_indlen(ip, temp);
2998                         xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
2999                         temp2 = xfs_bmap_worst_indlen(ip, temp2);
3000                         new.br_startblock = nullstartblock((int)temp2);
3001                         da_new = temp + temp2;
3002                         while (da_new > da_old) {
3003                                 if (temp) {
3004                                         temp--;
3005                                         da_new--;
3006                                         xfs_bmbt_set_startblock(ep,
3007                                                 nullstartblock((int)temp));
3008                                 }
3009                                 if (da_new == da_old)
3010                                         break;
3011                                 if (temp2) {
3012                                         temp2--;
3013                                         da_new--;
3014                                         new.br_startblock =
3015                                                 nullstartblock((int)temp2);
3016                                 }
3017                         }
3018                 }
3019                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
3020                 xfs_iext_insert(ip, *idx + 1, 1, &new, state);
3021                 ++*idx;
3022                 break;
3023         }
3024         /*
3025          * If we need to, add to list of extents to delete.
3026          */
3027         if (do_fx)
3028                 xfs_bmap_add_free(del->br_startblock, del->br_blockcount, flist,
3029                         mp);
3030         /*
3031          * Adjust inode # blocks in the file.
3032          */
3033         if (nblks)
3034                 ip->i_d.di_nblocks -= nblks;
3035         /*
3036          * Adjust quota data.
3037          */
3038         if (qfield)
3039                 xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks);
3040
3041         /*
3042          * Account for change in delayed indirect blocks.
3043          * Nothing to do for disk quota accounting here.
3044          */
3045         ASSERT(da_old >= da_new);
3046         if (da_old > da_new) {
3047                 xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
3048                         (int64_t)(da_old - da_new), 0);
3049         }
3050 done:
3051         *logflagsp = flags;
3052         return error;
3053 }
3054
3055 /*
3056  * Remove the entry "free" from the free item list.  Prev points to the
3057  * previous entry, unless "free" is the head of the list.
3058  */
3059 STATIC void
3060 xfs_bmap_del_free(
3061         xfs_bmap_free_t         *flist, /* free item list header */
3062         xfs_bmap_free_item_t    *prev,  /* previous item on list, if any */
3063         xfs_bmap_free_item_t    *free)  /* list item to be freed */
3064 {
3065         if (prev)
3066                 prev->xbfi_next = free->xbfi_next;
3067         else
3068                 flist->xbf_first = free->xbfi_next;
3069         flist->xbf_count--;
3070         kmem_zone_free(xfs_bmap_free_item_zone, free);
3071 }
3072
3073 /*
3074  * Convert an extents-format file into a btree-format file.
3075  * The new file will have a root block (in the inode) and a single child block.
3076  */
3077 STATIC int                                      /* error */
3078 xfs_bmap_extents_to_btree(
3079         xfs_trans_t             *tp,            /* transaction pointer */
3080         xfs_inode_t             *ip,            /* incore inode pointer */
3081         xfs_fsblock_t           *firstblock,    /* first-block-allocated */
3082         xfs_bmap_free_t         *flist,         /* blocks freed in xaction */
3083         xfs_btree_cur_t         **curp,         /* cursor returned to caller */
3084         int                     wasdel,         /* converting a delayed alloc */
3085         int                     *logflagsp,     /* inode logging flags */
3086         int                     whichfork)      /* data or attr fork */
3087 {
3088         struct xfs_btree_block  *ablock;        /* allocated (child) bt block */
3089         xfs_buf_t               *abp;           /* buffer for ablock */
3090         xfs_alloc_arg_t         args;           /* allocation arguments */
3091         xfs_bmbt_rec_t          *arp;           /* child record pointer */
3092         struct xfs_btree_block  *block;         /* btree root block */
3093         xfs_btree_cur_t         *cur;           /* bmap btree cursor */
3094         xfs_bmbt_rec_host_t     *ep;            /* extent record pointer */
3095         int                     error;          /* error return value */
3096         xfs_extnum_t            i, cnt;         /* extent record index */
3097         xfs_ifork_t             *ifp;           /* inode fork pointer */
3098         xfs_bmbt_key_t          *kp;            /* root block key pointer */
3099         xfs_mount_t             *mp;            /* mount structure */
3100         xfs_extnum_t            nextents;       /* number of file extents */
3101         xfs_bmbt_ptr_t          *pp;            /* root block address pointer */
3102
3103         ifp = XFS_IFORK_PTR(ip, whichfork);
3104         ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS);
3105         ASSERT(ifp->if_ext_max ==
3106                XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
3107         /*
3108          * Make space in the inode incore.
3109          */
3110         xfs_iroot_realloc(ip, 1, whichfork);
3111         ifp->if_flags |= XFS_IFBROOT;
3112
3113         /*
3114          * Fill in the root.
3115          */
3116         block = ifp->if_broot;
3117         block->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC);
3118         block->bb_level = cpu_to_be16(1);
3119         block->bb_numrecs = cpu_to_be16(1);
3120         block->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO);
3121         block->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO);
3122
3123         /*
3124          * Need a cursor.  Can't allocate until bb_level is filled in.
3125          */
3126         mp = ip->i_mount;
3127         cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
3128         cur->bc_private.b.firstblock = *firstblock;
3129         cur->bc_private.b.flist = flist;
3130         cur->bc_private.b.flags = wasdel ? XFS_BTCUR_BPRV_WASDEL : 0;
3131         /*
3132          * Convert to a btree with two levels, one record in root.
3133          */
3134         XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_BTREE);
3135         args.tp = tp;
3136         args.mp = mp;
3137         args.firstblock = *firstblock;
3138         if (*firstblock == NULLFSBLOCK) {
3139                 args.type = XFS_ALLOCTYPE_START_BNO;
3140                 args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino);
3141         } else if (flist->xbf_low) {
3142                 args.type = XFS_ALLOCTYPE_START_BNO;
3143                 args.fsbno = *firstblock;
3144         } else {
3145                 args.type = XFS_ALLOCTYPE_NEAR_BNO;
3146                 args.fsbno = *firstblock;
3147         }
3148         args.minlen = args.maxlen = args.prod = 1;
3149         args.total = args.minleft = args.alignment = args.mod = args.isfl =
3150                 args.minalignslop = 0;
3151         args.wasdel = wasdel;
3152         *logflagsp = 0;
3153         if ((error = xfs_alloc_vextent(&args))) {
3154                 xfs_iroot_realloc(ip, -1, whichfork);
3155                 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
3156                 return error;
3157         }
3158         /*
3159          * Allocation can't fail, the space was reserved.
3160          */
3161         ASSERT(args.fsbno != NULLFSBLOCK);
3162         ASSERT(*firstblock == NULLFSBLOCK ||
3163                args.agno == XFS_FSB_TO_AGNO(mp, *firstblock) ||
3164                (flist->xbf_low &&
3165                 args.agno > XFS_FSB_TO_AGNO(mp, *firstblock)));
3166         *firstblock = cur->bc_private.b.firstblock = args.fsbno;
3167         cur->bc_private.b.allocated++;
3168         ip->i_d.di_nblocks++;
3169         xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
3170         abp = xfs_btree_get_bufl(mp, tp, args.fsbno, 0);
3171         /*
3172          * Fill in the child block.
3173          */
3174         ablock = XFS_BUF_TO_BLOCK(abp);
3175         ablock->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC);
3176         ablock->bb_level = 0;
3177         ablock->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO);
3178         ablock->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO);
3179         arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
3180         nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
3181         for (cnt = i = 0; i < nextents; i++) {
3182                 ep = xfs_iext_get_ext(ifp, i);
3183                 if (!isnullstartblock(xfs_bmbt_get_startblock(ep))) {
3184                         arp->l0 = cpu_to_be64(ep->l0);
3185                         arp->l1 = cpu_to_be64(ep->l1);
3186                         arp++; cnt++;
3187                 }
3188         }
3189         ASSERT(cnt == XFS_IFORK_NEXTENTS(ip, whichfork));
3190         xfs_btree_set_numrecs(ablock, cnt);
3191
3192         /*
3193          * Fill in the root key and pointer.
3194          */
3195         kp = XFS_BMBT_KEY_ADDR(mp, block, 1);
3196         arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
3197         kp->br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(arp));
3198         pp = XFS_BMBT_PTR_ADDR(mp, block, 1, xfs_bmbt_get_maxrecs(cur,
3199                                                 be16_to_cpu(block->bb_level)));
3200         *pp = cpu_to_be64(args.fsbno);
3201
3202         /*
3203          * Do all this logging at the end so that
3204          * the root is at the right level.
3205          */
3206         xfs_btree_log_block(cur, abp, XFS_BB_ALL_BITS);
3207         xfs_btree_log_recs(cur, abp, 1, be16_to_cpu(ablock->bb_numrecs));
3208         ASSERT(*curp == NULL);
3209         *curp = cur;
3210         *logflagsp = XFS_ILOG_CORE | xfs_ilog_fbroot(whichfork);
3211         return 0;
3212 }
3213
3214 /*
3215  * Calculate the default attribute fork offset for newly created inodes.
3216  */
3217 uint
3218 xfs_default_attroffset(
3219         struct xfs_inode        *ip)
3220 {
3221         struct xfs_mount        *mp = ip->i_mount;
3222         uint                    offset;
3223
3224         if (mp->m_sb.sb_inodesize == 256) {
3225                 offset = XFS_LITINO(mp) -
3226                                 XFS_BMDR_SPACE_CALC(MINABTPTRS);
3227         } else {
3228                 offset = XFS_BMDR_SPACE_CALC(6 * MINABTPTRS);
3229         }
3230
3231         ASSERT(offset < XFS_LITINO(mp));
3232         return offset;
3233 }
3234
3235 /*
3236  * Helper routine to reset inode di_forkoff field when switching
3237  * attribute fork from local to extent format - we reset it where
3238  * possible to make space available for inline data fork extents.
3239  */
3240 STATIC void
3241 xfs_bmap_forkoff_reset(
3242         xfs_mount_t     *mp,
3243         xfs_inode_t     *ip,
3244         int             whichfork)
3245 {
3246         if (whichfork == XFS_ATTR_FORK &&
3247             ip->i_d.di_format != XFS_DINODE_FMT_DEV &&
3248             ip->i_d.di_format != XFS_DINODE_FMT_UUID &&
3249             ip->i_d.di_format != XFS_DINODE_FMT_BTREE) {
3250                 uint    dfl_forkoff = xfs_default_attroffset(ip) >> 3;
3251
3252                 if (dfl_forkoff > ip->i_d.di_forkoff) {
3253                         ip->i_d.di_forkoff = dfl_forkoff;
3254                         ip->i_df.if_ext_max =
3255                                 XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t);
3256                         ip->i_afp->if_ext_max =
3257                                 XFS_IFORK_ASIZE(ip) / sizeof(xfs_bmbt_rec_t);
3258                 }
3259         }
3260 }
3261
3262 /*
3263  * Convert a local file to an extents file.
3264  * This code is out of bounds for data forks of regular files,
3265  * since the file data needs to get logged so things will stay consistent.
3266  * (The bmap-level manipulations are ok, though).
3267  */
3268 STATIC int                              /* error */
3269 xfs_bmap_local_to_extents(
3270         xfs_trans_t     *tp,            /* transaction pointer */
3271         xfs_inode_t     *ip,            /* incore inode pointer */
3272         xfs_fsblock_t   *firstblock,    /* first block allocated in xaction */
3273         xfs_extlen_t    total,          /* total blocks needed by transaction */
3274         int             *logflagsp,     /* inode logging flags */
3275         int             whichfork)      /* data or attr fork */
3276 {
3277         int             error;          /* error return value */
3278         int             flags;          /* logging flags returned */
3279         xfs_ifork_t     *ifp;           /* inode fork pointer */
3280
3281         /*
3282          * We don't want to deal with the case of keeping inode data inline yet.
3283          * So sending the data fork of a regular inode is invalid.
3284          */
3285         ASSERT(!(S_ISREG(ip->i_d.di_mode) && whichfork == XFS_DATA_FORK));
3286         ifp = XFS_IFORK_PTR(ip, whichfork);
3287         ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
3288         flags = 0;
3289         error = 0;
3290         if (ifp->if_bytes) {
3291                 xfs_alloc_arg_t args;   /* allocation arguments */
3292                 xfs_buf_t       *bp;    /* buffer for extent block */
3293                 xfs_bmbt_rec_host_t *ep;/* extent record pointer */
3294
3295                 args.tp = tp;
3296                 args.mp = ip->i_mount;
3297                 args.firstblock = *firstblock;
3298                 ASSERT((ifp->if_flags &
3299                         (XFS_IFINLINE|XFS_IFEXTENTS|XFS_IFEXTIREC)) == XFS_IFINLINE);
3300                 /*
3301                  * Allocate a block.  We know we need only one, since the
3302                  * file currently fits in an inode.
3303                  */
3304                 if (*firstblock == NULLFSBLOCK) {
3305                         args.fsbno = XFS_INO_TO_FSB(args.mp, ip->i_ino);
3306                         args.type = XFS_ALLOCTYPE_START_BNO;
3307                 } else {
3308                         args.fsbno = *firstblock;
3309                         args.type = XFS_ALLOCTYPE_NEAR_BNO;
3310                 }
3311                 args.total = total;
3312                 args.mod = args.minleft = args.alignment = args.wasdel =
3313                         args.isfl = args.minalignslop = 0;
3314                 args.minlen = args.maxlen = args.prod = 1;
3315                 if ((error = xfs_alloc_vextent(&args)))
3316                         goto done;
3317                 /*
3318                  * Can't fail, the space was reserved.
3319                  */
3320                 ASSERT(args.fsbno != NULLFSBLOCK);
3321                 ASSERT(args.len == 1);
3322                 *firstblock = args.fsbno;
3323                 bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0);
3324                 memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes);
3325                 xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1);
3326                 xfs_bmap_forkoff_reset(args.mp, ip, whichfork);
3327                 xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
3328                 xfs_iext_add(ifp, 0, 1);
3329                 ep = xfs_iext_get_ext(ifp, 0);
3330                 xfs_bmbt_set_allf(ep, 0, args.fsbno, 1, XFS_EXT_NORM);
3331                 trace_xfs_bmap_post_update(ip, 0,
3332                                 whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0,
3333                                 _THIS_IP_);
3334                 XFS_IFORK_NEXT_SET(ip, whichfork, 1);
3335                 ip->i_d.di_nblocks = 1;
3336                 xfs_trans_mod_dquot_byino(tp, ip,
3337                         XFS_TRANS_DQ_BCOUNT, 1L);
3338                 flags |= xfs_ilog_fext(whichfork);
3339         } else {
3340                 ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) == 0);
3341                 xfs_bmap_forkoff_reset(ip->i_mount, ip, whichfork);
3342         }
3343         ifp->if_flags &= ~XFS_IFINLINE;
3344         ifp->if_flags |= XFS_IFEXTENTS;
3345         XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
3346         flags |= XFS_ILOG_CORE;
3347 done:
3348         *logflagsp = flags;
3349         return error;
3350 }
3351
3352 /*
3353  * Search the extent records for the entry containing block bno.
3354  * If bno lies in a hole, point to the next entry.  If bno lies
3355  * past eof, *eofp will be set, and *prevp will contain the last
3356  * entry (null if none).  Else, *lastxp will be set to the index
3357  * of the found entry; *gotp will contain the entry.
3358  */
3359 STATIC xfs_bmbt_rec_host_t *            /* pointer to found extent entry */
3360 xfs_bmap_search_multi_extents(
3361         xfs_ifork_t     *ifp,           /* inode fork pointer */
3362         xfs_fileoff_t   bno,            /* block number searched for */
3363         int             *eofp,          /* out: end of file found */
3364         xfs_extnum_t    *lastxp,        /* out: last extent index */
3365         xfs_bmbt_irec_t *gotp,          /* out: extent entry found */
3366         xfs_bmbt_irec_t *prevp)         /* out: previous extent entry found */
3367 {
3368         xfs_bmbt_rec_host_t *ep;                /* extent record pointer */
3369         xfs_extnum_t    lastx;          /* last extent index */
3370
3371         /*
3372          * Initialize the extent entry structure to catch access to
3373          * uninitialized br_startblock field.
3374          */
3375         gotp->br_startoff = 0xffa5a5a5a5a5a5a5LL;
3376         gotp->br_blockcount = 0xa55a5a5a5a5a5a5aLL;
3377         gotp->br_state = XFS_EXT_INVALID;
3378 #if XFS_BIG_BLKNOS
3379         gotp->br_startblock = 0xffffa5a5a5a5a5a5LL;
3380 #else
3381         gotp->br_startblock = 0xffffa5a5;
3382 #endif
3383         prevp->br_startoff = NULLFILEOFF;
3384
3385         ep = xfs_iext_bno_to_ext(ifp, bno, &lastx);
3386         if (lastx > 0) {
3387                 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx - 1), prevp);
3388         }
3389         if (lastx < (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))) {
3390                 xfs_bmbt_get_all(ep, gotp);
3391                 *eofp = 0;
3392         } else {
3393                 if (lastx > 0) {
3394                         *gotp = *prevp;
3395                 }
3396                 *eofp = 1;
3397                 ep = NULL;
3398         }
3399         *lastxp = lastx;
3400         return ep;
3401 }
3402
3403 /*
3404  * Search the extents list for the inode, for the extent containing bno.
3405  * If bno lies in a hole, point to the next entry.  If bno lies past eof,
3406  * *eofp will be set, and *prevp will contain the last entry (null if none).
3407  * Else, *lastxp will be set to the index of the found
3408  * entry; *gotp will contain the entry.
3409  */
3410 STATIC xfs_bmbt_rec_host_t *                 /* pointer to found extent entry */
3411 xfs_bmap_search_extents(
3412         xfs_inode_t     *ip,            /* incore inode pointer */
3413         xfs_fileoff_t   bno,            /* block number searched for */
3414         int             fork,           /* data or attr fork */
3415         int             *eofp,          /* out: end of file found */
3416         xfs_extnum_t    *lastxp,        /* out: last extent index */
3417         xfs_bmbt_irec_t *gotp,          /* out: extent entry found */
3418         xfs_bmbt_irec_t *prevp)         /* out: previous extent entry found */
3419 {
3420         xfs_ifork_t     *ifp;           /* inode fork pointer */
3421         xfs_bmbt_rec_host_t  *ep;            /* extent record pointer */
3422
3423         XFS_STATS_INC(xs_look_exlist);
3424         ifp = XFS_IFORK_PTR(ip, fork);
3425
3426         ep = xfs_bmap_search_multi_extents(ifp, bno, eofp, lastxp, gotp, prevp);
3427
3428         if (unlikely(!(gotp->br_startblock) && (*lastxp != NULLEXTNUM) &&
3429                      !(XFS_IS_REALTIME_INODE(ip) && fork == XFS_DATA_FORK))) {
3430                 xfs_alert_tag(ip->i_mount, XFS_PTAG_FSBLOCK_ZERO,
3431                                 "Access to block zero in inode %llu "
3432                                 "start_block: %llx start_off: %llx "
3433                                 "blkcnt: %llx extent-state: %x lastx: %x\n",
3434                         (unsigned long long)ip->i_ino,
3435                         (unsigned long long)gotp->br_startblock,
3436                         (unsigned long long)gotp->br_startoff,
3437                         (unsigned long long)gotp->br_blockcount,
3438                         gotp->br_state, *lastxp);
3439                 *lastxp = NULLEXTNUM;
3440                 *eofp = 1;
3441                 return NULL;
3442         }
3443         return ep;
3444 }
3445
3446 /*
3447  * Compute the worst-case number of indirect blocks that will be used
3448  * for ip's delayed extent of length "len".
3449  */
3450 STATIC xfs_filblks_t
3451 xfs_bmap_worst_indlen(
3452         xfs_inode_t     *ip,            /* incore inode pointer */
3453         xfs_filblks_t   len)            /* delayed extent length */
3454 {
3455         int             level;          /* btree level number */
3456         int             maxrecs;        /* maximum record count at this level */
3457         xfs_mount_t     *mp;            /* mount structure */
3458         xfs_filblks_t   rval;           /* return value */
3459
3460         mp = ip->i_mount;
3461         maxrecs = mp->m_bmap_dmxr[0];
3462         for (level = 0, rval = 0;
3463              level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK);
3464              level++) {
3465                 len += maxrecs - 1;
3466                 do_div(len, maxrecs);
3467                 rval += len;
3468                 if (len == 1)
3469                         return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) -
3470                                 level - 1;
3471                 if (level == 0)
3472                         maxrecs = mp->m_bmap_dmxr[1];
3473         }
3474         return rval;
3475 }
3476
3477 /*
3478  * Convert inode from non-attributed to attributed.
3479  * Must not be in a transaction, ip must not be locked.
3480  */
3481 int                                             /* error code */
3482 xfs_bmap_add_attrfork(
3483         xfs_inode_t             *ip,            /* incore inode pointer */
3484         int                     size,           /* space new attribute needs */
3485         int                     rsvd)           /* xact may use reserved blks */
3486 {
3487         xfs_fsblock_t           firstblock;     /* 1st block/ag allocated */
3488         xfs_bmap_free_t         flist;          /* freed extent records */
3489         xfs_mount_t             *mp;            /* mount structure */
3490         xfs_trans_t             *tp;            /* transaction pointer */
3491         int                     blks;           /* space reservation */
3492         int                     version = 1;    /* superblock attr version */
3493         int                     committed;      /* xaction was committed */
3494         int                     logflags;       /* logging flags */
3495         int                     error;          /* error return value */
3496
3497         ASSERT(XFS_IFORK_Q(ip) == 0);
3498         ASSERT(ip->i_df.if_ext_max ==
3499                XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t));
3500
3501         mp = ip->i_mount;
3502         ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
3503         tp = xfs_trans_alloc(mp, XFS_TRANS_ADDAFORK);
3504         blks = XFS_ADDAFORK_SPACE_RES(mp);
3505         if (rsvd)
3506                 tp->t_flags |= XFS_TRANS_RESERVE;
3507         if ((error = xfs_trans_reserve(tp, blks, XFS_ADDAFORK_LOG_RES(mp), 0,
3508                         XFS_TRANS_PERM_LOG_RES, XFS_ADDAFORK_LOG_COUNT)))
3509                 goto error0;
3510         xfs_ilock(ip, XFS_ILOCK_EXCL);
3511         error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ?
3512                         XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
3513                         XFS_QMOPT_RES_REGBLKS);
3514         if (error) {
3515                 xfs_iunlock(ip, XFS_ILOCK_EXCL);
3516                 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES);
3517                 return error;
3518         }
3519         if (XFS_IFORK_Q(ip))
3520                 goto error1;
3521         if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS) {
3522                 /*
3523                  * For inodes coming from pre-6.2 filesystems.
3524                  */
3525                 ASSERT(ip->i_d.di_aformat == 0);
3526                 ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
3527         }
3528         ASSERT(ip->i_d.di_anextents == 0);
3529
3530         xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL);
3531         xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
3532
3533         switch (ip->i_d.di_format) {
3534         case XFS_DINODE_FMT_DEV:
3535                 ip->i_d.di_forkoff = roundup(sizeof(xfs_dev_t), 8) >> 3;
3536                 break;
3537         case XFS_DINODE_FMT_UUID:
3538                 ip->i_d.di_forkoff = roundup(sizeof(uuid_t), 8) >> 3;
3539                 break;
3540         case XFS_DINODE_FMT_LOCAL:
3541         case XFS_DINODE_FMT_EXTENTS:
3542         case XFS_DINODE_FMT_BTREE:
3543                 ip->i_d.di_forkoff = xfs_attr_shortform_bytesfit(ip, size);
3544                 if (!ip->i_d.di_forkoff)
3545                         ip->i_d.di_forkoff = xfs_default_attroffset(ip) >> 3;
3546                 else if (mp->m_flags & XFS_MOUNT_ATTR2)
3547                         version = 2;
3548                 break;
3549         default:
3550                 ASSERT(0);
3551                 error = XFS_ERROR(EINVAL);
3552                 goto error1;
3553         }
3554         ip->i_df.if_ext_max =
3555                 XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
3556         ASSERT(ip->i_afp == NULL);
3557         ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP);
3558         ip->i_afp->if_ext_max =
3559                 XFS_IFORK_ASIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
3560         ip->i_afp->if_flags = XFS_IFEXTENTS;
3561         logflags = 0;
3562         xfs_bmap_init(&flist, &firstblock);
3563         switch (ip->i_d.di_format) {
3564         case XFS_DINODE_FMT_LOCAL:
3565                 error = xfs_bmap_add_attrfork_local(tp, ip, &firstblock, &flist,
3566                         &logflags);
3567                 break;
3568         case XFS_DINODE_FMT_EXTENTS:
3569                 error = xfs_bmap_add_attrfork_extents(tp, ip, &firstblock,
3570                         &flist, &logflags);
3571                 break;
3572         case XFS_DINODE_FMT_BTREE:
3573                 error = xfs_bmap_add_attrfork_btree(tp, ip, &firstblock, &flist,
3574                         &logflags);
3575                 break;
3576         default:
3577                 error = 0;
3578                 break;
3579         }
3580         if (logflags)
3581                 xfs_trans_log_inode(tp, ip, logflags);
3582         if (error)
3583                 goto error2;
3584         if (!xfs_sb_version_hasattr(&mp->m_sb) ||
3585            (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2)) {
3586                 __int64_t sbfields = 0;
3587
3588                 spin_lock(&mp->m_sb_lock);
3589                 if (!xfs_sb_version_hasattr(&mp->m_sb)) {
3590                         xfs_sb_version_addattr(&mp->m_sb);
3591                         sbfields |= XFS_SB_VERSIONNUM;
3592                 }
3593                 if (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2) {
3594                         xfs_sb_version_addattr2(&mp->m_sb);
3595                         sbfields |= (XFS_SB_VERSIONNUM | XFS_SB_FEATURES2);
3596                 }
3597                 if (sbfields) {
3598                         spin_unlock(&mp->m_sb_lock);
3599                         xfs_mod_sb(tp, sbfields);
3600                 } else
3601                         spin_unlock(&mp->m_sb_lock);
3602         }
3603         if ((error = xfs_bmap_finish(&tp, &flist, &committed)))
3604                 goto error2;
3605         error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
3606         ASSERT(ip->i_df.if_ext_max ==
3607                XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t));
3608         return error;
3609 error2:
3610         xfs_bmap_cancel(&flist);
3611 error1:
3612         xfs_iunlock(ip, XFS_ILOCK_EXCL);
3613 error0:
3614         xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
3615         ASSERT(ip->i_df.if_ext_max ==
3616                XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t));
3617         return error;
3618 }
3619
3620 /*
3621  * Add the extent to the list of extents to be free at transaction end.
3622  * The list is maintained sorted (by block number).
3623  */
3624 /* ARGSUSED */
3625 void
3626 xfs_bmap_add_free(
3627         xfs_fsblock_t           bno,            /* fs block number of extent */
3628         xfs_filblks_t           len,            /* length of extent */
3629         xfs_bmap_free_t         *flist,         /* list of extents */
3630         xfs_mount_t             *mp)            /* mount point structure */
3631 {
3632         xfs_bmap_free_item_t    *cur;           /* current (next) element */
3633         xfs_bmap_free_item_t    *new;           /* new element */
3634         xfs_bmap_free_item_t    *prev;          /* previous element */
3635 #ifdef DEBUG
3636         xfs_agnumber_t          agno;
3637         xfs_agblock_t           agbno;
3638
3639         ASSERT(bno != NULLFSBLOCK);
3640         ASSERT(len > 0);
3641         ASSERT(len <= MAXEXTLEN);
3642         ASSERT(!isnullstartblock(bno));
3643         agno = XFS_FSB_TO_AGNO(mp, bno);
3644         agbno = XFS_FSB_TO_AGBNO(mp, bno);
3645         ASSERT(agno < mp->m_sb.sb_agcount);
3646         ASSERT(agbno < mp->m_sb.sb_agblocks);
3647         ASSERT(len < mp->m_sb.sb_agblocks);
3648         ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
3649 #endif
3650         ASSERT(xfs_bmap_free_item_zone != NULL);
3651         new = kmem_zone_alloc(xfs_bmap_free_item_zone, KM_SLEEP);
3652         new->xbfi_startblock = bno;
3653         new->xbfi_blockcount = (xfs_extlen_t)len;
3654         for (prev = NULL, cur = flist->xbf_first;
3655              cur != NULL;
3656              prev = cur, cur = cur->xbfi_next) {
3657                 if (cur->xbfi_startblock >= bno)
3658                         break;
3659         }
3660         if (prev)
3661                 prev->xbfi_next = new;
3662         else
3663                 flist->xbf_first = new;
3664         new->xbfi_next = cur;
3665         flist->xbf_count++;
3666 }
3667
3668 /*
3669  * Compute and fill in the value of the maximum depth of a bmap btree
3670  * in this filesystem.  Done once, during mount.
3671  */
3672 void
3673 xfs_bmap_compute_maxlevels(
3674         xfs_mount_t     *mp,            /* file system mount structure */
3675         int             whichfork)      /* data or attr fork */
3676 {
3677         int             level;          /* btree level */
3678         uint            maxblocks;      /* max blocks at this level */
3679         uint            maxleafents;    /* max leaf entries possible */
3680         int             maxrootrecs;    /* max records in root block */
3681         int             minleafrecs;    /* min records in leaf block */
3682         int             minnoderecs;    /* min records in node block */
3683         int             sz;             /* root block size */
3684
3685         /*
3686          * The maximum number of extents in a file, hence the maximum
3687          * number of leaf entries, is controlled by the type of di_nextents
3688          * (a signed 32-bit number, xfs_extnum_t), or by di_anextents
3689          * (a signed 16-bit number, xfs_aextnum_t).
3690          *
3691          * Note that we can no longer assume that if we are in ATTR1 that
3692          * the fork offset of all the inodes will be
3693          * (xfs_default_attroffset(ip) >> 3) because we could have mounted
3694          * with ATTR2 and then mounted back with ATTR1, keeping the
3695          * di_forkoff's fixed but probably at various positions. Therefore,
3696          * for both ATTR1 and ATTR2 we have to assume the worst case scenario
3697          * of a minimum size available.
3698          */
3699         if (whichfork == XFS_DATA_FORK) {
3700                 maxleafents = MAXEXTNUM;
3701                 sz = XFS_BMDR_SPACE_CALC(MINDBTPTRS);
3702         } else {
3703                 maxleafents = MAXAEXTNUM;
3704                 sz = XFS_BMDR_SPACE_CALC(MINABTPTRS);
3705         }
3706         maxrootrecs = xfs_bmdr_maxrecs(mp, sz, 0);
3707         minleafrecs = mp->m_bmap_dmnr[0];
3708         minnoderecs = mp->m_bmap_dmnr[1];
3709         maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
3710         for (level = 1; maxblocks > 1; level++) {
3711                 if (maxblocks <= maxrootrecs)
3712                         maxblocks = 1;
3713                 else
3714                         maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
3715         }
3716         mp->m_bm_maxlevels[whichfork] = level;
3717 }
3718
3719 /*
3720  * Routine to be called at transaction's end by xfs_bmapi, xfs_bunmapi
3721  * caller.  Frees all the extents that need freeing, which must be done
3722  * last due to locking considerations.  We never free any extents in
3723  * the first transaction.
3724  *
3725  * Return 1 if the given transaction was committed and a new one
3726  * started, and 0 otherwise in the committed parameter.
3727  */
3728 int                                             /* error */
3729 xfs_bmap_finish(
3730         xfs_trans_t             **tp,           /* transaction pointer addr */
3731         xfs_bmap_free_t         *flist,         /* i/o: list extents to free */
3732         int                     *committed)     /* xact committed or not */
3733 {
3734         xfs_efd_log_item_t      *efd;           /* extent free data */
3735         xfs_efi_log_item_t      *efi;           /* extent free intention */
3736         int                     error;          /* error return value */
3737         xfs_bmap_free_item_t    *free;          /* free extent item */
3738         unsigned int            logres;         /* new log reservation */
3739         unsigned int            logcount;       /* new log count */
3740         xfs_mount_t             *mp;            /* filesystem mount structure */
3741         xfs_bmap_free_item_t    *next;          /* next item on free list */
3742         xfs_trans_t             *ntp;           /* new transaction pointer */
3743
3744         ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
3745         if (flist->xbf_count == 0) {
3746                 *committed = 0;
3747                 return 0;
3748         }
3749         ntp = *tp;
3750         efi = xfs_trans_get_efi(ntp, flist->xbf_count);
3751         for (free = flist->xbf_first; free; free = free->xbfi_next)
3752                 xfs_trans_log_efi_extent(ntp, efi, free->xbfi_startblock,
3753                         free->xbfi_blockcount);
3754         logres = ntp->t_log_res;
3755         logcount = ntp->t_log_count;
3756         ntp = xfs_trans_dup(*tp);
3757         error = xfs_trans_commit(*tp, 0);
3758         *tp = ntp;
3759         *committed = 1;
3760         /*
3761          * We have a new transaction, so we should return committed=1,
3762          * even though we're returning an error.
3763          */
3764         if (error)
3765                 return error;
3766
3767         /*
3768          * transaction commit worked ok so we can drop the extra ticket
3769          * reference that we gained in xfs_trans_dup()
3770          */
3771         xfs_log_ticket_put(ntp->t_ticket);
3772
3773         if ((error = xfs_trans_reserve(ntp, 0, logres, 0, XFS_TRANS_PERM_LOG_RES,
3774                         logcount)))
3775                 return error;
3776         efd = xfs_trans_get_efd(ntp, efi, flist->xbf_count);
3777         for (free = flist->xbf_first; free != NULL; free = next) {
3778                 next = free->xbfi_next;
3779                 if ((error = xfs_free_extent(ntp, free->xbfi_startblock,
3780                                 free->xbfi_blockcount))) {
3781                         /*
3782                          * The bmap free list will be cleaned up at a
3783                          * higher level.  The EFI will be canceled when
3784                          * this transaction is aborted.
3785                          * Need to force shutdown here to make sure it
3786                          * happens, since this transaction may not be
3787                          * dirty yet.
3788                          */
3789                         mp = ntp->t_mountp;
3790                         if (!XFS_FORCED_SHUTDOWN(mp))
3791                                 xfs_force_shutdown(mp,
3792                                                    (error == EFSCORRUPTED) ?
3793                                                    SHUTDOWN_CORRUPT_INCORE :
3794                                                    SHUTDOWN_META_IO_ERROR);
3795                         return error;
3796                 }
3797                 xfs_trans_log_efd_extent(ntp, efd, free->xbfi_startblock,
3798                         free->xbfi_blockcount);
3799                 xfs_bmap_del_free(flist, NULL, free);
3800         }
3801         return 0;
3802 }
3803
3804 /*
3805  * Free up any items left in the list.
3806  */
3807 void
3808 xfs_bmap_cancel(
3809         xfs_bmap_free_t         *flist) /* list of bmap_free_items */
3810 {
3811         xfs_bmap_free_item_t    *free;  /* free list item */
3812         xfs_bmap_free_item_t    *next;
3813
3814         if (flist->xbf_count == 0)
3815                 return;
3816         ASSERT(flist->xbf_first != NULL);
3817         for (free = flist->xbf_first; free; free = next) {
3818                 next = free->xbfi_next;
3819                 xfs_bmap_del_free(flist, NULL, free);
3820         }
3821         ASSERT(flist->xbf_count == 0);
3822 }
3823
3824 /*
3825  * Returns the file-relative block number of the first unused block(s)
3826  * in the file with at least "len" logically contiguous blocks free.
3827  * This is the lowest-address hole if the file has holes, else the first block
3828  * past the end of file.
3829  * Return 0 if the file is currently local (in-inode).
3830  */
3831 int                                             /* error */
3832 xfs_bmap_first_unused(
3833         xfs_trans_t     *tp,                    /* transaction pointer */
3834         xfs_inode_t     *ip,                    /* incore inode */
3835         xfs_extlen_t    len,                    /* size of hole to find */
3836         xfs_fileoff_t   *first_unused,          /* unused block */
3837         int             whichfork)              /* data or attr fork */
3838 {
3839         int             error;                  /* error return value */
3840         int             idx;                    /* extent record index */
3841         xfs_ifork_t     *ifp;                   /* inode fork pointer */
3842         xfs_fileoff_t   lastaddr;               /* last block number seen */
3843         xfs_fileoff_t   lowest;                 /* lowest useful block */
3844         xfs_fileoff_t   max;                    /* starting useful block */
3845         xfs_fileoff_t   off;                    /* offset for this block */
3846         xfs_extnum_t    nextents;               /* number of extent entries */
3847
3848         ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE ||
3849                XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ||
3850                XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
3851         if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
3852                 *first_unused = 0;
3853                 return 0;
3854         }
3855         ifp = XFS_IFORK_PTR(ip, whichfork);
3856         if (!(ifp->if_flags & XFS_IFEXTENTS) &&
3857             (error = xfs_iread_extents(tp, ip, whichfork)))
3858                 return error;
3859         lowest = *first_unused;
3860         nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
3861         for (idx = 0, lastaddr = 0, max = lowest; idx < nextents; idx++) {
3862                 xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, idx);
3863                 off = xfs_bmbt_get_startoff(ep);
3864                 /*
3865                  * See if the hole before this extent will work.
3866                  */
3867                 if (off >= lowest + len && off - max >= len) {
3868                         *first_unused = max;
3869                         return 0;
3870                 }
3871                 lastaddr = off + xfs_bmbt_get_blockcount(ep);
3872                 max = XFS_FILEOFF_MAX(lastaddr, lowest);
3873         }
3874         *first_unused = max;
3875         return 0;
3876 }
3877
3878 /*
3879  * Returns the file-relative block number of the last block + 1 before
3880  * last_block (input value) in the file.
3881  * This is not based on i_size, it is based on the extent records.
3882  * Returns 0 for local files, as they do not have extent records.
3883  */
3884 int                                             /* error */
3885 xfs_bmap_last_before(
3886         xfs_trans_t     *tp,                    /* transaction pointer */
3887         xfs_inode_t     *ip,                    /* incore inode */
3888         xfs_fileoff_t   *last_block,            /* last block */
3889         int             whichfork)              /* data or attr fork */
3890 {
3891         xfs_fileoff_t   bno;                    /* input file offset */
3892         int             eof;                    /* hit end of file */
3893         xfs_bmbt_rec_host_t *ep;                /* pointer to last extent */
3894         int             error;                  /* error return value */
3895         xfs_bmbt_irec_t got;                    /* current extent value */
3896         xfs_ifork_t     *ifp;                   /* inode fork pointer */
3897         xfs_extnum_t    lastx;                  /* last extent used */
3898         xfs_bmbt_irec_t prev;                   /* previous extent value */
3899
3900         if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
3901             XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
3902             XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL)
3903                return XFS_ERROR(EIO);
3904         if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
3905                 *last_block = 0;
3906                 return 0;
3907         }
3908         ifp = XFS_IFORK_PTR(ip, whichfork);
3909         if (!(ifp->if_flags & XFS_IFEXTENTS) &&
3910             (error = xfs_iread_extents(tp, ip, whichfork)))
3911                 return error;
3912         bno = *last_block - 1;
3913         ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got,
3914                 &prev);
3915         if (eof || xfs_bmbt_get_startoff(ep) > bno) {
3916                 if (prev.br_startoff == NULLFILEOFF)
3917                         *last_block = 0;
3918                 else
3919                         *last_block = prev.br_startoff + prev.br_blockcount;
3920         }
3921         /*
3922          * Otherwise *last_block is already the right answer.
3923          */
3924         return 0;
3925 }
3926
3927 /*
3928  * Returns the file-relative block number of the first block past eof in
3929  * the file.  This is not based on i_size, it is based on the extent records.
3930  * Returns 0 for local files, as they do not have extent records.
3931  */
3932 int                                             /* error */
3933 xfs_bmap_last_offset(
3934         xfs_trans_t     *tp,                    /* transaction pointer */
3935         xfs_inode_t     *ip,                    /* incore inode */
3936         xfs_fileoff_t   *last_block,            /* last block */
3937         int             whichfork)              /* data or attr fork */
3938 {
3939         xfs_bmbt_rec_host_t *ep;                /* pointer to last extent */
3940         int             error;                  /* error return value */
3941         xfs_ifork_t     *ifp;                   /* inode fork pointer */
3942         xfs_extnum_t    nextents;               /* number of extent entries */
3943
3944         if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
3945             XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
3946             XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL)
3947                return XFS_ERROR(EIO);
3948         if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
3949                 *last_block = 0;
3950                 return 0;
3951         }
3952         ifp = XFS_IFORK_PTR(ip, whichfork);
3953         if (!(ifp->if_flags & XFS_IFEXTENTS) &&
3954             (error = xfs_iread_extents(tp, ip, whichfork)))
3955                 return error;
3956         nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
3957         if (!nextents) {
3958                 *last_block = 0;
3959                 return 0;
3960         }
3961         ep = xfs_iext_get_ext(ifp, nextents - 1);
3962         *last_block = xfs_bmbt_get_startoff(ep) + xfs_bmbt_get_blockcount(ep);
3963         return 0;
3964 }
3965
3966 /*
3967  * Returns whether the selected fork of the inode has exactly one
3968  * block or not.  For the data fork we check this matches di_size,
3969  * implying the file's range is 0..bsize-1.
3970  */
3971 int                                     /* 1=>1 block, 0=>otherwise */
3972 xfs_bmap_one_block(
3973         xfs_inode_t     *ip,            /* incore inode */
3974         int             whichfork)      /* data or attr fork */
3975 {
3976         xfs_bmbt_rec_host_t *ep;        /* ptr to fork's extent */
3977         xfs_ifork_t     *ifp;           /* inode fork pointer */
3978         int             rval;           /* return value */
3979         xfs_bmbt_irec_t s;              /* internal version of extent */
3980
3981 #ifndef DEBUG
3982         if (whichfork == XFS_DATA_FORK) {
3983                 return S_ISREG(ip->i_d.di_mode) ?
3984                         (ip->i_size == ip->i_mount->m_sb.sb_blocksize) :
3985                         (ip->i_d.di_size == ip->i_mount->m_sb.sb_blocksize);
3986         }
3987 #endif  /* !DEBUG */
3988         if (XFS_IFORK_NEXTENTS(ip, whichfork) != 1)
3989                 return 0;
3990         if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
3991                 return 0;
3992         ifp = XFS_IFORK_PTR(ip, whichfork);
3993         ASSERT(ifp->if_flags & XFS_IFEXTENTS);
3994         ep = xfs_iext_get_ext(ifp, 0);
3995         xfs_bmbt_get_all(ep, &s);
3996         rval = s.br_startoff == 0 && s.br_blockcount == 1;
3997         if (rval && whichfork == XFS_DATA_FORK)
3998                 ASSERT(ip->i_size == ip->i_mount->m_sb.sb_blocksize);
3999         return rval;
4000 }
4001
4002 STATIC int
4003 xfs_bmap_sanity_check(
4004         struct xfs_mount        *mp,
4005         struct xfs_buf          *bp,
4006         int                     level)
4007 {
4008         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
4009
4010         if (block->bb_magic != cpu_to_be32(XFS_BMAP_MAGIC) ||
4011             be16_to_cpu(block->bb_level) != level ||
4012             be16_to_cpu(block->bb_numrecs) == 0 ||
4013             be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0])
4014                 return 0;
4015         return 1;
4016 }
4017
4018 /*
4019  * Read in the extents to if_extents.
4020  * All inode fields are set up by caller, we just traverse the btree
4021  * and copy the records in. If the file system cannot contain unwritten
4022  * extents, the records are checked for no "state" flags.
4023  */
4024 int                                     /* error */
4025 xfs_bmap_read_extents(
4026         xfs_trans_t             *tp,    /* transaction pointer */
4027         xfs_inode_t             *ip,    /* incore inode */
4028         int                     whichfork) /* data or attr fork */
4029 {
4030         struct xfs_btree_block  *block; /* current btree block */
4031         xfs_fsblock_t           bno;    /* block # of "block" */
4032         xfs_buf_t               *bp;    /* buffer for "block" */
4033         int                     error;  /* error return value */
4034         xfs_exntfmt_t           exntf;  /* XFS_EXTFMT_NOSTATE, if checking */
4035         xfs_extnum_t            i, j;   /* index into the extents list */
4036         xfs_ifork_t             *ifp;   /* fork structure */
4037         int                     level;  /* btree level, for checking */
4038         xfs_mount_t             *mp;    /* file system mount structure */
4039         __be64                  *pp;    /* pointer to block address */
4040         /* REFERENCED */
4041         xfs_extnum_t            room;   /* number of entries there's room for */
4042
4043         bno = NULLFSBLOCK;
4044         mp = ip->i_mount;
4045         ifp = XFS_IFORK_PTR(ip, whichfork);
4046         exntf = (whichfork != XFS_DATA_FORK) ? XFS_EXTFMT_NOSTATE :
4047                                         XFS_EXTFMT_INODE(ip);
4048         block = ifp->if_broot;
4049         /*
4050          * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
4051          */
4052         level = be16_to_cpu(block->bb_level);
4053         ASSERT(level > 0);
4054         pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
4055         bno = be64_to_cpu(*pp);
4056         ASSERT(bno != NULLDFSBNO);
4057         ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
4058         ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
4059         /*
4060          * Go down the tree until leaf level is reached, following the first
4061          * pointer (leftmost) at each level.
4062          */
4063         while (level-- > 0) {
4064                 if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
4065                                 XFS_BMAP_BTREE_REF)))
4066                         return error;
4067                 block = XFS_BUF_TO_BLOCK(bp);
4068                 XFS_WANT_CORRUPTED_GOTO(
4069                         xfs_bmap_sanity_check(mp, bp, level),
4070                         error0);
4071                 if (level == 0)
4072                         break;
4073                 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
4074                 bno = be64_to_cpu(*pp);
4075                 XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0);
4076                 xfs_trans_brelse(tp, bp);
4077         }
4078         /*
4079          * Here with bp and block set to the leftmost leaf node in the tree.
4080          */
4081         room = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
4082         i = 0;
4083         /*
4084          * Loop over all leaf nodes.  Copy information to the extent records.
4085          */
4086         for (;;) {
4087                 xfs_bmbt_rec_t  *frp;
4088                 xfs_fsblock_t   nextbno;
4089                 xfs_extnum_t    num_recs;
4090                 xfs_extnum_t    start;
4091
4092                 num_recs = xfs_btree_get_numrecs(block);
4093                 if (unlikely(i + num_recs > room)) {
4094                         ASSERT(i + num_recs <= room);
4095                         xfs_warn(ip->i_mount,
4096                                 "corrupt dinode %Lu, (btree extents).",
4097                                 (unsigned long long) ip->i_ino);
4098                         XFS_CORRUPTION_ERROR("xfs_bmap_read_extents(1)",
4099                                 XFS_ERRLEVEL_LOW, ip->i_mount, block);
4100                         goto error0;
4101                 }
4102                 XFS_WANT_CORRUPTED_GOTO(
4103                         xfs_bmap_sanity_check(mp, bp, 0),
4104                         error0);
4105                 /*
4106                  * Read-ahead the next leaf block, if any.
4107                  */
4108                 nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
4109                 if (nextbno != NULLFSBLOCK)
4110                         xfs_btree_reada_bufl(mp, nextbno, 1);
4111                 /*
4112                  * Copy records into the extent records.
4113                  */
4114                 frp = XFS_BMBT_REC_ADDR(mp, block, 1);
4115                 start = i;
4116                 for (j = 0; j < num_recs; j++, i++, frp++) {
4117                         xfs_bmbt_rec_host_t *trp = xfs_iext_get_ext(ifp, i);
4118                         trp->l0 = be64_to_cpu(frp->l0);
4119                         trp->l1 = be64_to_cpu(frp->l1);
4120                 }
4121                 if (exntf == XFS_EXTFMT_NOSTATE) {
4122                         /*
4123                          * Check all attribute bmap btree records and
4124                          * any "older" data bmap btree records for a
4125                          * set bit in the "extent flag" position.
4126                          */
4127                         if (unlikely(xfs_check_nostate_extents(ifp,
4128                                         start, num_recs))) {
4129                                 XFS_ERROR_REPORT("xfs_bmap_read_extents(2)",
4130                                                  XFS_ERRLEVEL_LOW,
4131                                                  ip->i_mount);
4132                                 goto error0;
4133                         }
4134                 }
4135                 xfs_trans_brelse(tp, bp);
4136                 bno = nextbno;
4137                 /*
4138                  * If we've reached the end, stop.
4139                  */
4140                 if (bno == NULLFSBLOCK)
4141                         break;
4142                 if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
4143                                 XFS_BMAP_BTREE_REF)))
4144                         return error;
4145                 block = XFS_BUF_TO_BLOCK(bp);
4146         }
4147         ASSERT(i == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)));
4148         ASSERT(i == XFS_IFORK_NEXTENTS(ip, whichfork));
4149         XFS_BMAP_TRACE_EXLIST(ip, i, whichfork);
4150         return 0;
4151 error0:
4152         xfs_trans_brelse(tp, bp);
4153         return XFS_ERROR(EFSCORRUPTED);
4154 }
4155
4156 #ifdef DEBUG
4157 /*
4158  * Add bmap trace insert entries for all the contents of the extent records.
4159  */
4160 void
4161 xfs_bmap_trace_exlist(
4162         xfs_inode_t     *ip,            /* incore inode pointer */
4163         xfs_extnum_t    cnt,            /* count of entries in the list */
4164         int             whichfork,      /* data or attr fork */
4165         unsigned long   caller_ip)
4166 {
4167         xfs_extnum_t    idx;            /* extent record index */
4168         xfs_ifork_t     *ifp;           /* inode fork pointer */
4169         int             state = 0;
4170
4171         if (whichfork == XFS_ATTR_FORK)
4172                 state |= BMAP_ATTRFORK;
4173
4174         ifp = XFS_IFORK_PTR(ip, whichfork);
4175         ASSERT(cnt == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)));
4176         for (idx = 0; idx < cnt; idx++)
4177                 trace_xfs_extlist(ip, idx, whichfork, caller_ip);
4178 }
4179
4180 /*
4181  * Validate that the bmbt_irecs being returned from bmapi are valid
4182  * given the callers original parameters.  Specifically check the
4183  * ranges of the returned irecs to ensure that they only extent beyond
4184  * the given parameters if the XFS_BMAPI_ENTIRE flag was set.
4185  */
4186 STATIC void
4187 xfs_bmap_validate_ret(
4188         xfs_fileoff_t           bno,
4189         xfs_filblks_t           len,
4190         int                     flags,
4191         xfs_bmbt_irec_t         *mval,
4192         int                     nmap,
4193         int                     ret_nmap)
4194 {
4195         int                     i;              /* index to map values */
4196
4197         ASSERT(ret_nmap <= nmap);
4198
4199         for (i = 0; i < ret_nmap; i++) {
4200                 ASSERT(mval[i].br_blockcount > 0);
4201                 if (!(flags & XFS_BMAPI_ENTIRE)) {
4202                         ASSERT(mval[i].br_startoff >= bno);
4203                         ASSERT(mval[i].br_blockcount <= len);
4204                         ASSERT(mval[i].br_startoff + mval[i].br_blockcount <=
4205                                bno + len);
4206                 } else {
4207                         ASSERT(mval[i].br_startoff < bno + len);
4208                         ASSERT(mval[i].br_startoff + mval[i].br_blockcount >
4209                                bno);
4210                 }
4211                 ASSERT(i == 0 ||
4212                        mval[i - 1].br_startoff + mval[i - 1].br_blockcount ==
4213                        mval[i].br_startoff);
4214                 ASSERT(mval[i].br_startblock != DELAYSTARTBLOCK &&
4215                        mval[i].br_startblock != HOLESTARTBLOCK);
4216                 ASSERT(mval[i].br_state == XFS_EXT_NORM ||
4217                        mval[i].br_state == XFS_EXT_UNWRITTEN);
4218         }
4219 }
4220 #endif /* DEBUG */
4221
4222
4223 /*
4224  * Trim the returned map to the required bounds
4225  */
4226 STATIC void
4227 xfs_bmapi_trim_map(
4228         struct xfs_bmbt_irec    *mval,
4229         struct xfs_bmbt_irec    *got,
4230         xfs_fileoff_t           *bno,
4231         xfs_filblks_t           len,
4232         xfs_fileoff_t           obno,
4233         xfs_fileoff_t           end,
4234         int                     n,
4235         int                     flags)
4236 {
4237         if ((flags & XFS_BMAPI_ENTIRE) ||
4238             got->br_startoff + got->br_blockcount <= obno) {
4239                 *mval = *got;
4240                 if (isnullstartblock(got->br_startblock))
4241                         mval->br_startblock = DELAYSTARTBLOCK;
4242                 return;
4243         }
4244
4245         if (obno > *bno)
4246                 *bno = obno;
4247         ASSERT((*bno >= obno) || (n == 0));
4248         ASSERT(*bno < end);
4249         mval->br_startoff = *bno;
4250         if (isnullstartblock(got->br_startblock))
4251                 mval->br_startblock = DELAYSTARTBLOCK;
4252         else
4253                 mval->br_startblock = got->br_startblock +
4254                                         (*bno - got->br_startoff);
4255         /*
4256          * Return the minimum of what we got and what we asked for for
4257          * the length.  We can use the len variable here because it is
4258          * modified below and we could have been there before coming
4259          * here if the first part of the allocation didn't overlap what
4260          * was asked for.
4261          */
4262         mval->br_blockcount = XFS_FILBLKS_MIN(end - *bno,
4263                         got->br_blockcount - (*bno - got->br_startoff));
4264         mval->br_state = got->br_state;
4265         ASSERT(mval->br_blockcount <= len);
4266         return;
4267 }
4268
4269 /*
4270  * Update and validate the extent map to return
4271  */
4272 STATIC void
4273 xfs_bmapi_update_map(
4274         struct xfs_bmbt_irec    **map,
4275         xfs_fileoff_t           *bno,
4276         xfs_filblks_t           *len,
4277         xfs_fileoff_t           obno,
4278         xfs_fileoff_t           end,
4279         int                     *n,
4280         int                     flags)
4281 {
4282         xfs_bmbt_irec_t *mval = *map;
4283
4284         ASSERT((flags & XFS_BMAPI_ENTIRE) ||
4285                ((mval->br_startoff + mval->br_blockcount) <= end));
4286         ASSERT((flags & XFS_BMAPI_ENTIRE) || (mval->br_blockcount <= *len) ||
4287                (mval->br_startoff < obno));
4288
4289         *bno = mval->br_startoff + mval->br_blockcount;
4290         *len = end - *bno;
4291         if (*n > 0 && mval->br_startoff == mval[-1].br_startoff) {
4292                 /* update previous map with new information */
4293                 ASSERT(mval->br_startblock == mval[-1].br_startblock);
4294                 ASSERT(mval->br_blockcount > mval[-1].br_blockcount);
4295                 ASSERT(mval->br_state == mval[-1].br_state);
4296                 mval[-1].br_blockcount = mval->br_blockcount;
4297                 mval[-1].br_state = mval->br_state;
4298         } else if (*n > 0 && mval->br_startblock != DELAYSTARTBLOCK &&
4299                    mval[-1].br_startblock != DELAYSTARTBLOCK &&
4300                    mval[-1].br_startblock != HOLESTARTBLOCK &&
4301                    mval->br_startblock == mval[-1].br_startblock +
4302                                           mval[-1].br_blockcount &&
4303                    ((flags & XFS_BMAPI_IGSTATE) ||
4304                         mval[-1].br_state == mval->br_state)) {
4305                 ASSERT(mval->br_startoff ==
4306                        mval[-1].br_startoff + mval[-1].br_blockcount);
4307                 mval[-1].br_blockcount += mval->br_blockcount;
4308         } else if (*n > 0 &&
4309                    mval->br_startblock == DELAYSTARTBLOCK &&
4310                    mval[-1].br_startblock == DELAYSTARTBLOCK &&
4311                    mval->br_startoff ==
4312                    mval[-1].br_startoff + mval[-1].br_blockcount) {
4313                 mval[-1].br_blockcount += mval->br_blockcount;
4314                 mval[-1].br_state = mval->br_state;
4315         } else if (!((*n == 0) &&
4316                      ((mval->br_startoff + mval->br_blockcount) <=
4317                       obno))) {
4318                 mval++;
4319                 (*n)++;
4320         }
4321         *map = mval;
4322 }
4323
4324 /*
4325  * Map file blocks to filesystem blocks without allocation.
4326  */
4327 int
4328 xfs_bmapi_read(
4329         struct xfs_inode        *ip,
4330         xfs_fileoff_t           bno,
4331         xfs_filblks_t           len,
4332         struct xfs_bmbt_irec    *mval,
4333         int                     *nmap,
4334         int                     flags)
4335 {
4336         struct xfs_mount        *mp = ip->i_mount;
4337         struct xfs_ifork        *ifp;
4338         struct xfs_bmbt_irec    got;
4339         struct xfs_bmbt_irec    prev;
4340         xfs_fileoff_t           obno;
4341         xfs_fileoff_t           end;
4342         xfs_extnum_t            lastx;
4343         int                     error;
4344         int                     eof;
4345         int                     n = 0;
4346         int                     whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
4347                                                 XFS_ATTR_FORK : XFS_DATA_FORK;
4348
4349         ASSERT(*nmap >= 1);
4350         ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK|XFS_BMAPI_ENTIRE|
4351                            XFS_BMAPI_IGSTATE)));
4352
4353         if (unlikely(XFS_TEST_ERROR(
4354             (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
4355              XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
4356              mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
4357                 XFS_ERROR_REPORT("xfs_bmapi_read", XFS_ERRLEVEL_LOW, mp);
4358                 return XFS_ERROR(EFSCORRUPTED);
4359         }
4360
4361         if (XFS_FORCED_SHUTDOWN(mp))
4362                 return XFS_ERROR(EIO);
4363
4364         XFS_STATS_INC(xs_blk_mapr);
4365
4366         ifp = XFS_IFORK_PTR(ip, whichfork);
4367         ASSERT(ifp->if_ext_max ==
4368                XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
4369
4370         if (!(ifp->if_flags & XFS_IFEXTENTS)) {
4371                 error = xfs_iread_extents(NULL, ip, whichfork);
4372                 if (error)
4373                         return error;
4374         }
4375
4376         xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, &prev);
4377         end = bno + len;
4378         obno = bno;
4379
4380         while (bno < end && n < *nmap) {
4381                 /* Reading past eof, act as though there's a hole up to end. */
4382                 if (eof)
4383                         got.br_startoff = end;
4384                 if (got.br_startoff > bno) {
4385                         /* Reading in a hole.  */
4386                         mval->br_startoff = bno;
4387                         mval->br_startblock = HOLESTARTBLOCK;
4388                         mval->br_blockcount =
4389                                 XFS_FILBLKS_MIN(len, got.br_startoff - bno);
4390                         mval->br_state = XFS_EXT_NORM;
4391                         bno += mval->br_blockcount;
4392                         len -= mval->br_blockcount;
4393                         mval++;
4394                         n++;
4395                         continue;
4396                 }
4397
4398                 /* set up the extent map to return. */
4399                 xfs_bmapi_trim_map(mval, &got, &bno, len, obno, end, n, flags);
4400                 xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
4401
4402                 /* If we're done, stop now. */
4403                 if (bno >= end || n >= *nmap)
4404                         break;
4405
4406                 /* Else go on to the next record. */
4407                 if (++lastx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t))
4408                         xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx), &got);
4409                 else
4410                         eof = 1;
4411         }
4412         *nmap = n;
4413         return 0;
4414 }
4415
4416 STATIC int
4417 xfs_bmapi_reserve_delalloc(
4418         struct xfs_inode        *ip,
4419         xfs_fileoff_t           aoff,
4420         xfs_filblks_t           len,
4421         struct xfs_bmbt_irec    *got,
4422         struct xfs_bmbt_irec    *prev,
4423         xfs_extnum_t            *lastx,
4424         int                     eof)
4425 {
4426         struct xfs_mount        *mp = ip->i_mount;
4427         struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
4428         xfs_extlen_t            alen;
4429         xfs_extlen_t            indlen;
4430         char                    rt = XFS_IS_REALTIME_INODE(ip);
4431         xfs_extlen_t            extsz;
4432         int                     error;
4433
4434         alen = XFS_FILBLKS_MIN(len, MAXEXTLEN);
4435         if (!eof)
4436                 alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff);
4437
4438         /* Figure out the extent size, adjust alen */
4439         extsz = xfs_get_extsz_hint(ip);
4440         if (extsz) {
4441                 /*
4442                  * Make sure we don't exceed a single extent length when we
4443                  * align the extent by reducing length we are going to
4444                  * allocate by the maximum amount extent size aligment may
4445                  * require.
4446                  */
4447                 alen = XFS_FILBLKS_MIN(len, MAXEXTLEN - (2 * extsz - 1));
4448                 error = xfs_bmap_extsize_align(mp, got, prev, extsz, rt, eof,
4449                                                1, 0, &aoff, &alen);
4450                 ASSERT(!error);
4451         }
4452
4453         if (rt)
4454                 extsz = alen / mp->m_sb.sb_rextsize;
4455
4456         /*
4457          * Make a transaction-less quota reservation for delayed allocation
4458          * blocks.  This number gets adjusted later.  We return if we haven't
4459          * allocated blocks already inside this loop.
4460          */
4461         error = xfs_trans_reserve_quota_nblks(NULL, ip, (long)alen, 0,
4462                         rt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
4463         if (error)
4464                 return error;
4465
4466         /*
4467          * Split changing sb for alen and indlen since they could be coming
4468          * from different places.
4469          */
4470         indlen = (xfs_extlen_t)xfs_bmap_worst_indlen(ip, alen);
4471         ASSERT(indlen > 0);
4472
4473         if (rt) {
4474                 error = xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS,
4475                                           -((int64_t)extsz), 0);
4476         } else {
4477                 error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
4478                                                  -((int64_t)alen), 0);
4479         }
4480
4481         if (error)
4482                 goto out_unreserve_quota;
4483
4484         error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
4485                                          -((int64_t)indlen), 0);
4486         if (error)
4487                 goto out_unreserve_blocks;
4488
4489
4490         ip->i_delayed_blks += alen;
4491
4492         got->br_startoff = aoff;
4493         got->br_startblock = nullstartblock(indlen);
4494         got->br_blockcount = alen;
4495         got->br_state = XFS_EXT_NORM;
4496         xfs_bmap_add_extent_hole_delay(ip, lastx, got);
4497
4498         /*
4499          * Update our extent pointer, given that xfs_bmap_add_extent_hole_delay
4500          * might have merged it into one of the neighbouring ones.
4501          */
4502         xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *lastx), got);
4503
4504         ASSERT(got->br_startoff <= aoff);
4505         ASSERT(got->br_startoff + got->br_blockcount >= aoff + alen);
4506         ASSERT(isnullstartblock(got->br_startblock));
4507         ASSERT(got->br_state == XFS_EXT_NORM);
4508         return 0;
4509
4510 out_unreserve_blocks:
4511         if (rt)
4512                 xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, extsz, 0);
4513         else
4514                 xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, alen, 0);
4515 out_unreserve_quota:
4516         if (XFS_IS_QUOTA_ON(mp))
4517                 xfs_trans_unreserve_quota_nblks(NULL, ip, alen, 0, rt ?
4518                                 XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
4519         return error;
4520 }
4521
4522 /*
4523  * Map file blocks to filesystem blocks, adding delayed allocations as needed.
4524  */
4525 int
4526 xfs_bmapi_delay(
4527         struct xfs_inode        *ip,    /* incore inode */
4528         xfs_fileoff_t           bno,    /* starting file offs. mapped */
4529         xfs_filblks_t           len,    /* length to map in file */
4530         struct xfs_bmbt_irec    *mval,  /* output: map values */
4531         int                     *nmap,  /* i/o: mval size/count */
4532         int                     flags)  /* XFS_BMAPI_... */
4533 {
4534         struct xfs_mount        *mp = ip->i_mount;
4535         struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
4536         struct xfs_bmbt_irec    got;    /* current file extent record */
4537         struct xfs_bmbt_irec    prev;   /* previous file extent record */
4538         xfs_fileoff_t           obno;   /* old block number (offset) */
4539         xfs_fileoff_t           end;    /* end of mapped file region */
4540         xfs_extnum_t            lastx;  /* last useful extent number */
4541         int                     eof;    /* we've hit the end of extents */
4542         int                     n = 0;  /* current extent index */
4543         int                     error = 0;
4544
4545         ASSERT(*nmap >= 1);
4546         ASSERT(*nmap <= XFS_BMAP_MAX_NMAP);
4547         ASSERT(!(flags & ~XFS_BMAPI_ENTIRE));
4548
4549         if (unlikely(XFS_TEST_ERROR(
4550             (XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_EXTENTS &&
4551              XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_BTREE),
4552              mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
4553                 XFS_ERROR_REPORT("xfs_bmapi_delay", XFS_ERRLEVEL_LOW, mp);
4554                 return XFS_ERROR(EFSCORRUPTED);
4555         }
4556
4557         if (XFS_FORCED_SHUTDOWN(mp))
4558                 return XFS_ERROR(EIO);
4559
4560         XFS_STATS_INC(xs_blk_mapw);
4561
4562         if (!(ifp->if_flags & XFS_IFEXTENTS)) {
4563                 error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK);
4564                 if (error)
4565                         return error;
4566         }
4567
4568         xfs_bmap_search_extents(ip, bno, XFS_DATA_FORK, &eof, &lastx, &got, &prev);
4569         end = bno + len;
4570         obno = bno;
4571
4572         while (bno < end && n < *nmap) {
4573                 if (eof || got.br_startoff > bno) {
4574                         error = xfs_bmapi_reserve_delalloc(ip, bno, len, &got,
4575                                                            &prev, &lastx, eof);
4576                         if (error) {
4577                                 if (n == 0) {
4578                                         *nmap = 0;
4579                                         return error;
4580                                 }
4581                                 break;
4582                         }
4583                 }
4584
4585                 /* set up the extent map to return. */
4586                 xfs_bmapi_trim_map(mval, &got, &bno, len, obno, end, n, flags);
4587                 xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
4588
4589                 /* If we're done, stop now. */
4590                 if (bno >= end || n >= *nmap)
4591                         break;
4592
4593                 /* Else go on to the next record. */
4594                 prev = got;
4595                 if (++lastx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t))
4596                         xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx), &got);
4597                 else
4598                         eof = 1;
4599         }
4600
4601         *nmap = n;
4602         return 0;
4603 }
4604
4605
4606 STATIC int
4607 xfs_bmapi_allocate(
4608         struct xfs_bmalloca     *bma,
4609         xfs_extnum_t            *lastx,
4610         struct xfs_btree_cur    **cur,
4611         xfs_fsblock_t           *firstblock,
4612         struct xfs_bmap_free    *flist,
4613         int                     flags,
4614         int                     *nallocs,
4615         int                     *logflags)
4616 {
4617         struct xfs_mount        *mp = bma->ip->i_mount;
4618         int                     whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
4619                                                 XFS_ATTR_FORK : XFS_DATA_FORK;
4620         struct xfs_ifork        *ifp = XFS_IFORK_PTR(bma->ip, whichfork);
4621         xfs_fsblock_t           abno;
4622         xfs_extlen_t            alen;
4623         xfs_fileoff_t           aoff;
4624         int                     error;
4625         int                     rt;
4626
4627         rt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(bma->ip);
4628
4629         /*
4630          * For the wasdelay case, we could also just allocate the stuff asked
4631          * for in this bmap call but that wouldn't be as good.
4632          */
4633         if (bma->wasdel) {
4634                 alen = (xfs_extlen_t)bma->gotp->br_blockcount;
4635                 aoff = bma->gotp->br_startoff;
4636                 if (*lastx != NULLEXTNUM && *lastx) {
4637                         xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *lastx - 1),
4638                                          bma->prevp);
4639                 }
4640         } else {
4641                 alen = (xfs_extlen_t)XFS_FILBLKS_MIN(bma->alen, MAXEXTLEN);
4642                 if (!bma->eof)
4643                         alen = (xfs_extlen_t)XFS_FILBLKS_MIN(alen,
4644                                         bma->gotp->br_startoff - bma->off);
4645                 aoff = bma->off;
4646         }
4647
4648         /*
4649          * Indicate if this is the first user data in the file, or just any
4650          * user data.
4651          */
4652         if (!(flags & XFS_BMAPI_METADATA)) {
4653                 bma->userdata = (aoff == 0) ?
4654                         XFS_ALLOC_INITIAL_USER_DATA : XFS_ALLOC_USERDATA;
4655         }
4656
4657         /*
4658          * Fill in changeable bma fields.
4659          */
4660         bma->alen = alen;
4661         bma->off = aoff;
4662         bma->firstblock = *firstblock;
4663         bma->minlen = (flags & XFS_BMAPI_CONTIG) ? alen : 1;
4664         bma->low = flist->xbf_low;
4665         bma->aeof = 0;
4666
4667         /*
4668          * Only want to do the alignment at the eof if it is userdata and
4669          * allocation length is larger than a stripe unit.
4670          */
4671         if (mp->m_dalign && alen >= mp->m_dalign &&
4672             !(flags & XFS_BMAPI_METADATA) && whichfork == XFS_DATA_FORK) {
4673                 error = xfs_bmap_isaeof(bma->ip, aoff, whichfork, &bma->aeof);
4674                 if (error)
4675                         return error;
4676         }
4677
4678         error = xfs_bmap_alloc(bma);
4679         if (error)
4680                 return error;
4681
4682         /*
4683          * Copy out result fields.
4684          */
4685         abno = bma->rval;
4686         flist->xbf_low = bma->low;
4687         alen = bma->alen;
4688         aoff = bma->off;
4689         ASSERT(*firstblock == NULLFSBLOCK ||
4690                XFS_FSB_TO_AGNO(mp, *firstblock) ==
4691                XFS_FSB_TO_AGNO(mp, bma->firstblock) ||
4692                (flist->xbf_low &&
4693                 XFS_FSB_TO_AGNO(mp, *firstblock) <
4694                         XFS_FSB_TO_AGNO(mp, bma->firstblock)));
4695         *firstblock = bma->firstblock;
4696         if (*cur)
4697                 (*cur)->bc_private.b.firstblock = *firstblock;
4698         if (abno == NULLFSBLOCK)
4699                 return 0;
4700         if ((ifp->if_flags & XFS_IFBROOT) && !*cur) {
4701                 (*cur) = xfs_bmbt_init_cursor(mp, bma->tp, bma->ip, whichfork);
4702                 (*cur)->bc_private.b.firstblock = *firstblock;
4703                 (*cur)->bc_private.b.flist = flist;
4704         }
4705         /*
4706          * Bump the number of extents we've allocated
4707          * in this call.
4708          */
4709         (*nallocs)++;
4710
4711         if (*cur)
4712                 (*cur)->bc_private.b.flags =
4713                         bma->wasdel ? XFS_BTCUR_BPRV_WASDEL : 0;
4714
4715         bma->gotp->br_startoff = aoff;
4716         bma->gotp->br_startblock = abno;
4717         bma->gotp->br_blockcount = alen;
4718         bma->gotp->br_state = XFS_EXT_NORM;
4719
4720         /*
4721          * A wasdelay extent has been initialized, so shouldn't be flagged
4722          * as unwritten.
4723          */
4724         if (!bma->wasdel && (flags & XFS_BMAPI_PREALLOC) &&
4725             xfs_sb_version_hasextflgbit(&mp->m_sb))
4726                 bma->gotp->br_state = XFS_EXT_UNWRITTEN;
4727
4728         error = xfs_bmap_add_extent(bma->tp, bma->ip, lastx, cur, bma->gotp,
4729                                     firstblock, flist, logflags, whichfork);
4730         if (error)
4731                 return error;
4732
4733         /*
4734          * Update our extent pointer, given that xfs_bmap_add_extent  might
4735          * have merged it into one of the neighbouring ones.
4736          */
4737         xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *lastx), bma->gotp);
4738
4739         ASSERT(bma->gotp->br_startoff <= aoff);
4740         ASSERT(bma->gotp->br_startoff + bma->gotp->br_blockcount >=
4741                 aoff + alen);
4742         ASSERT(bma->gotp->br_state == XFS_EXT_NORM ||
4743                bma->gotp->br_state == XFS_EXT_UNWRITTEN);
4744         return 0;
4745 }
4746
4747 STATIC int
4748 xfs_bmapi_convert_unwritten(
4749         struct xfs_bmalloca     *bma,
4750         struct xfs_bmbt_irec    *mval,
4751         xfs_filblks_t           len,
4752         xfs_extnum_t            *lastx,
4753         struct xfs_btree_cur    **cur,
4754         xfs_fsblock_t           *firstblock,
4755         struct xfs_bmap_free    *flist,
4756         int                     flags,
4757         int                     *logflags)
4758 {
4759         int                     whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
4760                                                 XFS_ATTR_FORK : XFS_DATA_FORK;
4761         struct xfs_ifork        *ifp = XFS_IFORK_PTR(bma->ip, whichfork);
4762         int                     error;
4763
4764         *logflags = 0;
4765
4766         /* check if we need to do unwritten->real conversion */
4767         if (mval->br_state == XFS_EXT_UNWRITTEN &&
4768             (flags & XFS_BMAPI_PREALLOC))
4769                 return 0;
4770
4771         /* check if we need to do real->unwritten conversion */
4772         if (mval->br_state == XFS_EXT_NORM &&
4773             (flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT)) !=
4774                         (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT))
4775                 return 0;
4776
4777         /*
4778          * Modify (by adding) the state flag, if writing.
4779          */
4780         ASSERT(mval->br_blockcount <= len);
4781         if ((ifp->if_flags & XFS_IFBROOT) && !*cur) {
4782                 *cur = xfs_bmbt_init_cursor(bma->ip->i_mount, bma->tp,
4783                                         bma->ip, whichfork);
4784                 (*cur)->bc_private.b.firstblock = *firstblock;
4785                 (*cur)->bc_private.b.flist = flist;
4786         }
4787         mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN)
4788                                 ? XFS_EXT_NORM : XFS_EXT_UNWRITTEN;
4789
4790         error = xfs_bmap_add_extent(bma->tp, bma->ip, lastx, cur, mval,
4791                                 firstblock, flist, logflags, whichfork);
4792         if (error)
4793                 return error;
4794
4795         /*
4796          * Update our extent pointer, given that xfs_bmap_add_extent  might
4797          * have merged it into one of the neighbouring ones.
4798          */
4799         xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *lastx), bma->gotp);
4800
4801         /*
4802          * We may have combined previously unwritten space with written space,
4803          * so generate another request.
4804          */
4805         if (mval->br_blockcount < len)
4806                 return EAGAIN;
4807         return 0;
4808 }
4809
4810 /*
4811  * Map file blocks to filesystem blocks, and allocate blocks or convert the
4812  * extent state if necessary.  Details behaviour is controlled by the flags
4813  * parameter.  Only allocates blocks from a single allocation group, to avoid
4814  * locking problems.
4815  *
4816  * The returned value in "firstblock" from the first call in a transaction
4817  * must be remembered and presented to subsequent calls in "firstblock".
4818  * An upper bound for the number of blocks to be allocated is supplied to
4819  * the first call in "total"; if no allocation group has that many free
4820  * blocks then the call will fail (return NULLFSBLOCK in "firstblock").
4821  */
4822 int
4823 xfs_bmapi_write(
4824         struct xfs_trans        *tp,            /* transaction pointer */
4825         struct xfs_inode        *ip,            /* incore inode */
4826         xfs_fileoff_t           bno,            /* starting file offs. mapped */
4827         xfs_filblks_t           len,            /* length to map in file */
4828         int                     flags,          /* XFS_BMAPI_... */
4829         xfs_fsblock_t           *firstblock,    /* first allocated block
4830                                                    controls a.g. for allocs */
4831         xfs_extlen_t            total,          /* total blocks needed */
4832         struct xfs_bmbt_irec    *mval,          /* output: map values */
4833         int                     *nmap,          /* i/o: mval size/count */
4834         struct xfs_bmap_free    *flist)         /* i/o: list extents to free */
4835 {
4836         struct xfs_mount        *mp = ip->i_mount;
4837         struct xfs_ifork        *ifp;
4838         struct xfs_bmalloca     bma = { 0 };    /* args for xfs_bmap_alloc */
4839         struct xfs_btree_cur    *cur;           /* bmap btree cursor */
4840         xfs_fileoff_t           end;            /* end of mapped file region */
4841         int                     eof;            /* after the end of extents */
4842         int                     error;          /* error return */
4843         struct xfs_bmbt_irec    got;            /* current file extent record */
4844         xfs_extnum_t            lastx;          /* last useful extent number */
4845         int                     logflags;       /* flags for transaction logging */
4846         xfs_extlen_t            minleft;        /* min blocks left after allocation */
4847         int                     n;              /* current extent index */
4848         int                     nallocs;        /* number of extents alloc'd */
4849         xfs_fileoff_t           obno;           /* old block number (offset) */
4850         struct xfs_bmbt_irec    prev;           /* previous file extent record */
4851         int                     tmp_logflags;   /* temp flags holder */
4852         int                     whichfork;      /* data or attr fork */
4853         char                    inhole;         /* current location is hole in file */
4854         char                    wasdelay;       /* old extent was delayed */
4855
4856 #ifdef DEBUG
4857         xfs_fileoff_t           orig_bno;       /* original block number value */
4858         int                     orig_flags;     /* original flags arg value */
4859         xfs_filblks_t           orig_len;       /* original value of len arg */
4860         struct xfs_bmbt_irec    *orig_mval;     /* original value of mval */
4861         int                     orig_nmap;      /* original value of *nmap */
4862
4863         orig_bno = bno;
4864         orig_len = len;
4865         orig_flags = flags;
4866         orig_mval = mval;
4867         orig_nmap = *nmap;
4868 #endif
4869
4870         ASSERT(*nmap >= 1);
4871         ASSERT(*nmap <= XFS_BMAP_MAX_NMAP);
4872         ASSERT(!(flags & XFS_BMAPI_IGSTATE));
4873         ASSERT(tp != NULL);
4874
4875         whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
4876                 XFS_ATTR_FORK : XFS_DATA_FORK;
4877
4878         if (unlikely(XFS_TEST_ERROR(
4879             (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
4880              XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
4881              XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL),
4882              mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
4883                 XFS_ERROR_REPORT("xfs_bmapi_write", XFS_ERRLEVEL_LOW, mp);
4884                 return XFS_ERROR(EFSCORRUPTED);
4885         }
4886
4887         if (XFS_FORCED_SHUTDOWN(mp))
4888                 return XFS_ERROR(EIO);
4889
4890         ifp = XFS_IFORK_PTR(ip, whichfork);
4891         ASSERT(ifp->if_ext_max ==
4892                XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
4893
4894         XFS_STATS_INC(xs_blk_mapw);
4895
4896         logflags = 0;
4897         nallocs = 0;
4898         cur = NULL;
4899
4900         if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
4901                 error = xfs_bmap_local_to_extents(tp, ip, firstblock, total,
4902                                                   &logflags, whichfork);
4903                 if (error)
4904                         goto error0;
4905         }
4906
4907         if (*firstblock == NULLFSBLOCK) {
4908                 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE)
4909                         minleft = be16_to_cpu(ifp->if_broot->bb_level) + 1;
4910                 else
4911                         minleft = 1;
4912         } else {
4913                 minleft = 0;
4914         }
4915
4916         if (!(ifp->if_flags & XFS_IFEXTENTS)) {
4917                 error = xfs_iread_extents(tp, ip, whichfork);
4918                 if (error)
4919                         goto error0;
4920         }
4921
4922         xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, &prev);
4923         n = 0;
4924         end = bno + len;
4925         obno = bno;
4926
4927         bma.tp = tp;
4928         bma.ip = ip;
4929         bma.prevp = &prev;
4930         bma.gotp = &got;
4931         bma.total = total;
4932         bma.userdata = 0;
4933
4934         while (bno < end && n < *nmap) {
4935                 inhole = eof || got.br_startoff > bno;
4936                 wasdelay = !inhole && isnullstartblock(got.br_startblock);
4937
4938                 /*
4939                  * First, deal with the hole before the allocated space
4940                  * that we found, if any.
4941                  */
4942                 if (inhole || wasdelay) {
4943                         bma.eof = eof;
4944                         bma.conv = !!(flags & XFS_BMAPI_CONVERT);
4945                         bma.wasdel = wasdelay;
4946                         bma.alen = len;
4947                         bma.off = bno;
4948                         bma.minleft = minleft;
4949
4950                         error = xfs_bmapi_allocate(&bma, &lastx, &cur,
4951                                         firstblock, flist, flags, &nallocs,
4952                                         &tmp_logflags);
4953                         logflags |= tmp_logflags;
4954                         if (error)
4955                                 goto error0;
4956                         if (flist && flist->xbf_low)
4957                                 minleft = 0;
4958                         if (bma.rval == NULLFSBLOCK)
4959                                 break;
4960                 }
4961
4962                 /* Deal with the allocated space we found.  */
4963                 xfs_bmapi_trim_map(mval, &got, &bno, len, obno, end, n, flags);
4964
4965                 /* Execute unwritten extent conversion if necessary */
4966                 error = xfs_bmapi_convert_unwritten(&bma, mval, len, &lastx,
4967                                                     &cur, firstblock, flist,
4968                                                     flags, &tmp_logflags);
4969                 logflags |= tmp_logflags;
4970                 if (error == EAGAIN)
4971                         continue;
4972                 if (error)
4973                         goto error0;
4974
4975                 /* update the extent map to return */
4976                 xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
4977
4978                 /*
4979                  * If we're done, stop now.  Stop when we've allocated
4980                  * XFS_BMAP_MAX_NMAP extents no matter what.  Otherwise
4981                  * the transaction may get too big.
4982                  */
4983                 if (bno >= end || n >= *nmap || nallocs >= *nmap)
4984                         break;
4985
4986                 /* Else go on to the next record. */
4987                 prev = got;
4988                 if (++lastx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t))
4989                         xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx), &got);
4990                 else
4991                         eof = 1;
4992         }
4993         *nmap = n;
4994
4995         /*
4996          * Transform from btree to extents, give it cur.
4997          */
4998         if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE &&
4999             XFS_IFORK_NEXTENTS(ip, whichfork) <= ifp->if_ext_max) {
5000                 ASSERT(cur);
5001                 error = xfs_bmap_btree_to_extents(tp, ip, cur,
5002                         &tmp_logflags, whichfork);
5003                 logflags |= tmp_logflags;
5004                 if (error)
5005                         goto error0;
5006         }
5007         ASSERT(ifp->if_ext_max ==
5008                XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
5009         ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE ||
5010                XFS_IFORK_NEXTENTS(ip, whichfork) > ifp->if_ext_max);
5011         error = 0;
5012 error0:
5013         /*
5014          * Log everything.  Do this after conversion, there's no point in
5015          * logging the extent records if we've converted to btree format.
5016          */
5017         if ((logflags & xfs_ilog_fext(whichfork)) &&
5018             XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
5019                 logflags &= ~xfs_ilog_fext(whichfork);
5020         else if ((logflags & xfs_ilog_fbroot(whichfork)) &&
5021                  XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)
5022                 logflags &= ~xfs_ilog_fbroot(whichfork);
5023         /*
5024          * Log whatever the flags say, even if error.  Otherwise we might miss
5025          * detecting a case where the data is changed, there's an error,
5026          * and it's not logged so we don't shutdown when we should.
5027          */
5028         if (logflags)
5029                 xfs_trans_log_inode(tp, ip, logflags);
5030
5031         if (cur) {
5032                 if (!error) {
5033                         ASSERT(*firstblock == NULLFSBLOCK ||
5034                                XFS_FSB_TO_AGNO(mp, *firstblock) ==
5035                                XFS_FSB_TO_AGNO(mp,
5036                                        cur->bc_private.b.firstblock) ||
5037                                (flist->xbf_low &&
5038                                 XFS_FSB_TO_AGNO(mp, *firstblock) <
5039                                 XFS_FSB_TO_AGNO(mp,
5040                                         cur->bc_private.b.firstblock)));
5041                         *firstblock = cur->bc_private.b.firstblock;
5042                 }
5043                 xfs_btree_del_cursor(cur,
5044                         error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
5045         }
5046         if (!error)
5047                 xfs_bmap_validate_ret(orig_bno, orig_len, orig_flags, orig_mval,
5048                         orig_nmap, *nmap);
5049         return error;
5050 }
5051
5052 /*
5053  * Unmap (remove) blocks from a file.
5054  * If nexts is nonzero then the number of extents to remove is limited to
5055  * that value.  If not all extents in the block range can be removed then
5056  * *done is set.
5057  */
5058 int                                             /* error */
5059 xfs_bunmapi(
5060         xfs_trans_t             *tp,            /* transaction pointer */
5061         struct xfs_inode        *ip,            /* incore inode */
5062         xfs_fileoff_t           bno,            /* starting offset to unmap */
5063         xfs_filblks_t           len,            /* length to unmap in file */
5064         int                     flags,          /* misc flags */
5065         xfs_extnum_t            nexts,          /* number of extents max */
5066         xfs_fsblock_t           *firstblock,    /* first allocated block
5067                                                    controls a.g. for allocs */
5068         xfs_bmap_free_t         *flist,         /* i/o: list extents to free */
5069         int                     *done)          /* set if not done yet */
5070 {
5071         xfs_btree_cur_t         *cur;           /* bmap btree cursor */
5072         xfs_bmbt_irec_t         del;            /* extent being deleted */
5073         int                     eof;            /* is deleting at eof */
5074         xfs_bmbt_rec_host_t     *ep;            /* extent record pointer */
5075         int                     error;          /* error return value */
5076         xfs_extnum_t            extno;          /* extent number in list */
5077         xfs_bmbt_irec_t         got;            /* current extent record */
5078         xfs_ifork_t             *ifp;           /* inode fork pointer */
5079         int                     isrt;           /* freeing in rt area */
5080         xfs_extnum_t            lastx;          /* last extent index used */
5081         int                     logflags;       /* transaction logging flags */
5082         xfs_extlen_t            mod;            /* rt extent offset */
5083         xfs_mount_t             *mp;            /* mount structure */
5084         xfs_extnum_t            nextents;       /* number of file extents */
5085         xfs_bmbt_irec_t         prev;           /* previous extent record */
5086         xfs_fileoff_t           start;          /* first file offset deleted */
5087         int                     tmp_logflags;   /* partial logging flags */
5088         int                     wasdel;         /* was a delayed alloc extent */
5089         int                     whichfork;      /* data or attribute fork */
5090         xfs_fsblock_t           sum;
5091
5092         trace_xfs_bunmap(ip, bno, len, flags, _RET_IP_);
5093
5094         whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
5095                 XFS_ATTR_FORK : XFS_DATA_FORK;
5096         ifp = XFS_IFORK_PTR(ip, whichfork);
5097         if (unlikely(
5098             XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
5099             XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) {
5100                 XFS_ERROR_REPORT("xfs_bunmapi", XFS_ERRLEVEL_LOW,
5101                                  ip->i_mount);
5102                 return XFS_ERROR(EFSCORRUPTED);
5103         }
5104         mp = ip->i_mount;
5105         if (XFS_FORCED_SHUTDOWN(mp))
5106                 return XFS_ERROR(EIO);
5107
5108         ASSERT(len > 0);
5109         ASSERT(nexts >= 0);
5110         ASSERT(ifp->if_ext_max ==
5111                XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
5112         if (!(ifp->if_flags & XFS_IFEXTENTS) &&
5113             (error = xfs_iread_extents(tp, ip, whichfork)))
5114                 return error;
5115         nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
5116         if (nextents == 0) {
5117                 *done = 1;
5118                 return 0;
5119         }
5120         XFS_STATS_INC(xs_blk_unmap);
5121         isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
5122         start = bno;
5123         bno = start + len - 1;
5124         ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got,
5125                 &prev);
5126
5127         /*
5128          * Check to see if the given block number is past the end of the
5129          * file, back up to the last block if so...
5130          */
5131         if (eof) {
5132                 ep = xfs_iext_get_ext(ifp, --lastx);
5133                 xfs_bmbt_get_all(ep, &got);
5134                 bno = got.br_startoff + got.br_blockcount - 1;
5135         }
5136         logflags = 0;
5137         if (ifp->if_flags & XFS_IFBROOT) {
5138                 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
5139                 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5140                 cur->bc_private.b.firstblock = *firstblock;
5141                 cur->bc_private.b.flist = flist;
5142                 cur->bc_private.b.flags = 0;
5143         } else
5144                 cur = NULL;
5145         extno = 0;
5146         while (bno != (xfs_fileoff_t)-1 && bno >= start && lastx >= 0 &&
5147                (nexts == 0 || extno < nexts)) {
5148                 /*
5149                  * Is the found extent after a hole in which bno lives?
5150                  * Just back up to the previous extent, if so.
5151                  */
5152                 if (got.br_startoff > bno) {
5153                         if (--lastx < 0)
5154                                 break;
5155                         ep = xfs_iext_get_ext(ifp, lastx);
5156                         xfs_bmbt_get_all(ep, &got);
5157                 }
5158                 /*
5159                  * Is the last block of this extent before the range
5160                  * we're supposed to delete?  If so, we're done.
5161                  */
5162                 bno = XFS_FILEOFF_MIN(bno,
5163                         got.br_startoff + got.br_blockcount - 1);
5164                 if (bno < start)
5165                         break;
5166                 /*
5167                  * Then deal with the (possibly delayed) allocated space
5168                  * we found.
5169                  */
5170                 ASSERT(ep != NULL);
5171                 del = got;
5172                 wasdel = isnullstartblock(del.br_startblock);
5173                 if (got.br_startoff < start) {
5174                         del.br_startoff = start;
5175                         del.br_blockcount -= start - got.br_startoff;
5176                         if (!wasdel)
5177                                 del.br_startblock += start - got.br_startoff;
5178                 }
5179                 if (del.br_startoff + del.br_blockcount > bno + 1)
5180                         del.br_blockcount = bno + 1 - del.br_startoff;
5181                 sum = del.br_startblock + del.br_blockcount;
5182                 if (isrt &&
5183                     (mod = do_mod(sum, mp->m_sb.sb_rextsize))) {
5184                         /*
5185                          * Realtime extent not lined up at the end.
5186                          * The extent could have been split into written
5187                          * and unwritten pieces, or we could just be
5188                          * unmapping part of it.  But we can't really
5189                          * get rid of part of a realtime extent.
5190                          */
5191                         if (del.br_state == XFS_EXT_UNWRITTEN ||
5192                             !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
5193                                 /*
5194                                  * This piece is unwritten, or we're not
5195                                  * using unwritten extents.  Skip over it.
5196                                  */
5197                                 ASSERT(bno >= mod);
5198                                 bno -= mod > del.br_blockcount ?
5199                                         del.br_blockcount : mod;
5200                                 if (bno < got.br_startoff) {
5201                                         if (--lastx >= 0)
5202                                                 xfs_bmbt_get_all(xfs_iext_get_ext(
5203                                                         ifp, lastx), &got);
5204                                 }
5205                                 continue;
5206                         }
5207                         /*
5208                          * It's written, turn it unwritten.
5209                          * This is better than zeroing it.
5210                          */
5211                         ASSERT(del.br_state == XFS_EXT_NORM);
5212                         ASSERT(xfs_trans_get_block_res(tp) > 0);
5213                         /*
5214                          * If this spans a realtime extent boundary,
5215                          * chop it back to the start of the one we end at.
5216                          */
5217                         if (del.br_blockcount > mod) {
5218                                 del.br_startoff += del.br_blockcount - mod;
5219                                 del.br_startblock += del.br_blockcount - mod;
5220                                 del.br_blockcount = mod;
5221                         }
5222                         del.br_state = XFS_EXT_UNWRITTEN;
5223                         error = xfs_bmap_add_extent(tp, ip, &lastx, &cur, &del,
5224                                 firstblock, flist, &logflags,
5225                                 XFS_DATA_FORK);
5226                         if (error)
5227                                 goto error0;
5228                         goto nodelete;
5229                 }
5230                 if (isrt && (mod = do_mod(del.br_startblock, mp->m_sb.sb_rextsize))) {
5231                         /*
5232                          * Realtime extent is lined up at the end but not
5233                          * at the front.  We'll get rid of full extents if
5234                          * we can.
5235                          */
5236                         mod = mp->m_sb.sb_rextsize - mod;
5237                         if (del.br_blockcount > mod) {
5238                                 del.br_blockcount -= mod;
5239                                 del.br_startoff += mod;
5240                                 del.br_startblock += mod;
5241                         } else if ((del.br_startoff == start &&
5242                                     (del.br_state == XFS_EXT_UNWRITTEN ||
5243                                      xfs_trans_get_block_res(tp) == 0)) ||
5244                                    !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
5245                                 /*
5246                                  * Can't make it unwritten.  There isn't
5247                                  * a full extent here so just skip it.
5248                                  */
5249                                 ASSERT(bno >= del.br_blockcount);
5250                                 bno -= del.br_blockcount;
5251                                 if (got.br_startoff > bno) {
5252                                         if (--lastx >= 0) {
5253                                                 ep = xfs_iext_get_ext(ifp,
5254                                                                       lastx);
5255                                                 xfs_bmbt_get_all(ep, &got);
5256                                         }
5257                                 }
5258                                 continue;
5259                         } else if (del.br_state == XFS_EXT_UNWRITTEN) {
5260                                 /*
5261                                  * This one is already unwritten.
5262                                  * It must have a written left neighbor.
5263                                  * Unwrite the killed part of that one and
5264                                  * try again.
5265                                  */
5266                                 ASSERT(lastx > 0);
5267                                 xfs_bmbt_get_all(xfs_iext_get_ext(ifp,
5268                                                 lastx - 1), &prev);
5269                                 ASSERT(prev.br_state == XFS_EXT_NORM);
5270                                 ASSERT(!isnullstartblock(prev.br_startblock));
5271                                 ASSERT(del.br_startblock ==
5272                                        prev.br_startblock + prev.br_blockcount);
5273                                 if (prev.br_startoff < start) {
5274                                         mod = start - prev.br_startoff;
5275                                         prev.br_blockcount -= mod;
5276                                         prev.br_startblock += mod;
5277                                         prev.br_startoff = start;
5278                                 }
5279                                 prev.br_state = XFS_EXT_UNWRITTEN;
5280                                 lastx--;
5281                                 error = xfs_bmap_add_extent(tp, ip, &lastx,
5282                                                 &cur, &prev, firstblock, flist,
5283                                                 &logflags, XFS_DATA_FORK);
5284                                 if (error)
5285                                         goto error0;
5286                                 goto nodelete;
5287                         } else {
5288                                 ASSERT(del.br_state == XFS_EXT_NORM);
5289                                 del.br_state = XFS_EXT_UNWRITTEN;
5290                                 error = xfs_bmap_add_extent(tp, ip, &lastx,
5291                                                 &cur, &del, firstblock, flist,
5292                                                 &logflags, XFS_DATA_FORK);
5293                                 if (error)
5294                                         goto error0;
5295                                 goto nodelete;
5296                         }
5297                 }
5298                 if (wasdel) {
5299                         ASSERT(startblockval(del.br_startblock) > 0);
5300                         /* Update realtime/data freespace, unreserve quota */
5301                         if (isrt) {
5302                                 xfs_filblks_t rtexts;
5303
5304                                 rtexts = XFS_FSB_TO_B(mp, del.br_blockcount);
5305                                 do_div(rtexts, mp->m_sb.sb_rextsize);
5306                                 xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS,
5307                                                 (int64_t)rtexts, 0);
5308                                 (void)xfs_trans_reserve_quota_nblks(NULL,
5309                                         ip, -((long)del.br_blockcount), 0,
5310                                         XFS_QMOPT_RES_RTBLKS);
5311                         } else {
5312                                 xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
5313                                                 (int64_t)del.br_blockcount, 0);
5314                                 (void)xfs_trans_reserve_quota_nblks(NULL,
5315                                         ip, -((long)del.br_blockcount), 0,
5316                                         XFS_QMOPT_RES_REGBLKS);
5317                         }
5318                         ip->i_delayed_blks -= del.br_blockcount;
5319                         if (cur)
5320                                 cur->bc_private.b.flags |=
5321                                         XFS_BTCUR_BPRV_WASDEL;
5322                 } else if (cur)
5323                         cur->bc_private.b.flags &= ~XFS_BTCUR_BPRV_WASDEL;
5324                 /*
5325                  * If it's the case where the directory code is running
5326                  * with no block reservation, and the deleted block is in
5327                  * the middle of its extent, and the resulting insert
5328                  * of an extent would cause transformation to btree format,
5329                  * then reject it.  The calling code will then swap
5330                  * blocks around instead.
5331                  * We have to do this now, rather than waiting for the
5332                  * conversion to btree format, since the transaction
5333                  * will be dirty.
5334                  */
5335                 if (!wasdel && xfs_trans_get_block_res(tp) == 0 &&
5336                     XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
5337                     XFS_IFORK_NEXTENTS(ip, whichfork) >= ifp->if_ext_max &&
5338                     del.br_startoff > got.br_startoff &&
5339                     del.br_startoff + del.br_blockcount <
5340                     got.br_startoff + got.br_blockcount) {
5341                         error = XFS_ERROR(ENOSPC);
5342                         goto error0;
5343                 }
5344                 error = xfs_bmap_del_extent(ip, tp, &lastx, flist, cur, &del,
5345                                 &tmp_logflags, whichfork);
5346                 logflags |= tmp_logflags;
5347                 if (error)
5348                         goto error0;
5349                 bno = del.br_startoff - 1;
5350 nodelete:
5351                 /*
5352                  * If not done go on to the next (previous) record.
5353                  */
5354                 if (bno != (xfs_fileoff_t)-1 && bno >= start) {
5355                         if (lastx >= 0) {
5356                                 ep = xfs_iext_get_ext(ifp, lastx);
5357                                 if (xfs_bmbt_get_startoff(ep) > bno) {
5358                                         if (--lastx >= 0)
5359                                                 ep = xfs_iext_get_ext(ifp,
5360                                                                       lastx);
5361                                 }
5362                                 xfs_bmbt_get_all(ep, &got);
5363                         }
5364                         extno++;
5365                 }
5366         }
5367         *done = bno == (xfs_fileoff_t)-1 || bno < start || lastx < 0;
5368         ASSERT(ifp->if_ext_max ==
5369                XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
5370         /*
5371          * Convert to a btree if necessary.
5372          */
5373         if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
5374             XFS_IFORK_NEXTENTS(ip, whichfork) > ifp->if_ext_max) {
5375                 ASSERT(cur == NULL);
5376                 error = xfs_bmap_extents_to_btree(tp, ip, firstblock, flist,
5377                         &cur, 0, &tmp_logflags, whichfork);
5378                 logflags |= tmp_logflags;
5379                 if (error)
5380                         goto error0;
5381         }
5382         /*
5383          * transform from btree to extents, give it cur
5384          */
5385         else if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE &&
5386                  XFS_IFORK_NEXTENTS(ip, whichfork) <= ifp->if_ext_max) {
5387                 ASSERT(cur != NULL);
5388                 error = xfs_bmap_btree_to_extents(tp, ip, cur, &tmp_logflags,
5389                         whichfork);
5390                 logflags |= tmp_logflags;
5391                 if (error)
5392                         goto error0;
5393         }
5394         /*
5395          * transform from extents to local?
5396          */
5397         ASSERT(ifp->if_ext_max ==
5398                XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
5399         error = 0;
5400 error0:
5401         /*
5402          * Log everything.  Do this after conversion, there's no point in
5403          * logging the extent records if we've converted to btree format.
5404          */
5405         if ((logflags & xfs_ilog_fext(whichfork)) &&
5406             XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
5407                 logflags &= ~xfs_ilog_fext(whichfork);
5408         else if ((logflags & xfs_ilog_fbroot(whichfork)) &&
5409                  XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)
5410                 logflags &= ~xfs_ilog_fbroot(whichfork);
5411         /*
5412          * Log inode even in the error case, if the transaction
5413          * is dirty we'll need to shut down the filesystem.
5414          */
5415         if (logflags)
5416                 xfs_trans_log_inode(tp, ip, logflags);
5417         if (cur) {
5418                 if (!error) {
5419                         *firstblock = cur->bc_private.b.firstblock;
5420                         cur->bc_private.b.allocated = 0;
5421                 }
5422                 xfs_btree_del_cursor(cur,
5423                         error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
5424         }
5425         return error;
5426 }
5427
5428 /*
5429  * returns 1 for success, 0 if we failed to map the extent.
5430  */
5431 STATIC int
5432 xfs_getbmapx_fix_eof_hole(
5433         xfs_inode_t             *ip,            /* xfs incore inode pointer */
5434         struct getbmapx         *out,           /* output structure */
5435         int                     prealloced,     /* this is a file with
5436                                                  * preallocated data space */
5437         __int64_t               end,            /* last block requested */
5438         xfs_fsblock_t           startblock)
5439 {
5440         __int64_t               fixlen;
5441         xfs_mount_t             *mp;            /* file system mount point */
5442         xfs_ifork_t             *ifp;           /* inode fork pointer */
5443         xfs_extnum_t            lastx;          /* last extent pointer */
5444         xfs_fileoff_t           fileblock;
5445
5446         if (startblock == HOLESTARTBLOCK) {
5447                 mp = ip->i_mount;
5448                 out->bmv_block = -1;
5449                 fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, ip->i_size));
5450                 fixlen -= out->bmv_offset;
5451                 if (prealloced && out->bmv_offset + out->bmv_length == end) {
5452                         /* Came to hole at EOF. Trim it. */
5453                         if (fixlen <= 0)
5454                                 return 0;
5455                         out->bmv_length = fixlen;
5456                 }
5457         } else {
5458                 if (startblock == DELAYSTARTBLOCK)
5459                         out->bmv_block = -2;
5460                 else
5461                         out->bmv_block = xfs_fsb_to_db(ip, startblock);
5462                 fileblock = XFS_BB_TO_FSB(ip->i_mount, out->bmv_offset);
5463                 ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
5464                 if (xfs_iext_bno_to_ext(ifp, fileblock, &lastx) &&
5465                    (lastx == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))-1))
5466                         out->bmv_oflags |= BMV_OF_LAST;
5467         }
5468
5469         return 1;
5470 }
5471
5472 /*
5473  * Get inode's extents as described in bmv, and format for output.
5474  * Calls formatter to fill the user's buffer until all extents
5475  * are mapped, until the passed-in bmv->bmv_count slots have
5476  * been filled, or until the formatter short-circuits the loop,
5477  * if it is tracking filled-in extents on its own.
5478  */
5479 int                                             /* error code */
5480 xfs_getbmap(
5481         xfs_inode_t             *ip,
5482         struct getbmapx         *bmv,           /* user bmap structure */
5483         xfs_bmap_format_t       formatter,      /* format to user */
5484         void                    *arg)           /* formatter arg */
5485 {
5486         __int64_t               bmvend;         /* last block requested */
5487         int                     error = 0;      /* return value */
5488         __int64_t               fixlen;         /* length for -1 case */
5489         int                     i;              /* extent number */
5490         int                     lock;           /* lock state */
5491         xfs_bmbt_irec_t         *map;           /* buffer for user's data */
5492         xfs_mount_t             *mp;            /* file system mount point */
5493         int                     nex;            /* # of user extents can do */
5494         int                     nexleft;        /* # of user extents left */
5495         int                     subnex;         /* # of bmapi's can do */
5496         int                     nmap;           /* number of map entries */
5497         struct getbmapx         *out;           /* output structure */
5498         int                     whichfork;      /* data or attr fork */
5499         int                     prealloced;     /* this is a file with
5500                                                  * preallocated data space */
5501         int                     iflags;         /* interface flags */
5502         int                     bmapi_flags;    /* flags for xfs_bmapi */
5503         int                     cur_ext = 0;
5504
5505         mp = ip->i_mount;
5506         iflags = bmv->bmv_iflags;
5507         whichfork = iflags & BMV_IF_ATTRFORK ? XFS_ATTR_FORK : XFS_DATA_FORK;
5508
5509         if (whichfork == XFS_ATTR_FORK) {
5510                 if (XFS_IFORK_Q(ip)) {
5511                         if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS &&
5512                             ip->i_d.di_aformat != XFS_DINODE_FMT_BTREE &&
5513                             ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)
5514                                 return XFS_ERROR(EINVAL);
5515                 } else if (unlikely(
5516                            ip->i_d.di_aformat != 0 &&
5517                            ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS)) {
5518                         XFS_ERROR_REPORT("xfs_getbmap", XFS_ERRLEVEL_LOW,
5519                                          ip->i_mount);
5520                         return XFS_ERROR(EFSCORRUPTED);
5521                 }
5522
5523                 prealloced = 0;
5524                 fixlen = 1LL << 32;
5525         } else {
5526                 if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS &&
5527                     ip->i_d.di_format != XFS_DINODE_FMT_BTREE &&
5528                     ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)
5529                         return XFS_ERROR(EINVAL);
5530
5531                 if (xfs_get_extsz_hint(ip) ||
5532                     ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){
5533                         prealloced = 1;
5534                         fixlen = XFS_MAXIOFFSET(mp);
5535                 } else {
5536                         prealloced = 0;
5537                         fixlen = ip->i_size;
5538                 }
5539         }
5540
5541         if (bmv->bmv_length == -1) {
5542                 fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, fixlen));
5543                 bmv->bmv_length =
5544                         max_t(__int64_t, fixlen - bmv->bmv_offset, 0);
5545         } else if (bmv->bmv_length == 0) {
5546                 bmv->bmv_entries = 0;
5547                 return 0;
5548         } else if (bmv->bmv_length < 0) {
5549                 return XFS_ERROR(EINVAL);
5550         }
5551
5552         nex = bmv->bmv_count - 1;
5553         if (nex <= 0)
5554                 return XFS_ERROR(EINVAL);
5555         bmvend = bmv->bmv_offset + bmv->bmv_length;
5556
5557
5558         if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx))
5559                 return XFS_ERROR(ENOMEM);
5560         out = kmem_zalloc(bmv->bmv_count * sizeof(struct getbmapx), KM_MAYFAIL);
5561         if (!out)
5562                 return XFS_ERROR(ENOMEM);
5563
5564         xfs_ilock(ip, XFS_IOLOCK_SHARED);
5565         if (whichfork == XFS_DATA_FORK && !(iflags & BMV_IF_DELALLOC)) {
5566                 if (ip->i_delayed_blks || ip->i_size > ip->i_d.di_size) {
5567                         error = xfs_flush_pages(ip, 0, -1, 0, FI_REMAPF);
5568                         if (error)
5569                                 goto out_unlock_iolock;
5570                 }
5571                 /*
5572                  * even after flushing the inode, there can still be delalloc
5573                  * blocks on the inode beyond EOF due to speculative
5574                  * preallocation. These are not removed until the release
5575                  * function is called or the inode is inactivated. Hence we
5576                  * cannot assert here that ip->i_delayed_blks == 0.
5577                  */
5578         }
5579
5580         lock = xfs_ilock_map_shared(ip);
5581
5582         /*
5583          * Don't let nex be bigger than the number of extents
5584          * we can have assuming alternating holes and real extents.
5585          */
5586         if (nex > XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1)
5587                 nex = XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1;
5588
5589         bmapi_flags = xfs_bmapi_aflag(whichfork);
5590         if (!(iflags & BMV_IF_PREALLOC))
5591                 bmapi_flags |= XFS_BMAPI_IGSTATE;
5592
5593         /*
5594          * Allocate enough space to handle "subnex" maps at a time.
5595          */
5596         error = ENOMEM;
5597         subnex = 16;
5598         map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL | KM_NOFS);
5599         if (!map)
5600                 goto out_unlock_ilock;
5601
5602         bmv->bmv_entries = 0;
5603
5604         if (XFS_IFORK_NEXTENTS(ip, whichfork) == 0 &&
5605             (whichfork == XFS_ATTR_FORK || !(iflags & BMV_IF_DELALLOC))) {
5606                 error = 0;
5607                 goto out_free_map;
5608         }
5609
5610         nexleft = nex;
5611
5612         do {
5613                 nmap = (nexleft > subnex) ? subnex : nexleft;
5614                 error = xfs_bmapi_read(ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset),
5615                                        XFS_BB_TO_FSB(mp, bmv->bmv_length),
5616                                        map, &nmap, bmapi_flags);
5617                 if (error)
5618                         goto out_free_map;
5619                 ASSERT(nmap <= subnex);
5620
5621                 for (i = 0; i < nmap && nexleft && bmv->bmv_length; i++) {
5622                         out[cur_ext].bmv_oflags = 0;
5623                         if (map[i].br_state == XFS_EXT_UNWRITTEN)
5624                                 out[cur_ext].bmv_oflags |= BMV_OF_PREALLOC;
5625                         else if (map[i].br_startblock == DELAYSTARTBLOCK)
5626                                 out[cur_ext].bmv_oflags |= BMV_OF_DELALLOC;
5627                         out[cur_ext].bmv_offset =
5628                                 XFS_FSB_TO_BB(mp, map[i].br_startoff);
5629                         out[cur_ext].bmv_length =
5630                                 XFS_FSB_TO_BB(mp, map[i].br_blockcount);
5631                         out[cur_ext].bmv_unused1 = 0;
5632                         out[cur_ext].bmv_unused2 = 0;
5633                         ASSERT(((iflags & BMV_IF_DELALLOC) != 0) ||
5634                               (map[i].br_startblock != DELAYSTARTBLOCK));
5635                         if (map[i].br_startblock == HOLESTARTBLOCK &&
5636                             whichfork == XFS_ATTR_FORK) {
5637                                 /* came to the end of attribute fork */
5638                                 out[cur_ext].bmv_oflags |= BMV_OF_LAST;
5639                                 goto out_free_map;
5640                         }
5641
5642                         if (!xfs_getbmapx_fix_eof_hole(ip, &out[cur_ext],
5643                                         prealloced, bmvend,
5644                                         map[i].br_startblock))
5645                                 goto out_free_map;
5646
5647                         bmv->bmv_offset =
5648                                 out[cur_ext].bmv_offset +
5649                                 out[cur_ext].bmv_length;
5650                         bmv->bmv_length =
5651                                 max_t(__int64_t, 0, bmvend - bmv->bmv_offset);
5652
5653                         /*
5654                          * In case we don't want to return the hole,
5655                          * don't increase cur_ext so that we can reuse
5656                          * it in the next loop.
5657                          */
5658                         if ((iflags & BMV_IF_NO_HOLES) &&
5659                             map[i].br_startblock == HOLESTARTBLOCK) {
5660                                 memset(&out[cur_ext], 0, sizeof(out[cur_ext]));
5661                                 continue;
5662                         }
5663
5664                         nexleft--;
5665                         bmv->bmv_entries++;
5666                         cur_ext++;
5667                 }
5668         } while (nmap && nexleft && bmv->bmv_length);
5669
5670  out_free_map:
5671         kmem_free(map);
5672  out_unlock_ilock:
5673         xfs_iunlock_map_shared(ip, lock);
5674  out_unlock_iolock:
5675         xfs_iunlock(ip, XFS_IOLOCK_SHARED);
5676
5677         for (i = 0; i < cur_ext; i++) {
5678                 int full = 0;   /* user array is full */
5679
5680                 /* format results & advance arg */
5681                 error = formatter(&arg, &out[i], &full);
5682                 if (error || full)
5683                         break;
5684         }
5685
5686         kmem_free(out);
5687         return error;
5688 }
5689
5690 /*
5691  * Check the last inode extent to determine whether this allocation will result
5692  * in blocks being allocated at the end of the file. When we allocate new data
5693  * blocks at the end of the file which do not start at the previous data block,
5694  * we will try to align the new blocks at stripe unit boundaries.
5695  */
5696 STATIC int                              /* error */
5697 xfs_bmap_isaeof(
5698         xfs_inode_t     *ip,            /* incore inode pointer */
5699         xfs_fileoff_t   off,            /* file offset in fsblocks */
5700         int             whichfork,      /* data or attribute fork */
5701         char            *aeof)          /* return value */
5702 {
5703         int             error;          /* error return value */
5704         xfs_ifork_t     *ifp;           /* inode fork pointer */
5705         xfs_bmbt_rec_host_t *lastrec;   /* extent record pointer */
5706         xfs_extnum_t    nextents;       /* number of file extents */
5707         xfs_bmbt_irec_t s;              /* expanded extent record */
5708
5709         ASSERT(whichfork == XFS_DATA_FORK);
5710         ifp = XFS_IFORK_PTR(ip, whichfork);
5711         if (!(ifp->if_flags & XFS_IFEXTENTS) &&
5712             (error = xfs_iread_extents(NULL, ip, whichfork)))
5713                 return error;
5714         nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
5715         if (nextents == 0) {
5716                 *aeof = 1;
5717                 return 0;
5718         }
5719         /*
5720          * Go to the last extent
5721          */
5722         lastrec = xfs_iext_get_ext(ifp, nextents - 1);
5723         xfs_bmbt_get_all(lastrec, &s);
5724         /*
5725          * Check we are allocating in the last extent (for delayed allocations)
5726          * or past the last extent for non-delayed allocations.
5727          */
5728         *aeof = (off >= s.br_startoff &&
5729                  off < s.br_startoff + s.br_blockcount &&
5730                  isnullstartblock(s.br_startblock)) ||
5731                 off >= s.br_startoff + s.br_blockcount;
5732         return 0;
5733 }
5734
5735 /*
5736  * Check if the endoff is outside the last extent. If so the caller will grow
5737  * the allocation to a stripe unit boundary.
5738  */
5739 int                                     /* error */
5740 xfs_bmap_eof(
5741         xfs_inode_t     *ip,            /* incore inode pointer */
5742         xfs_fileoff_t   endoff,         /* file offset in fsblocks */
5743         int             whichfork,      /* data or attribute fork */
5744         int             *eof)           /* result value */
5745 {
5746         xfs_fsblock_t   blockcount;     /* extent block count */
5747         int             error;          /* error return value */
5748         xfs_ifork_t     *ifp;           /* inode fork pointer */
5749         xfs_bmbt_rec_host_t *lastrec;   /* extent record pointer */
5750         xfs_extnum_t    nextents;       /* number of file extents */
5751         xfs_fileoff_t   startoff;       /* extent starting file offset */
5752
5753         ASSERT(whichfork == XFS_DATA_FORK);
5754         ifp = XFS_IFORK_PTR(ip, whichfork);
5755         if (!(ifp->if_flags & XFS_IFEXTENTS) &&
5756             (error = xfs_iread_extents(NULL, ip, whichfork)))
5757                 return error;
5758         nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
5759         if (nextents == 0) {
5760                 *eof = 1;
5761                 return 0;
5762         }
5763         /*
5764          * Go to the last extent
5765          */
5766         lastrec = xfs_iext_get_ext(ifp, nextents - 1);
5767         startoff = xfs_bmbt_get_startoff(lastrec);
5768         blockcount = xfs_bmbt_get_blockcount(lastrec);
5769         *eof = endoff >= startoff + blockcount;
5770         return 0;
5771 }
5772
5773 #ifdef DEBUG
5774 STATIC struct xfs_buf *
5775 xfs_bmap_get_bp(
5776         struct xfs_btree_cur    *cur,
5777         xfs_fsblock_t           bno)
5778 {
5779         struct xfs_log_item_desc *lidp;
5780         int                     i;
5781
5782         if (!cur)
5783                 return NULL;
5784
5785         for (i = 0; i < XFS_BTREE_MAXLEVELS; i++) {
5786                 if (!cur->bc_bufs[i])
5787                         break;
5788                 if (XFS_BUF_ADDR(cur->bc_bufs[i]) == bno)
5789                         return cur->bc_bufs[i];
5790         }
5791
5792         /* Chase down all the log items to see if the bp is there */
5793         list_for_each_entry(lidp, &cur->bc_tp->t_items, lid_trans) {
5794                 struct xfs_buf_log_item *bip;
5795                 bip = (struct xfs_buf_log_item *)lidp->lid_item;
5796                 if (bip->bli_item.li_type == XFS_LI_BUF &&
5797                     XFS_BUF_ADDR(bip->bli_buf) == bno)
5798                         return bip->bli_buf;
5799         }
5800
5801         return NULL;
5802 }
5803
5804 STATIC void
5805 xfs_check_block(
5806         struct xfs_btree_block  *block,
5807         xfs_mount_t             *mp,
5808         int                     root,
5809         short                   sz)
5810 {
5811         int                     i, j, dmxr;
5812         __be64                  *pp, *thispa;   /* pointer to block address */
5813         xfs_bmbt_key_t          *prevp, *keyp;
5814
5815         ASSERT(be16_to_cpu(block->bb_level) > 0);
5816
5817         prevp = NULL;
5818         for( i = 1; i <= xfs_btree_get_numrecs(block); i++) {
5819                 dmxr = mp->m_bmap_dmxr[0];
5820                 keyp = XFS_BMBT_KEY_ADDR(mp, block, i);
5821
5822                 if (prevp) {
5823                         ASSERT(be64_to_cpu(prevp->br_startoff) <
5824                                be64_to_cpu(keyp->br_startoff));
5825                 }
5826                 prevp = keyp;
5827
5828                 /*
5829                  * Compare the block numbers to see if there are dups.
5830                  */
5831                 if (root)
5832                         pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, i, sz);
5833                 else
5834                         pp = XFS_BMBT_PTR_ADDR(mp, block, i, dmxr);
5835
5836                 for (j = i+1; j <= be16_to_cpu(block->bb_numrecs); j++) {
5837                         if (root)
5838                                 thispa = XFS_BMAP_BROOT_PTR_ADDR(mp, block, j, sz);
5839                         else
5840                                 thispa = XFS_BMBT_PTR_ADDR(mp, block, j, dmxr);
5841                         if (*thispa == *pp) {
5842                                 xfs_warn(mp, "%s: thispa(%d) == pp(%d) %Ld",
5843                                         __func__, j, i,
5844                                         (unsigned long long)be64_to_cpu(*thispa));
5845                                 panic("%s: ptrs are equal in node\n",
5846                                         __func__);
5847                         }
5848                 }
5849         }
5850 }
5851
5852 /*
5853  * Check that the extents for the inode ip are in the right order in all
5854  * btree leaves.
5855  */
5856
5857 STATIC void
5858 xfs_bmap_check_leaf_extents(
5859         xfs_btree_cur_t         *cur,   /* btree cursor or null */
5860         xfs_inode_t             *ip,            /* incore inode pointer */
5861         int                     whichfork)      /* data or attr fork */
5862 {
5863         struct xfs_btree_block  *block; /* current btree block */
5864         xfs_fsblock_t           bno;    /* block # of "block" */
5865         xfs_buf_t               *bp;    /* buffer for "block" */
5866         int                     error;  /* error return value */
5867         xfs_extnum_t            i=0, j; /* index into the extents list */
5868         xfs_ifork_t             *ifp;   /* fork structure */
5869         int                     level;  /* btree level, for checking */
5870         xfs_mount_t             *mp;    /* file system mount structure */
5871         __be64                  *pp;    /* pointer to block address */
5872         xfs_bmbt_rec_t          *ep;    /* pointer to current extent */
5873         xfs_bmbt_rec_t          last = {0, 0}; /* last extent in prev block */
5874         xfs_bmbt_rec_t          *nextp; /* pointer to next extent */
5875         int                     bp_release = 0;
5876
5877         if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE) {
5878                 return;
5879         }
5880
5881         bno = NULLFSBLOCK;
5882         mp = ip->i_mount;
5883         ifp = XFS_IFORK_PTR(ip, whichfork);
5884         block = ifp->if_broot;
5885         /*
5886          * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
5887          */
5888         level = be16_to_cpu(block->bb_level);
5889         ASSERT(level > 0);
5890         xfs_check_block(block, mp, 1, ifp->if_broot_bytes);
5891         pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
5892         bno = be64_to_cpu(*pp);
5893
5894         ASSERT(bno != NULLDFSBNO);
5895         ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
5896         ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
5897
5898         /*
5899          * Go down the tree until leaf level is reached, following the first
5900          * pointer (leftmost) at each level.
5901          */
5902         while (level-- > 0) {
5903                 /* See if buf is in cur first */
5904                 bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
5905                 if (bp) {
5906                         bp_release = 0;
5907                 } else {
5908                         bp_release = 1;
5909                 }
5910                 if (!bp && (error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
5911                                 XFS_BMAP_BTREE_REF)))
5912                         goto error_norelse;
5913                 block = XFS_BUF_TO_BLOCK(bp);
5914                 XFS_WANT_CORRUPTED_GOTO(
5915                         xfs_bmap_sanity_check(mp, bp, level),
5916                         error0);
5917                 if (level == 0)
5918                         break;
5919
5920                 /*
5921                  * Check this block for basic sanity (increasing keys and
5922                  * no duplicate blocks).
5923                  */
5924
5925                 xfs_check_block(block, mp, 0, 0);
5926                 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
5927                 bno = be64_to_cpu(*pp);
5928                 XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0);
5929                 if (bp_release) {
5930                         bp_release = 0;
5931                         xfs_trans_brelse(NULL, bp);
5932                 }
5933         }
5934
5935         /*
5936          * Here with bp and block set to the leftmost leaf node in the tree.
5937          */
5938         i = 0;
5939
5940         /*
5941          * Loop over all leaf nodes checking that all extents are in the right order.
5942          */
5943         for (;;) {
5944                 xfs_fsblock_t   nextbno;
5945                 xfs_extnum_t    num_recs;
5946
5947
5948                 num_recs = xfs_btree_get_numrecs(block);
5949
5950                 /*
5951                  * Read-ahead the next leaf block, if any.
5952                  */
5953
5954                 nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
5955
5956                 /*
5957                  * Check all the extents to make sure they are OK.
5958                  * If we had a previous block, the last entry should
5959                  * conform with the first entry in this one.
5960                  */
5961
5962                 ep = XFS_BMBT_REC_ADDR(mp, block, 1);
5963                 if (i) {
5964                         ASSERT(xfs_bmbt_disk_get_startoff(&last) +
5965                                xfs_bmbt_disk_get_blockcount(&last) <=
5966                                xfs_bmbt_disk_get_startoff(ep));
5967                 }
5968                 for (j = 1; j < num_recs; j++) {
5969                         nextp = XFS_BMBT_REC_ADDR(mp, block, j + 1);
5970                         ASSERT(xfs_bmbt_disk_get_startoff(ep) +
5971                                xfs_bmbt_disk_get_blockcount(ep) <=
5972                                xfs_bmbt_disk_get_startoff(nextp));
5973                         ep = nextp;
5974                 }
5975
5976                 last = *ep;
5977                 i += num_recs;
5978                 if (bp_release) {
5979                         bp_release = 0;
5980                         xfs_trans_brelse(NULL, bp);
5981                 }
5982                 bno = nextbno;
5983                 /*
5984                  * If we've reached the end, stop.
5985                  */
5986                 if (bno == NULLFSBLOCK)
5987                         break;
5988
5989                 bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
5990                 if (bp) {
5991                         bp_release = 0;
5992                 } else {
5993                         bp_release = 1;
5994                 }
5995                 if (!bp && (error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
5996                                 XFS_BMAP_BTREE_REF)))
5997                         goto error_norelse;
5998                 block = XFS_BUF_TO_BLOCK(bp);
5999         }
6000         if (bp_release) {
6001                 bp_release = 0;
6002                 xfs_trans_brelse(NULL, bp);
6003         }
6004         return;
6005
6006 error0:
6007         xfs_warn(mp, "%s: at error0", __func__);
6008         if (bp_release)
6009                 xfs_trans_brelse(NULL, bp);
6010 error_norelse:
6011         xfs_warn(mp, "%s: BAD after btree leaves for %d extents",
6012                 __func__, i);
6013         panic("%s: CORRUPTED BTREE OR SOMETHING", __func__);
6014         return;
6015 }
6016 #endif
6017
6018 /*
6019  * Count fsblocks of the given fork.
6020  */
6021 int                                             /* error */
6022 xfs_bmap_count_blocks(
6023         xfs_trans_t             *tp,            /* transaction pointer */
6024         xfs_inode_t             *ip,            /* incore inode */
6025         int                     whichfork,      /* data or attr fork */
6026         int                     *count)         /* out: count of blocks */
6027 {
6028         struct xfs_btree_block  *block; /* current btree block */
6029         xfs_fsblock_t           bno;    /* block # of "block" */
6030         xfs_ifork_t             *ifp;   /* fork structure */
6031         int                     level;  /* btree level, for checking */
6032         xfs_mount_t             *mp;    /* file system mount structure */
6033         __be64                  *pp;    /* pointer to block address */
6034
6035         bno = NULLFSBLOCK;
6036         mp = ip->i_mount;
6037         ifp = XFS_IFORK_PTR(ip, whichfork);
6038         if ( XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ) {
6039                 xfs_bmap_count_leaves(ifp, 0,
6040                         ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t),
6041                         count);
6042                 return 0;
6043         }
6044
6045         /*
6046          * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
6047          */
6048         block = ifp->if_broot;
6049         level = be16_to_cpu(block->bb_level);
6050         ASSERT(level > 0);
6051         pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
6052         bno = be64_to_cpu(*pp);
6053         ASSERT(bno != NULLDFSBNO);
6054         ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
6055         ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
6056
6057         if (unlikely(xfs_bmap_count_tree(mp, tp, ifp, bno, level, count) < 0)) {
6058                 XFS_ERROR_REPORT("xfs_bmap_count_blocks(2)", XFS_ERRLEVEL_LOW,
6059                                  mp);
6060                 return XFS_ERROR(EFSCORRUPTED);
6061         }
6062
6063         return 0;
6064 }
6065
6066 /*
6067  * Recursively walks each level of a btree
6068  * to count total fsblocks is use.
6069  */
6070 STATIC int                                     /* error */
6071 xfs_bmap_count_tree(
6072         xfs_mount_t     *mp,            /* file system mount point */
6073         xfs_trans_t     *tp,            /* transaction pointer */
6074         xfs_ifork_t     *ifp,           /* inode fork pointer */
6075         xfs_fsblock_t   blockno,        /* file system block number */
6076         int             levelin,        /* level in btree */
6077         int             *count)         /* Count of blocks */
6078 {
6079         int                     error;
6080         xfs_buf_t               *bp, *nbp;
6081         int                     level = levelin;
6082         __be64                  *pp;
6083         xfs_fsblock_t           bno = blockno;
6084         xfs_fsblock_t           nextbno;
6085         struct xfs_btree_block  *block, *nextblock;
6086         int                     numrecs;
6087
6088         if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF)))
6089                 return error;
6090         *count += 1;
6091         block = XFS_BUF_TO_BLOCK(bp);
6092
6093         if (--level) {
6094                 /* Not at node above leaves, count this level of nodes */
6095                 nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
6096                 while (nextbno != NULLFSBLOCK) {
6097                         if ((error = xfs_btree_read_bufl(mp, tp, nextbno,
6098                                 0, &nbp, XFS_BMAP_BTREE_REF)))
6099                                 return error;
6100                         *count += 1;
6101                         nextblock = XFS_BUF_TO_BLOCK(nbp);
6102                         nextbno = be64_to_cpu(nextblock->bb_u.l.bb_rightsib);
6103                         xfs_trans_brelse(tp, nbp);
6104                 }
6105
6106                 /* Dive to the next level */
6107                 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
6108                 bno = be64_to_cpu(*pp);
6109                 if (unlikely((error =
6110                      xfs_bmap_count_tree(mp, tp, ifp, bno, level, count)) < 0)) {
6111                         xfs_trans_brelse(tp, bp);
6112                         XFS_ERROR_REPORT("xfs_bmap_count_tree(1)",
6113                                          XFS_ERRLEVEL_LOW, mp);
6114                         return XFS_ERROR(EFSCORRUPTED);
6115                 }
6116                 xfs_trans_brelse(tp, bp);
6117         } else {
6118                 /* count all level 1 nodes and their leaves */
6119                 for (;;) {
6120                         nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
6121                         numrecs = be16_to_cpu(block->bb_numrecs);
6122                         xfs_bmap_disk_count_leaves(mp, block, numrecs, count);
6123                         xfs_trans_brelse(tp, bp);
6124                         if (nextbno == NULLFSBLOCK)
6125                                 break;
6126                         bno = nextbno;
6127                         if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
6128                                 XFS_BMAP_BTREE_REF)))
6129                                 return error;
6130                         *count += 1;
6131                         block = XFS_BUF_TO_BLOCK(bp);
6132                 }
6133         }
6134         return 0;
6135 }
6136
6137 /*
6138  * Count leaf blocks given a range of extent records.
6139  */
6140 STATIC void
6141 xfs_bmap_count_leaves(
6142         xfs_ifork_t             *ifp,
6143         xfs_extnum_t            idx,
6144         int                     numrecs,
6145         int                     *count)
6146 {
6147         int             b;
6148
6149         for (b = 0; b < numrecs; b++) {
6150                 xfs_bmbt_rec_host_t *frp = xfs_iext_get_ext(ifp, idx + b);
6151                 *count += xfs_bmbt_get_blockcount(frp);
6152         }
6153 }
6154
6155 /*
6156  * Count leaf blocks given a range of extent records originally
6157  * in btree format.
6158  */
6159 STATIC void
6160 xfs_bmap_disk_count_leaves(
6161         struct xfs_mount        *mp,
6162         struct xfs_btree_block  *block,
6163         int                     numrecs,
6164         int                     *count)
6165 {
6166         int             b;
6167         xfs_bmbt_rec_t  *frp;
6168
6169         for (b = 1; b <= numrecs; b++) {
6170                 frp = XFS_BMBT_REC_ADDR(mp, block, b);
6171                 *count += xfs_bmbt_disk_get_blockcount(frp);
6172         }
6173 }
6174
6175 /*
6176  * dead simple method of punching delalyed allocation blocks from a range in
6177  * the inode. Walks a block at a time so will be slow, but is only executed in
6178  * rare error cases so the overhead is not critical. This will alays punch out
6179  * both the start and end blocks, even if the ranges only partially overlap
6180  * them, so it is up to the caller to ensure that partial blocks are not
6181  * passed in.
6182  */
6183 int
6184 xfs_bmap_punch_delalloc_range(
6185         struct xfs_inode        *ip,
6186         xfs_fileoff_t           start_fsb,
6187         xfs_fileoff_t           length)
6188 {
6189         xfs_fileoff_t           remaining = length;
6190         int                     error = 0;
6191
6192         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
6193
6194         do {
6195                 int             done;
6196                 xfs_bmbt_irec_t imap;
6197                 int             nimaps = 1;
6198                 xfs_fsblock_t   firstblock;
6199                 xfs_bmap_free_t flist;
6200
6201                 /*
6202                  * Map the range first and check that it is a delalloc extent
6203                  * before trying to unmap the range. Otherwise we will be
6204                  * trying to remove a real extent (which requires a
6205                  * transaction) or a hole, which is probably a bad idea...
6206                  */
6207                 error = xfs_bmapi_read(ip, start_fsb, 1, &imap, &nimaps,
6208                                        XFS_BMAPI_ENTIRE);
6209
6210                 if (error) {
6211                         /* something screwed, just bail */
6212                         if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
6213                                 xfs_alert(ip->i_mount,
6214                         "Failed delalloc mapping lookup ino %lld fsb %lld.",
6215                                                 ip->i_ino, start_fsb);
6216                         }
6217                         break;
6218                 }
6219                 if (!nimaps) {
6220                         /* nothing there */
6221                         goto next_block;
6222                 }
6223                 if (imap.br_startblock != DELAYSTARTBLOCK) {
6224                         /* been converted, ignore */
6225                         goto next_block;
6226                 }
6227                 WARN_ON(imap.br_blockcount == 0);
6228
6229                 /*
6230                  * Note: while we initialise the firstblock/flist pair, they
6231                  * should never be used because blocks should never be
6232                  * allocated or freed for a delalloc extent and hence we need
6233                  * don't cancel or finish them after the xfs_bunmapi() call.
6234                  */
6235                 xfs_bmap_init(&flist, &firstblock);
6236                 error = xfs_bunmapi(NULL, ip, start_fsb, 1, 0, 1, &firstblock,
6237                                         &flist, &done);
6238                 if (error)
6239                         break;
6240
6241                 ASSERT(!flist.xbf_count && !flist.xbf_first);
6242 next_block:
6243                 start_fsb++;
6244                 remaining--;
6245         } while(remaining > 0);
6246
6247         return error;
6248 }