Merge tag 'xfs-for-linus-3.15-rc1' of git://oss.sgi.com/xfs/xfs

author Linus Torvalds <torvalds@linux-foundation.org>

Fri, 4 Apr 2014 22:50:08 +0000 (15:50 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 4 Apr 2014 22:50:08 +0000 (15:50 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Fri, 4 Apr 2014 22:50:08 +0000 (15:50 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 4 Apr 2014 22:50:08 +0000 (15:50 -0700)
diff --git a/fs/direct-io.c b/fs/direct-io.c

index 6e6bff3..31ba093 100644 (file)
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1193,13 +1193,19 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
         }
  
         /*
-        * For file extending writes updating i_size before data
-        * writeouts complete can expose uninitialized blocks. So
-        * even for AIO, we need to wait for i/o to complete before
-        * returning in this case.
+        * For file extending writes updating i_size before data writeouts
+        * complete can expose uninitialized blocks in dumb filesystems.
+        * In that case we need to wait for I/O completion even if asked
+        * for an asynchronous write.
          */
-       dio->is_async = !is_sync_kiocb(iocb) && !((rw & WRITE) &&
-               (end > i_size_read(inode)));
+       if (is_sync_kiocb(iocb))
+               dio->is_async = false;
+       else if (!(dio->flags & DIO_ASYNC_EXTEND) &&
+            (rw & WRITE) && end > i_size_read(inode))
+               dio->is_async = false;
+       else
+               dio->is_async = true;
+
         dio->inode = inode;
         dio->rw = rw;
  
diff --git a/fs/open.c b/fs/open.c

index b9ed8b2..631aea8 100644 (file)
--- a/fs/open.c
+++ b/fs/open.c
@@ -231,7 +231,13 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
                 return -EINVAL;
  
         /* Return error if mode is not supported */
-       if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
+       if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
+                    FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))
+               return -EOPNOTSUPP;
+
+       /* Punch hole and zero range are mutually exclusive */
+       if ((mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) ==
+           (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE))
                 return -EOPNOTSUPP;
  
         /* Punch hole must have keep size set */
@@ -239,11 +245,20 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
             !(mode & FALLOC_FL_KEEP_SIZE))
                 return -EOPNOTSUPP;
  
+       /* Collapse range should only be used exclusively. */
+       if ((mode & FALLOC_FL_COLLAPSE_RANGE) &&
+           (mode & ~FALLOC_FL_COLLAPSE_RANGE))
+               return -EINVAL;
+
         if (!(file->f_mode & FMODE_WRITE))
                 return -EBADF;
  
-       /* It's not possible punch hole on append only file */
-       if (mode & FALLOC_FL_PUNCH_HOLE && IS_APPEND(inode))
+       /*
+        * It's not possible to punch hole or perform collapse range
+        * on append only file
+        */
+       if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE)
+           && IS_APPEND(inode))
                 return -EPERM;
  
         if (IS_IMMUTABLE(inode))
@@ -271,6 +286,14 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
         if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0))
                 return -EFBIG;
  
+       /*
+        * There is no need to overlap collapse range with EOF, in which case
+        * it is effectively a truncate operation
+        */
+       if ((mode & FALLOC_FL_COLLAPSE_RANGE) &&
+           (offset + len >= i_size_read(inode)))
+               return -EINVAL;
+
         if (!file->f_op->fallocate)
                 return -EOPNOTSUPP;
  
diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c

index 66a36be..844e288 100644 (file)
--- a/fs/xfs/kmem.c
+++ b/fs/xfs/kmem.c
@@ -65,12 +65,31 @@ kmem_alloc(size_t size, xfs_km_flags_t flags)
  void *
  kmem_zalloc_large(size_t size, xfs_km_flags_t flags)
  {
+       unsigned noio_flag = 0;
         void    *ptr;
+       gfp_t   lflags;
  
         ptr = kmem_zalloc(size, flags | KM_MAYFAIL);
         if (ptr)
                 return ptr;
-       return vzalloc(size);
+
+       /*
+        * __vmalloc() will allocate data pages and auxillary structures (e.g.
+        * pagetables) with GFP_KERNEL, yet we may be under GFP_NOFS context
+        * here. Hence we need to tell memory reclaim that we are in such a
+        * context via PF_MEMALLOC_NOIO to prevent memory reclaim re-entering
+        * the filesystem here and potentially deadlocking.
+        */
+       if ((current->flags & PF_FSTRANS) || (flags & KM_NOFS))
+               noio_flag = memalloc_noio_save();
+
+       lflags = kmem_flags_convert(flags);
+       ptr = __vmalloc(size, lflags | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL);
+
+       if ((current->flags & PF_FSTRANS) || (flags & KM_NOFS))
+               memalloc_noio_restore(noio_flag);
+
+       return ptr;
  }
  
  void
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c

index 0ecec18..6888ad8 100644 (file)
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -281,7 +281,7 @@ xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
         if (!acl)
                 goto set_acl;
  
-       error = -EINVAL;
+       error = -E2BIG;
         if (acl->a_count > XFS_ACL_MAX_ENTRIES(XFS_M(inode->i_sb)))
                 return error;
  
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h

index 3fc1098..0fdd410 100644 (file)
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -89,6 +89,8 @@ typedef struct xfs_agf {
         /* structure must be padded to 64 bit alignment */
  } xfs_agf_t;
  
+#define XFS_AGF_CRC_OFF                offsetof(struct xfs_agf, agf_crc)
+
  #define        XFS_AGF_MAGICNUM        0x00000001
  #define        XFS_AGF_VERSIONNUM      0x00000002
  #define        XFS_AGF_SEQNO           0x00000004
@@ -167,6 +169,8 @@ typedef struct xfs_agi {
         /* structure must be padded to 64 bit alignment */
  } xfs_agi_t;
  
+#define XFS_AGI_CRC_OFF                offsetof(struct xfs_agi, agi_crc)
+
  #define        XFS_AGI_MAGICNUM        0x00000001
  #define        XFS_AGI_VERSIONNUM      0x00000002
  #define        XFS_AGI_SEQNO           0x00000004
@@ -222,6 +226,8 @@ typedef struct xfs_agfl {
         __be32          agfl_bno[];     /* actually XFS_AGFL_SIZE(mp) */
  } xfs_agfl_t;
  
+#define XFS_AGFL_CRC_OFF       offsetof(struct xfs_agfl, agfl_crc)
+
  /*
   * tags for inode radix tree
   */
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c

index 9eab2df..c1cf6a3 100644 (file)
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -474,7 +474,6 @@ xfs_agfl_read_verify(
         struct xfs_buf  *bp)
  {
         struct xfs_mount *mp = bp->b_target->bt_mount;
-       int             agfl_ok = 1;
  
         /*
          * There is no verification of non-crc AGFLs because mkfs does not
@@ -485,15 +484,13 @@ xfs_agfl_read_verify(
         if (!xfs_sb_version_hascrc(&mp->m_sb))
                 return;
  
-       agfl_ok = xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
-                                  offsetof(struct xfs_agfl, agfl_crc));
-
-       agfl_ok = agfl_ok && xfs_agfl_verify(bp);
-
-       if (!agfl_ok) {
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
+       if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF))
+               xfs_buf_ioerror(bp, EFSBADCRC);
+       else if (!xfs_agfl_verify(bp))
                 xfs_buf_ioerror(bp, EFSCORRUPTED);
-       }
+
+       if (bp->b_error)
+               xfs_verifier_error(bp);
  }
  
  static void
@@ -508,16 +505,15 @@ xfs_agfl_write_verify(
                 return;
  
         if (!xfs_agfl_verify(bp)) {
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
                 xfs_buf_ioerror(bp, EFSCORRUPTED);
+               xfs_verifier_error(bp);
                 return;
         }
  
         if (bip)
                 XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
  
-       xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
-                        offsetof(struct xfs_agfl, agfl_crc));
+       xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF);
  }
  
  const struct xfs_buf_ops xfs_agfl_buf_ops = {
@@ -2238,19 +2234,17 @@ xfs_agf_read_verify(
         struct xfs_buf  *bp)
  {
         struct xfs_mount *mp = bp->b_target->bt_mount;
-       int             agf_ok = 1;
-
-       if (xfs_sb_version_hascrc(&mp->m_sb))
-               agf_ok = xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
-                                         offsetof(struct xfs_agf, agf_crc));
  
-       agf_ok = agf_ok && xfs_agf_verify(mp, bp);
-
-       if (unlikely(XFS_TEST_ERROR(!agf_ok, mp, XFS_ERRTAG_ALLOC_READ_AGF,
-                       XFS_RANDOM_ALLOC_READ_AGF))) {
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
+       if (xfs_sb_version_hascrc(&mp->m_sb) &&
+           !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF))
+               xfs_buf_ioerror(bp, EFSBADCRC);
+       else if (XFS_TEST_ERROR(!xfs_agf_verify(mp, bp), mp,
+                               XFS_ERRTAG_ALLOC_READ_AGF,
+                               XFS_RANDOM_ALLOC_READ_AGF))
                 xfs_buf_ioerror(bp, EFSCORRUPTED);
-       }
+
+       if (bp->b_error)
+               xfs_verifier_error(bp);
  }
  
  static void
@@ -2261,8 +2255,8 @@ xfs_agf_write_verify(
         struct xfs_buf_log_item *bip = bp->b_fspriv;
  
         if (!xfs_agf_verify(mp, bp)) {
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
                 xfs_buf_ioerror(bp, EFSCORRUPTED);
+               xfs_verifier_error(bp);
                 return;
         }
  
@@ -2272,8 +2266,7 @@ xfs_agf_write_verify(
         if (bip)
                 XFS_BUF_TO_AGF(bp)->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn);
  
-       xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
-                        offsetof(struct xfs_agf, agf_crc));
+       xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF);
  }
  
  const struct xfs_buf_ops xfs_agf_buf_ops = {
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c

index 1308542..cc1eadc 100644 (file)
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -355,12 +355,14 @@ static void
  xfs_allocbt_read_verify(
         struct xfs_buf  *bp)
  {
-       if (!(xfs_btree_sblock_verify_crc(bp) &&
-             xfs_allocbt_verify(bp))) {
-               trace_xfs_btree_corrupt(bp, _RET_IP_);
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
-                                    bp->b_target->bt_mount, bp->b_addr);
+       if (!xfs_btree_sblock_verify_crc(bp))
+               xfs_buf_ioerror(bp, EFSBADCRC);
+       else if (!xfs_allocbt_verify(bp))
                 xfs_buf_ioerror(bp, EFSCORRUPTED);
+
+       if (bp->b_error) {
+               trace_xfs_btree_corrupt(bp, _RET_IP_);
+               xfs_verifier_error(bp);
         }
  }
  
@@ -370,9 +372,9 @@ xfs_allocbt_write_verify(
  {
         if (!xfs_allocbt_verify(bp)) {
                 trace_xfs_btree_corrupt(bp, _RET_IP_);
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
-                                    bp->b_target->bt_mount, bp->b_addr);
                 xfs_buf_ioerror(bp, EFSCORRUPTED);
+               xfs_verifier_error(bp);
+               return;
         }
         xfs_btree_sblock_calc_crc(bp);
  
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c

index db2cfb0..75df77d 100644 (file)
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -632,38 +632,46 @@ xfs_map_at_offset(
  }
  
  /*
- * Test if a given page is suitable for writing as part of an unwritten
- * or delayed allocate extent.
+ * Test if a given page contains at least one buffer of a given @type.
+ * If @check_all_buffers is true, then we walk all the buffers in the page to
+ * try to find one of the type passed in. If it is not set, then the caller only
+ * needs to check the first buffer on the page for a match.
   */
-STATIC int
+STATIC bool
  xfs_check_page_type(
         struct page             *page,
-       unsigned int            type)
+       unsigned int            type,
+       bool                    check_all_buffers)
  {
-       if (PageWriteback(page))
-               return 0;
+       struct buffer_head      *bh;
+       struct buffer_head      *head;
  
-       if (page->mapping && page_has_buffers(page)) {
-               struct buffer_head      *bh, *head;
-               int                     acceptable = 0;
+       if (PageWriteback(page))
+               return false;
+       if (!page->mapping)
+               return false;
+       if (!page_has_buffers(page))
+               return false;
  
-               bh = head = page_buffers(page);
-               do {
-                       if (buffer_unwritten(bh))
-                               acceptable += (type == XFS_IO_UNWRITTEN);
-                       else if (buffer_delay(bh))
-                               acceptable += (type == XFS_IO_DELALLOC);
-                       else if (buffer_dirty(bh) && buffer_mapped(bh))
-                               acceptable += (type == XFS_IO_OVERWRITE);
-                       else
-                               break;
-               } while ((bh = bh->b_this_page) != head);
+       bh = head = page_buffers(page);
+       do {
+               if (buffer_unwritten(bh)) {
+                       if (type == XFS_IO_UNWRITTEN)
+                               return true;
+               } else if (buffer_delay(bh)) {
+                       if (type == XFS_IO_DELALLOC)
+                               return true;
+               } else if (buffer_dirty(bh) && buffer_mapped(bh)) {
+                       if (type == XFS_IO_OVERWRITE)
+                               return true;
+               }
  
-               if (acceptable)
-                       return 1;
-       }
+               /* If we are only checking the first buffer, we are done now. */
+               if (!check_all_buffers)
+                       break;
+       } while ((bh = bh->b_this_page) != head);
  
-       return 0;
+       return false;
  }
  
  /*
@@ -697,7 +705,7 @@ xfs_convert_page(
                 goto fail_unlock_page;
         if (page->mapping != inode->i_mapping)
                 goto fail_unlock_page;
-       if (!xfs_check_page_type(page, (*ioendp)->io_type))
+       if (!xfs_check_page_type(page, (*ioendp)->io_type, false))
                 goto fail_unlock_page;
  
         /*
@@ -742,6 +750,15 @@ xfs_convert_page(
         p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE;
         page_dirty = p_offset / len;
  
+       /*
+        * The moment we find a buffer that doesn't match our current type
+        * specification or can't be written, abort the loop and start
+        * writeback. As per the above xfs_imap_valid() check, only
+        * xfs_vm_writepage() can handle partial page writeback fully - we are
+        * limited here to the buffers that are contiguous with the current
+        * ioend, and hence a buffer we can't write breaks that contiguity and
+        * we have to defer the rest of the IO to xfs_vm_writepage().
+        */
         bh = head = page_buffers(page);
         do {
                 if (offset >= end_offset)
@@ -750,7 +767,7 @@ xfs_convert_page(
                         uptodate = 0;
                 if (!(PageUptodate(page) || buffer_uptodate(bh))) {
                         done = 1;
-                       continue;
+                       break;
                 }
  
                 if (buffer_unwritten(bh) || buffer_delay(bh) ||
@@ -762,10 +779,11 @@ xfs_convert_page(
                         else
                                 type = XFS_IO_OVERWRITE;
  
-                       if (!xfs_imap_valid(inode, imap, offset)) {
-                               done = 1;
-                               continue;
-                       }
+                       /*
+                        * imap should always be valid because of the above
+                        * partial page end_offset check on the imap.
+                        */
+                       ASSERT(xfs_imap_valid(inode, imap, offset));
  
                         lock_buffer(bh);
                         if (type != XFS_IO_OVERWRITE)
@@ -777,6 +795,7 @@ xfs_convert_page(
                         count++;
                 } else {
                         done = 1;
+                       break;
                 }
         } while (offset += len, (bh = bh->b_this_page) != head);
  
@@ -868,7 +887,7 @@ xfs_aops_discard_page(
         struct buffer_head      *bh, *head;
         loff_t                  offset = page_offset(page);
  
-       if (!xfs_check_page_type(page, XFS_IO_DELALLOC))
+       if (!xfs_check_page_type(page, XFS_IO_DELALLOC, true))
                 goto out_invalidate;
  
         if (XFS_FORCED_SHUTDOWN(ip->i_mount))
@@ -1441,7 +1460,8 @@ xfs_vm_direct_IO(
                 ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
                                             offset, nr_segs,
                                             xfs_get_blocks_direct,
-                                           xfs_end_io_direct_write, NULL, 0);
+                                           xfs_end_io_direct_write, NULL,
+                                           DIO_ASYNC_EXTEND);
                 if (ret != -EIOCBQUEUED && iocb->private)
                         goto out_destroy_ioend;
         } else {
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c

index 7b126f4..fe9587f 100644 (file)
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -213,8 +213,8 @@ xfs_attr3_leaf_write_verify(
         struct xfs_attr3_leaf_hdr *hdr3 = bp->b_addr;
  
         if (!xfs_attr3_leaf_verify(bp)) {
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
                 xfs_buf_ioerror(bp, EFSCORRUPTED);
+               xfs_verifier_error(bp);
                 return;
         }
  
@@ -224,7 +224,7 @@ xfs_attr3_leaf_write_verify(
         if (bip)
                 hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn);
  
-       xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_ATTR3_LEAF_CRC_OFF);
+       xfs_buf_update_cksum(bp, XFS_ATTR3_LEAF_CRC_OFF);
  }
  
  /*
@@ -239,13 +239,14 @@ xfs_attr3_leaf_read_verify(
  {
         struct xfs_mount        *mp = bp->b_target->bt_mount;
  
-       if ((xfs_sb_version_hascrc(&mp->m_sb) &&
-            !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
-                                         XFS_ATTR3_LEAF_CRC_OFF)) ||
-           !xfs_attr3_leaf_verify(bp)) {
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
+       if (xfs_sb_version_hascrc(&mp->m_sb) &&
+            !xfs_buf_verify_cksum(bp, XFS_ATTR3_LEAF_CRC_OFF))
+               xfs_buf_ioerror(bp, EFSBADCRC);
+       else if (!xfs_attr3_leaf_verify(bp))
                 xfs_buf_ioerror(bp, EFSCORRUPTED);
-       }
+
+       if (bp->b_error)
+               xfs_verifier_error(bp);
  }
  
  const struct xfs_buf_ops xfs_attr3_leaf_buf_ops = {
diff --git a/fs/xfs/xfs_attr_remote.c b/fs/xfs/xfs_attr_remote.c

index 5549d69..6e37823 100644 (file)
--- a/fs/xfs/xfs_attr_remote.c
+++ b/fs/xfs/xfs_attr_remote.c
@@ -125,7 +125,6 @@ xfs_attr3_rmt_read_verify(
         struct xfs_mount *mp = bp->b_target->bt_mount;
         char            *ptr;
         int             len;
-       bool            corrupt = false;
         xfs_daddr_t     bno;
  
         /* no verification of non-crc buffers */
@@ -140,11 +139,11 @@ xfs_attr3_rmt_read_verify(
         while (len > 0) {
                 if (!xfs_verify_cksum(ptr, XFS_LBSIZE(mp),
                                       XFS_ATTR3_RMT_CRC_OFF)) {
-                       corrupt = true;
+                       xfs_buf_ioerror(bp, EFSBADCRC);
                         break;
                 }
                 if (!xfs_attr3_rmt_verify(mp, ptr, XFS_LBSIZE(mp), bno)) {
-                       corrupt = true;
+                       xfs_buf_ioerror(bp, EFSCORRUPTED);
                         break;
                 }
                 len -= XFS_LBSIZE(mp);
@@ -152,10 +151,9 @@ xfs_attr3_rmt_read_verify(
                 bno += mp->m_bsize;
         }
  
-       if (corrupt) {
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
-               xfs_buf_ioerror(bp, EFSCORRUPTED);
-       } else
+       if (bp->b_error)
+               xfs_verifier_error(bp);
+       else
                 ASSERT(len == 0);
  }
  
@@ -180,9 +178,8 @@ xfs_attr3_rmt_write_verify(
  
         while (len > 0) {
                 if (!xfs_attr3_rmt_verify(mp, ptr, XFS_LBSIZE(mp), bno)) {
-                       XFS_CORRUPTION_ERROR(__func__,
-                                           XFS_ERRLEVEL_LOW, mp, bp->b_addr);
                         xfs_buf_ioerror(bp, EFSCORRUPTED);
+                       xfs_verifier_error(bp);
                         return;
                 }
                 if (bip) {
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c

index 152543c..5b6092e 100644 (file)
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -5378,3 +5378,196 @@ error0:
         }
         return error;
  }
+
+/*
+ * Shift extent records to the left to cover a hole.
+ *
+ * The maximum number of extents to be shifted in a single operation
+ * is @num_exts, and @current_ext keeps track of the current extent
+ * index we have shifted. @offset_shift_fsb is the length by which each
+ * extent is shifted. If there is no hole to shift the extents
+ * into, this will be considered invalid operation and we abort immediately.
+ */
+int
+xfs_bmap_shift_extents(
+       struct xfs_trans        *tp,
+       struct xfs_inode        *ip,
+       int                     *done,
+       xfs_fileoff_t           start_fsb,
+       xfs_fileoff_t           offset_shift_fsb,
+       xfs_extnum_t            *current_ext,
+       xfs_fsblock_t           *firstblock,
+       struct xfs_bmap_free    *flist,
+       int                     num_exts)
+{
+       struct xfs_btree_cur            *cur;
+       struct xfs_bmbt_rec_host        *gotp;
+       struct xfs_bmbt_irec            got;
+       struct xfs_bmbt_irec            left;
+       struct xfs_mount                *mp = ip->i_mount;
+       struct xfs_ifork                *ifp;
+       xfs_extnum_t                    nexts = 0;
+       xfs_fileoff_t                   startoff;
+       int                             error = 0;
+       int                             i;
+       int                             whichfork = XFS_DATA_FORK;
+       int                             logflags;
+       xfs_filblks_t                   blockcount = 0;
+
+       if (unlikely(XFS_TEST_ERROR(
+           (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
+            XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
+            mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
+               XFS_ERROR_REPORT("xfs_bmap_shift_extents",
+                                XFS_ERRLEVEL_LOW, mp);
+               return XFS_ERROR(EFSCORRUPTED);
+       }
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return XFS_ERROR(EIO);
+
+       ASSERT(current_ext != NULL);
+
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+
+       if (!(ifp->if_flags & XFS_IFEXTENTS)) {
+               /* Read in all the extents */
+               error = xfs_iread_extents(tp, ip, whichfork);
+               if (error)
+                       return error;
+       }
+
+       /*
+        * If *current_ext is 0, we would need to lookup the extent
+        * from where we would start shifting and store it in gotp.
+        */
+       if (!*current_ext) {
+               gotp = xfs_iext_bno_to_ext(ifp, start_fsb, current_ext);
+               /*
+                * gotp can be null in 2 cases: 1) if there are no extents
+                * or 2) start_fsb lies in a hole beyond which there are
+                * no extents. Either way, we are done.
+                */
+               if (!gotp) {
+                       *done = 1;
+                       return 0;
+               }
+       }
+
+       /* We are going to change core inode */
+       logflags = XFS_ILOG_CORE;
+
+       if (ifp->if_flags & XFS_IFBROOT) {
+               cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
+               cur->bc_private.b.firstblock = *firstblock;
+               cur->bc_private.b.flist = flist;
+               cur->bc_private.b.flags = 0;
+       } else {
+               cur = NULL;
+               logflags |= XFS_ILOG_DEXT;
+       }
+
+       while (nexts++ < num_exts &&
+              *current_ext <  XFS_IFORK_NEXTENTS(ip, whichfork)) {
+
+               gotp = xfs_iext_get_ext(ifp, *current_ext);
+               xfs_bmbt_get_all(gotp, &got);
+               startoff = got.br_startoff - offset_shift_fsb;
+
+               /*
+                * Before shifting extent into hole, make sure that the hole
+                * is large enough to accomodate the shift.
+                */
+               if (*current_ext) {
+                       xfs_bmbt_get_all(xfs_iext_get_ext(ifp,
+                                               *current_ext - 1), &left);
+
+                       if (startoff < left.br_startoff + left.br_blockcount)
+                               error = XFS_ERROR(EINVAL);
+               } else if (offset_shift_fsb > got.br_startoff) {
+                       /*
+                        * When first extent is shifted, offset_shift_fsb
+                        * should be less than the stating offset of
+                        * the first extent.
+                        */
+                       error = XFS_ERROR(EINVAL);
+               }
+
+               if (error)
+                       goto del_cursor;
+
+               if (cur) {
+                       error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
+                                                  got.br_startblock,
+                                                  got.br_blockcount,
+                                                  &i);
+                       if (error)
+                               goto del_cursor;
+                       XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
+               }
+
+               /* Check if we can merge 2 adjacent extents */
+               if (*current_ext &&
+                   left.br_startoff + left.br_blockcount == startoff &&
+                   left.br_startblock + left.br_blockcount ==
+                               got.br_startblock &&
+                   left.br_state == got.br_state &&
+                   left.br_blockcount + got.br_blockcount <= MAXEXTLEN) {
+                       blockcount = left.br_blockcount +
+                               got.br_blockcount;
+                       xfs_iext_remove(ip, *current_ext, 1, 0);
+                       if (cur) {
+                               error = xfs_btree_delete(cur, &i);
+                               if (error)
+                                       goto del_cursor;
+                               XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
+                       }
+                       XFS_IFORK_NEXT_SET(ip, whichfork,
+                               XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
+                       gotp = xfs_iext_get_ext(ifp, --*current_ext);
+                       xfs_bmbt_get_all(gotp, &got);
+
+                       /* Make cursor point to the extent we will update */
+                       if (cur) {
+                               error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
+                                                          got.br_startblock,
+                                                          got.br_blockcount,
+                                                          &i);
+                               if (error)
+                                       goto del_cursor;
+                               XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
+                       }
+
+                       xfs_bmbt_set_blockcount(gotp, blockcount);
+                       got.br_blockcount = blockcount;
+               } else {
+                       /* We have to update the startoff */
+                       xfs_bmbt_set_startoff(gotp, startoff);
+                       got.br_startoff = startoff;
+               }
+
+               if (cur) {
+                       error = xfs_bmbt_update(cur, got.br_startoff,
+                                               got.br_startblock,
+                                               got.br_blockcount,
+                                               got.br_state);
+                       if (error)
+                               goto del_cursor;
+               }
+
+               (*current_ext)++;
+       }
+
+       /* Check if we are done */
+       if (*current_ext ==  XFS_IFORK_NEXTENTS(ip, whichfork))
+               *done = 1;
+
+del_cursor:
+       if (cur)
+               xfs_btree_del_cursor(cur,
+                       error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+
+       xfs_trans_log_inode(tp, ip, logflags);
+
+       return error;
+}
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h

index 33b41f3..f84bd7a 100644 (file)
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -127,6 +127,16 @@ static inline void xfs_bmap_init(xfs_bmap_free_t *flp, xfs_fsblock_t *fbp)
         { BMAP_RIGHT_FILLING,   "RF" }, \
         { BMAP_ATTRFORK,        "ATTR" }
  
+
+/*
+ * This macro is used to determine how many extents will be shifted
+ * in one write transaction. We could require two splits,
+ * an extent move on the first and an extent merge on the second,
+ * So it is proper that one extent is shifted inside write transaction
+ * at a time.
+ */
+#define XFS_BMAP_MAX_SHIFT_EXTENTS     1
+
  #ifdef DEBUG
  void   xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt,
                 int whichfork, unsigned long caller_ip);
@@ -169,5 +179,10 @@ int        xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip,
  int    xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx,
                 xfs_extnum_t num);
  uint   xfs_default_attroffset(struct xfs_inode *ip);
+int    xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip,
+               int *done, xfs_fileoff_t start_fsb,
+               xfs_fileoff_t offset_shift_fsb, xfs_extnum_t *current_ext,
+               xfs_fsblock_t *firstblock, struct xfs_bmap_free *flist,
+               int num_exts);
  
  #endif /* __XFS_BMAP_H__ */
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c

index 706bc3f..818d546 100644 (file)
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -780,12 +780,14 @@ static void
  xfs_bmbt_read_verify(
         struct xfs_buf  *bp)
  {
-       if (!(xfs_btree_lblock_verify_crc(bp) &&
-             xfs_bmbt_verify(bp))) {
-               trace_xfs_btree_corrupt(bp, _RET_IP_);
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
-                                    bp->b_target->bt_mount, bp->b_addr);
+       if (!xfs_btree_lblock_verify_crc(bp))
+               xfs_buf_ioerror(bp, EFSBADCRC);
+       else if (!xfs_bmbt_verify(bp))
                 xfs_buf_ioerror(bp, EFSCORRUPTED);
+
+       if (bp->b_error) {
+               trace_xfs_btree_corrupt(bp, _RET_IP_);
+               xfs_verifier_error(bp);
         }
  }
  
@@ -794,11 +796,9 @@ xfs_bmbt_write_verify(
         struct xfs_buf  *bp)
  {
         if (!xfs_bmbt_verify(bp)) {
-               xfs_warn(bp->b_target->bt_mount, "bmbt daddr 0x%llx failed", bp->b_bn);
                 trace_xfs_btree_corrupt(bp, _RET_IP_);
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
-                                    bp->b_target->bt_mount, bp->b_addr);
                 xfs_buf_ioerror(bp, EFSCORRUPTED);
+               xfs_verifier_error(bp);
                 return;
         }
         xfs_btree_lblock_calc_crc(bp);
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c

index f264616..01f6a64 100644 (file)
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1349,7 +1349,6 @@ xfs_free_file_space(
                  * the freeing of the space succeeds at ENOSPC.
                  */
                 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
-               tp->t_flags |= XFS_TRANS_RESERVE;
                 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, resblks, 0);
  
                 /*
@@ -1467,6 +1466,102 @@ out:
  
  }
  
+/*
+ * xfs_collapse_file_space()
+ *     This routine frees disk space and shift extent for the given file.
+ *     The first thing we do is to free data blocks in the specified range
+ *     by calling xfs_free_file_space(). It would also sync dirty data
+ *     and invalidate page cache over the region on which collapse range
+ *     is working. And Shift extent records to the left to cover a hole.
+ * RETURNS:
+ *     0 on success
+ *     errno on error
+ *
+ */
+int
+xfs_collapse_file_space(
+       struct xfs_inode        *ip,
+       xfs_off_t               offset,
+       xfs_off_t               len)
+{
+       int                     done = 0;
+       struct xfs_mount        *mp = ip->i_mount;
+       struct xfs_trans        *tp;
+       int                     error;
+       xfs_extnum_t            current_ext = 0;
+       struct xfs_bmap_free    free_list;
+       xfs_fsblock_t           first_block;
+       int                     committed;
+       xfs_fileoff_t           start_fsb;
+       xfs_fileoff_t           shift_fsb;
+
+       ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
+
+       trace_xfs_collapse_file_space(ip);
+
+       start_fsb = XFS_B_TO_FSB(mp, offset + len);
+       shift_fsb = XFS_B_TO_FSB(mp, len);
+
+       error = xfs_free_file_space(ip, offset, len);
+       if (error)
+               return error;
+
+       while (!error && !done) {
+               tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
+               tp->t_flags |= XFS_TRANS_RESERVE;
+               /*
+                * We would need to reserve permanent block for transaction.
+                * This will come into picture when after shifting extent into
+                * hole we found that adjacent extents can be merged which
+                * may lead to freeing of a block during record update.
+                */
+               error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
+                               XFS_DIOSTRAT_SPACE_RES(mp, 0), 0);
+               if (error) {
+                       ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp));
+                       xfs_trans_cancel(tp, 0);
+                       break;
+               }
+
+               xfs_ilock(ip, XFS_ILOCK_EXCL);
+               error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot,
+                               ip->i_gdquot, ip->i_pdquot,
+                               XFS_DIOSTRAT_SPACE_RES(mp, 0), 0,
+                               XFS_QMOPT_RES_REGBLKS);
+               if (error)
+                       goto out;
+
+               xfs_trans_ijoin(tp, ip, 0);
+
+               xfs_bmap_init(&free_list, &first_block);
+
+               /*
+                * We are using the write transaction in which max 2 bmbt
+                * updates are allowed
+                */
+               error = xfs_bmap_shift_extents(tp, ip, &done, start_fsb,
+                                              shift_fsb, &current_ext,
+                                              &first_block, &free_list,
+                                              XFS_BMAP_MAX_SHIFT_EXTENTS);
+               if (error)
+                       goto out;
+
+               error = xfs_bmap_finish(&tp, &free_list, &committed);
+               if (error)
+                       goto out;
+
+               error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+               xfs_iunlock(ip, XFS_ILOCK_EXCL);
+       }
+
+       return error;
+
+out:
+       xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+       return error;
+}
+
  /*
   * We need to check that the format of the data fork in the temporary inode is
   * valid for the target inode before doing the swap. This is not a problem with
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h

index 900747b..935ed2b 100644 (file)
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -99,6 +99,8 @@ int   xfs_free_file_space(struct xfs_inode *ip, xfs_off_t offset,
                             xfs_off_t len);
  int    xfs_zero_file_space(struct xfs_inode *ip, xfs_off_t offset,
                             xfs_off_t len);
+int    xfs_collapse_file_space(struct xfs_inode *, xfs_off_t offset,
+                               xfs_off_t len);
  
  /* EOF block manipulation functions */
  bool   xfs_can_free_eofblocks(struct xfs_inode *ip, bool force);
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c

index 9adaae4..e80d59f 100644 (file)
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -234,8 +234,7 @@ xfs_btree_lblock_calc_crc(
                 return;
         if (bip)
                 block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
-       xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
-                        XFS_BTREE_LBLOCK_CRC_OFF);
+       xfs_buf_update_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
  }
  
  bool
@@ -243,8 +242,8 @@ xfs_btree_lblock_verify_crc(
         struct xfs_buf          *bp)
  {
         if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
-               return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
-                                       XFS_BTREE_LBLOCK_CRC_OFF);
+               return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
+
         return true;
  }
  
@@ -267,8 +266,7 @@ xfs_btree_sblock_calc_crc(
                 return;
         if (bip)
                 block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
-       xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
-                        XFS_BTREE_SBLOCK_CRC_OFF);
+       xfs_buf_update_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
  }
  
  bool
@@ -276,8 +274,8 @@ xfs_btree_sblock_verify_crc(
         struct xfs_buf          *bp)
  {
         if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
-               return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
-                                       XFS_BTREE_SBLOCK_CRC_OFF);
+               return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
+
         return true;
  }
  
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c

index 9c061ef..107f2fd 100644 (file)
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -396,7 +396,17 @@ _xfs_buf_map_pages(
                 bp->b_addr = NULL;
         } else {
                 int retried = 0;
+               unsigned noio_flag;
  
+               /*
+                * vm_map_ram() will allocate auxillary structures (e.g.
+                * pagetables) with GFP_KERNEL, yet we are likely to be under
+                * GFP_NOFS context here. Hence we need to tell memory reclaim
+                * that we are in such a context via PF_MEMALLOC_NOIO to prevent
+                * memory reclaim re-entering the filesystem here and
+                * potentially deadlocking.
+                */
+               noio_flag = memalloc_noio_save();
                 do {
                         bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count,
                                                 -1, PAGE_KERNEL);
@@ -404,6 +414,7 @@ _xfs_buf_map_pages(
                                 break;
                         vm_unmap_aliases();
                 } while (retried++ <= 1);
+               memalloc_noio_restore(noio_flag);
  
                 if (!bp->b_addr)
                         return -ENOMEM;
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h

index 9953395..b8a3abf 100644 (file)
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -369,6 +369,20 @@ static inline void xfs_buf_relse(xfs_buf_t *bp)
         xfs_buf_rele(bp);
  }
  
+static inline int
+xfs_buf_verify_cksum(struct xfs_buf *bp, unsigned long cksum_offset)
+{
+       return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
+                               cksum_offset);
+}
+
+static inline void
+xfs_buf_update_cksum(struct xfs_buf *bp, unsigned long cksum_offset)
+{
+       xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
+                        cksum_offset);
+}
+
  /*
   *     Handling of buftargs.
   */
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c

index 3314911..8752821 100644 (file)
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -796,20 +796,6 @@ xfs_buf_item_init(
                 bip->bli_formats[i].blf_map_size = map_size;
         }
  
-#ifdef XFS_TRANS_DEBUG
-       /*
-        * Allocate the arrays for tracking what needs to be logged
-        * and what our callers request to be logged.  bli_orig
-        * holds a copy of the original, clean buffer for comparison
-        * against, and bli_logged keeps a 1 bit flag per byte in
-        * the buffer to indicate which bytes the callers have asked
-        * to have logged.
-        */
-       bip->bli_orig = kmem_alloc(BBTOB(bp->b_length), KM_SLEEP);
-       memcpy(bip->bli_orig, bp->b_addr, BBTOB(bp->b_length));
-       bip->bli_logged = kmem_zalloc(BBTOB(bp->b_length) / NBBY, KM_SLEEP);
-#endif
-
         /*
          * Put the buf item into the list of items attached to the
          * buffer at the front.
@@ -957,11 +943,6 @@ STATIC void
  xfs_buf_item_free(
         xfs_buf_log_item_t      *bip)
  {
-#ifdef XFS_TRANS_DEBUG
-       kmem_free(bip->bli_orig);
-       kmem_free(bip->bli_logged);
-#endif /* XFS_TRANS_DEBUG */
-
         xfs_buf_item_free_format(bip);
         kmem_zone_free(xfs_buf_item_zone, bip);
  }
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c

index 796272a..6cc5f67 100644 (file)
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -185,8 +185,8 @@ xfs_da3_node_write_verify(
         struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
  
         if (!xfs_da3_node_verify(bp)) {
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
                 xfs_buf_ioerror(bp, EFSCORRUPTED);
+               xfs_verifier_error(bp);
                 return;
         }
  
@@ -196,7 +196,7 @@ xfs_da3_node_write_verify(
         if (bip)
                 hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn);
  
-       xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DA3_NODE_CRC_OFF);
+       xfs_buf_update_cksum(bp, XFS_DA3_NODE_CRC_OFF);
  }
  
  /*
@@ -209,18 +209,20 @@ static void
  xfs_da3_node_read_verify(
         struct xfs_buf          *bp)
  {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
         struct xfs_da_blkinfo   *info = bp->b_addr;
  
         switch (be16_to_cpu(info->magic)) {
                 case XFS_DA3_NODE_MAGIC:
-                       if (!xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
-                                             XFS_DA3_NODE_CRC_OFF))
+                       if (!xfs_buf_verify_cksum(bp, XFS_DA3_NODE_CRC_OFF)) {
+                               xfs_buf_ioerror(bp, EFSBADCRC);
                                 break;
+                       }
                         /* fall through */
                 case XFS_DA_NODE_MAGIC:
-                       if (!xfs_da3_node_verify(bp))
+                       if (!xfs_da3_node_verify(bp)) {
+                               xfs_buf_ioerror(bp, EFSCORRUPTED);
                                 break;
+                       }
                         return;
                 case XFS_ATTR_LEAF_MAGIC:
                 case XFS_ATTR3_LEAF_MAGIC:
@@ -237,8 +239,7 @@ xfs_da3_node_read_verify(
         }
  
         /* corrupt block */
-       XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
-       xfs_buf_ioerror(bp, EFSCORRUPTED);
+       xfs_verifier_error(bp);
  }
  
  const struct xfs_buf_ops xfs_da3_node_buf_ops = {
@@ -1295,7 +1296,7 @@ xfs_da3_fixhashpath(
                 node = blk->bp->b_addr;
                 dp->d_ops->node_hdr_from_disk(&nodehdr, node);
                 btree = dp->d_ops->node_tree_p(node);
-               if (be32_to_cpu(btree->hashval) == lasthash)
+               if (be32_to_cpu(btree[blk->index].hashval) == lasthash)
                         break;
                 blk->hashval = lasthash;
                 btree[blk->index].hashval = cpu_to_be32(lasthash);
diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h

index e5869b5..623bbe8 100644 (file)
--- a/fs/xfs/xfs_dinode.h
+++ b/fs/xfs/xfs_dinode.h
@@ -89,6 +89,8 @@ typedef struct xfs_dinode {
         /* structure must be padded to 64 bit alignment */
  } xfs_dinode_t;
  
+#define XFS_DINODE_CRC_OFF     offsetof(struct xfs_dinode, di_crc)
+
  #define DI_MAX_FLUSH 0xffff
  
  /*
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c

index ce16ef0..fda4625 100644 (file)
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -180,16 +180,23 @@ xfs_dir_init(
         xfs_inode_t     *dp,
         xfs_inode_t     *pdp)
  {
-       xfs_da_args_t   args;
+       struct xfs_da_args *args;
         int             error;
  
-       memset((char *)&args, 0, sizeof(args));
-       args.dp = dp;
-       args.trans = tp;
         ASSERT(S_ISDIR(dp->i_d.di_mode));
-       if ((error = xfs_dir_ino_validate(tp->t_mountp, pdp->i_ino)))
+       error = xfs_dir_ino_validate(tp->t_mountp, pdp->i_ino);
+       if (error)
                 return error;
-       return xfs_dir2_sf_create(&args, pdp->i_ino);
+
+       args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
+       if (!args)
+               return ENOMEM;
+
+       args->dp = dp;
+       args->trans = tp;
+       error = xfs_dir2_sf_create(args, pdp->i_ino);
+       kmem_free(args);
+       return error;
  }
  
  /*
@@ -205,41 +212,56 @@ xfs_dir_createname(
         xfs_bmap_free_t         *flist,         /* bmap's freeblock list */
         xfs_extlen_t            total)          /* bmap's total block count */
  {
-       xfs_da_args_t           args;
+       struct xfs_da_args      *args;
         int                     rval;
         int                     v;              /* type-checking value */
  
         ASSERT(S_ISDIR(dp->i_d.di_mode));
-       if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum)))
+       rval = xfs_dir_ino_validate(tp->t_mountp, inum);
+       if (rval)
                 return rval;
         XFS_STATS_INC(xs_dir_create);
  
-       memset(&args, 0, sizeof(xfs_da_args_t));
-       args.name = name->name;
-       args.namelen = name->len;
-       args.filetype = name->type;
-       args.hashval = dp->i_mount->m_dirnameops->hashname(name);
-       args.inumber = inum;
-       args.dp = dp;
-       args.firstblock = first;
-       args.flist = flist;
-       args.total = total;
-       args.whichfork = XFS_DATA_FORK;
-       args.trans = tp;
-       args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
-
-       if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
-               rval = xfs_dir2_sf_addname(&args);
-       else if ((rval = xfs_dir2_isblock(tp, dp, &v)))
-               return rval;
-       else if (v)
-               rval = xfs_dir2_block_addname(&args);
-       else if ((rval = xfs_dir2_isleaf(tp, dp, &v)))
-               return rval;
-       else if (v)
-               rval = xfs_dir2_leaf_addname(&args);
+       args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
+       if (!args)
+               return ENOMEM;
+
+       args->name = name->name;
+       args->namelen = name->len;
+       args->filetype = name->type;
+       args->hashval = dp->i_mount->m_dirnameops->hashname(name);
+       args->inumber = inum;
+       args->dp = dp;
+       args->firstblock = first;
+       args->flist = flist;
+       args->total = total;
+       args->whichfork = XFS_DATA_FORK;
+       args->trans = tp;
+       args->op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
+
+       if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
+               rval = xfs_dir2_sf_addname(args);
+               goto out_free;
+       }
+
+       rval = xfs_dir2_isblock(tp, dp, &v);
+       if (rval)
+               goto out_free;
+       if (v) {
+               rval = xfs_dir2_block_addname(args);
+               goto out_free;
+       }
+
+       rval = xfs_dir2_isleaf(tp, dp, &v);
+       if (rval)
+               goto out_free;
+       if (v)
+               rval = xfs_dir2_leaf_addname(args);
         else
-               rval = xfs_dir2_node_addname(&args);
+               rval = xfs_dir2_node_addname(args);
+
+out_free:
+       kmem_free(args);
         return rval;
  }
  
@@ -282,46 +304,66 @@ xfs_dir_lookup(
         xfs_ino_t       *inum,          /* out: inode number */
         struct xfs_name *ci_name)       /* out: actual name if CI match */
  {
-       xfs_da_args_t   args;
+       struct xfs_da_args *args;
         int             rval;
         int             v;              /* type-checking value */
  
         ASSERT(S_ISDIR(dp->i_d.di_mode));
         XFS_STATS_INC(xs_dir_lookup);
  
-       memset(&args, 0, sizeof(xfs_da_args_t));
-       args.name = name->name;
-       args.namelen = name->len;
-       args.filetype = name->type;
-       args.hashval = dp->i_mount->m_dirnameops->hashname(name);
-       args.dp = dp;
-       args.whichfork = XFS_DATA_FORK;
-       args.trans = tp;
-       args.op_flags = XFS_DA_OP_OKNOENT;
+       /*
+        * We need to use KM_NOFS here so that lockdep will not throw false
+        * positive deadlock warnings on a non-transactional lookup path. It is
+        * safe to recurse into inode recalim in that case, but lockdep can't
+        * easily be taught about it. Hence KM_NOFS avoids having to add more
+        * lockdep Doing this avoids having to add a bunch of lockdep class
+        * annotations into the reclaim path for the ilock.
+        */
+       args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
+       args->name = name->name;
+       args->namelen = name->len;
+       args->filetype = name->type;
+       args->hashval = dp->i_mount->m_dirnameops->hashname(name);
+       args->dp = dp;
+       args->whichfork = XFS_DATA_FORK;
+       args->trans = tp;
+       args->op_flags = XFS_DA_OP_OKNOENT;
         if (ci_name)
-               args.op_flags |= XFS_DA_OP_CILOOKUP;
+               args->op_flags |= XFS_DA_OP_CILOOKUP;
  
-       if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
-               rval = xfs_dir2_sf_lookup(&args);
-       else if ((rval = xfs_dir2_isblock(tp, dp, &v)))
-               return rval;
-       else if (v)
-               rval = xfs_dir2_block_lookup(&args);
-       else if ((rval = xfs_dir2_isleaf(tp, dp, &v)))
-               return rval;
-       else if (v)
-               rval = xfs_dir2_leaf_lookup(&args);
+       if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
+               rval = xfs_dir2_sf_lookup(args);
+               goto out_check_rval;
+       }
+
+       rval = xfs_dir2_isblock(tp, dp, &v);
+       if (rval)
+               goto out_free;
+       if (v) {
+               rval = xfs_dir2_block_lookup(args);
+               goto out_check_rval;
+       }
+
+       rval = xfs_dir2_isleaf(tp, dp, &v);
+       if (rval)
+               goto out_free;
+       if (v)
+               rval = xfs_dir2_leaf_lookup(args);
         else
-               rval = xfs_dir2_node_lookup(&args);
+               rval = xfs_dir2_node_lookup(args);
+
+out_check_rval:
         if (rval == EEXIST)
                 rval = 0;
         if (!rval) {
-               *inum = args.inumber;
+               *inum = args->inumber;
                 if (ci_name) {
-                       ci_name->name = args.value;
-                       ci_name->len = args.valuelen;
+                       ci_name->name = args->value;
+                       ci_name->len = args->valuelen;
                 }
         }
+out_free:
+       kmem_free(args);
         return rval;
  }
  
@@ -338,38 +380,51 @@ xfs_dir_removename(
         xfs_bmap_free_t *flist,         /* bmap's freeblock list */
         xfs_extlen_t    total)          /* bmap's total block count */
  {
-       xfs_da_args_t   args;
+       struct xfs_da_args *args;
         int             rval;
         int             v;              /* type-checking value */
  
         ASSERT(S_ISDIR(dp->i_d.di_mode));
         XFS_STATS_INC(xs_dir_remove);
  
-       memset(&args, 0, sizeof(xfs_da_args_t));
-       args.name = name->name;
-       args.namelen = name->len;
-       args.filetype = name->type;
-       args.hashval = dp->i_mount->m_dirnameops->hashname(name);
-       args.inumber = ino;
-       args.dp = dp;
-       args.firstblock = first;
-       args.flist = flist;
-       args.total = total;
-       args.whichfork = XFS_DATA_FORK;
-       args.trans = tp;
-
-       if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
-               rval = xfs_dir2_sf_removename(&args);
-       else if ((rval = xfs_dir2_isblock(tp, dp, &v)))
-               return rval;
-       else if (v)
-               rval = xfs_dir2_block_removename(&args);
-       else if ((rval = xfs_dir2_isleaf(tp, dp, &v)))
-               return rval;
-       else if (v)
-               rval = xfs_dir2_leaf_removename(&args);
+       args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
+       if (!args)
+               return ENOMEM;
+
+       args->name = name->name;
+       args->namelen = name->len;
+       args->filetype = name->type;
+       args->hashval = dp->i_mount->m_dirnameops->hashname(name);
+       args->inumber = ino;
+       args->dp = dp;
+       args->firstblock = first;
+       args->flist = flist;
+       args->total = total;
+       args->whichfork = XFS_DATA_FORK;
+       args->trans = tp;
+
+       if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
+               rval = xfs_dir2_sf_removename(args);
+               goto out_free;
+       }
+
+       rval = xfs_dir2_isblock(tp, dp, &v);
+       if (rval)
+               goto out_free;
+       if (v) {
+               rval = xfs_dir2_block_removename(args);
+               goto out_free;
+       }
+
+       rval = xfs_dir2_isleaf(tp, dp, &v);
+       if (rval)
+               goto out_free;
+       if (v)
+               rval = xfs_dir2_leaf_removename(args);
         else
-               rval = xfs_dir2_node_removename(&args);
+               rval = xfs_dir2_node_removename(args);
+out_free:
+       kmem_free(args);
         return rval;
  }
  
@@ -386,40 +441,54 @@ xfs_dir_replace(
         xfs_bmap_free_t *flist,         /* bmap's freeblock list */
         xfs_extlen_t    total)          /* bmap's total block count */
  {
-       xfs_da_args_t   args;
+       struct xfs_da_args *args;
         int             rval;
         int             v;              /* type-checking value */
  
         ASSERT(S_ISDIR(dp->i_d.di_mode));
  
-       if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum)))
+       rval = xfs_dir_ino_validate(tp->t_mountp, inum);
+       if (rval)
                 return rval;
  
-       memset(&args, 0, sizeof(xfs_da_args_t));
-       args.name = name->name;
-       args.namelen = name->len;
-       args.filetype = name->type;
-       args.hashval = dp->i_mount->m_dirnameops->hashname(name);
-       args.inumber = inum;
-       args.dp = dp;
-       args.firstblock = first;
-       args.flist = flist;
-       args.total = total;
-       args.whichfork = XFS_DATA_FORK;
-       args.trans = tp;
-
-       if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
-               rval = xfs_dir2_sf_replace(&args);
-       else if ((rval = xfs_dir2_isblock(tp, dp, &v)))
-               return rval;
-       else if (v)
-               rval = xfs_dir2_block_replace(&args);
-       else if ((rval = xfs_dir2_isleaf(tp, dp, &v)))
-               return rval;
-       else if (v)
-               rval = xfs_dir2_leaf_replace(&args);
+       args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
+       if (!args)
+               return ENOMEM;
+
+       args->name = name->name;
+       args->namelen = name->len;
+       args->filetype = name->type;
+       args->hashval = dp->i_mount->m_dirnameops->hashname(name);
+       args->inumber = inum;
+       args->dp = dp;
+       args->firstblock = first;
+       args->flist = flist;
+       args->total = total;
+       args->whichfork = XFS_DATA_FORK;
+       args->trans = tp;
+
+       if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
+               rval = xfs_dir2_sf_replace(args);
+               goto out_free;
+       }
+
+       rval = xfs_dir2_isblock(tp, dp, &v);
+       if (rval)
+               goto out_free;
+       if (v) {
+               rval = xfs_dir2_block_replace(args);
+               goto out_free;
+       }
+
+       rval = xfs_dir2_isleaf(tp, dp, &v);
+       if (rval)
+               goto out_free;
+       if (v)
+               rval = xfs_dir2_leaf_replace(args);
         else
-               rval = xfs_dir2_node_replace(&args);
+               rval = xfs_dir2_node_replace(args);
+out_free:
+       kmem_free(args);
         return rval;
  }
  
@@ -434,7 +503,7 @@ xfs_dir_canenter(
         struct xfs_name *name,          /* name of entry to add */
         uint            resblks)
  {
-       xfs_da_args_t   args;
+       struct xfs_da_args *args;
         int             rval;
         int             v;              /* type-checking value */
  
@@ -443,29 +512,42 @@ xfs_dir_canenter(
  
         ASSERT(S_ISDIR(dp->i_d.di_mode));
  
-       memset(&args, 0, sizeof(xfs_da_args_t));
-       args.name = name->name;
-       args.namelen = name->len;
-       args.filetype = name->type;
-       args.hashval = dp->i_mount->m_dirnameops->hashname(name);
-       args.dp = dp;
-       args.whichfork = XFS_DATA_FORK;
-       args.trans = tp;
-       args.op_flags = XFS_DA_OP_JUSTCHECK | XFS_DA_OP_ADDNAME |
+       args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
+       if (!args)
+               return ENOMEM;
+
+       args->name = name->name;
+       args->namelen = name->len;
+       args->filetype = name->type;
+       args->hashval = dp->i_mount->m_dirnameops->hashname(name);
+       args->dp = dp;
+       args->whichfork = XFS_DATA_FORK;
+       args->trans = tp;
+       args->op_flags = XFS_DA_OP_JUSTCHECK | XFS_DA_OP_ADDNAME |
                                                         XFS_DA_OP_OKNOENT;
  
-       if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
-               rval = xfs_dir2_sf_addname(&args);
-       else if ((rval = xfs_dir2_isblock(tp, dp, &v)))
-               return rval;
-       else if (v)
-               rval = xfs_dir2_block_addname(&args);
-       else if ((rval = xfs_dir2_isleaf(tp, dp, &v)))
-               return rval;
-       else if (v)
-               rval = xfs_dir2_leaf_addname(&args);
+       if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
+               rval = xfs_dir2_sf_addname(args);
+               goto out_free;
+       }
+
+       rval = xfs_dir2_isblock(tp, dp, &v);
+       if (rval)
+               goto out_free;
+       if (v) {
+               rval = xfs_dir2_block_addname(args);
+               goto out_free;
+       }
+
+       rval = xfs_dir2_isleaf(tp, dp, &v);
+       if (rval)
+               goto out_free;
+       if (v)
+               rval = xfs_dir2_leaf_addname(args);
         else
-               rval = xfs_dir2_node_addname(&args);
+               rval = xfs_dir2_node_addname(args);
+out_free:
+       kmem_free(args);
         return rval;
  }
  
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c

index 90cdbf4..4f6a38c 100644 (file)
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -89,13 +89,14 @@ xfs_dir3_block_read_verify(
  {
         struct xfs_mount        *mp = bp->b_target->bt_mount;
  
-       if ((xfs_sb_version_hascrc(&mp->m_sb) &&
-            !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
-                                         XFS_DIR3_DATA_CRC_OFF)) ||
-           !xfs_dir3_block_verify(bp)) {
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
+       if (xfs_sb_version_hascrc(&mp->m_sb) &&
+            !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF))
+               xfs_buf_ioerror(bp, EFSBADCRC);
+       else if (!xfs_dir3_block_verify(bp))
                 xfs_buf_ioerror(bp, EFSCORRUPTED);
-       }
+
+       if (bp->b_error)
+               xfs_verifier_error(bp);
  }
  
  static void
@@ -107,8 +108,8 @@ xfs_dir3_block_write_verify(
         struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
  
         if (!xfs_dir3_block_verify(bp)) {
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
                 xfs_buf_ioerror(bp, EFSCORRUPTED);
+               xfs_verifier_error(bp);
                 return;
         }
  
@@ -118,7 +119,7 @@ xfs_dir3_block_write_verify(
         if (bip)
                 hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn);
  
-       xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_DATA_CRC_OFF);
+       xfs_buf_update_cksum(bp, XFS_DIR3_DATA_CRC_OFF);
  }
  
  const struct xfs_buf_ops xfs_dir3_block_buf_ops = {
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c

index 70acff4..afa4ad5 100644 (file)
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/xfs_dir2_data.c
@@ -241,7 +241,6 @@ static void
  xfs_dir3_data_reada_verify(
         struct xfs_buf          *bp)
  {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
         struct xfs_dir2_data_hdr *hdr = bp->b_addr;
  
         switch (hdr->magic) {
@@ -255,8 +254,8 @@ xfs_dir3_data_reada_verify(
                 xfs_dir3_data_verify(bp);
                 return;
         default:
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr);
                 xfs_buf_ioerror(bp, EFSCORRUPTED);
+               xfs_verifier_error(bp);
                 break;
         }
  }
@@ -267,13 +266,14 @@ xfs_dir3_data_read_verify(
  {
         struct xfs_mount        *mp = bp->b_target->bt_mount;
  
-       if ((xfs_sb_version_hascrc(&mp->m_sb) &&
-            !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
-                                         XFS_DIR3_DATA_CRC_OFF)) ||
-           !xfs_dir3_data_verify(bp)) {
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
+       if (xfs_sb_version_hascrc(&mp->m_sb) &&
+            !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF))
+                xfs_buf_ioerror(bp, EFSBADCRC);
+       else if (!xfs_dir3_data_verify(bp))
                 xfs_buf_ioerror(bp, EFSCORRUPTED);
-       }
+
+       if (bp->b_error)
+               xfs_verifier_error(bp);
  }
  
  static void
@@ -285,8 +285,8 @@ xfs_dir3_data_write_verify(
         struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
  
         if (!xfs_dir3_data_verify(bp)) {
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
                 xfs_buf_ioerror(bp, EFSCORRUPTED);
+               xfs_verifier_error(bp);
                 return;
         }
  
@@ -296,7 +296,7 @@ xfs_dir3_data_write_verify(
         if (bip)
                 hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn);
  
-       xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_DATA_CRC_OFF);
+       xfs_buf_update_cksum(bp, XFS_DIR3_DATA_CRC_OFF);
  }
  
  const struct xfs_buf_ops xfs_dir3_data_buf_ops = {
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c

index ae47ec6..d36e97d 100644 (file)
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -179,13 +179,14 @@ __read_verify(
  {
         struct xfs_mount        *mp = bp->b_target->bt_mount;
  
-       if ((xfs_sb_version_hascrc(&mp->m_sb) &&
-            !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
-                                         XFS_DIR3_LEAF_CRC_OFF)) ||
-           !xfs_dir3_leaf_verify(bp, magic)) {
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
+       if (xfs_sb_version_hascrc(&mp->m_sb) &&
+            !xfs_buf_verify_cksum(bp, XFS_DIR3_LEAF_CRC_OFF))
+               xfs_buf_ioerror(bp, EFSBADCRC);
+       else if (!xfs_dir3_leaf_verify(bp, magic))
                 xfs_buf_ioerror(bp, EFSCORRUPTED);
-       }
+
+       if (bp->b_error)
+               xfs_verifier_error(bp);
  }
  
  static void
@@ -198,8 +199,8 @@ __write_verify(
         struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr;
  
         if (!xfs_dir3_leaf_verify(bp, magic)) {
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
                 xfs_buf_ioerror(bp, EFSCORRUPTED);
+               xfs_verifier_error(bp);
                 return;
         }
  
@@ -209,7 +210,7 @@ __write_verify(
         if (bip)
                 hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn);
  
-       xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_LEAF_CRC_OFF);
+       xfs_buf_update_cksum(bp, XFS_DIR3_LEAF_CRC_OFF);
  }
  
  static void
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c

index 48c7d18..cb434d7 100644 (file)
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -115,13 +115,14 @@ xfs_dir3_free_read_verify(
  {
         struct xfs_mount        *mp = bp->b_target->bt_mount;
  
-       if ((xfs_sb_version_hascrc(&mp->m_sb) &&
-            !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
-                                         XFS_DIR3_FREE_CRC_OFF)) ||
-           !xfs_dir3_free_verify(bp)) {
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
+       if (xfs_sb_version_hascrc(&mp->m_sb) &&
+           !xfs_buf_verify_cksum(bp, XFS_DIR3_FREE_CRC_OFF))
+               xfs_buf_ioerror(bp, EFSBADCRC);
+       else if (!xfs_dir3_free_verify(bp))
                 xfs_buf_ioerror(bp, EFSCORRUPTED);
-       }
+
+       if (bp->b_error)
+               xfs_verifier_error(bp);
  }
  
  static void
@@ -133,8 +134,8 @@ xfs_dir3_free_write_verify(
         struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
  
         if (!xfs_dir3_free_verify(bp)) {
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
                 xfs_buf_ioerror(bp, EFSCORRUPTED);
+               xfs_verifier_error(bp);
                 return;
         }
  
@@ -144,7 +145,7 @@ xfs_dir3_free_write_verify(
         if (bip)
                 hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn);
  
-       xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_FREE_CRC_OFF);
+       xfs_buf_update_cksum(bp, XFS_DIR3_FREE_CRC_OFF);
  }
  
  const struct xfs_buf_ops xfs_dir3_free_buf_ops = {
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c

index 7aeb4c8..868b19f 100644 (file)
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -615,7 +615,7 @@ xfs_qm_dqread(
  
         if (flags & XFS_QMOPT_DQALLOC) {
                 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC);
-               error = xfs_trans_reserve(tp, &M_RES(mp)->tr_attrsetm,
+               error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_dqalloc,
                                           XFS_QM_DQALLOC_SPACE_RES(mp), 0);
                 if (error)
                         goto error1;
diff --git a/fs/xfs/xfs_dquot_buf.c b/fs/xfs/xfs_dquot_buf.c

index d401457..610da81 100644 (file)
--- a/fs/xfs/xfs_dquot_buf.c
+++ b/fs/xfs/xfs_dquot_buf.c
@@ -257,10 +257,13 @@ xfs_dquot_buf_read_verify(
  {
         struct xfs_mount        *mp = bp->b_target->bt_mount;
  
-       if (!xfs_dquot_buf_verify_crc(mp, bp) || !xfs_dquot_buf_verify(mp, bp)) {
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
+       if (!xfs_dquot_buf_verify_crc(mp, bp))
+               xfs_buf_ioerror(bp, EFSBADCRC);
+       else if (!xfs_dquot_buf_verify(mp, bp))
                 xfs_buf_ioerror(bp, EFSCORRUPTED);
-       }
+
+       if (bp->b_error)
+               xfs_verifier_error(bp);
  }
  
  /*
@@ -275,8 +278,8 @@ xfs_dquot_buf_write_verify(
         struct xfs_mount        *mp = bp->b_target->bt_mount;
  
         if (!xfs_dquot_buf_verify(mp, bp)) {
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
                 xfs_buf_ioerror(bp, EFSCORRUPTED);
+               xfs_verifier_error(bp);
                 return;
         }
  }
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c

index 9995b80..edac5b0 100644 (file)
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -156,7 +156,7 @@ xfs_error_report(
  {
         if (level <= xfs_error_level) {
                 xfs_alert_tag(mp, XFS_PTAG_ERROR_REPORT,
-               "Internal error %s at line %d of file %s.  Caller 0x%p",
+               "Internal error %s at line %d of file %s.  Caller %pF",
                             tag, linenum, filename, ra);
  
                 xfs_stack_trace();
@@ -178,3 +178,28 @@ xfs_corruption_error(
         xfs_error_report(tag, level, mp, filename, linenum, ra);
         xfs_alert(mp, "Corruption detected. Unmount and run xfs_repair");
  }
+
+/*
+ * Warnings specifically for verifier errors.  Differentiate CRC vs. invalid
+ * values, and omit the stack trace unless the error level is tuned high.
+ */
+void
+xfs_verifier_error(
+       struct xfs_buf          *bp)
+{
+       struct xfs_mount *mp = bp->b_target->bt_mount;
+
+       xfs_alert(mp, "Metadata %s detected at %pF, block 0x%llx",
+                 bp->b_error == EFSBADCRC ? "CRC error" : "corruption",
+                 __return_address, bp->b_bn);
+
+       xfs_alert(mp, "Unmount and run xfs_repair");
+
+       if (xfs_error_level >= XFS_ERRLEVEL_LOW) {
+               xfs_alert(mp, "First 64 bytes of corrupted metadata buffer:");
+               xfs_hex_dump(xfs_buf_offset(bp, 0), 64);
+       }
+
+       if (xfs_error_level >= XFS_ERRLEVEL_HIGH)
+               xfs_stack_trace();
+}
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h

index 079a367..c1c57d4 100644 (file)
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -34,6 +34,7 @@ extern void xfs_error_report(const char *tag, int level, struct xfs_mount *mp,
  extern void xfs_corruption_error(const char *tag, int level,
                         struct xfs_mount *mp, void *p, const char *filename,
                         int linenum, inst_t *ra);
+extern void xfs_verifier_error(struct xfs_buf *bp);
  
  #define        XFS_ERROR_REPORT(e, lvl, mp)    \
         xfs_error_report(e, lvl, mp, __FILE__, __LINE__, __return_address)
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c

index 64b48ea..f7abff8 100644 (file)
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -823,7 +823,8 @@ xfs_file_fallocate(
  
         if (!S_ISREG(inode->i_mode))
                 return -EINVAL;
-       if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
+       if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
+                    FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))
                 return -EOPNOTSUPP;
  
         xfs_ilock(ip, XFS_IOLOCK_EXCL);
@@ -831,6 +832,20 @@ xfs_file_fallocate(
                 error = xfs_free_file_space(ip, offset, len);
                 if (error)
                         goto out_unlock;
+       } else if (mode & FALLOC_FL_COLLAPSE_RANGE) {
+               unsigned blksize_mask = (1 << inode->i_blkbits) - 1;
+
+               if (offset & blksize_mask || len & blksize_mask) {
+                       error = -EINVAL;
+                       goto out_unlock;
+               }
+
+               ASSERT(offset + len < i_size_read(inode));
+               new_size = i_size_read(inode) - len;
+
+               error = xfs_collapse_file_space(ip, offset, len);
+               if (error)
+                       goto out_unlock;
         } else {
                 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
                     offset + len > i_size_read(inode)) {
@@ -840,8 +855,11 @@ xfs_file_fallocate(
                                 goto out_unlock;
                 }
  
-               error = xfs_alloc_file_space(ip, offset, len,
-                                            XFS_BMAPI_PREALLOC);
+               if (mode & FALLOC_FL_ZERO_RANGE)
+                       error = xfs_zero_file_space(ip, offset, len);
+               else
+                       error = xfs_alloc_file_space(ip, offset, len,
+                                                    XFS_BMAPI_PREALLOC);
                 if (error)
                         goto out_unlock;
         }
@@ -859,7 +877,7 @@ xfs_file_fallocate(
         if (ip->i_d.di_mode & S_IXGRP)
                 ip->i_d.di_mode &= ~S_ISGID;
  
-       if (!(mode & FALLOC_FL_PUNCH_HOLE))
+       if (!(mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE)))
                 ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC;
  
         xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
diff --git a/fs/xfs/xfs_format.h b/fs/xfs/xfs_format.h

index b6ab5a3..9898f31 100644 (file)
--- a/fs/xfs/xfs_format.h
+++ b/fs/xfs/xfs_format.h
@@ -145,6 +145,8 @@ struct xfs_dsymlink_hdr {
         __be64  sl_lsn;
  };
  
+#define XFS_SYMLINK_CRC_OFF    offsetof(struct xfs_dsymlink_hdr, sl_crc)
+
  /*
   * The maximum pathlen is 1024 bytes. Since the minimum file system
   * blocksize is 512 bytes, we can get a max of 3 extents back from
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c

index 5d7f105..8f711db 100644 (file)
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -363,6 +363,18 @@ xfs_ialloc_ag_alloc(
                 args.minleft = args.mp->m_in_maxlevels - 1;
                 if ((error = xfs_alloc_vextent(&args)))
                         return error;
+
+               /*
+                * This request might have dirtied the transaction if the AG can
+                * satisfy the request, but the exact block was not available.
+                * If the allocation did fail, subsequent requests will relax
+                * the exact agbno requirement and increase the alignment
+                * instead. It is critical that the total size of the request
+                * (len + alignment + slop) does not increase from this point
+                * on, so reset minalignslop to ensure it is not included in
+                * subsequent requests.
+                */
+               args.minalignslop = 0;
         } else
                 args.fsbno = NULLFSBLOCK;
  
@@ -1568,18 +1580,17 @@ xfs_agi_read_verify(
         struct xfs_buf  *bp)
  {
         struct xfs_mount *mp = bp->b_target->bt_mount;
-       int             agi_ok = 1;
-
-       if (xfs_sb_version_hascrc(&mp->m_sb))
-               agi_ok = xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
-                                         offsetof(struct xfs_agi, agi_crc));
-       agi_ok = agi_ok && xfs_agi_verify(bp);
  
-       if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI,
-                       XFS_RANDOM_IALLOC_READ_AGI))) {
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
+       if (xfs_sb_version_hascrc(&mp->m_sb) &&
+           !xfs_buf_verify_cksum(bp, XFS_AGI_CRC_OFF))
+               xfs_buf_ioerror(bp, EFSBADCRC);
+       else if (XFS_TEST_ERROR(!xfs_agi_verify(bp), mp,
+                               XFS_ERRTAG_IALLOC_READ_AGI,
+                               XFS_RANDOM_IALLOC_READ_AGI))
                 xfs_buf_ioerror(bp, EFSCORRUPTED);
-       }
+
+       if (bp->b_error)
+               xfs_verifier_error(bp);
  }
  
  static void
@@ -1590,8 +1601,8 @@ xfs_agi_write_verify(
         struct xfs_buf_log_item *bip = bp->b_fspriv;
  
         if (!xfs_agi_verify(bp)) {
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
                 xfs_buf_ioerror(bp, EFSCORRUPTED);
+               xfs_verifier_error(bp);
                 return;
         }
  
@@ -1600,8 +1611,7 @@ xfs_agi_write_verify(
  
         if (bip)
                 XFS_BUF_TO_AGI(bp)->agi_lsn = cpu_to_be64(bip->bli_item.li_lsn);
-       xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
-                        offsetof(struct xfs_agi, agi_crc));
+       xfs_buf_update_cksum(bp, XFS_AGI_CRC_OFF);
  }
  
  const struct xfs_buf_ops xfs_agi_buf_ops = {
diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c

index c8fa5bb..7e309b1 100644 (file)
--- a/fs/xfs/xfs_ialloc_btree.c
+++ b/fs/xfs/xfs_ialloc_btree.c
@@ -243,12 +243,14 @@ static void
  xfs_inobt_read_verify(
         struct xfs_buf  *bp)
  {
-       if (!(xfs_btree_sblock_verify_crc(bp) &&
-             xfs_inobt_verify(bp))) {
-               trace_xfs_btree_corrupt(bp, _RET_IP_);
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
-                                    bp->b_target->bt_mount, bp->b_addr);
+       if (!xfs_btree_sblock_verify_crc(bp))
+               xfs_buf_ioerror(bp, EFSBADCRC);
+       else if (!xfs_inobt_verify(bp))
                 xfs_buf_ioerror(bp, EFSCORRUPTED);
+
+       if (bp->b_error) {
+               trace_xfs_btree_corrupt(bp, _RET_IP_);
+               xfs_verifier_error(bp);
         }
  }
  
@@ -258,9 +260,9 @@ xfs_inobt_write_verify(
  {
         if (!xfs_inobt_verify(bp)) {
                 trace_xfs_btree_corrupt(bp, _RET_IP_);
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
-                                    bp->b_target->bt_mount, bp->b_addr);
                 xfs_buf_ioerror(bp, EFSCORRUPTED);
+               xfs_verifier_error(bp);
+               return;
         }
         xfs_btree_sblock_calc_crc(bp);
  
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c

index 3a137e9..5e7a38f 100644 (file)
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -42,7 +42,6 @@
  #include "xfs_bmap_util.h"
  #include "xfs_error.h"
  #include "xfs_quota.h"
-#include "xfs_dinode.h"
  #include "xfs_filestream.h"
  #include "xfs_cksum.h"
  #include "xfs_trace.h"
@@ -62,6 +61,8 @@ kmem_zone_t *xfs_inode_zone;
  
  STATIC int xfs_iflush_int(xfs_inode_t *, xfs_buf_t *);
  
+STATIC int xfs_iunlink_remove(xfs_trans_t *, xfs_inode_t *);
+
  /*
   * helper function to extract extent size hint from inode
   */
@@ -1115,7 +1116,7 @@ xfs_bumplink(
  {
         xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
  
-       ASSERT(ip->i_d.di_nlink > 0);
+       ASSERT(ip->i_d.di_nlink > 0 || (VFS_I(ip)->i_state & I_LINKABLE));
         ip->i_d.di_nlink++;
         inc_nlink(VFS_I(ip));
         if ((ip->i_d.di_version == 1) &&
@@ -1165,10 +1166,7 @@ xfs_create(
         if (XFS_FORCED_SHUTDOWN(mp))
                 return XFS_ERROR(EIO);
  
-       if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
-               prid = xfs_get_projid(dp);
-       else
-               prid = XFS_PROJID_DEFAULT;
+       prid = xfs_get_initial_prid(dp);
  
         /*
          * Make sure that we have allocated dquot(s) on disk.
@@ -1332,6 +1330,113 @@ xfs_create(
         return error;
  }
  
+int
+xfs_create_tmpfile(
+       struct xfs_inode        *dp,
+       struct dentry           *dentry,
+       umode_t                 mode)
+{
+       struct xfs_mount        *mp = dp->i_mount;
+       struct xfs_inode        *ip = NULL;
+       struct xfs_trans        *tp = NULL;
+       int                     error;
+       uint                    cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
+       prid_t                  prid;
+       struct xfs_dquot        *udqp = NULL;
+       struct xfs_dquot        *gdqp = NULL;
+       struct xfs_dquot        *pdqp = NULL;
+       struct xfs_trans_res    *tres;
+       uint                    resblks;
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return XFS_ERROR(EIO);
+
+       prid = xfs_get_initial_prid(dp);
+
+       /*
+        * Make sure that we have allocated dquot(s) on disk.
+        */
+       error = xfs_qm_vop_dqalloc(dp, xfs_kuid_to_uid(current_fsuid()),
+                               xfs_kgid_to_gid(current_fsgid()), prid,
+                               XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
+                               &udqp, &gdqp, &pdqp);
+       if (error)
+               return error;
+
+       resblks = XFS_IALLOC_SPACE_RES(mp);
+       tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE_TMPFILE);
+
+       tres = &M_RES(mp)->tr_create_tmpfile;
+       error = xfs_trans_reserve(tp, tres, resblks, 0);
+       if (error == ENOSPC) {
+               /* No space at all so try a "no-allocation" reservation */
+               resblks = 0;
+               error = xfs_trans_reserve(tp, tres, 0, 0);
+       }
+       if (error) {
+               cancel_flags = 0;
+               goto out_trans_cancel;
+       }
+
+       error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp,
+                                               pdqp, resblks, 1, 0);
+       if (error)
+               goto out_trans_cancel;
+
+       error = xfs_dir_ialloc(&tp, dp, mode, 1, 0,
+                               prid, resblks > 0, &ip, NULL);
+       if (error) {
+               if (error == ENOSPC)
+                       goto out_trans_cancel;
+               goto out_trans_abort;
+       }
+
+       if (mp->m_flags & XFS_MOUNT_WSYNC)
+               xfs_trans_set_sync(tp);
+
+       /*
+        * Attach the dquot(s) to the inodes and modify them incore.
+        * These ids of the inode couldn't have changed since the new
+        * inode has been locked ever since it was created.
+        */
+       xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp);
+
+       ip->i_d.di_nlink--;
+       d_tmpfile(dentry, VFS_I(ip));
+       error = xfs_iunlink(tp, ip);
+       if (error)
+               goto out_trans_abort;
+
+       error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+       if (error)
+               goto out_release_inode;
+
+       xfs_qm_dqrele(udqp);
+       xfs_qm_dqrele(gdqp);
+       xfs_qm_dqrele(pdqp);
+
+       return 0;
+
+ out_trans_abort:
+       cancel_flags |= XFS_TRANS_ABORT;
+ out_trans_cancel:
+       xfs_trans_cancel(tp, cancel_flags);
+ out_release_inode:
+       /*
+        * Wait until after the current transaction is aborted to
+        * release the inode.  This prevents recursive transactions
+        * and deadlocks from xfs_inactive.
+        */
+       if (ip)
+               IRELE(ip);
+
+       xfs_qm_dqrele(udqp);
+       xfs_qm_dqrele(gdqp);
+       xfs_qm_dqrele(pdqp);
+
+       return error;
+}
+
  int
  xfs_link(
         xfs_inode_t             *tdp,
@@ -1397,6 +1502,12 @@ xfs_link(
  
         xfs_bmap_init(&free_list, &first_block);
  
+       if (sip->i_d.di_nlink == 0) {
+               error = xfs_iunlink_remove(tp, sip);
+               if (error)
+                       goto abort_return;
+       }
+
         error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino,
                                         &first_block, &free_list, resblks);
         if (error)
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h

index 65e2350..396cc1f 100644 (file)
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -20,6 +20,7 @@
  
  #include "xfs_inode_buf.h"
  #include "xfs_inode_fork.h"
+#include "xfs_dinode.h"
  
  /*
   * Kernel only inode definitions
@@ -192,6 +193,15 @@ xfs_set_projid(struct xfs_inode *ip,
         ip->i_d.di_projid_lo = (__uint16_t) (projid & 0xffff);
  }
  
+static inline prid_t
+xfs_get_initial_prid(struct xfs_inode *dp)
+{
+       if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
+               return xfs_get_projid(dp);
+
+       return XFS_PROJID_DEFAULT;
+}
+
  /*
   * In-core inode flags.
   */
@@ -323,6 +333,8 @@ int         xfs_lookup(struct xfs_inode *dp, struct xfs_name *name,
                            struct xfs_inode **ipp, struct xfs_name *ci_name);
  int            xfs_create(struct xfs_inode *dp, struct xfs_name *name,
                            umode_t mode, xfs_dev_t rdev, struct xfs_inode **ipp);
+int            xfs_create_tmpfile(struct xfs_inode *dp, struct dentry *dentry,
+                          umode_t mode);
  int            xfs_remove(struct xfs_inode *dp, struct xfs_name *name,
                            struct xfs_inode *ip);
  int            xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip,
diff --git a/fs/xfs/xfs_inode_buf.c b/fs/xfs/xfs_inode_buf.c

index 4fc9f39..24e9939 100644 (file)
--- a/fs/xfs/xfs_inode_buf.c
+++ b/fs/xfs/xfs_inode_buf.c
@@ -102,8 +102,7 @@ xfs_inode_buf_verify(
                         }
  
                         xfs_buf_ioerror(bp, EFSCORRUPTED);
-                       XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_HIGH,
-                                            mp, dip);
+                       xfs_verifier_error(bp);
  #ifdef DEBUG
                         xfs_alert(mp,
                                 "bad inode magic/vsn daddr %lld #%d (magic=%x)",
@@ -306,7 +305,7 @@ xfs_dinode_verify(
         if (!xfs_sb_version_hascrc(&mp->m_sb))
                 return false;
         if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize,
-                             offsetof(struct xfs_dinode, di_crc)))
+                             XFS_DINODE_CRC_OFF))
                 return false;
         if (be64_to_cpu(dip->di_ino) != ip->i_ino)
                 return false;
@@ -327,7 +326,7 @@ xfs_dinode_calc_crc(
  
         ASSERT(xfs_sb_version_hascrc(&mp->m_sb));
         crc = xfs_start_cksum((char *)dip, mp->m_sb.sb_inodesize,
-                             offsetof(struct xfs_dinode, di_crc));
+                             XFS_DINODE_CRC_OFF);
         dip->di_crc = xfs_end_cksum(crc);
  }
  
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c

index 22d1cbe..3b80eba 100644 (file)
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -128,7 +128,6 @@ xfs_iomap_write_direct(
         xfs_fsblock_t   firstfsb;
         xfs_extlen_t    extsz, temp;
         int             nimaps;
-       int             bmapi_flag;
         int             quota_flag;
         int             rt;
         xfs_trans_t     *tp;
@@ -200,18 +199,15 @@ xfs_iomap_write_direct(
  
         xfs_trans_ijoin(tp, ip, 0);
  
-       bmapi_flag = 0;
-       if (offset < XFS_ISIZE(ip) || extsz)
-               bmapi_flag |= XFS_BMAPI_PREALLOC;
-
         /*
          * From this point onwards we overwrite the imap pointer that the
          * caller gave to us.
          */
         xfs_bmap_init(&free_list, &firstfsb);
         nimaps = 1;
-       error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, bmapi_flag,
-                               &firstfsb, 0, imap, &nimaps, &free_list);
+       error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb,
+                               XFS_BMAPI_PREALLOC, &firstfsb, 0,
+                               imap, &nimaps, &free_list);
         if (error)
                 goto out_bmap_cancel;
  
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c

index 9ddfb81..89b07e4 100644 (file)
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -39,6 +39,7 @@
  #include "xfs_da_btree.h"
  #include "xfs_dir2_priv.h"
  #include "xfs_dinode.h"
+#include "xfs_trans_space.h"
  
  #include <linux/capability.h>
  #include <linux/xattr.h>
@@ -48,6 +49,18 @@
  #include <linux/fiemap.h>
  #include <linux/slab.h>
  
+/*
+ * Directories have different lock order w.r.t. mmap_sem compared to regular
+ * files. This is due to readdir potentially triggering page faults on a user
+ * buffer inside filldir(), and this happens with the ilock on the directory
+ * held. For regular files, the lock order is the other way around - the
+ * mmap_sem is taken during the page fault, and then we lock the ilock to do
+ * block mapping. Hence we need a different class for the directory ilock so
+ * that lockdep can tell them apart.
+ */
+static struct lock_class_key xfs_nondir_ilock_class;
+static struct lock_class_key xfs_dir_ilock_class;
+
  static int
  xfs_initxattrs(
         struct inode            *inode,
@@ -1034,6 +1047,19 @@ xfs_vn_fiemap(
         return 0;
  }
  
+STATIC int
+xfs_vn_tmpfile(
+       struct inode    *dir,
+       struct dentry   *dentry,
+       umode_t         mode)
+{
+       int             error;
+
+       error = xfs_create_tmpfile(XFS_I(dir), dentry, mode);
+
+       return -error;
+}
+
  static const struct inode_operations xfs_inode_operations = {
         .get_acl                = xfs_get_acl,
         .set_acl                = xfs_set_acl,
@@ -1072,6 +1098,7 @@ static const struct inode_operations xfs_dir_inode_operations = {
         .removexattr            = generic_removexattr,
         .listxattr              = xfs_vn_listxattr,
         .update_time            = xfs_vn_update_time,
+       .tmpfile                = xfs_vn_tmpfile,
  };
  
  static const struct inode_operations xfs_dir_ci_inode_operations = {
@@ -1099,6 +1126,7 @@ static const struct inode_operations xfs_dir_ci_inode_operations = {
         .removexattr            = generic_removexattr,
         .listxattr              = xfs_vn_listxattr,
         .update_time            = xfs_vn_update_time,
+       .tmpfile                = xfs_vn_tmpfile,
  };
  
  static const struct inode_operations xfs_symlink_inode_operations = {
@@ -1191,6 +1219,7 @@ xfs_setup_inode(
         xfs_diflags_to_iflags(inode, ip);
  
         ip->d_ops = ip->i_mount->m_nondir_inode_ops;
+       lockdep_set_class(&ip->i_lock.mr_lock, &xfs_nondir_ilock_class);
         switch (inode->i_mode & S_IFMT) {
         case S_IFREG:
                 inode->i_op = &xfs_inode_operations;
@@ -1198,6 +1227,7 @@ xfs_setup_inode(
                 inode->i_mapping->a_ops = &xfs_address_space_operations;
                 break;
         case S_IFDIR:
+               lockdep_set_class(&ip->i_lock.mr_lock, &xfs_dir_ilock_class);
                 if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb))
                         inode->i_op = &xfs_dir_ci_inode_operations;
                 else
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h

index f9bb590..825249d 100644 (file)
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -119,6 +119,7 @@ typedef __uint64_t __psunsigned_t;
  #include "xfs_iops.h"
  #include "xfs_aops.h"
  #include "xfs_super.h"
+#include "xfs_cksum.h"
  #include "xfs_buf.h"
  #include "xfs_message.h"
  
@@ -178,6 +179,7 @@ typedef __uint64_t __psunsigned_t;
  #define ENOATTR                ENODATA         /* Attribute not found */
  #define EWRONGFS       EINVAL          /* Mount with wrong filesystem type */
  #define EFSCORRUPTED   EUCLEAN         /* Filesystem is corrupted */
+#define EFSBADCRC      EBADMSG         /* Bad CRC detected */
  
  #define SYNCHRONIZE()  barrier()
  #define __return_address __builtin_return_address(0)
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h

index b0f4ef7..2c40044 100644 (file)
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -175,7 +175,7 @@ void          xlog_iodone(struct xfs_buf *);
  struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket);
  void     xfs_log_ticket_put(struct xlog_ticket *ticket);
  
-int    xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
+void   xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
                                 xfs_lsn_t *commit_lsn, int flags);
  bool   xfs_log_item_in_current_chkpt(struct xfs_log_item *lip);
  
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c

index 4ef6fdb..7e54553 100644 (file)
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -498,13 +498,6 @@ xlog_cil_push(
         new_ctx->cil = cil;
         cil->xc_ctx = new_ctx;
  
-       /*
-        * mirror the new sequence into the cil structure so that we can do
-        * unlocked checks against the current sequence in log forces without
-        * risking deferencing a freed context pointer.
-        */
-       cil->xc_current_sequence = new_ctx->sequence;
-
         /*
          * The switch is now done, so we can drop the context lock and move out
          * of a shared context. We can't just go straight to the commit record,
@@ -523,8 +516,15 @@ xlog_cil_push(
          * Hence we need to add this context to the committing context list so
          * that higher sequences will wait for us to write out a commit record
          * before they do.
+        *
+        * xfs_log_force_lsn requires us to mirror the new sequence into the cil
+        * structure atomically with the addition of this sequence to the
+        * committing list. This also ensures that we can do unlocked checks
+        * against the current sequence in log forces without risking
+        * deferencing a freed context pointer.
          */
         spin_lock(&cil->xc_push_lock);
+       cil->xc_current_sequence = new_ctx->sequence;
         list_add(&ctx->committing, &cil->xc_committing);
         spin_unlock(&cil->xc_push_lock);
         up_write(&cil->xc_ctx_lock);
@@ -662,8 +662,14 @@ xlog_cil_push_background(
  
  }
  
+/*
+ * xlog_cil_push_now() is used to trigger an immediate CIL push to the sequence
+ * number that is passed. When it returns, the work will be queued for
+ * @push_seq, but it won't be completed. The caller is expected to do any
+ * waiting for push_seq to complete if it is required.
+ */
  static void
-xlog_cil_push_foreground(
+xlog_cil_push_now(
         struct xlog     *log,
         xfs_lsn_t       push_seq)
  {
@@ -688,10 +694,8 @@ xlog_cil_push_foreground(
         }
  
         cil->xc_push_seq = push_seq;
+       queue_work(log->l_mp->m_cil_workqueue, &cil->xc_push_work);
         spin_unlock(&cil->xc_push_lock);
-
-       /* do the push now */
-       xlog_cil_push(log);
  }
  
  bool
@@ -721,7 +725,7 @@ xlog_cil_empty(
   * background commit, returns without it held once background commits are
   * allowed again.
   */
-int
+void
  xfs_log_commit_cil(
         struct xfs_mount        *mp,
         struct xfs_trans        *tp,
@@ -767,7 +771,6 @@ xfs_log_commit_cil(
         xlog_cil_push_background(log);
  
         up_read(&cil->xc_ctx_lock);
-       return 0;
  }
  
  /*
@@ -796,7 +799,8 @@ xlog_cil_force_lsn(
          * xlog_cil_push() handles racing pushes for the same sequence,
          * so no need to deal with it here.
          */
-       xlog_cil_push_foreground(log, sequence);
+restart:
+       xlog_cil_push_now(log, sequence);
  
         /*
          * See if we can find a previous sequence still committing.
@@ -804,7 +808,6 @@ xlog_cil_force_lsn(
          * before allowing the force of push_seq to go ahead. Hence block
          * on commits for those as well.
          */
-restart:
         spin_lock(&cil->xc_push_lock);
         list_for_each_entry(ctx, &cil->xc_committing, committing) {
                 if (ctx->sequence > sequence)
@@ -822,6 +825,28 @@ restart:
                 /* found it! */
                 commit_lsn = ctx->commit_lsn;
         }
+
+       /*
+        * The call to xlog_cil_push_now() executes the push in the background.
+        * Hence by the time we have got here it our sequence may not have been
+        * pushed yet. This is true if the current sequence still matches the
+        * push sequence after the above wait loop and the CIL still contains
+        * dirty objects.
+        *
+        * When the push occurs, it will empty the CIL and
+        * atomically increment the currect sequence past the push sequence and
+        * move it into the committing list. Of course, if the CIL is clean at
+        * the time of the push, it won't have pushed the CIL at all, so in that
+        * case we should try the push for this sequence again from the start
+        * just in case.
+        */
+
+       if (sequence == cil->xc_current_sequence &&
+           !list_empty(&cil->xc_cil)) {
+               spin_unlock(&cil->xc_push_lock);
+               goto restart;
+       }
+
         spin_unlock(&cil->xc_push_lock);
         return commit_lsn;
  }
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c

index f96c056..993cb19 100644 (file)
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -314,6 +314,9 @@ reread:
                 error = bp->b_error;
                 if (loud)
                         xfs_warn(mp, "SB validate failed with error %d.", error);
+               /* bad CRC means corrupted metadata */
+               if (error == EFSBADCRC)
+                       error = EFSCORRUPTED;
                 goto release_buf;
         }
  
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c

index a6a76b2..ec5ca65 100644 (file)
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -842,7 +842,7 @@ xfs_growfs_rt_alloc(
                 /*
                  * Reserve space & log for one extent added to the file.
                  */
-               error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growdata,
+               error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growrtalloc,
                                           resblks, 0);
                 if (error)
                         goto error_cancel;
diff --git a/fs/xfs/xfs_sb.c b/fs/xfs/xfs_sb.c

index 1e11679..0c0e41b 100644 (file)
--- a/fs/xfs/xfs_sb.c
+++ b/fs/xfs/xfs_sb.c
@@ -288,6 +288,7 @@ xfs_mount_validate_sb(
             sbp->sb_inodelog < XFS_DINODE_MIN_LOG                       ||
             sbp->sb_inodelog > XFS_DINODE_MAX_LOG                       ||
             sbp->sb_inodesize != (1 << sbp->sb_inodelog)                ||
+           sbp->sb_inopblock != howmany(sbp->sb_blocksize,sbp->sb_inodesize) ||
             (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog)   ||
             (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE)  ||
             (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE)  ||
@@ -610,12 +611,11 @@ xfs_sb_read_verify(
                                                 XFS_SB_VERSION_5) ||
              dsb->sb_crc != 0)) {
  
-               if (!xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
-                                     offsetof(struct xfs_sb, sb_crc))) {
+               if (!xfs_buf_verify_cksum(bp, XFS_SB_CRC_OFF)) {
                         /* Only fail bad secondaries on a known V5 filesystem */
                         if (bp->b_bn == XFS_SB_DADDR ||
                             xfs_sb_version_hascrc(&mp->m_sb)) {
-                               error = EFSCORRUPTED;
+                               error = EFSBADCRC;
                                 goto out_error;
                         }
                 }
@@ -624,10 +624,9 @@ xfs_sb_read_verify(
  
  out_error:
         if (error) {
-               if (error == EFSCORRUPTED)
-                       XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
-                                            mp, bp->b_addr);
                 xfs_buf_ioerror(bp, error);
+               if (error == EFSCORRUPTED || error == EFSBADCRC)
+                       xfs_verifier_error(bp);
         }
  }
  
@@ -662,9 +661,8 @@ xfs_sb_write_verify(
  
         error = xfs_sb_verify(bp, false);
         if (error) {
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
-                                    mp, bp->b_addr);
                 xfs_buf_ioerror(bp, error);
+               xfs_verifier_error(bp);
                 return;
         }
  
@@ -674,8 +672,7 @@ xfs_sb_write_verify(
         if (bip)
                 XFS_BUF_TO_SBP(bp)->sb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
  
-       xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
-                        offsetof(struct xfs_sb, sb_crc));
+       xfs_buf_update_cksum(bp, XFS_SB_CRC_OFF);
  }
  
  const struct xfs_buf_ops xfs_sb_buf_ops = {
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h

index 35061d4..f7b2fe7 100644 (file)
--- a/fs/xfs/xfs_sb.h
+++ b/fs/xfs/xfs_sb.h
@@ -182,6 +182,8 @@ typedef struct xfs_sb {
         /* must be padded to 64 bit alignment */
  } xfs_sb_t;
  
+#define XFS_SB_CRC_OFF         offsetof(struct xfs_sb, sb_crc)
+
  /*
   * Superblock - on disk version.  Must match the in core version above.
   * Must be padded to 64 bit alignment.
diff --git a/fs/xfs/xfs_shared.h b/fs/xfs/xfs_shared.h

index 8c5035a..4484e51 100644 (file)
--- a/fs/xfs/xfs_shared.h
+++ b/fs/xfs/xfs_shared.h
@@ -104,7 +104,8 @@ extern const struct xfs_buf_ops xfs_symlink_buf_ops;
  #define        XFS_TRANS_SB_COUNT              41
  #define        XFS_TRANS_CHECKPOINT            42
  #define        XFS_TRANS_ICREATE               43
-#define        XFS_TRANS_TYPE_MAX              43
+#define        XFS_TRANS_CREATE_TMPFILE        44
+#define        XFS_TRANS_TYPE_MAX              44
  /* new transaction types need to be reflected in xfs_logprint(8) */
  
  #define XFS_TRANS_TYPES \
@@ -112,6 +113,7 @@ extern const struct xfs_buf_ops xfs_symlink_buf_ops;
         { XFS_TRANS_SETATTR_SIZE,       "SETATTR_SIZE" }, \
         { XFS_TRANS_INACTIVE,           "INACTIVE" }, \
         { XFS_TRANS_CREATE,             "CREATE" }, \
+       { XFS_TRANS_CREATE_TMPFILE,     "CREATE_TMPFILE" }, \
         { XFS_TRANS_CREATE_TRUNC,       "CREATE_TRUNC" }, \
         { XFS_TRANS_TRUNCATE_FILE,      "TRUNCATE_FILE" }, \
         { XFS_TRANS_REMOVE,             "REMOVE" }, \
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c

index 14e58f2..52979aa 100644 (file)
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -80,6 +80,10 @@ xfs_readlink_bmap(
                 if (error) {
                         xfs_buf_ioerror_alert(bp, __func__);
                         xfs_buf_relse(bp);
+
+                       /* bad CRC means corrupted metadata */
+                       if (error == EFSBADCRC)
+                               error = EFSCORRUPTED;
                         goto out;
                 }
                 byte_cnt = XFS_SYMLINK_BUF_SPACE(mp, byte_cnt);
@@ -208,10 +212,7 @@ xfs_symlink(
                 return XFS_ERROR(ENAMETOOLONG);
  
         udqp = gdqp = NULL;
-       if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
-               prid = xfs_get_projid(dp);
-       else
-               prid = XFS_PROJID_DEFAULT;
+       prid = xfs_get_initial_prid(dp);
  
         /*
          * Make sure that we have allocated dquot(s) on disk.
diff --git a/fs/xfs/xfs_symlink_remote.c b/fs/xfs/xfs_symlink_remote.c

index bf59a2b..9b32052 100644 (file)
--- a/fs/xfs/xfs_symlink_remote.c
+++ b/fs/xfs/xfs_symlink_remote.c
@@ -133,12 +133,13 @@ xfs_symlink_read_verify(
         if (!xfs_sb_version_hascrc(&mp->m_sb))
                 return;
  
-       if (!xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
-                                 offsetof(struct xfs_dsymlink_hdr, sl_crc)) ||
-           !xfs_symlink_verify(bp)) {
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
+       if (!xfs_buf_verify_cksum(bp, XFS_SYMLINK_CRC_OFF))
+               xfs_buf_ioerror(bp, EFSBADCRC);
+       else if (!xfs_symlink_verify(bp))
                 xfs_buf_ioerror(bp, EFSCORRUPTED);
-       }
+
+       if (bp->b_error)
+               xfs_verifier_error(bp);
  }
  
  static void
@@ -153,8 +154,8 @@ xfs_symlink_write_verify(
                 return;
  
         if (!xfs_symlink_verify(bp)) {
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
                 xfs_buf_ioerror(bp, EFSCORRUPTED);
+               xfs_verifier_error(bp);
                 return;
         }
  
@@ -162,8 +163,7 @@ xfs_symlink_write_verify(
                 struct xfs_dsymlink_hdr *dsl = bp->b_addr;
                 dsl->sl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
         }
-       xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
-                        offsetof(struct xfs_dsymlink_hdr, sl_crc));
+       xfs_buf_update_cksum(bp, XFS_SYMLINK_CRC_OFF);
  }
  
  const struct xfs_buf_ops xfs_symlink_buf_ops = {
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h

index 425dfa4..a4ae41c 100644 (file)
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -603,6 +603,7 @@ DEFINE_INODE_EVENT(xfs_readlink);
  DEFINE_INODE_EVENT(xfs_inactive_symlink);
  DEFINE_INODE_EVENT(xfs_alloc_file_space);
  DEFINE_INODE_EVENT(xfs_free_file_space);
+DEFINE_INODE_EVENT(xfs_collapse_file_space);
  DEFINE_INODE_EVENT(xfs_readdir);
  #ifdef CONFIG_XFS_POSIX_ACL
  DEFINE_INODE_EVENT(xfs_get_acl);
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c

index c812c5c..54a5732 100644 (file)
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -887,12 +887,7 @@ xfs_trans_commit(
                 xfs_trans_apply_sb_deltas(tp);
         xfs_trans_apply_dquot_deltas(tp);
  
-       error = xfs_log_commit_cil(mp, tp, &commit_lsn, flags);
-       if (error == ENOMEM) {
-               xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
-               error = XFS_ERROR(EIO);
-               goto out_unreserve;
-       }
+       xfs_log_commit_cil(mp, tp, &commit_lsn, flags);
  
         current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
         xfs_trans_free(tp);
@@ -902,10 +897,7 @@ xfs_trans_commit(
          * log out now and wait for it.
          */
         if (sync) {
-               if (!error) {
-                       error = _xfs_log_force_lsn(mp, commit_lsn,
-                                     XFS_LOG_SYNC, NULL);
-               }
+               error = _xfs_log_force_lsn(mp, commit_lsn, XFS_LOG_SYNC, NULL);
                 XFS_STATS_INC(xs_trans_sync);
         } else {
                 XFS_STATS_INC(xs_trans_async);
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c

index 647b6f1..b8eef05 100644 (file)
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -275,6 +275,10 @@ xfs_trans_read_buf_map(
                         XFS_BUF_UNDONE(bp);
                         xfs_buf_stale(bp);
                         xfs_buf_relse(bp);
+
+                       /* bad CRC means corrupted metadata */
+                       if (error == EFSBADCRC)
+                               error = EFSCORRUPTED;
                         return error;
                 }
  #ifdef DEBUG
@@ -338,6 +342,9 @@ xfs_trans_read_buf_map(
                                 if (tp->t_flags & XFS_TRANS_DIRTY)
                                         xfs_force_shutdown(tp->t_mountp,
                                                         SHUTDOWN_META_IO_ERROR);
+                               /* bad CRC means corrupted metadata */
+                               if (error == EFSBADCRC)
+                                       error = EFSCORRUPTED;
                                 return error;
                         }
                 }
@@ -375,6 +382,10 @@ xfs_trans_read_buf_map(
                 if (tp->t_flags & XFS_TRANS_DIRTY)
                         xfs_force_shutdown(tp->t_mountp, SHUTDOWN_META_IO_ERROR);
                 xfs_buf_relse(bp);
+
+               /* bad CRC means corrupted metadata */
+               if (error == EFSBADCRC)
+                       error = EFSCORRUPTED;
                 return error;
         }
  #ifdef DEBUG
diff --git a/fs/xfs/xfs_trans_resv.c b/fs/xfs/xfs_trans_resv.c

index 2ffd3e3..ae36816 100644 (file)
--- a/fs/xfs/xfs_trans_resv.c
+++ b/fs/xfs/xfs_trans_resv.c
@@ -81,20 +81,28 @@ xfs_calc_buf_res(
   * on disk. Hence we need an inode reservation function that calculates all this
   * correctly. So, we log:
   *
- * - log op headers for object
+ * - 4 log op headers for object
+ *     - for the ilf, the inode core and 2 forks
   * - inode log format object
- * - the entire inode contents (core + 2 forks)
- * - two bmap btree block headers
+ * - the inode core
+ * - two inode forks containing bmap btree root blocks.
+ *     - the btree data contained by both forks will fit into the inode size,
+ *       hence when combined with the inode core above, we have a total of the
+ *       actual inode size.
+ *     - the BMBT headers need to be accounted separately, as they are
+ *       additional to the records and pointers that fit inside the inode
+ *       forks.
   */
  STATIC uint
  xfs_calc_inode_res(
         struct xfs_mount        *mp,
         uint                    ninodes)
  {
-       return ninodes * (sizeof(struct xlog_op_header) +
-                         sizeof(struct xfs_inode_log_format) +
-                         mp->m_sb.sb_inodesize +
-                         2 * XFS_BMBT_BLOCK_LEN(mp));
+       return ninodes *
+               (4 * sizeof(struct xlog_op_header) +
+                sizeof(struct xfs_inode_log_format) +
+                mp->m_sb.sb_inodesize +
+                2 * XFS_BMBT_BLOCK_LEN(mp));
  }
  
  /*
@@ -203,6 +211,19 @@ xfs_calc_rename_reservation(
                                       XFS_FSB_TO_B(mp, 1))));
  }
  
+/*
+ * For removing an inode from unlinked list at first, we can modify:
+ *    the agi hash list and counters: sector size
+ *    the on disk inode before ours in the agi hash list: inode cluster size
+ */
+STATIC uint
+xfs_calc_iunlink_remove_reservation(
+       struct xfs_mount        *mp)
+{
+       return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
+              max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size);
+}
+
  /*
   * For creating a link to an inode:
   *    the parent directory inode: inode size
@@ -220,6 +241,7 @@ xfs_calc_link_reservation(
         struct xfs_mount        *mp)
  {
         return XFS_DQUOT_LOGRES(mp) +
+               xfs_calc_iunlink_remove_reservation(mp) +
                 MAX((xfs_calc_inode_res(mp, 2) +
                      xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
                                       XFS_FSB_TO_B(mp, 1))),
@@ -228,6 +250,18 @@ xfs_calc_link_reservation(
                                       XFS_FSB_TO_B(mp, 1))));
  }
  
+/*
+ * For adding an inode to unlinked list we can modify:
+ *    the agi hash list: sector size
+ *    the unlinked inode: inode size
+ */
+STATIC uint
+xfs_calc_iunlink_add_reservation(xfs_mount_t *mp)
+{
+       return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
+               xfs_calc_inode_res(mp, 1);
+}
+
  /*
   * For removing a directory entry we can modify:
   *    the parent directory inode: inode size
@@ -245,10 +279,11 @@ xfs_calc_remove_reservation(
         struct xfs_mount        *mp)
  {
         return XFS_DQUOT_LOGRES(mp) +
-               MAX((xfs_calc_inode_res(mp, 2) +
+               xfs_calc_iunlink_add_reservation(mp) +
+               MAX((xfs_calc_inode_res(mp, 1) +
                      xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
                                       XFS_FSB_TO_B(mp, 1))),
-                   (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
+                   (xfs_calc_buf_res(4, mp->m_sb.sb_sectsize) +
                      xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
                                       XFS_FSB_TO_B(mp, 1))));
  }
@@ -343,6 +378,20 @@ xfs_calc_create_reservation(
  
  }
  
+STATIC uint
+xfs_calc_create_tmpfile_reservation(
+       struct xfs_mount        *mp)
+{
+       uint    res = XFS_DQUOT_LOGRES(mp);
+
+       if (xfs_sb_version_hascrc(&mp->m_sb))
+               res += xfs_calc_icreate_resv_alloc(mp);
+       else
+               res += xfs_calc_create_resv_alloc(mp);
+
+       return res + xfs_calc_iunlink_add_reservation(mp);
+}
+
  /*
   * Making a new directory is the same as creating a new file.
   */
@@ -383,9 +432,9 @@ xfs_calc_ifree_reservation(
  {
         return XFS_DQUOT_LOGRES(mp) +
                 xfs_calc_inode_res(mp, 1) +
-               xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
+               xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
                 xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) +
-               max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size) +
+               xfs_calc_iunlink_remove_reservation(mp) +
                 xfs_calc_buf_res(1, 0) +
                 xfs_calc_buf_res(2 + mp->m_ialloc_blks +
                                  mp->m_in_maxlevels, 0) +
@@ -644,15 +693,14 @@ xfs_calc_qm_setqlim_reservation(
  
  /*
   * Allocating quota on disk if needed.
- *     the write transaction log space: M_RES(mp)->tr_write.tr_logres
+ *     the write transaction log space for quota file extent allocation
   *     the unit of quota allocation: one system block size
   */
  STATIC uint
  xfs_calc_qm_dqalloc_reservation(
         struct xfs_mount        *mp)
  {
-       ASSERT(M_RES(mp)->tr_write.tr_logres);
-       return M_RES(mp)->tr_write.tr_logres +
+       return xfs_calc_write_reservation(mp) +
                 xfs_calc_buf_res(1,
                         XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB) - 1);
  }
@@ -729,6 +777,11 @@ xfs_trans_resv_calc(
         resp->tr_create.tr_logcount = XFS_CREATE_LOG_COUNT;
         resp->tr_create.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
  
+       resp->tr_create_tmpfile.tr_logres =
+                       xfs_calc_create_tmpfile_reservation(mp);
+       resp->tr_create_tmpfile.tr_logcount = XFS_CREATE_TMPFILE_LOG_COUNT;
+       resp->tr_create_tmpfile.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
+
         resp->tr_mkdir.tr_logres = xfs_calc_mkdir_reservation(mp);
         resp->tr_mkdir.tr_logcount = XFS_MKDIR_LOG_COUNT;
         resp->tr_mkdir.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
@@ -784,7 +837,6 @@ xfs_trans_resv_calc(
         /* The following transaction are logged in logical format */
         resp->tr_ichange.tr_logres = xfs_calc_ichange_reservation(mp);
         resp->tr_growdata.tr_logres = xfs_calc_growdata_reservation(mp);
-       resp->tr_swrite.tr_logres = xfs_calc_swrite_reservation(mp);
         resp->tr_fsyncts.tr_logres = xfs_calc_swrite_reservation(mp);
         resp->tr_writeid.tr_logres = xfs_calc_writeid_reservation(mp);
         resp->tr_attrsetrt.tr_logres = xfs_calc_attrsetrt_reservation(mp);
diff --git a/fs/xfs/xfs_trans_resv.h b/fs/xfs/xfs_trans_resv.h

index de7de9a..1097d14 100644 (file)
--- a/fs/xfs/xfs_trans_resv.h
+++ b/fs/xfs/xfs_trans_resv.h
@@ -38,11 +38,11 @@ struct xfs_trans_resv {
         struct xfs_trans_res    tr_remove;      /* unlink trans */
         struct xfs_trans_res    tr_symlink;     /* symlink trans */
         struct xfs_trans_res    tr_create;      /* create trans */
+       struct xfs_trans_res    tr_create_tmpfile; /* create O_TMPFILE trans */
         struct xfs_trans_res    tr_mkdir;       /* mkdir trans */
         struct xfs_trans_res    tr_ifree;       /* inode free trans */
         struct xfs_trans_res    tr_ichange;     /* inode update trans */
         struct xfs_trans_res    tr_growdata;    /* fs data section grow trans */
-       struct xfs_trans_res    tr_swrite;      /* sync write inode trans */
         struct xfs_trans_res    tr_addafork;    /* add inode attr fork trans */
         struct xfs_trans_res    tr_writeid;     /* write setuid/setgid file */
         struct xfs_trans_res    tr_attrinval;   /* attr fork buffer
@@ -100,6 +100,7 @@ struct xfs_trans_resv {
  #define        XFS_ITRUNCATE_LOG_COUNT         2
  #define XFS_INACTIVE_LOG_COUNT         2
  #define        XFS_CREATE_LOG_COUNT            2
+#define        XFS_CREATE_TMPFILE_LOG_COUNT    2
  #define        XFS_MKDIR_LOG_COUNT             3
  #define        XFS_SYMLINK_LOG_COUNT           3
  #define        XFS_REMOVE_LOG_COUNT            2
diff --git a/include/linux/fs.h b/include/linux/fs.h

index ea80f1c..81048f9 100644 (file)
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2550,6 +2550,9 @@ enum {
  
         /* filesystem does not support filling holes */
         DIO_SKIP_HOLES  = 0x02,
+
+       /* filesystem can handle aio writes beyond i_size */
+       DIO_ASYNC_EXTEND = 0x04,
  };
  
  void dio_end_io(struct bio *bio, int error);
diff --git a/include/uapi/linux/falloc.h b/include/uapi/linux/falloc.h

index 990c4cc..d1197ae 100644 (file)
--- a/include/uapi/linux/falloc.h
+++ b/include/uapi/linux/falloc.h
@@ -5,5 +5,40 @@
  #define FALLOC_FL_PUNCH_HOLE   0x02 /* de-allocates range */
  #define FALLOC_FL_NO_HIDE_STALE        0x04 /* reserved codepoint */
  
+/*
+ * FALLOC_FL_COLLAPSE_RANGE is used to remove a range of a file
+ * without leaving a hole in the file. The contents of the file beyond
+ * the range being removed is appended to the start offset of the range
+ * being removed (i.e. the hole that was punched is "collapsed"),
+ * resulting in a file layout that looks like the range that was
+ * removed never existed. As such collapsing a range of a file changes
+ * the size of the file, reducing it by the same length of the range
+ * that has been removed by the operation.
+ *
+ * Different filesystems may implement different limitations on the
+ * granularity of the operation. Most will limit operations to
+ * filesystem block size boundaries, but this boundary may be larger or
+ * smaller depending on the filesystem and/or the configuration of the
+ * filesystem or file.
+ *
+ * Attempting to collapse a range that crosses the end of the file is
+ * considered an illegal operation - just use ftruncate(2) if you need
+ * to collapse a range that crosses EOF.
+ */
+#define FALLOC_FL_COLLAPSE_RANGE       0x08
+
+/*
+ * FALLOC_FL_ZERO_RANGE is used to convert a range of file to zeros preferably
+ * without issuing data IO. Blocks should be preallocated for the regions that
+ * span holes in the file, and the entire range is preferable converted to
+ * unwritten extents - even though file system may choose to zero out the
+ * extent or do whatever which will result in reading zeros from the range
+ * while the range remains allocated for the file.
+ *
+ * This can be also used to preallocate blocks past EOF in the same way as
+ * with fallocate. Flag FALLOC_FL_KEEP_SIZE should cause the inode
+ * size to remain the same.
+ */
+#define FALLOC_FL_ZERO_RANGE           0x10
  
  #endif /* _UAPI_FALLOC_H_ */
author	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 4 Apr 2014 22:50:08 +0000 (15:50 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 4 Apr 2014 22:50:08 +0000 (15:50 -0700)
fs/direct-io.c		patch \| blob \| history
fs/open.c		patch \| blob \| history
fs/xfs/kmem.c		patch \| blob \| history
fs/xfs/xfs_acl.c		patch \| blob \| history
fs/xfs/xfs_ag.h		patch \| blob \| history
fs/xfs/xfs_alloc.c		patch \| blob \| history
fs/xfs/xfs_alloc_btree.c		patch \| blob \| history
fs/xfs/xfs_aops.c		patch \| blob \| history
fs/xfs/xfs_attr_leaf.c		patch \| blob \| history
fs/xfs/xfs_attr_remote.c		patch \| blob \| history
fs/xfs/xfs_bmap.c		patch \| blob \| history
fs/xfs/xfs_bmap.h		patch \| blob \| history
fs/xfs/xfs_bmap_btree.c		patch \| blob \| history
fs/xfs/xfs_bmap_util.c		patch \| blob \| history
fs/xfs/xfs_bmap_util.h		patch \| blob \| history
fs/xfs/xfs_btree.c		patch \| blob \| history
fs/xfs/xfs_buf.c		patch \| blob \| history
fs/xfs/xfs_buf.h		patch \| blob \| history
fs/xfs/xfs_buf_item.c		patch \| blob \| history
fs/xfs/xfs_da_btree.c		patch \| blob \| history
fs/xfs/xfs_dinode.h		patch \| blob \| history
fs/xfs/xfs_dir2.c		patch \| blob \| history
fs/xfs/xfs_dir2_block.c		patch \| blob \| history
fs/xfs/xfs_dir2_data.c		patch \| blob \| history
fs/xfs/xfs_dir2_leaf.c		patch \| blob \| history
fs/xfs/xfs_dir2_node.c		patch \| blob \| history
fs/xfs/xfs_dquot.c		patch \| blob \| history
fs/xfs/xfs_dquot_buf.c		patch \| blob \| history
fs/xfs/xfs_error.c		patch \| blob \| history
fs/xfs/xfs_error.h		patch \| blob \| history
fs/xfs/xfs_file.c		patch \| blob \| history
fs/xfs/xfs_format.h		patch \| blob \| history
fs/xfs/xfs_ialloc.c		patch \| blob \| history
fs/xfs/xfs_ialloc_btree.c		patch \| blob \| history
fs/xfs/xfs_inode.c		patch \| blob \| history
fs/xfs/xfs_inode.h		patch \| blob \| history
fs/xfs/xfs_inode_buf.c		patch \| blob \| history
fs/xfs/xfs_iomap.c		patch \| blob \| history
fs/xfs/xfs_iops.c		patch \| blob \| history
fs/xfs/xfs_linux.h		patch \| blob \| history
fs/xfs/xfs_log.h		patch \| blob \| history
fs/xfs/xfs_log_cil.c		patch \| blob \| history
fs/xfs/xfs_mount.c		patch \| blob \| history
fs/xfs/xfs_rtalloc.c		patch \| blob \| history
fs/xfs/xfs_sb.c		patch \| blob \| history
fs/xfs/xfs_sb.h		patch \| blob \| history
fs/xfs/xfs_shared.h		patch \| blob \| history
fs/xfs/xfs_symlink.c		patch \| blob \| history
fs/xfs/xfs_symlink_remote.c		patch \| blob \| history
fs/xfs/xfs_trace.h		patch \| blob \| history
fs/xfs/xfs_trans.c		patch \| blob \| history
fs/xfs/xfs_trans_buf.c		patch \| blob \| history
fs/xfs/xfs_trans_resv.c		patch \| blob \| history
fs/xfs/xfs_trans_resv.h		patch \| blob \| history
include/linux/fs.h		patch \| blob \| history
include/uapi/linux/falloc.h		patch \| blob \| history