Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

author Linus Torvalds <torvalds@linux-foundation.org>

Thu, 27 May 2010 17:26:37 +0000 (10:26 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Thu, 27 May 2010 17:26:37 +0000 (10:26 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Thu, 27 May 2010 17:26:37 +0000 (10:26 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Thu, 27 May 2010 17:26:37 +0000 (10:26 -0700)
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c

index d2f37a5..95b7594 100644 (file)
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -591,14 +591,15 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
         ret = ext4_mb_new_blocks(handle, &ar, errp);
         if (count)
                 *count = ar.len;
-
         /*
-        * Account for the allocated meta blocks
+        * Account for the allocated meta blocks.  We will never
+        * fail EDQUOT for metdata, but we do account for it.
          */
         if (!(*errp) && EXT4_I(inode)->i_delalloc_reserved_flag) {
                 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
                 EXT4_I(inode)->i_allocated_meta_blocks += ar.len;
                 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+               dquot_alloc_block_nofail(inode, ar.len);
         }
         return ret;
  }
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c

index 538c486..5b6973f 100644 (file)
--- a/fs/ext4/block_validity.c
+++ b/fs/ext4/block_validity.c
@@ -72,9 +72,9 @@ static int add_system_zone(struct ext4_sb_info *sbi,
                 else if (start_blk >= (entry->start_blk + entry->count))
                         n = &(*n)->rb_right;
                 else {
-                       if (start_blk + count > (entry->start_blk + 
+                       if (start_blk + count > (entry->start_blk +
                                                  entry->count))
-                               entry->count = (start_blk + count - 
+                               entry->count = (start_blk + count -
                                                 entry->start_blk);
                         new_node = *n;
                         new_entry = rb_entry(new_node, struct ext4_system_zone,
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c

index 86cb6d8..ea5e6cb 100644 (file)
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -83,11 +83,10 @@ int ext4_check_dir_entry(const char *function, struct inode *dir,
                 error_msg = "inode out of bounds";
  
         if (error_msg != NULL)
-               __ext4_error(dir->i_sb, function,
-                       "bad entry in directory #%lu: %s - block=%llu"
+               ext4_error_inode(function, dir,
+                       "bad entry in directory: %s - block=%llu"
                         "offset=%u(%u), inode=%u, rec_len=%d, name_len=%d",
-                       dir->i_ino, error_msg, 
-                       (unsigned long long) bh->b_blocknr,     
+                       error_msg, (unsigned long long) bh->b_blocknr,
                         (unsigned) (offset%bh->b_size), offset,
                         le32_to_cpu(de->inode),
                         rlen, de->name_len);
@@ -111,7 +110,7 @@ static int ext4_readdir(struct file *filp,
  
         if (EXT4_HAS_COMPAT_FEATURE(inode->i_sb,
                                     EXT4_FEATURE_COMPAT_DIR_INDEX) &&
-           ((EXT4_I(inode)->i_flags & EXT4_INDEX_FL) ||
+           ((ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) ||
              ((inode->i_size >> sb->s_blocksize_bits) == 1))) {
                 err = ext4_dx_readdir(filp, dirent, filldir);
                 if (err != ERR_BAD_DX_DIR) {
@@ -122,20 +121,20 @@ static int ext4_readdir(struct file *filp,
                  * We don't set the inode dirty flag since it's not
                  * critical that it get flushed back to the disk.
                  */
-               EXT4_I(filp->f_path.dentry->d_inode)->i_flags &= ~EXT4_INDEX_FL;
+               ext4_clear_inode_flag(filp->f_path.dentry->d_inode, EXT4_INODE_INDEX);
         }
         stored = 0;
         offset = filp->f_pos & (sb->s_blocksize - 1);
  
         while (!error && !stored && filp->f_pos < inode->i_size) {
-               ext4_lblk_t blk = filp->f_pos >> EXT4_BLOCK_SIZE_BITS(sb);
-               struct buffer_head map_bh;
+               struct ext4_map_blocks map;
                 struct buffer_head *bh = NULL;
  
-               map_bh.b_state = 0;
-               err = ext4_get_blocks(NULL, inode, blk, 1, &map_bh, 0);
+               map.m_lblk = filp->f_pos >> EXT4_BLOCK_SIZE_BITS(sb);
+               map.m_len = 1;
+               err = ext4_map_blocks(NULL, inode, &map, 0);
                 if (err > 0) {
-                       pgoff_t index = map_bh.b_blocknr >>
+                       pgoff_t index = map.m_pblk >>
                                         (PAGE_CACHE_SHIFT - inode->i_blkbits);
                         if (!ra_has_index(&filp->f_ra, index))
                                 page_cache_sync_readahead(
@@ -143,7 +142,7 @@ static int ext4_readdir(struct file *filp,
                                         &filp->f_ra, filp,
                                         index, 1);
                         filp->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT;
-                       bh = ext4_bread(NULL, inode, blk, 0, &err);
+                       bh = ext4_bread(NULL, inode, map.m_lblk, 0, &err);
                 }
  
                 /*
@@ -152,9 +151,8 @@ static int ext4_readdir(struct file *filp,
                  */
                 if (!bh) {
                         if (!dir_has_error) {
-                               ext4_error(sb, "directory #%lu "
+                               EXT4_ERROR_INODE(inode, "directory "
                                            "contains a hole at offset %Lu",
-                                          inode->i_ino,
                                            (unsigned long long) filp->f_pos);
                                 dir_has_error = 1;
                         }
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h

index bf938cf..60bd310 100644 (file)
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -29,6 +29,9 @@
  #include <linux/wait.h>
  #include <linux/blockgroup_lock.h>
  #include <linux/percpu_counter.h>
+#ifdef __KERNEL__
+#include <linux/compat.h>
+#endif
  
  /*
   * The fourth extended filesystem constants/structures
@@ -54,10 +57,10 @@
  #endif
  
  #define EXT4_ERROR_INODE(inode, fmt, a...) \
-       ext4_error_inode(__func__, (inode), (fmt), ## a);
+       ext4_error_inode(__func__, (inode), (fmt), ## a)
  
  #define EXT4_ERROR_FILE(file, fmt, a...)       \
-       ext4_error_file(__func__, (file), (fmt), ## a);
+       ext4_error_file(__func__, (file), (fmt), ## a)
  
  /* data type for block offset of block group */
  typedef int ext4_grpblk_t;
@@ -72,7 +75,7 @@ typedef __u32 ext4_lblk_t;
  typedef unsigned int ext4_group_t;
  
  /*
- * Flags used in mballoc's allocation_context flags field.  
+ * Flags used in mballoc's allocation_context flags field.
   *
   * Also used to show what's going on for debugging purposes when the
   * flag field is exported via the traceport interface
@@ -125,6 +128,29 @@ struct ext4_allocation_request {
         unsigned int flags;
  };
  
+/*
+ * Logical to physical block mapping, used by ext4_map_blocks()
+ *
+ * This structure is used to pass requests into ext4_map_blocks() as
+ * well as to store the information returned by ext4_map_blocks().  It
+ * takes less room on the stack than a struct buffer_head.
+ */
+#define EXT4_MAP_NEW           (1 << BH_New)
+#define EXT4_MAP_MAPPED                (1 << BH_Mapped)
+#define EXT4_MAP_UNWRITTEN     (1 << BH_Unwritten)
+#define EXT4_MAP_BOUNDARY      (1 << BH_Boundary)
+#define EXT4_MAP_UNINIT                (1 << BH_Uninit)
+#define EXT4_MAP_FLAGS         (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\
+                                EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY |\
+                                EXT4_MAP_UNINIT)
+
+struct ext4_map_blocks {
+       ext4_fsblk_t m_pblk;
+       ext4_lblk_t m_lblk;
+       unsigned int m_len;
+       unsigned int m_flags;
+};
+
  /*
   * For delayed allocation tracking
   */
@@ -321,6 +347,83 @@ static inline __u32 ext4_mask_flags(umode_t mode, __u32 flags)
                 return flags & EXT4_OTHER_FLMASK;
  }
  
+/*
+ * Inode flags used for atomic set/get
+ */
+enum {
+       EXT4_INODE_SECRM        = 0,    /* Secure deletion */
+       EXT4_INODE_UNRM         = 1,    /* Undelete */
+       EXT4_INODE_COMPR        = 2,    /* Compress file */
+       EXT4_INODE_SYNC         = 3,    /* Synchronous updates */
+       EXT4_INODE_IMMUTABLE    = 4,    /* Immutable file */
+       EXT4_INODE_APPEND       = 5,    /* writes to file may only append */
+       EXT4_INODE_NODUMP       = 6,    /* do not dump file */
+       EXT4_INODE_NOATIME      = 7,    /* do not update atime */
+/* Reserved for compression usage... */
+       EXT4_INODE_DIRTY        = 8,
+       EXT4_INODE_COMPRBLK     = 9,    /* One or more compressed clusters */
+       EXT4_INODE_NOCOMPR      = 10,   /* Don't compress */
+       EXT4_INODE_ECOMPR       = 11,   /* Compression error */
+/* End compression flags --- maybe not all used */
+       EXT4_INODE_INDEX        = 12,   /* hash-indexed directory */
+       EXT4_INODE_IMAGIC       = 13,   /* AFS directory */
+       EXT4_INODE_JOURNAL_DATA = 14,   /* file data should be journaled */
+       EXT4_INODE_NOTAIL       = 15,   /* file tail should not be merged */
+       EXT4_INODE_DIRSYNC      = 16,   /* dirsync behaviour (directories only) */
+       EXT4_INODE_TOPDIR       = 17,   /* Top of directory hierarchies*/
+       EXT4_INODE_HUGE_FILE    = 18,   /* Set to each huge file */
+       EXT4_INODE_EXTENTS      = 19,   /* Inode uses extents */
+       EXT4_INODE_EA_INODE     = 21,   /* Inode used for large EA */
+       EXT4_INODE_EOFBLOCKS    = 22,   /* Blocks allocated beyond EOF */
+       EXT4_INODE_RESERVED     = 31,   /* reserved for ext4 lib */
+};
+
+#define TEST_FLAG_VALUE(FLAG) (EXT4_##FLAG##_FL == (1 << EXT4_INODE_##FLAG))
+#define CHECK_FLAG_VALUE(FLAG) if (!TEST_FLAG_VALUE(FLAG)) { \
+       printk(KERN_EMERG "EXT4 flag fail: " #FLAG ": %d %d\n", \
+               EXT4_##FLAG##_FL, EXT4_INODE_##FLAG); BUG_ON(1); }
+
+/*
+ * Since it's pretty easy to mix up bit numbers and hex values, and we
+ * can't do a compile-time test for ENUM values, we use a run-time
+ * test to make sure that EXT4_XXX_FL is consistent with respect to
+ * EXT4_INODE_XXX.  If all is well the printk and BUG_ON will all drop
+ * out so it won't cost any extra space in the compiled kernel image.
+ * But it's important that these values are the same, since we are
+ * using EXT4_INODE_XXX to test for the flag values, but EXT4_XX_FL
+ * must be consistent with the values of FS_XXX_FL defined in
+ * include/linux/fs.h and the on-disk values found in ext2, ext3, and
+ * ext4 filesystems, and of course the values defined in e2fsprogs.
+ *
+ * It's not paranoia if the Murphy's Law really *is* out to get you.  :-)
+ */
+static inline void ext4_check_flag_values(void)
+{
+       CHECK_FLAG_VALUE(SECRM);
+       CHECK_FLAG_VALUE(UNRM);
+       CHECK_FLAG_VALUE(COMPR);
+       CHECK_FLAG_VALUE(SYNC);
+       CHECK_FLAG_VALUE(IMMUTABLE);
+       CHECK_FLAG_VALUE(APPEND);
+       CHECK_FLAG_VALUE(NODUMP);
+       CHECK_FLAG_VALUE(NOATIME);
+       CHECK_FLAG_VALUE(DIRTY);
+       CHECK_FLAG_VALUE(COMPRBLK);
+       CHECK_FLAG_VALUE(NOCOMPR);
+       CHECK_FLAG_VALUE(ECOMPR);
+       CHECK_FLAG_VALUE(INDEX);
+       CHECK_FLAG_VALUE(IMAGIC);
+       CHECK_FLAG_VALUE(JOURNAL_DATA);
+       CHECK_FLAG_VALUE(NOTAIL);
+       CHECK_FLAG_VALUE(DIRSYNC);
+       CHECK_FLAG_VALUE(TOPDIR);
+       CHECK_FLAG_VALUE(HUGE_FILE);
+       CHECK_FLAG_VALUE(EXTENTS);
+       CHECK_FLAG_VALUE(EA_INODE);
+       CHECK_FLAG_VALUE(EOFBLOCKS);
+       CHECK_FLAG_VALUE(RESERVED);
+}
+
  /* Used to pass group descriptor data when online resize is done */
  struct ext4_new_group_input {
         __u32 group;            /* Group number for this data */
@@ -332,6 +435,18 @@ struct ext4_new_group_input {
         __u16 unused;
  };
  
+#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
+struct compat_ext4_new_group_input {
+       u32 group;
+       compat_u64 block_bitmap;
+       compat_u64 inode_bitmap;
+       compat_u64 inode_table;
+       u32 blocks_count;
+       u16 reserved_blocks;
+       u16 unused;
+};
+#endif
+
  /* The struct ext4_new_group_input in kernel space, with free_blocks_count */
  struct ext4_new_group_data {
         __u32 group;
@@ -355,7 +470,7 @@ struct ext4_new_group_data {
  #define EXT4_GET_BLOCKS_CREATE_UNINIT_EXT      (EXT4_GET_BLOCKS_UNINIT_EXT|\
                                                  EXT4_GET_BLOCKS_CREATE)
         /* Caller is from the delayed allocation writeout path,
-          so set the magic i_delalloc_reserve_flag after taking the 
+          so set the magic i_delalloc_reserve_flag after taking the
            inode allocation semaphore for */
  #define EXT4_GET_BLOCKS_DELALLOC_RESERVE       0x0004
         /* caller is from the direct IO path, request to creation of an
@@ -398,6 +513,7 @@ struct ext4_new_group_data {
  #define EXT4_IOC_ALLOC_DA_BLKS         _IO('f', 12)
  #define EXT4_IOC_MOVE_EXT              _IOWR('f', 15, struct move_extent)
  
+#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
  /*
   * ioctl commands in 32 bit emulation
   */
@@ -408,11 +524,13 @@ struct ext4_new_group_data {
  #define EXT4_IOC32_GETRSVSZ            _IOR('f', 5, int)
  #define EXT4_IOC32_SETRSVSZ            _IOW('f', 6, int)
  #define EXT4_IOC32_GROUP_EXTEND                _IOW('f', 7, unsigned int)
+#define EXT4_IOC32_GROUP_ADD           _IOW('f', 8, struct compat_ext4_new_group_input)
  #ifdef CONFIG_JBD2_DEBUG
  #define EXT4_IOC32_WAIT_FOR_READONLY   _IOR('f', 99, int)
  #endif
  #define EXT4_IOC32_GETVERSION_OLD      FS_IOC32_GETVERSION
  #define EXT4_IOC32_SETVERSION_OLD      FS_IOC32_SETVERSION
+#endif
  
  
  /*
@@ -616,9 +734,8 @@ struct ext4_ext_cache {
   */
  struct ext4_inode_info {
         __le32  i_data[15];     /* unconverted */
-       __u32   i_flags;
-       ext4_fsblk_t    i_file_acl;
         __u32   i_dtime;
+       ext4_fsblk_t    i_file_acl;
  
         /*
          * i_block_group is the number of the block group which contains
@@ -629,6 +746,7 @@ struct ext4_inode_info {
          */
         ext4_group_t    i_block_group;
         unsigned long   i_state_flags;          /* Dynamic state flags */
+       unsigned long   i_flags;
  
         ext4_lblk_t             i_dir_start_lookup;
  #ifdef CONFIG_EXT4_FS_XATTR
@@ -1062,22 +1180,25 @@ enum {
         EXT4_STATE_DA_ALLOC_CLOSE,      /* Alloc DA blks on close */
         EXT4_STATE_EXT_MIGRATE,         /* Inode is migrating */
         EXT4_STATE_DIO_UNWRITTEN,       /* need convert on dio done*/
+       EXT4_STATE_NEWENTRY,            /* File just added to dir */
  };
  
-static inline int ext4_test_inode_state(struct inode *inode, int bit)
-{
-       return test_bit(bit, &EXT4_I(inode)->i_state_flags);
-}
-
-static inline void ext4_set_inode_state(struct inode *inode, int bit)
-{
-       set_bit(bit, &EXT4_I(inode)->i_state_flags);
+#define EXT4_INODE_BIT_FNS(name, field)                                        \
+static inline int ext4_test_inode_##name(struct inode *inode, int bit) \
+{                                                                      \
+       return test_bit(bit, &EXT4_I(inode)->i_##field);                \
+}                                                                      \
+static inline void ext4_set_inode_##name(struct inode *inode, int bit) \
+{                                                                      \
+       set_bit(bit, &EXT4_I(inode)->i_##field);                        \
+}                                                                      \
+static inline void ext4_clear_inode_##name(struct inode *inode, int bit) \
+{                                                                      \
+       clear_bit(bit, &EXT4_I(inode)->i_##field);                      \
  }
  
-static inline void ext4_clear_inode_state(struct inode *inode, int bit)
-{
-       clear_bit(bit, &EXT4_I(inode)->i_state_flags);
-}
+EXT4_INODE_BIT_FNS(flag, flags)
+EXT4_INODE_BIT_FNS(state, state_flags)
  #else
  /* Assume that user mode programs are passing in an ext4fs superblock, not
   * a kernel struct super_block.  This will allow us to call the feature-test
@@ -1264,7 +1385,7 @@ struct ext4_dir_entry_2 {
  
  #define is_dx(dir) (EXT4_HAS_COMPAT_FEATURE(dir->i_sb, \
                                       EXT4_FEATURE_COMPAT_DIR_INDEX) && \
-                     (EXT4_I(dir)->i_flags & EXT4_INDEX_FL))
+                   ext4_test_inode_flag((dir), EXT4_INODE_INDEX))
  #define EXT4_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT4_LINK_MAX)
  #define EXT4_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1)
  
@@ -1678,6 +1799,7 @@ struct ext4_group_info {
         ext4_grpblk_t   bb_first_free;  /* first free block */
         ext4_grpblk_t   bb_free;        /* total free blocks */
         ext4_grpblk_t   bb_fragments;   /* nr of freespace fragments */
+       ext4_grpblk_t   bb_largest_free_order;/* order of largest frag in BG */
         struct          list_head bb_prealloc_list;
  #ifdef DOUBLE_CHECK
         void            *bb_bitmap;
@@ -1772,9 +1894,8 @@ extern int ext4_ext_tree_init(handle_t *handle, struct inode *);
  extern int ext4_ext_writepage_trans_blocks(struct inode *, int);
  extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks,
                                        int chunk);
-extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
-                              ext4_lblk_t iblock, unsigned int max_blocks,
-                              struct buffer_head *bh_result, int flags);
+extern int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
+                              struct ext4_map_blocks *map, int flags);
  extern void ext4_ext_truncate(struct inode *);
  extern void ext4_ext_init(struct super_block *);
  extern void ext4_ext_release(struct super_block *);
@@ -1782,6 +1903,8 @@ extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset,
                           loff_t len);
  extern int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
                           ssize_t len);
+extern int ext4_map_blocks(handle_t *handle, struct inode *inode,
+                          struct ext4_map_blocks *map, int flags);
  extern int ext4_get_blocks(handle_t *handle, struct inode *inode,
                            sector_t block, unsigned int max_blocks,
                            struct buffer_head *bh, int flags);
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h

index b79ad51..dade0c0 100644 (file)
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -273,7 +273,7 @@ static inline int ext4_should_journal_data(struct inode *inode)
                 return 1;
         if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
                 return 1;
-       if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL)
+       if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA))
                 return 1;
         return 0;
  }
@@ -284,7 +284,7 @@ static inline int ext4_should_order_data(struct inode *inode)
                 return 0;
         if (!S_ISREG(inode->i_mode))
                 return 0;
-       if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL)
+       if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA))
                 return 0;
         if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
                 return 1;
@@ -297,7 +297,7 @@ static inline int ext4_should_writeback_data(struct inode *inode)
                 return 0;
         if (EXT4_JOURNAL(inode) == NULL)
                 return 1;
-       if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL)
+       if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA))
                 return 0;
         if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
                 return 1;
@@ -321,7 +321,7 @@ static inline int ext4_should_dioread_nolock(struct inode *inode)
                 return 0;
         if (!S_ISREG(inode->i_mode))
                 return 0;
-       if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
+       if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
                 return 0;
         if (ext4_should_journal_data(inode))
                 return 0;
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c

index 236b834..377309c 100644 (file)
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -107,11 +107,8 @@ static int ext4_ext_truncate_extend_restart(handle_t *handle,
         if (err <= 0)
                 return err;
         err = ext4_truncate_restart_trans(handle, inode, needed);
-       /*
-        * We have dropped i_data_sem so someone might have cached again
-        * an extent we are going to truncate.
-        */
-       ext4_ext_invalidate_cache(inode);
+       if (err == 0)
+               err = -EAGAIN;
  
         return err;
  }
@@ -185,10 +182,10 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
         if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) {
                 /*
                  * If there are at least EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME
-                * block groups per flexgroup, reserve the first block 
-                * group for directories and special files.  Regular 
+                * block groups per flexgroup, reserve the first block
+                * group for directories and special files.  Regular
                  * files will start at the second block group.  This
-                * tends to speed up directory access and improves 
+                * tends to speed up directory access and improves
                  * fsck times.
                  */
                 block_group &= ~(flex_size-1);
@@ -439,10 +436,10 @@ static int __ext4_ext_check(const char *function, struct inode *inode,
         return 0;
  
  corrupted:
-       __ext4_error(inode->i_sb, function,
-                       "bad header/extent in inode #%lu: %s - magic %x, "
+       ext4_error_inode(function, inode,
+                       "bad header/extent: %s - magic %x, "
                         "entries %u, max %u(%u), depth %u(%u)",
-                       inode->i_ino, error_msg, le16_to_cpu(eh->eh_magic),
+                       error_msg, le16_to_cpu(eh->eh_magic),
                         le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max),
                         max, le16_to_cpu(eh->eh_depth), depth);
  
@@ -1622,9 +1619,7 @@ int ext4_ext_try_to_merge(struct inode *inode,
                 merge_done = 1;
                 WARN_ON(eh->eh_entries == 0);
                 if (!eh->eh_entries)
-                       ext4_error(inode->i_sb,
-                                  "inode#%lu, eh->eh_entries = 0!",
-                                  inode->i_ino);
+                       EXT4_ERROR_INODE(inode, "eh->eh_entries = 0!");
         }
  
         return merge_done;
@@ -2039,7 +2034,7 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
         struct ext4_ext_cache *cex;
         int ret = EXT4_EXT_CACHE_NO;
  
-       /* 
+       /*
          * We borrow i_block_reservation_lock to protect i_cached_extent
          */
         spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
@@ -2361,7 +2356,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start)
         int depth = ext_depth(inode);
         struct ext4_ext_path *path;
         handle_t *handle;
-       int i = 0, err = 0;
+       int i, err;
  
         ext_debug("truncate since %u\n", start);
  
@@ -2370,23 +2365,26 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start)
         if (IS_ERR(handle))
                 return PTR_ERR(handle);
  
+again:
         ext4_ext_invalidate_cache(inode);
  
         /*
          * We start scanning from right side, freeing all the blocks
          * after i_size and walking into the tree depth-wise.
          */
+       depth = ext_depth(inode);
         path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_NOFS);
         if (path == NULL) {
                 ext4_journal_stop(handle);
                 return -ENOMEM;
         }
+       path[0].p_depth = depth;
         path[0].p_hdr = ext_inode_hdr(inode);
         if (ext4_ext_check(inode, path[0].p_hdr, depth)) {
                 err = -EIO;
                 goto out;
         }
-       path[0].p_depth = depth;
+       i = err = 0;
  
         while (i >= 0 && err == 0) {
                 if (i == depth) {
@@ -2480,6 +2478,8 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start)
  out:
         ext4_ext_drop_refs(path);
         kfree(path);
+       if (err == -EAGAIN)
+               goto again;
         ext4_journal_stop(handle);
  
         return err;
@@ -2544,7 +2544,7 @@ static void bi_complete(struct bio *bio, int error)
  /* FIXME!! we need to try to merge to left or right after zero-out  */
  static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
  {
-       int ret = -EIO;
+       int ret;
         struct bio *bio;
         int blkbits, blocksize;
         sector_t ee_pblock;
@@ -2568,6 +2568,9 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
                         len = ee_len;
  
                 bio = bio_alloc(GFP_NOIO, len);
+               if (!bio)
+                       return -ENOMEM;
+
                 bio->bi_sector = ee_pblock;
                 bio->bi_bdev   = inode->i_sb->s_bdev;
  
@@ -2595,22 +2598,20 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
                 submit_bio(WRITE, bio);
                 wait_for_completion(&event);
  
-               if (test_bit(BIO_UPTODATE, &bio->bi_flags))
-                       ret = 0;
-               else {
-                       ret = -EIO;
-                       break;
+               if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
+                       bio_put(bio);
+                       return -EIO;
                 }
                 bio_put(bio);
                 ee_len    -= done;
                 ee_pblock += done  << (blkbits - 9);
         }
-       return ret;
+       return 0;
  }
  
  #define EXT4_EXT_ZERO_LEN 7
  /*
- * This function is called by ext4_ext_get_blocks() if someone tries to write
+ * This function is called by ext4_ext_map_blocks() if someone tries to write
   * to an uninitialized extent. It may result in splitting the uninitialized
   * extent into multiple extents (upto three - one initialized and two
   * uninitialized).
@@ -2620,39 +2621,55 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
   *   c> Splits in three extents: Somone is writing in middle of the extent
   */
  static int ext4_ext_convert_to_initialized(handle_t *handle,
-                                               struct inode *inode,
-                                               struct ext4_ext_path *path,
-                                               ext4_lblk_t iblock,
-                                               unsigned int max_blocks)
+                                          struct inode *inode,
+                                          struct ext4_map_blocks *map,
+                                          struct ext4_ext_path *path)
  {
         struct ext4_extent *ex, newex, orig_ex;
         struct ext4_extent *ex1 = NULL;
         struct ext4_extent *ex2 = NULL;
         struct ext4_extent *ex3 = NULL;
         struct ext4_extent_header *eh;
-       ext4_lblk_t ee_block;
+       ext4_lblk_t ee_block, eof_block;
         unsigned int allocated, ee_len, depth;
         ext4_fsblk_t newblock;
         int err = 0;
         int ret = 0;
+       int may_zeroout;
+
+       ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical"
+               "block %llu, max_blocks %u\n", inode->i_ino,
+               (unsigned long long)map->m_lblk, map->m_len);
+
+       eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
+               inode->i_sb->s_blocksize_bits;
+       if (eof_block < map->m_lblk + map->m_len)
+               eof_block = map->m_lblk + map->m_len;
  
         depth = ext_depth(inode);
         eh = path[depth].p_hdr;
         ex = path[depth].p_ext;
         ee_block = le32_to_cpu(ex->ee_block);
         ee_len = ext4_ext_get_actual_len(ex);
-       allocated = ee_len - (iblock - ee_block);
-       newblock = iblock - ee_block + ext_pblock(ex);
+       allocated = ee_len - (map->m_lblk - ee_block);
+       newblock = map->m_lblk - ee_block + ext_pblock(ex);
+
         ex2 = ex;
         orig_ex.ee_block = ex->ee_block;
         orig_ex.ee_len   = cpu_to_le16(ee_len);
         ext4_ext_store_pblock(&orig_ex, ext_pblock(ex));
  
+       /*
+        * It is safe to convert extent to initialized via explicit
+        * zeroout only if extent is fully insde i_size or new_size.
+        */
+       may_zeroout = ee_block + ee_len <= eof_block;
+
         err = ext4_ext_get_access(handle, inode, path + depth);
         if (err)
                 goto out;
         /* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */
-       if (ee_len <= 2*EXT4_EXT_ZERO_LEN) {
+       if (ee_len <= 2*EXT4_EXT_ZERO_LEN && may_zeroout) {
                 err =  ext4_ext_zeroout(inode, &orig_ex);
                 if (err)
                         goto fix_extent_len;
@@ -2665,10 +2682,10 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
                 return allocated;
         }
  
-       /* ex1: ee_block to iblock - 1 : uninitialized */
-       if (iblock > ee_block) {
+       /* ex1: ee_block to map->m_lblk - 1 : uninitialized */
+       if (map->m_lblk > ee_block) {
                 ex1 = ex;
-               ex1->ee_len = cpu_to_le16(iblock - ee_block);
+               ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block);
                 ext4_ext_mark_uninitialized(ex1);
                 ex2 = &newex;
         }
@@ -2677,15 +2694,15 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
          * we insert ex3, if ex1 is NULL. This is to avoid temporary
          * overlap of blocks.
          */
-       if (!ex1 && allocated > max_blocks)
-               ex2->ee_len = cpu_to_le16(max_blocks);
+       if (!ex1 && allocated > map->m_len)
+               ex2->ee_len = cpu_to_le16(map->m_len);
         /* ex3: to ee_block + ee_len : uninitialised */
-       if (allocated > max_blocks) {
+       if (allocated > map->m_len) {
                 unsigned int newdepth;
                 /* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */
-               if (allocated <= EXT4_EXT_ZERO_LEN) {
+               if (allocated <= EXT4_EXT_ZERO_LEN && may_zeroout) {
                         /*
-                        * iblock == ee_block is handled by the zerouout
+                        * map->m_lblk == ee_block is handled by the zerouout
                          * at the beginning.
                          * Mark first half uninitialized.
                          * Mark second half initialized and zero out the
@@ -2698,7 +2715,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
                         ext4_ext_dirty(handle, inode, path + depth);
  
                         ex3 = &newex;
-                       ex3->ee_block = cpu_to_le32(iblock);
+                       ex3->ee_block = cpu_to_le32(map->m_lblk);
                         ext4_ext_store_pblock(ex3, newblock);
                         ex3->ee_len = cpu_to_le16(allocated);
                         err = ext4_ext_insert_extent(handle, inode, path,
@@ -2711,7 +2728,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
                                 ex->ee_len   = orig_ex.ee_len;
                                 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
                                 ext4_ext_dirty(handle, inode, path + depth);
-                               /* blocks available from iblock */
+                               /* blocks available from map->m_lblk */
                                 return allocated;
  
                         } else if (err)
@@ -2733,8 +2750,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
                                  */
                                 depth = ext_depth(inode);
                                 ext4_ext_drop_refs(path);
-                               path = ext4_ext_find_extent(inode,
-                                                               iblock, path);
+                               path = ext4_ext_find_extent(inode, map->m_lblk,
+                                                           path);
                                 if (IS_ERR(path)) {
                                         err = PTR_ERR(path);
                                         return err;
@@ -2754,12 +2771,12 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
                         return allocated;
                 }
                 ex3 = &newex;
-               ex3->ee_block = cpu_to_le32(iblock + max_blocks);
-               ext4_ext_store_pblock(ex3, newblock + max_blocks);
-               ex3->ee_len = cpu_to_le16(allocated - max_blocks);
+               ex3->ee_block = cpu_to_le32(map->m_lblk + map->m_len);
+               ext4_ext_store_pblock(ex3, newblock + map->m_len);
+               ex3->ee_len = cpu_to_le16(allocated - map->m_len);
                 ext4_ext_mark_uninitialized(ex3);
                 err = ext4_ext_insert_extent(handle, inode, path, ex3, 0);
-               if (err == -ENOSPC) {
+               if (err == -ENOSPC && may_zeroout) {
                         err =  ext4_ext_zeroout(inode, &orig_ex);
                         if (err)
                                 goto fix_extent_len;
@@ -2769,7 +2786,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
                         ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
                         ext4_ext_dirty(handle, inode, path + depth);
                         /* zeroed the full extent */
-                       /* blocks available from iblock */
+                       /* blocks available from map->m_lblk */
                         return allocated;
  
                 } else if (err)
@@ -2783,11 +2800,13 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
                  * update the extent length after successful insert of the
                  * split extent
                  */
-               orig_ex.ee_len = cpu_to_le16(ee_len -
-                                               ext4_ext_get_actual_len(ex3));
+               ee_len -= ext4_ext_get_actual_len(ex3);
+               orig_ex.ee_len = cpu_to_le16(ee_len);
+               may_zeroout = ee_block + ee_len <= eof_block;
+
                 depth = newdepth;
                 ext4_ext_drop_refs(path);
-               path = ext4_ext_find_extent(inode, iblock, path);
+               path = ext4_ext_find_extent(inode, map->m_lblk, path);
                 if (IS_ERR(path)) {
                         err = PTR_ERR(path);
                         goto out;
@@ -2801,14 +2820,14 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
                 if (err)
                         goto out;
  
-               allocated = max_blocks;
+               allocated = map->m_len;
  
                 /* If extent has less than EXT4_EXT_ZERO_LEN and we are trying
                  * to insert a extent in the middle zerout directly
                  * otherwise give the extent a chance to merge to left
                  */
                 if (le16_to_cpu(orig_ex.ee_len) <= EXT4_EXT_ZERO_LEN &&
-                                                       iblock != ee_block) {
+                       map->m_lblk != ee_block && may_zeroout) {
                         err =  ext4_ext_zeroout(inode, &orig_ex);
                         if (err)
                                 goto fix_extent_len;
@@ -2818,7 +2837,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
                         ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
                         ext4_ext_dirty(handle, inode, path + depth);
                         /* zero out the first half */
-                       /* blocks available from iblock */
+                       /* blocks available from map->m_lblk */
                         return allocated;
                 }
         }
@@ -2829,12 +2848,12 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
          */
         if (ex1 && ex1 != ex) {
                 ex1 = ex;
-               ex1->ee_len = cpu_to_le16(iblock - ee_block);
+               ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block);
                 ext4_ext_mark_uninitialized(ex1);
                 ex2 = &newex;
         }
-       /* ex2: iblock to iblock + maxblocks-1 : initialised */
-       ex2->ee_block = cpu_to_le32(iblock);
+       /* ex2: map->m_lblk to map->m_lblk + maxblocks-1 : initialised */
+       ex2->ee_block = cpu_to_le32(map->m_lblk);
         ext4_ext_store_pblock(ex2, newblock);
         ex2->ee_len = cpu_to_le16(allocated);
         if (ex2 != ex)
@@ -2877,7 +2896,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
         goto out;
  insert:
         err = ext4_ext_insert_extent(handle, inode, path, &newex, 0);
-       if (err == -ENOSPC) {
+       if (err == -ENOSPC && may_zeroout) {
                 err =  ext4_ext_zeroout(inode, &orig_ex);
                 if (err)
                         goto fix_extent_len;
@@ -2904,7 +2923,7 @@ fix_extent_len:
  }
  
  /*
- * This function is called by ext4_ext_get_blocks() from
+ * This function is called by ext4_ext_map_blocks() from
   * ext4_get_blocks_dio_write() when DIO to write
   * to an uninitialized extent.
   *
@@ -2927,9 +2946,8 @@ fix_extent_len:
   */
  static int ext4_split_unwritten_extents(handle_t *handle,
                                         struct inode *inode,
+                                       struct ext4_map_blocks *map,
                                         struct ext4_ext_path *path,
-                                       ext4_lblk_t iblock,
-                                       unsigned int max_blocks,
                                         int flags)
  {
         struct ext4_extent *ex, newex, orig_ex;
@@ -2937,41 +2955,55 @@ static int ext4_split_unwritten_extents(handle_t *handle,
         struct ext4_extent *ex2 = NULL;
         struct ext4_extent *ex3 = NULL;
         struct ext4_extent_header *eh;
-       ext4_lblk_t ee_block;
+       ext4_lblk_t ee_block, eof_block;
         unsigned int allocated, ee_len, depth;
         ext4_fsblk_t newblock;
         int err = 0;
+       int may_zeroout;
+
+       ext_debug("ext4_split_unwritten_extents: inode %lu, logical"
+               "block %llu, max_blocks %u\n", inode->i_ino,
+               (unsigned long long)map->m_lblk, map->m_len);
+
+       eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
+               inode->i_sb->s_blocksize_bits;
+       if (eof_block < map->m_lblk + map->m_len)
+               eof_block = map->m_lblk + map->m_len;
  
-       ext_debug("ext4_split_unwritten_extents: inode %lu,"
-                 "iblock %llu, max_blocks %u\n", inode->i_ino,
-                 (unsigned long long)iblock, max_blocks);
         depth = ext_depth(inode);
         eh = path[depth].p_hdr;
         ex = path[depth].p_ext;
         ee_block = le32_to_cpu(ex->ee_block);
         ee_len = ext4_ext_get_actual_len(ex);
-       allocated = ee_len - (iblock - ee_block);
-       newblock = iblock - ee_block + ext_pblock(ex);
+       allocated = ee_len - (map->m_lblk - ee_block);
+       newblock = map->m_lblk - ee_block + ext_pblock(ex);
+
         ex2 = ex;
         orig_ex.ee_block = ex->ee_block;
         orig_ex.ee_len   = cpu_to_le16(ee_len);
         ext4_ext_store_pblock(&orig_ex, ext_pblock(ex));
  
+       /*
+        * It is safe to convert extent to initialized via explicit
+        * zeroout only if extent is fully insde i_size or new_size.
+        */
+       may_zeroout = ee_block + ee_len <= eof_block;
+
         /*
          * If the uninitialized extent begins at the same logical
          * block where the write begins, and the write completely
          * covers the extent, then we don't need to split it.
          */
-       if ((iblock == ee_block) && (allocated <= max_blocks))
+       if ((map->m_lblk == ee_block) && (allocated <= map->m_len))
                 return allocated;
  
         err = ext4_ext_get_access(handle, inode, path + depth);
         if (err)
                 goto out;
-       /* ex1: ee_block to iblock - 1 : uninitialized */
-       if (iblock > ee_block) {
+       /* ex1: ee_block to map->m_lblk - 1 : uninitialized */
+       if (map->m_lblk > ee_block) {
                 ex1 = ex;
-               ex1->ee_len = cpu_to_le16(iblock - ee_block);
+               ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block);
                 ext4_ext_mark_uninitialized(ex1);
                 ex2 = &newex;
         }
@@ -2980,18 +3012,18 @@ static int ext4_split_unwritten_extents(handle_t *handle,
          * we insert ex3, if ex1 is NULL. This is to avoid temporary
          * overlap of blocks.
          */
-       if (!ex1 && allocated > max_blocks)
-               ex2->ee_len = cpu_to_le16(max_blocks);
+       if (!ex1 && allocated > map->m_len)
+               ex2->ee_len = cpu_to_le16(map->m_len);
         /* ex3: to ee_block + ee_len : uninitialised */
-       if (allocated > max_blocks) {
+       if (allocated > map->m_len) {
                 unsigned int newdepth;
                 ex3 = &newex;
-               ex3->ee_block = cpu_to_le32(iblock + max_blocks);
-               ext4_ext_store_pblock(ex3, newblock + max_blocks);
-               ex3->ee_len = cpu_to_le16(allocated - max_blocks);
+               ex3->ee_block = cpu_to_le32(map->m_lblk + map->m_len);
+               ext4_ext_store_pblock(ex3, newblock + map->m_len);
+               ex3->ee_len = cpu_to_le16(allocated - map->m_len);
                 ext4_ext_mark_uninitialized(ex3);
                 err = ext4_ext_insert_extent(handle, inode, path, ex3, flags);
-               if (err == -ENOSPC) {
+               if (err == -ENOSPC && may_zeroout) {
                         err =  ext4_ext_zeroout(inode, &orig_ex);
                         if (err)
                                 goto fix_extent_len;
@@ -3001,7 +3033,7 @@ static int ext4_split_unwritten_extents(handle_t *handle,
                         ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
                         ext4_ext_dirty(handle, inode, path + depth);
                         /* zeroed the full extent */
-                       /* blocks available from iblock */
+                       /* blocks available from map->m_lblk */
                         return allocated;
  
                 } else if (err)
@@ -3015,11 +3047,13 @@ static int ext4_split_unwritten_extents(handle_t *handle,
                  * update the extent length after successful insert of the
                  * split extent
                  */
-               orig_ex.ee_len = cpu_to_le16(ee_len -
-                                               ext4_ext_get_actual_len(ex3));
+               ee_len -= ext4_ext_get_actual_len(ex3);
+               orig_ex.ee_len = cpu_to_le16(ee_len);
+               may_zeroout = ee_block + ee_len <= eof_block;
+
                 depth = newdepth;
                 ext4_ext_drop_refs(path);
-               path = ext4_ext_find_extent(inode, iblock, path);
+               path = ext4_ext_find_extent(inode, map->m_lblk, path);
                 if (IS_ERR(path)) {
                         err = PTR_ERR(path);
                         goto out;
@@ -3033,7 +3067,7 @@ static int ext4_split_unwritten_extents(handle_t *handle,
                 if (err)
                         goto out;
  
-               allocated = max_blocks;
+               allocated = map->m_len;
         }
         /*
          * If there was a change of depth as part of the
@@ -3042,15 +3076,15 @@ static int ext4_split_unwritten_extents(handle_t *handle,
          */
         if (ex1 && ex1 != ex) {
                 ex1 = ex;
-               ex1->ee_len = cpu_to_le16(iblock - ee_block);
+               ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block);
                 ext4_ext_mark_uninitialized(ex1);
                 ex2 = &newex;
         }
         /*
-        * ex2: iblock to iblock + maxblocks-1 : to be direct IO written,
-        * uninitialised still.
+        * ex2: map->m_lblk to map->m_lblk + map->m_len-1 : to be written
+        * using direct I/O, uninitialised still.
          */
-       ex2->ee_block = cpu_to_le32(iblock);
+       ex2->ee_block = cpu_to_le32(map->m_lblk);
         ext4_ext_store_pblock(ex2, newblock);
         ex2->ee_len = cpu_to_le16(allocated);
         ext4_ext_mark_uninitialized(ex2);
@@ -3062,7 +3096,7 @@ static int ext4_split_unwritten_extents(handle_t *handle,
         goto out;
  insert:
         err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
-       if (err == -ENOSPC) {
+       if (err == -ENOSPC && may_zeroout) {
                 err =  ext4_ext_zeroout(inode, &orig_ex);
                 if (err)
                         goto fix_extent_len;
@@ -3152,10 +3186,9 @@ static void unmap_underlying_metadata_blocks(struct block_device *bdev,
  
  static int
  ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
-                       ext4_lblk_t iblock, unsigned int max_blocks,
+                       struct ext4_map_blocks *map,
                         struct ext4_ext_path *path, int flags,
-                       unsigned int allocated, struct buffer_head *bh_result,
-                       ext4_fsblk_t newblock)
+                       unsigned int allocated, ext4_fsblk_t newblock)
  {
         int ret = 0;
         int err = 0;
@@ -3163,15 +3196,14 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
  
         ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical"
                   "block %llu, max_blocks %u, flags %d, allocated %u",
-                 inode->i_ino, (unsigned long long)iblock, max_blocks,
+                 inode->i_ino, (unsigned long long)map->m_lblk, map->m_len,
                   flags, allocated);
         ext4_ext_show_leaf(inode, path);
  
         /* get_block() before submit the IO, split the extent */
         if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
-               ret = ext4_split_unwritten_extents(handle,
-                                               inode, path, iblock,
-                                               max_blocks, flags);
+               ret = ext4_split_unwritten_extents(handle, inode, map,
+                                                  path, flags);
                 /*
                  * Flag the inode(non aio case) or end_io struct (aio case)
                  * that this IO needs to convertion to written when IO is
@@ -3182,7 +3214,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
                 else
                         ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
                 if (ext4_should_dioread_nolock(inode))
-                       set_buffer_uninit(bh_result);
+                       map->m_flags |= EXT4_MAP_UNINIT;
                 goto out;
         }
         /* IO end_io complete, convert the filled extent to written */
@@ -3210,14 +3242,12 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
                  * the buffer head will be unmapped so that
                  * a read from the block returns 0s.
                  */
-               set_buffer_unwritten(bh_result);
+               map->m_flags |= EXT4_MAP_UNWRITTEN;
                 goto out1;
         }
  
         /* buffered write, writepage time, convert*/
-       ret = ext4_ext_convert_to_initialized(handle, inode,
-                                               path, iblock,
-                                               max_blocks);
+       ret = ext4_ext_convert_to_initialized(handle, inode, map, path);
         if (ret >= 0)
                 ext4_update_inode_fsync_trans(handle, inode, 1);
  out:
@@ -3226,7 +3256,7 @@ out:
                 goto out2;
         } else
                 allocated = ret;
-       set_buffer_new(bh_result);
+       map->m_flags |= EXT4_MAP_NEW;
         /*
          * if we allocated more blocks than requested
          * we need to make sure we unmap the extra block
@@ -3234,11 +3264,11 @@ out:
          * unmapped later when we find the buffer_head marked
          * new.
          */
-       if (allocated > max_blocks) {
+       if (allocated > map->m_len) {
                 unmap_underlying_metadata_blocks(inode->i_sb->s_bdev,
-                                       newblock + max_blocks,
-                                       allocated - max_blocks);
-               allocated = max_blocks;
+                                       newblock + map->m_len,
+                                       allocated - map->m_len);
+               allocated = map->m_len;
         }
  
         /*
@@ -3252,13 +3282,13 @@ out:
                 ext4_da_update_reserve_space(inode, allocated, 0);
  
  map_out:
-       set_buffer_mapped(bh_result);
+       map->m_flags |= EXT4_MAP_MAPPED;
  out1:
-       if (allocated > max_blocks)
-               allocated = max_blocks;
+       if (allocated > map->m_len)
+               allocated = map->m_len;
         ext4_ext_show_leaf(inode, path);
-       bh_result->b_bdev = inode->i_sb->s_bdev;
-       bh_result->b_blocknr = newblock;
+       map->m_pblk = newblock;
+       map->m_len = allocated;
  out2:
         if (path) {
                 ext4_ext_drop_refs(path);
@@ -3284,26 +3314,23 @@ out2:
   *
   * return < 0, error case.
   */
-int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
-                       ext4_lblk_t iblock,
-                       unsigned int max_blocks, struct buffer_head *bh_result,
-                       int flags)
+int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
+                       struct ext4_map_blocks *map, int flags)
  {
         struct ext4_ext_path *path = NULL;
         struct ext4_extent_header *eh;
         struct ext4_extent newex, *ex, *last_ex;
         ext4_fsblk_t newblock;
-       int err = 0, depth, ret, cache_type;
+       int i, err = 0, depth, ret, cache_type;
         unsigned int allocated = 0;
         struct ext4_allocation_request ar;
         ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio;
  
-       __clear_bit(BH_New, &bh_result->b_state);
         ext_debug("blocks %u/%u requested for inode %lu\n",
-                       iblock, max_blocks, inode->i_ino);
+                 map->m_lblk, map->m_len, inode->i_ino);
  
         /* check in cache */
-       cache_type = ext4_ext_in_cache(inode, iblock, &newex);
+       cache_type = ext4_ext_in_cache(inode, map->m_lblk, &newex);
         if (cache_type) {
                 if (cache_type == EXT4_EXT_CACHE_GAP) {
                         if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
@@ -3316,12 +3343,12 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
                         /* we should allocate requested block */
                 } else if (cache_type == EXT4_EXT_CACHE_EXTENT) {
                         /* block is already allocated */
-                       newblock = iblock
+                       newblock = map->m_lblk
                                    - le32_to_cpu(newex.ee_block)
                                    + ext_pblock(&newex);
                         /* number of remaining blocks in the extent */
                         allocated = ext4_ext_get_actual_len(&newex) -
-                                       (iblock - le32_to_cpu(newex.ee_block));
+                               (map->m_lblk - le32_to_cpu(newex.ee_block));
                         goto out;
                 } else {
                         BUG();
@@ -3329,7 +3356,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
         }
  
         /* find extent for this block */
-       path = ext4_ext_find_extent(inode, iblock, NULL);
+       path = ext4_ext_find_extent(inode, map->m_lblk, NULL);
         if (IS_ERR(path)) {
                 err = PTR_ERR(path);
                 path = NULL;
@@ -3345,8 +3372,9 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
          */
         if (unlikely(path[depth].p_ext == NULL && depth != 0)) {
                 EXT4_ERROR_INODE(inode, "bad extent address "
-                                "iblock: %d, depth: %d pblock %lld",
-                                iblock, depth, path[depth].p_block);
+                                "lblock: %lu, depth: %d pblock %lld",
+                                (unsigned long) map->m_lblk, depth,
+                                path[depth].p_block);
                 err = -EIO;
                 goto out2;
         }
@@ -3364,12 +3392,12 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
                  */
                 ee_len = ext4_ext_get_actual_len(ex);
                 /* if found extent covers block, simply return it */
-               if (in_range(iblock, ee_block, ee_len)) {
-                       newblock = iblock - ee_block + ee_start;
+               if (in_range(map->m_lblk, ee_block, ee_len)) {
+                       newblock = map->m_lblk - ee_block + ee_start;
                         /* number of remaining blocks in the extent */
-                       allocated = ee_len - (iblock - ee_block);
-                       ext_debug("%u fit into %u:%d -> %llu\n", iblock,
-                                       ee_block, ee_len, newblock);
+                       allocated = ee_len - (map->m_lblk - ee_block);
+                       ext_debug("%u fit into %u:%d -> %llu\n", map->m_lblk,
+                                 ee_block, ee_len, newblock);
  
                         /* Do not put uninitialized extent in the cache */
                         if (!ext4_ext_is_uninitialized(ex)) {
@@ -3379,8 +3407,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
                                 goto out;
                         }
                         ret = ext4_ext_handle_uninitialized_extents(handle,
-                                       inode, iblock, max_blocks, path,
-                                       flags, allocated, bh_result, newblock);
+                                       inode, map, path, flags, allocated,
+                                       newblock);
                         return ret;
                 }
         }
@@ -3394,7 +3422,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
                  * put just found gap into cache to speed up
                  * subsequent requests
                  */
-               ext4_ext_put_gap_in_cache(inode, path, iblock);
+               ext4_ext_put_gap_in_cache(inode, path, map->m_lblk);
                 goto out2;
         }
         /*
@@ -3402,11 +3430,11 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
          */
  
         /* find neighbour allocated blocks */
-       ar.lleft = iblock;
+       ar.lleft = map->m_lblk;
         err = ext4_ext_search_left(inode, path, &ar.lleft, &ar.pleft);
         if (err)
                 goto out2;
-       ar.lright = iblock;
+       ar.lright = map->m_lblk;
         err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright);
         if (err)
                 goto out2;
@@ -3417,26 +3445,26 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
          * EXT_INIT_MAX_LEN and for an uninitialized extent this limit is
          * EXT_UNINIT_MAX_LEN.
          */
-       if (max_blocks > EXT_INIT_MAX_LEN &&
+       if (map->m_len > EXT_INIT_MAX_LEN &&
             !(flags & EXT4_GET_BLOCKS_UNINIT_EXT))
-               max_blocks = EXT_INIT_MAX_LEN;
-       else if (max_blocks > EXT_UNINIT_MAX_LEN &&
+               map->m_len = EXT_INIT_MAX_LEN;
+       else if (map->m_len > EXT_UNINIT_MAX_LEN &&
                  (flags & EXT4_GET_BLOCKS_UNINIT_EXT))
-               max_blocks = EXT_UNINIT_MAX_LEN;
+               map->m_len = EXT_UNINIT_MAX_LEN;
  
-       /* Check if we can really insert (iblock)::(iblock+max_blocks) extent */
-       newex.ee_block = cpu_to_le32(iblock);
-       newex.ee_len = cpu_to_le16(max_blocks);
+       /* Check if we can really insert (m_lblk)::(m_lblk + m_len) extent */
+       newex.ee_block = cpu_to_le32(map->m_lblk);
+       newex.ee_len = cpu_to_le16(map->m_len);
         err = ext4_ext_check_overlap(inode, &newex, path);
         if (err)
                 allocated = ext4_ext_get_actual_len(&newex);
         else
-               allocated = max_blocks;
+               allocated = map->m_len;
  
         /* allocate new block */
         ar.inode = inode;
-       ar.goal = ext4_ext_find_goal(inode, path, iblock);
-       ar.logical = iblock;
+       ar.goal = ext4_ext_find_goal(inode, path, map->m_lblk);
+       ar.logical = map->m_lblk;
         ar.len = allocated;
         if (S_ISREG(inode->i_mode))
                 ar.flags = EXT4_MB_HINT_DATA;
@@ -3470,21 +3498,33 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
                                                      EXT4_STATE_DIO_UNWRITTEN);
                 }
                 if (ext4_should_dioread_nolock(inode))
-                       set_buffer_uninit(bh_result);
+                       map->m_flags |= EXT4_MAP_UNINIT;
         }
  
-       if (unlikely(EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL)) {
+       if (unlikely(ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))) {
                 if (unlikely(!eh->eh_entries)) {
                         EXT4_ERROR_INODE(inode,
-                                        "eh->eh_entries == 0 ee_block %d",
-                                        ex->ee_block);
+                                        "eh->eh_entries == 0 and "
+                                        "EOFBLOCKS_FL set");
                         err = -EIO;
                         goto out2;
                 }
                 last_ex = EXT_LAST_EXTENT(eh);
-               if (iblock + ar.len > le32_to_cpu(last_ex->ee_block)
-                   + ext4_ext_get_actual_len(last_ex))
-                       EXT4_I(inode)->i_flags &= ~EXT4_EOFBLOCKS_FL;
+               /*
+                * If the current leaf block was reached by looking at
+                * the last index block all the way down the tree, and
+                * we are extending the inode beyond the last extent
+                * in the current leaf block, then clear the
+                * EOFBLOCKS_FL flag.
+                */
+               for (i = depth-1; i >= 0; i--) {
+                       if (path[i].p_idx != EXT_LAST_INDEX(path[i].p_hdr))
+                               break;
+               }
+               if ((i < 0) &&
+                   (map->m_lblk + ar.len > le32_to_cpu(last_ex->ee_block) +
+                    ext4_ext_get_actual_len(last_ex)))
+                       ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
         }
         err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
         if (err) {
@@ -3500,9 +3540,9 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
         /* previous routine could use block we allocated */
         newblock = ext_pblock(&newex);
         allocated = ext4_ext_get_actual_len(&newex);
-       if (allocated > max_blocks)
-               allocated = max_blocks;
-       set_buffer_new(bh_result);
+       if (allocated > map->m_len)
+               allocated = map->m_len;
+       map->m_flags |= EXT4_MAP_NEW;
  
         /*
          * Update reserved blocks/metadata blocks after successful
@@ -3516,18 +3556,18 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
          * when it is _not_ an uninitialized extent.
          */
         if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) {
-               ext4_ext_put_in_cache(inode, iblock, allocated, newblock,
+               ext4_ext_put_in_cache(inode, map->m_lblk, allocated, newblock,
                                                 EXT4_EXT_CACHE_EXTENT);
                 ext4_update_inode_fsync_trans(handle, inode, 1);
         } else
                 ext4_update_inode_fsync_trans(handle, inode, 0);
  out:
-       if (allocated > max_blocks)
-               allocated = max_blocks;
+       if (allocated > map->m_len)
+               allocated = map->m_len;
         ext4_ext_show_leaf(inode, path);
-       set_buffer_mapped(bh_result);
-       bh_result->b_bdev = inode->i_sb->s_bdev;
-       bh_result->b_blocknr = newblock;
+       map->m_flags |= EXT4_MAP_MAPPED;
+       map->m_pblk = newblock;
+       map->m_len = allocated;
  out2:
         if (path) {
                 ext4_ext_drop_refs(path);
@@ -3625,7 +3665,7 @@ static void ext4_falloc_update_inode(struct inode *inode,
                  * can proceed even if the new size is the same as i_size.
                  */
                 if (new_size > i_size_read(inode))
-                       EXT4_I(inode)->i_flags |= EXT4_EOFBLOCKS_FL;
+                       ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
         }
  
  }
@@ -3640,55 +3680,57 @@ static void ext4_falloc_update_inode(struct inode *inode,
  long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
  {
         handle_t *handle;
-       ext4_lblk_t block;
         loff_t new_size;
         unsigned int max_blocks;
         int ret = 0;
         int ret2 = 0;
         int retries = 0;
-       struct buffer_head map_bh;
+       struct ext4_map_blocks map;
         unsigned int credits, blkbits = inode->i_blkbits;
  
         /*
          * currently supporting (pre)allocate mode for extent-based
          * files _only_
          */
-       if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
+       if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
                 return -EOPNOTSUPP;
  
         /* preallocation to directories is currently not supported */
         if (S_ISDIR(inode->i_mode))
                 return -ENODEV;
  
-       block = offset >> blkbits;
+       map.m_lblk = offset >> blkbits;
         /*
          * We can't just convert len to max_blocks because
          * If blocksize = 4096 offset = 3072 and len = 2048
          */
         max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits)
-                                                       - block;
+               - map.m_lblk;
         /*
          * credits to insert 1 extent into extent tree
          */
         credits = ext4_chunk_trans_blocks(inode, max_blocks);
         mutex_lock(&inode->i_mutex);
+       ret = inode_newsize_ok(inode, (len + offset));
+       if (ret) {
+               mutex_unlock(&inode->i_mutex);
+               return ret;
+       }
  retry:
         while (ret >= 0 && ret < max_blocks) {
-               block = block + ret;
-               max_blocks = max_blocks - ret;
+               map.m_lblk = map.m_lblk + ret;
+               map.m_len = max_blocks = max_blocks - ret;
                 handle = ext4_journal_start(inode, credits);
                 if (IS_ERR(handle)) {
                         ret = PTR_ERR(handle);
                         break;
                 }
-               map_bh.b_state = 0;
-               ret = ext4_get_blocks(handle, inode, block,
-                                     max_blocks, &map_bh,
+               ret = ext4_map_blocks(handle, inode, &map,
                                       EXT4_GET_BLOCKS_CREATE_UNINIT_EXT);
                 if (ret <= 0) {
  #ifdef EXT4FS_DEBUG
                         WARN_ON(ret <= 0);
-                       printk(KERN_ERR "%s: ext4_ext_get_blocks "
+                       printk(KERN_ERR "%s: ext4_ext_map_blocks "
                                     "returned error inode#%lu, block=%u, "
                                     "max_blocks=%u", __func__,
                                     inode->i_ino, block, max_blocks);
@@ -3697,14 +3739,14 @@ retry:
                         ret2 = ext4_journal_stop(handle);
                         break;
                 }
-               if ((block + ret) >= (EXT4_BLOCK_ALIGN(offset + len,
+               if ((map.m_lblk + ret) >= (EXT4_BLOCK_ALIGN(offset + len,
                                                 blkbits) >> blkbits))
                         new_size = offset + len;
                 else
-                       new_size = (block + ret) << blkbits;
+                       new_size = (map.m_lblk + ret) << blkbits;
  
                 ext4_falloc_update_inode(inode, mode, new_size,
-                                               buffer_new(&map_bh));
+                                        (map.m_flags & EXT4_MAP_NEW));
                 ext4_mark_inode_dirty(handle, inode);
                 ret2 = ext4_journal_stop(handle);
                 if (ret2)
@@ -3733,42 +3775,39 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
                                     ssize_t len)
  {
         handle_t *handle;
-       ext4_lblk_t block;
         unsigned int max_blocks;
         int ret = 0;
         int ret2 = 0;
-       struct buffer_head map_bh;
+       struct ext4_map_blocks map;
         unsigned int credits, blkbits = inode->i_blkbits;
  
-       block = offset >> blkbits;
+       map.m_lblk = offset >> blkbits;
         /*
          * We can't just convert len to max_blocks because
          * If blocksize = 4096 offset = 3072 and len = 2048
          */
-       max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits)
-                                                       - block;
+       max_blocks = ((EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) -
+                     map.m_lblk);
         /*
          * credits to insert 1 extent into extent tree
          */
         credits = ext4_chunk_trans_blocks(inode, max_blocks);
         while (ret >= 0 && ret < max_blocks) {
-               block = block + ret;
-               max_blocks = max_blocks - ret;
+               map.m_lblk += ret;
+               map.m_len = (max_blocks -= ret);
                 handle = ext4_journal_start(inode, credits);
                 if (IS_ERR(handle)) {
                         ret = PTR_ERR(handle);
                         break;
                 }
-               map_bh.b_state = 0;
-               ret = ext4_get_blocks(handle, inode, block,
-                                     max_blocks, &map_bh,
+               ret = ext4_map_blocks(handle, inode, &map,
                                       EXT4_GET_BLOCKS_IO_CONVERT_EXT);
                 if (ret <= 0) {
                         WARN_ON(ret <= 0);
-                       printk(KERN_ERR "%s: ext4_ext_get_blocks "
+                       printk(KERN_ERR "%s: ext4_ext_map_blocks "
                                     "returned error inode#%lu, block=%u, "
                                     "max_blocks=%u", __func__,
-                                   inode->i_ino, block, max_blocks);
+                                   inode->i_ino, map.m_lblk, map.m_len);
                 }
                 ext4_mark_inode_dirty(handle, inode);
                 ret2 = ext4_journal_stop(handle);
@@ -3898,7 +3937,7 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
         int error = 0;
  
         /* fallback to generic here if not in extents fmt */
-       if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
+       if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
                 return generic_block_fiemap(inode, fieinfo, start, len,
                         ext4_get_block);
  
diff --git a/fs/ext4/file.c b/fs/ext4/file.c

index d0776e4..5313ae4 100644 (file)
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -66,7 +66,7 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
          * is smaller than s_maxbytes, which is for extent-mapped files.
          */
  
-       if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) {
+       if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
                 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
                 size_t length = iov_length(iov, nr_segs);
  
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c

index ef3d980..b6a74f9 100644 (file)
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -34,6 +34,29 @@
  
  #include <trace/events/ext4.h>
  
+/*
+ * If we're not journaling and this is a just-created file, we have to
+ * sync our parent directory (if it was freshly created) since
+ * otherwise it will only be written by writeback, leaving a huge
+ * window during which a crash may lose the file.  This may apply for
+ * the parent directory's parent as well, and so on recursively, if
+ * they are also freshly created.
+ */
+static void ext4_sync_parent(struct inode *inode)
+{
+       struct dentry *dentry = NULL;
+
+       while (inode && ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) {
+               ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY);
+               dentry = list_entry(inode->i_dentry.next,
+                                   struct dentry, d_alias);
+               if (!dentry || !dentry->d_parent || !dentry->d_parent->d_inode)
+                       break;
+               inode = dentry->d_parent->d_inode;
+               sync_mapping_buffers(inode->i_mapping);
+       }
+}
+
  /*
   * akpm: A new design for ext4_sync_file().
   *
@@ -66,9 +89,13 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
         ret = flush_completed_IO(inode);
         if (ret < 0)
                 return ret;
-       
-       if (!journal)
-               return simple_fsync(file, dentry, datasync);
+
+       if (!journal) {
+               ret = simple_fsync(file, dentry, datasync);
+               if (!ret && !list_empty(&inode->i_dentry))
+                       ext4_sync_parent(inode);
+               return ret;
+       }
  
         /*
          * data=writeback,ordered:
@@ -102,7 +129,7 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
                     (journal->j_flags & JBD2_BARRIER))
                         blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL,
                                         NULL, BLKDEV_IFL_WAIT);
-               jbd2_log_wait_commit(journal, commit_tid);
+               ret = jbd2_log_wait_commit(journal, commit_tid);
         } else if (journal->j_flags & JBD2_BARRIER)
                 blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL,
                         BLKDEV_IFL_WAIT);
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c

index 1a0e183..25c4b31 100644 (file)
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -240,56 +240,49 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
         if (fatal)
                 goto error_return;
  
-       /* Ok, now we can actually update the inode bitmaps.. */
-       cleared = ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group),
-                                       bit, bitmap_bh->b_data);
-       if (!cleared)
-               ext4_error(sb, "bit already cleared for inode %lu", ino);
-       else {
-               gdp = ext4_get_group_desc(sb, block_group, &bh2);
-
+       fatal = -ESRCH;
+       gdp = ext4_get_group_desc(sb, block_group, &bh2);
+       if (gdp) {
                 BUFFER_TRACE(bh2, "get_write_access");
                 fatal = ext4_journal_get_write_access(handle, bh2);
-               if (fatal) goto error_return;
-
-               if (gdp) {
-                       ext4_lock_group(sb, block_group);
-                       count = ext4_free_inodes_count(sb, gdp) + 1;
-                       ext4_free_inodes_set(sb, gdp, count);
-                       if (is_directory) {
-                               count = ext4_used_dirs_count(sb, gdp) - 1;
-                               ext4_used_dirs_set(sb, gdp, count);
-                               if (sbi->s_log_groups_per_flex) {
-                                       ext4_group_t f;
-
-                                       f = ext4_flex_group(sbi, block_group);
-                                       atomic_dec(&sbi->s_flex_groups[f].used_dirs);
-                               }
+       }
+       ext4_lock_group(sb, block_group);
+       cleared = ext4_clear_bit(bit, bitmap_bh->b_data);
+       if (fatal || !cleared) {
+               ext4_unlock_group(sb, block_group);
+               goto out;
+       }
  
-                       }
-                       gdp->bg_checksum = ext4_group_desc_csum(sbi,
-                                                       block_group, gdp);
-                       ext4_unlock_group(sb, block_group);
-                       percpu_counter_inc(&sbi->s_freeinodes_counter);
-                       if (is_directory)
-                               percpu_counter_dec(&sbi->s_dirs_counter);
-
-                       if (sbi->s_log_groups_per_flex) {
-                               ext4_group_t f;
-
-                               f = ext4_flex_group(sbi, block_group);
-                               atomic_inc(&sbi->s_flex_groups[f].free_inodes);
-                       }
-               }
-               BUFFER_TRACE(bh2, "call ext4_handle_dirty_metadata");
-               err = ext4_handle_dirty_metadata(handle, NULL, bh2);
-               if (!fatal) fatal = err;
+       count = ext4_free_inodes_count(sb, gdp) + 1;
+       ext4_free_inodes_set(sb, gdp, count);
+       if (is_directory) {
+               count = ext4_used_dirs_count(sb, gdp) - 1;
+               ext4_used_dirs_set(sb, gdp, count);
+               percpu_counter_dec(&sbi->s_dirs_counter);
         }
-       BUFFER_TRACE(bitmap_bh, "call ext4_handle_dirty_metadata");
-       err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
-       if (!fatal)
-               fatal = err;
-       sb->s_dirt = 1;
+       gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp);
+       ext4_unlock_group(sb, block_group);
+
+       percpu_counter_inc(&sbi->s_freeinodes_counter);
+       if (sbi->s_log_groups_per_flex) {
+               ext4_group_t f = ext4_flex_group(sbi, block_group);
+
+               atomic_inc(&sbi->s_flex_groups[f].free_inodes);
+               if (is_directory)
+                       atomic_dec(&sbi->s_flex_groups[f].used_dirs);
+       }
+       BUFFER_TRACE(bh2, "call ext4_handle_dirty_metadata");
+       fatal = ext4_handle_dirty_metadata(handle, NULL, bh2);
+out:
+       if (cleared) {
+               BUFFER_TRACE(bitmap_bh, "call ext4_handle_dirty_metadata");
+               err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
+               if (!fatal)
+                       fatal = err;
+               sb->s_dirt = 1;
+       } else
+               ext4_error(sb, "bit already cleared for inode %lu", ino);
+
  error_return:
         brelse(bitmap_bh);
         ext4_std_error(sb, fatal);
@@ -499,7 +492,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
  
         if (S_ISDIR(mode) &&
             ((parent == sb->s_root->d_inode) ||
-            (EXT4_I(parent)->i_flags & EXT4_TOPDIR_FL))) {
+            (ext4_test_inode_flag(parent, EXT4_INODE_TOPDIR)))) {
                 int best_ndir = inodes_per_group;
                 int ret = -1;
  
@@ -1041,7 +1034,7 @@ got:
         if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) {
                 /* set extent flag only for directory, file and normal symlink*/
                 if (S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) {
-                       EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL;
+                       ext4_set_inode_flag(inode, EXT4_INODE_EXTENTS);
                         ext4_ext_tree_init(handle, inode);
                 }
         }
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c

index 3e0f6af..19df61c 100644 (file)
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -149,7 +149,7 @@ int ext4_truncate_restart_trans(handle_t *handle, struct inode *inode,
         int ret;
  
         /*
-        * Drop i_data_sem to avoid deadlock with ext4_get_blocks At this
+        * Drop i_data_sem to avoid deadlock with ext4_map_blocks.  At this
          * moment, get_block can be called only for blocks inside i_size since
          * page cache has been already dropped and writes are blocked by
          * i_mutex. So we can safely drop the i_data_sem here.
@@ -348,9 +348,8 @@ static int __ext4_check_blockref(const char *function, struct inode *inode,
                 if (blk &&
                     unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb),
                                                     blk, 1))) {
-                       __ext4_error(inode->i_sb, function,
-                                  "invalid block reference %u "
-                                  "in inode #%lu", blk, inode->i_ino);
+                       ext4_error_inode(function, inode,
+                                        "invalid block reference %u", blk);
                         return -EIO;
                 }
         }
@@ -785,7 +784,7 @@ failed:
         /* Allocation failed, free what we already allocated */
         ext4_free_blocks(handle, inode, 0, new_blocks[0], 1, 0);
         for (i = 1; i <= n ; i++) {
-               /* 
+               /*
                  * branch[i].bh is newly allocated, so there is no
                  * need to revoke the block, which is why we don't
                  * need to set EXT4_FREE_BLOCKS_METADATA.
@@ -875,7 +874,7 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
  
  err_out:
         for (i = 1; i <= num; i++) {
-               /* 
+               /*
                  * branch[i].bh is newly allocated, so there is no
                  * need to revoke the block, which is why we don't
                  * need to set EXT4_FREE_BLOCKS_METADATA.
@@ -890,9 +889,9 @@ err_out:
  }
  
  /*
- * The ext4_ind_get_blocks() function handles non-extents inodes
+ * The ext4_ind_map_blocks() function handles non-extents inodes
   * (i.e., using the traditional indirect/double-indirect i_blocks
- * scheme) for ext4_get_blocks().
+ * scheme) for ext4_map_blocks().
   *
   * Allocation strategy is simple: if we have to allocate something, we will
   * have to go the whole way to leaf. So let's do it before attaching anything
@@ -917,9 +916,8 @@ err_out:
   * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system
   * blocks.
   */
-static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
-                              ext4_lblk_t iblock, unsigned int maxblocks,
-                              struct buffer_head *bh_result,
+static int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
+                              struct ext4_map_blocks *map,
                                int flags)
  {
         int err = -EIO;
@@ -933,9 +931,9 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
         int count = 0;
         ext4_fsblk_t first_block = 0;
  
-       J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL));
+       J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)));
         J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0);
-       depth = ext4_block_to_path(inode, iblock, offsets,
+       depth = ext4_block_to_path(inode, map->m_lblk, offsets,
                                    &blocks_to_boundary);
  
         if (depth == 0)
@@ -946,10 +944,9 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
         /* Simplest case - block found, no allocation needed */
         if (!partial) {
                 first_block = le32_to_cpu(chain[depth - 1].key);
-               clear_buffer_new(bh_result);
                 count++;
                 /*map more blocks*/
-               while (count < maxblocks && count <= blocks_to_boundary) {
+               while (count < map->m_len && count <= blocks_to_boundary) {
                         ext4_fsblk_t blk;
  
                         blk = le32_to_cpu(*(chain[depth-1].p + count));
@@ -969,7 +966,7 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
         /*
          * Okay, we need to do block allocation.
         */
-       goal = ext4_find_goal(inode, iblock, partial);
+       goal = ext4_find_goal(inode, map->m_lblk, partial);
  
         /* the number of blocks need to allocate for [d,t]indirect blocks */
         indirect_blks = (chain + depth) - partial - 1;
@@ -979,11 +976,11 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
          * direct blocks to allocate for this branch.
          */
         count = ext4_blks_to_allocate(partial, indirect_blks,
-                                       maxblocks, blocks_to_boundary);
+                                     map->m_len, blocks_to_boundary);
         /*
          * Block out ext4_truncate while we alter the tree
          */
-       err = ext4_alloc_branch(handle, inode, iblock, indirect_blks,
+       err = ext4_alloc_branch(handle, inode, map->m_lblk, indirect_blks,
                                 &count, goal,
                                 offsets + (partial - chain), partial);
  
@@ -995,18 +992,20 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
          * may need to return -EAGAIN upwards in the worst case.  --sct
          */
         if (!err)
-               err = ext4_splice_branch(handle, inode, iblock,
+               err = ext4_splice_branch(handle, inode, map->m_lblk,
                                          partial, indirect_blks, count);
         if (err)
                 goto cleanup;
  
-       set_buffer_new(bh_result);
+       map->m_flags |= EXT4_MAP_NEW;
  
         ext4_update_inode_fsync_trans(handle, inode, 1);
  got_it:
-       map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key));
+       map->m_flags |= EXT4_MAP_MAPPED;
+       map->m_pblk = le32_to_cpu(chain[depth-1].key);
+       map->m_len = count;
         if (count > blocks_to_boundary)
-               set_buffer_boundary(bh_result);
+               map->m_flags |= EXT4_MAP_BOUNDARY;
         err = count;
         /* Clean up and exit */
         partial = chain + depth - 1;    /* the whole chain */
@@ -1016,7 +1015,6 @@ cleanup:
                 brelse(partial->bh);
                 partial--;
         }
-       BUFFER_TRACE(bh_result, "returned");
  out:
         return err;
  }
@@ -1061,7 +1059,7 @@ static int ext4_indirect_calc_metadata_amount(struct inode *inode,
   */
  static int ext4_calc_metadata_amount(struct inode *inode, sector_t lblock)
  {
-       if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
+       if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
                 return ext4_ext_calc_metadata_amount(inode, lblock);
  
         return ext4_indirect_calc_metadata_amount(inode, lblock);
@@ -1076,7 +1074,6 @@ void ext4_da_update_reserve_space(struct inode *inode,
  {
         struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
         struct ext4_inode_info *ei = EXT4_I(inode);
-       int mdb_free = 0, allocated_meta_blocks = 0;
  
         spin_lock(&ei->i_block_reservation_lock);
         trace_ext4_da_update_reserve_space(inode, used);
@@ -1091,11 +1088,10 @@ void ext4_da_update_reserve_space(struct inode *inode,
  
         /* Update per-inode reservations */
         ei->i_reserved_data_blocks -= used;
-       used += ei->i_allocated_meta_blocks;
         ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks;
-       allocated_meta_blocks = ei->i_allocated_meta_blocks;
+       percpu_counter_sub(&sbi->s_dirtyblocks_counter,
+                          used + ei->i_allocated_meta_blocks);
         ei->i_allocated_meta_blocks = 0;
-       percpu_counter_sub(&sbi->s_dirtyblocks_counter, used);
  
         if (ei->i_reserved_data_blocks == 0) {
                 /*
@@ -1103,30 +1099,23 @@ void ext4_da_update_reserve_space(struct inode *inode,
                  * only when we have written all of the delayed
                  * allocation blocks.
                  */
-               mdb_free = ei->i_reserved_meta_blocks;
+               percpu_counter_sub(&sbi->s_dirtyblocks_counter,
+                                  ei->i_reserved_meta_blocks);
                 ei->i_reserved_meta_blocks = 0;
                 ei->i_da_metadata_calc_len = 0;
-               percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free);
         }
         spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
  
-       /* Update quota subsystem */
-       if (quota_claim) {
+       /* Update quota subsystem for data blocks */
+       if (quota_claim)
                 dquot_claim_block(inode, used);
-               if (mdb_free)
-                       dquot_release_reservation_block(inode, mdb_free);
-       } else {
+       else {
                 /*
                  * We did fallocate with an offset that is already delayed
                  * allocated. So on delayed allocated writeback we should
-                * not update the quota for allocated blocks. But then
-                * converting an fallocate region to initialized region would
-                * have caused a metadata allocation. So claim quota for
-                * that
+                * not re-claim the quota for fallocated blocks.
                  */
-               if (allocated_meta_blocks)
-                       dquot_claim_block(inode, allocated_meta_blocks);
-               dquot_release_reservation_block(inode, mdb_free + used);
+               dquot_release_reservation_block(inode, used);
         }
  
         /*
@@ -1139,15 +1128,15 @@ void ext4_da_update_reserve_space(struct inode *inode,
                 ext4_discard_preallocations(inode);
  }
  
-static int check_block_validity(struct inode *inode, const char *msg,
-                               sector_t logical, sector_t phys, int len)
+static int check_block_validity(struct inode *inode, const char *func,
+                               struct ext4_map_blocks *map)
  {
-       if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) {
-               __ext4_error(inode->i_sb, msg,
-                          "inode #%lu logical block %llu mapped to %llu "
-                          "(size %d)", inode->i_ino,
-                          (unsigned long long) logical,
-                          (unsigned long long) phys, len);
+       if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk,
+                                  map->m_len)) {
+               ext4_error_inode(func, inode,
+                          "lblock %lu mapped to illegal pblock %llu "
+                          "(length %d)", (unsigned long) map->m_lblk,
+                                map->m_pblk, map->m_len);
                 return -EIO;
         }
         return 0;
@@ -1212,15 +1201,15 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx,
  }
  
  /*
- * The ext4_get_blocks() function tries to look up the requested blocks,
+ * The ext4_map_blocks() function tries to look up the requested blocks,
   * and returns if the blocks are already mapped.
   *
   * Otherwise it takes the write lock of the i_data_sem and allocate blocks
   * and store the allocated blocks in the result buffer head and mark it
   * mapped.
   *
- * If file type is extents based, it will call ext4_ext_get_blocks(),
- * Otherwise, call with ext4_ind_get_blocks() to handle indirect mapping
+ * If file type is extents based, it will call ext4_ext_map_blocks(),
+ * Otherwise, call with ext4_ind_map_blocks() to handle indirect mapping
   * based files
   *
   * On success, it returns the number of blocks being mapped or allocate.
@@ -1233,35 +1222,29 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx,
   *
   * It returns the error in case of allocation failure.
   */
-int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
-                   unsigned int max_blocks, struct buffer_head *bh,
-                   int flags)
+int ext4_map_blocks(handle_t *handle, struct inode *inode,
+                   struct ext4_map_blocks *map, int flags)
  {
         int retval;
  
-       clear_buffer_mapped(bh);
-       clear_buffer_unwritten(bh);
-
-       ext_debug("ext4_get_blocks(): inode %lu, flag %d, max_blocks %u,"
-                 "logical block %lu\n", inode->i_ino, flags, max_blocks,
-                 (unsigned long)block);
+       map->m_flags = 0;
+       ext_debug("ext4_map_blocks(): inode %lu, flag %d, max_blocks %u,"
+                 "logical block %lu\n", inode->i_ino, flags, map->m_len,
+                 (unsigned long) map->m_lblk);
         /*
          * Try to see if we can get the block without requesting a new
          * file system block.
          */
         down_read((&EXT4_I(inode)->i_data_sem));
-       if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
-               retval =  ext4_ext_get_blocks(handle, inode, block, max_blocks,
-                               bh, 0);
+       if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
+               retval = ext4_ext_map_blocks(handle, inode, map, 0);
         } else {
-               retval = ext4_ind_get_blocks(handle, inode, block, max_blocks,
-                                            bh, 0);
+               retval = ext4_ind_map_blocks(handle, inode, map, 0);
         }
         up_read((&EXT4_I(inode)->i_data_sem));
  
-       if (retval > 0 && buffer_mapped(bh)) {
-               int ret = check_block_validity(inode, "file system corruption",
-                                              block, bh->b_blocknr, retval);
+       if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
+               int ret = check_block_validity(inode, __func__, map);
                 if (ret != 0)
                         return ret;
         }
@@ -1277,7 +1260,7 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
          * ext4_ext_get_block() returns th create = 0
          * with buffer head unmapped.
          */
-       if (retval > 0 && buffer_mapped(bh))
+       if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED)
                 return retval;
  
         /*
@@ -1290,7 +1273,7 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
          * of BH_Unwritten and BH_Mapped flags being simultaneously
          * set on the buffer_head.
          */
-       clear_buffer_unwritten(bh);
+       map->m_flags &= ~EXT4_MAP_UNWRITTEN;
  
         /*
          * New blocks allocate and/or writing to uninitialized extent
@@ -1312,14 +1295,12 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
          * We need to check for EXT4 here because migrate
          * could have changed the inode type in between
          */
-       if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
-               retval =  ext4_ext_get_blocks(handle, inode, block, max_blocks,
-                                             bh, flags);
+       if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
+               retval = ext4_ext_map_blocks(handle, inode, map, flags);
         } else {
-               retval = ext4_ind_get_blocks(handle, inode, block,
-                                            max_blocks, bh, flags);
+               retval = ext4_ind_map_blocks(handle, inode, map, flags);
  
-               if (retval > 0 && buffer_new(bh)) {
+               if (retval > 0 && map->m_flags & EXT4_MAP_NEW) {
                         /*
                          * We allocated new blocks which will result in
                          * i_data's format changing.  Force the migrate
@@ -1342,10 +1323,10 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
                 EXT4_I(inode)->i_delalloc_reserved_flag = 0;
  
         up_write((&EXT4_I(inode)->i_data_sem));
-       if (retval > 0 && buffer_mapped(bh)) {
-               int ret = check_block_validity(inode, "file system "
-                                              "corruption after allocation",
-                                              block, bh->b_blocknr, retval);
+       if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
+               int ret = check_block_validity(inode,
+                                              "ext4_map_blocks_after_alloc",
+                                              map);
                 if (ret != 0)
                         return ret;
         }
@@ -1355,109 +1336,109 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
  /* Maximum number of blocks we map for direct IO at once. */
  #define DIO_MAX_BLOCKS 4096
  
-int ext4_get_block(struct inode *inode, sector_t iblock,
-                  struct buffer_head *bh_result, int create)
+static int _ext4_get_block(struct inode *inode, sector_t iblock,
+                          struct buffer_head *bh, int flags)
  {
         handle_t *handle = ext4_journal_current_handle();
+       struct ext4_map_blocks map;
         int ret = 0, started = 0;
-       unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
         int dio_credits;
  
-       if (create && !handle) {
+       map.m_lblk = iblock;
+       map.m_len = bh->b_size >> inode->i_blkbits;
+
+       if (flags && !handle) {
                 /* Direct IO write... */
-               if (max_blocks > DIO_MAX_BLOCKS)
-                       max_blocks = DIO_MAX_BLOCKS;
-               dio_credits = ext4_chunk_trans_blocks(inode, max_blocks);
+               if (map.m_len > DIO_MAX_BLOCKS)
+                       map.m_len = DIO_MAX_BLOCKS;
+               dio_credits = ext4_chunk_trans_blocks(inode, map.m_len);
                 handle = ext4_journal_start(inode, dio_credits);
                 if (IS_ERR(handle)) {
                         ret = PTR_ERR(handle);
-                       goto out;
+                       return ret;
                 }
                 started = 1;
         }
  
-       ret = ext4_get_blocks(handle, inode, iblock, max_blocks, bh_result,
-                             create ? EXT4_GET_BLOCKS_CREATE : 0);
+       ret = ext4_map_blocks(handle, inode, &map, flags);
         if (ret > 0) {
-               bh_result->b_size = (ret << inode->i_blkbits);
+               map_bh(bh, inode->i_sb, map.m_pblk);
+               bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags;
+               bh->b_size = inode->i_sb->s_blocksize * map.m_len;
                 ret = 0;
         }
         if (started)
                 ext4_journal_stop(handle);
-out:
         return ret;
  }
  
+int ext4_get_block(struct inode *inode, sector_t iblock,
+                  struct buffer_head *bh, int create)
+{
+       return _ext4_get_block(inode, iblock, bh,
+                              create ? EXT4_GET_BLOCKS_CREATE : 0);
+}
+
  /*
   * `handle' can be NULL if create is zero
   */
  struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
                                 ext4_lblk_t block, int create, int *errp)
  {
-       struct buffer_head dummy;
+       struct ext4_map_blocks map;
+       struct buffer_head *bh;
         int fatal = 0, err;
-       int flags = 0;
  
         J_ASSERT(handle != NULL || create == 0);
  
-       dummy.b_state = 0;
-       dummy.b_blocknr = -1000;
-       buffer_trace_init(&dummy.b_history);
-       if (create)
-               flags |= EXT4_GET_BLOCKS_CREATE;
-       err = ext4_get_blocks(handle, inode, block, 1, &dummy, flags);
-       /*
-        * ext4_get_blocks() returns number of blocks mapped. 0 in
-        * case of a HOLE.
-        */
-       if (err > 0) {
-               if (err > 1)
-                       WARN_ON(1);
-               err = 0;
+       map.m_lblk = block;
+       map.m_len = 1;
+       err = ext4_map_blocks(handle, inode, &map,
+                             create ? EXT4_GET_BLOCKS_CREATE : 0);
+
+       if (err < 0)
+               *errp = err;
+       if (err <= 0)
+               return NULL;
+       *errp = 0;
+
+       bh = sb_getblk(inode->i_sb, map.m_pblk);
+       if (!bh) {
+               *errp = -EIO;
+               return NULL;
         }
-       *errp = err;
-       if (!err && buffer_mapped(&dummy)) {
-               struct buffer_head *bh;
-               bh = sb_getblk(inode->i_sb, dummy.b_blocknr);
-               if (!bh) {
-                       *errp = -EIO;
-                       goto err;
-               }
-               if (buffer_new(&dummy)) {
-                       J_ASSERT(create != 0);
-                       J_ASSERT(handle != NULL);
+       if (map.m_flags & EXT4_MAP_NEW) {
+               J_ASSERT(create != 0);
+               J_ASSERT(handle != NULL);
  
-                       /*
-                        * Now that we do not always journal data, we should
-                        * keep in mind whether this should always journal the
-                        * new buffer as metadata.  For now, regular file
-                        * writes use ext4_get_block instead, so it's not a
-                        * problem.
-                        */
-                       lock_buffer(bh);
-                       BUFFER_TRACE(bh, "call get_create_access");
-                       fatal = ext4_journal_get_create_access(handle, bh);
-                       if (!fatal && !buffer_uptodate(bh)) {
-                               memset(bh->b_data, 0, inode->i_sb->s_blocksize);
-                               set_buffer_uptodate(bh);
-                       }
-                       unlock_buffer(bh);
-                       BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
-                       err = ext4_handle_dirty_metadata(handle, inode, bh);
-                       if (!fatal)
-                               fatal = err;
-               } else {
-                       BUFFER_TRACE(bh, "not a new buffer");
-               }
-               if (fatal) {
-                       *errp = fatal;
-                       brelse(bh);
-                       bh = NULL;
+               /*
+                * Now that we do not always journal data, we should
+                * keep in mind whether this should always journal the
+                * new buffer as metadata.  For now, regular file
+                * writes use ext4_get_block instead, so it's not a
+                * problem.
+                */
+               lock_buffer(bh);
+               BUFFER_TRACE(bh, "call get_create_access");
+               fatal = ext4_journal_get_create_access(handle, bh);
+               if (!fatal && !buffer_uptodate(bh)) {
+                       memset(bh->b_data, 0, inode->i_sb->s_blocksize);
+                       set_buffer_uptodate(bh);
                 }
-               return bh;
+               unlock_buffer(bh);
+               BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
+               err = ext4_handle_dirty_metadata(handle, inode, bh);
+               if (!fatal)
+                       fatal = err;
+       } else {
+               BUFFER_TRACE(bh, "not a new buffer");
         }
-err:
-       return NULL;
+       if (fatal) {
+               *errp = fatal;
+               brelse(bh);
+               bh = NULL;
+       }
+       return bh;
  }
  
  struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
@@ -1860,7 +1841,7 @@ static int ext4_da_reserve_space(struct inode *inode, sector_t lblock)
         int retries = 0;
         struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
         struct ext4_inode_info *ei = EXT4_I(inode);
-       unsigned long md_needed, md_reserved;
+       unsigned long md_needed;
         int ret;
  
         /*
@@ -1870,22 +1851,24 @@ static int ext4_da_reserve_space(struct inode *inode, sector_t lblock)
          */
  repeat:
         spin_lock(&ei->i_block_reservation_lock);
-       md_reserved = ei->i_reserved_meta_blocks;
         md_needed = ext4_calc_metadata_amount(inode, lblock);
         trace_ext4_da_reserve_space(inode, md_needed);
         spin_unlock(&ei->i_block_reservation_lock);
  
         /*
-        * Make quota reservation here to prevent quota overflow
-        * later. Real quota accounting is done at pages writeout
-        * time.
+        * We will charge metadata quota at writeout time; this saves
+        * us from metadata over-estimation, though we may go over by
+        * a small amount in the end.  Here we just reserve for data.
          */
-       ret = dquot_reserve_block(inode, md_needed + 1);
+       ret = dquot_reserve_block(inode, 1);
         if (ret)
                 return ret;
-
+       /*
+        * We do still charge estimated metadata to the sb though;
+        * we cannot afford to run out of free blocks.
+        */
         if (ext4_claim_free_blocks(sbi, md_needed + 1)) {
-               dquot_release_reservation_block(inode, md_needed + 1);
+               dquot_release_reservation_block(inode, 1);
                 if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
                         yield();
                         goto repeat;
@@ -1910,6 +1893,7 @@ static void ext4_da_release_space(struct inode *inode, int to_free)
  
         spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
  
+       trace_ext4_da_release_space(inode, to_free);
         if (unlikely(to_free > ei->i_reserved_data_blocks)) {
                 /*
                  * if there aren't enough reserved blocks, then the
@@ -1932,12 +1916,13 @@ static void ext4_da_release_space(struct inode *inode, int to_free)
                  * only when we have written all of the delayed
                  * allocation blocks.
                  */
-               to_free += ei->i_reserved_meta_blocks;
+               percpu_counter_sub(&sbi->s_dirtyblocks_counter,
+                                  ei->i_reserved_meta_blocks);
                 ei->i_reserved_meta_blocks = 0;
                 ei->i_da_metadata_calc_len = 0;
         }
  
-       /* update fs dirty blocks counter */
+       /* update fs dirty data blocks counter */
         percpu_counter_sub(&sbi->s_dirtyblocks_counter, to_free);
  
         spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
@@ -2042,28 +2027,23 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
  /*
   * mpage_put_bnr_to_bhs - walk blocks and assign them actual numbers
   *
- * @mpd->inode - inode to walk through
- * @exbh->b_blocknr - first block on a disk
- * @exbh->b_size - amount of space in bytes
- * @logical - first logical block to start assignment with
- *
   * the function goes through all passed space and put actual disk
   * block numbers into buffer heads, dropping BH_Delay and BH_Unwritten
   */
-static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical,
-                                struct buffer_head *exbh)
+static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd,
+                                struct ext4_map_blocks *map)
  {
         struct inode *inode = mpd->inode;
         struct address_space *mapping = inode->i_mapping;
-       int blocks = exbh->b_size >> inode->i_blkbits;
-       sector_t pblock = exbh->b_blocknr, cur_logical;
+       int blocks = map->m_len;
+       sector_t pblock = map->m_pblk, cur_logical;
         struct buffer_head *head, *bh;
         pgoff_t index, end;
         struct pagevec pvec;
         int nr_pages, i;
  
-       index = logical >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
-       end = (logical + blocks - 1) >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
+       index = map->m_lblk >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
+       end = (map->m_lblk + blocks - 1) >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
         cur_logical = index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
  
         pagevec_init(&pvec, 0);
@@ -2090,17 +2070,16 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical,
  
                         /* skip blocks out of the range */
                         do {
-                               if (cur_logical >= logical)
+                               if (cur_logical >= map->m_lblk)
                                         break;
                                 cur_logical++;
                         } while ((bh = bh->b_this_page) != head);
  
                         do {
-                               if (cur_logical >= logical + blocks)
+                               if (cur_logical >= map->m_lblk + blocks)
                                         break;
  
-                               if (buffer_delay(bh) ||
-                                               buffer_unwritten(bh)) {
+                               if (buffer_delay(bh) || buffer_unwritten(bh)) {
  
                                         BUG_ON(bh->b_bdev != inode->i_sb->s_bdev);
  
@@ -2119,7 +2098,7 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical,
                                 } else if (buffer_mapped(bh))
                                         BUG_ON(bh->b_blocknr != pblock);
  
-                               if (buffer_uninit(exbh))
+                               if (map->m_flags & EXT4_MAP_UNINIT)
                                         set_buffer_uninit(bh);
                                 cur_logical++;
                                 pblock++;
@@ -2130,21 +2109,6 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical,
  }
  
  
-/*
- * __unmap_underlying_blocks - just a helper function to unmap
- * set of blocks described by @bh
- */
-static inline void __unmap_underlying_blocks(struct inode *inode,
-                                            struct buffer_head *bh)
-{
-       struct block_device *bdev = inode->i_sb->s_bdev;
-       int blocks, i;
-
-       blocks = bh->b_size >> inode->i_blkbits;
-       for (i = 0; i < blocks; i++)
-               unmap_underlying_metadata(bdev, bh->b_blocknr + i);
-}
-
  static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd,
                                         sector_t logical, long blk_cnt)
  {
@@ -2206,7 +2170,7 @@ static void ext4_print_free_blocks(struct inode *inode)
  static int mpage_da_map_blocks(struct mpage_da_data *mpd)
  {
         int err, blks, get_blocks_flags;
-       struct buffer_head new;
+       struct ext4_map_blocks map;
         sector_t next = mpd->b_blocknr;
         unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits;
         loff_t disksize = EXT4_I(mpd->inode)->i_disksize;
@@ -2247,15 +2211,15 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
          * EXT4_GET_BLOCKS_DELALLOC_RESERVE so the delalloc accounting
          * variables are updated after the blocks have been allocated.
          */
-       new.b_state = 0;
+       map.m_lblk = next;
+       map.m_len = max_blocks;
         get_blocks_flags = EXT4_GET_BLOCKS_CREATE;
         if (ext4_should_dioread_nolock(mpd->inode))
                 get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT;
         if (mpd->b_state & (1 << BH_Delay))
                 get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE;
  
-       blks = ext4_get_blocks(handle, mpd->inode, next, max_blocks,
-                              &new, get_blocks_flags);
+       blks = ext4_map_blocks(handle, mpd->inode, &map, get_blocks_flags);
         if (blks < 0) {
                 err = blks;
                 /*
@@ -2282,7 +2246,7 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
                 ext4_msg(mpd->inode->i_sb, KERN_CRIT,
                          "delayed block allocation failed for inode %lu at "
                          "logical offset %llu with max blocks %zd with "
-                        "error %d\n", mpd->inode->i_ino,
+                        "error %d", mpd->inode->i_ino,
                          (unsigned long long) next,
                          mpd->b_size >> mpd->inode->i_blkbits, err);
                 printk(KERN_CRIT "This should not happen!!  "
@@ -2297,10 +2261,13 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
         }
         BUG_ON(blks == 0);
  
-       new.b_size = (blks << mpd->inode->i_blkbits);
+       if (map.m_flags & EXT4_MAP_NEW) {
+               struct block_device *bdev = mpd->inode->i_sb->s_bdev;
+               int i;
  
-       if (buffer_new(&new))
-               __unmap_underlying_blocks(mpd->inode, &new);
+               for (i = 0; i < map.m_len; i++)
+                       unmap_underlying_metadata(bdev, map.m_pblk + i);
+       }
  
         /*
          * If blocks are delayed marked, we need to
@@ -2308,7 +2275,7 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
          */
         if ((mpd->b_state & (1 << BH_Delay)) ||
             (mpd->b_state & (1 << BH_Unwritten)))
-               mpage_put_bnr_to_bhs(mpd, next, &new);
+               mpage_put_bnr_to_bhs(mpd, &map);
  
         if (ext4_should_order_data(mpd->inode)) {
                 err = ext4_jbd2_file_inode(handle, mpd->inode);
@@ -2349,8 +2316,17 @@ static void mpage_add_bh_to_extent(struct mpage_da_data *mpd,
         sector_t next;
         int nrblocks = mpd->b_size >> mpd->inode->i_blkbits;
  
+       /*
+        * XXX Don't go larger than mballoc is willing to allocate
+        * This is a stopgap solution.  We eventually need to fold
+        * mpage_da_submit_io() into this function and then call
+        * ext4_get_blocks() multiple times in a loop
+        */
+       if (nrblocks >= 8*1024*1024/mpd->inode->i_sb->s_blocksize)
+               goto flush_it;
+
         /* check if thereserved journal credits might overflow */
-       if (!(EXT4_I(mpd->inode)->i_flags & EXT4_EXTENTS_FL)) {
+       if (!(ext4_test_inode_flag(mpd->inode, EXT4_INODE_EXTENTS))) {
                 if (nrblocks >= EXT4_MAX_TRANS_DATA) {
                         /*
                          * With non-extent format we are limited by the journal
@@ -2423,17 +2399,6 @@ static int __mpage_da_writepage(struct page *page,
         struct buffer_head *bh, *head;
         sector_t logical;
  
-       if (mpd->io_done) {
-               /*
-                * Rest of the page in the page_vec
-                * redirty then and skip then. We will
-                * try to write them again after
-                * starting a new transaction
-                */
-               redirty_page_for_writepage(wbc, page);
-               unlock_page(page);
-               return MPAGE_DA_EXTENT_TAIL;
-       }
         /*
          * Can we merge this page to current extent?
          */
@@ -2528,8 +2493,9 @@ static int __mpage_da_writepage(struct page *page,
   * initialized properly.
   */
  static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
-                                 struct buffer_head *bh_result, int create)
+                                 struct buffer_head *bh, int create)
  {
+       struct ext4_map_blocks map;
         int ret = 0;
         sector_t invalid_block = ~((sector_t) 0xffff);
  
@@ -2537,16 +2503,22 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
                 invalid_block = ~0;
  
         BUG_ON(create == 0);
-       BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize);
+       BUG_ON(bh->b_size != inode->i_sb->s_blocksize);
+
+       map.m_lblk = iblock;
+       map.m_len = 1;
  
         /*
          * first, we need to know whether the block is allocated already
          * preallocated blocks are unmapped but should treated
          * the same as allocated blocks.
          */
-       ret = ext4_get_blocks(NULL, inode, iblock, 1,  bh_result, 0);
-       if ((ret == 0) && !buffer_delay(bh_result)) {
-               /* the block isn't (pre)allocated yet, let's reserve space */
+       ret = ext4_map_blocks(NULL, inode, &map, 0);
+       if (ret < 0)
+               return ret;
+       if (ret == 0) {
+               if (buffer_delay(bh))
+                       return 0; /* Not sure this could or should happen */
                 /*
                  * XXX: __block_prepare_write() unmaps passed block,
                  * is it OK?
@@ -2556,26 +2528,26 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
                         /* not enough space to reserve */
                         return ret;
  
-               map_bh(bh_result, inode->i_sb, invalid_block);
-               set_buffer_new(bh_result);
-               set_buffer_delay(bh_result);
-       } else if (ret > 0) {
-               bh_result->b_size = (ret << inode->i_blkbits);
-               if (buffer_unwritten(bh_result)) {
-                       /* A delayed write to unwritten bh should
-                        * be marked new and mapped.  Mapped ensures
-                        * that we don't do get_block multiple times
-                        * when we write to the same offset and new
-                        * ensures that we do proper zero out for
-                        * partial write.
-                        */
-                       set_buffer_new(bh_result);
-                       set_buffer_mapped(bh_result);
-               }
-               ret = 0;
+               map_bh(bh, inode->i_sb, invalid_block);
+               set_buffer_new(bh);
+               set_buffer_delay(bh);
+               return 0;
         }
  
-       return ret;
+       map_bh(bh, inode->i_sb, map.m_pblk);
+       bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags;
+
+       if (buffer_unwritten(bh)) {
+               /* A delayed write to unwritten bh should be marked
+                * new and mapped.  Mapped ensures that we don't do
+                * get_block multiple times when we write to the same
+                * offset and new ensures that we do proper zero out
+                * for partial write.
+                */
+               set_buffer_new(bh);
+               set_buffer_mapped(bh);
+       }
+       return 0;
  }
  
  /*
@@ -2597,21 +2569,8 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
  static int noalloc_get_block_write(struct inode *inode, sector_t iblock,
                                    struct buffer_head *bh_result, int create)
  {
-       int ret = 0;
-       unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
-
         BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize);
-
-       /*
-        * we don't want to do block allocation in writepage
-        * so call get_block_wrap with create = 0
-        */
-       ret = ext4_get_blocks(NULL, inode, iblock, max_blocks, bh_result, 0);
-       if (ret > 0) {
-               bh_result->b_size = (ret << inode->i_blkbits);
-               ret = 0;
-       }
-       return ret;
+       return _ext4_get_block(inode, iblock, bh_result, 0);
  }
  
  static int bget_one(handle_t *handle, struct buffer_head *bh)
@@ -2821,13 +2780,131 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode)
          * number of contiguous block. So we will limit
          * number of contiguous block to a sane value
          */
-       if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) &&
+       if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) &&
             (max_blocks > EXT4_MAX_TRANS_DATA))
                 max_blocks = EXT4_MAX_TRANS_DATA;
  
         return ext4_chunk_trans_blocks(inode, max_blocks);
  }
  
+/*
+ * write_cache_pages_da - walk the list of dirty pages of the given
+ * address space and call the callback function (which usually writes
+ * the pages).
+ *
+ * This is a forked version of write_cache_pages().  Differences:
+ *     Range cyclic is ignored.
+ *     no_nrwrite_index_update is always presumed true
+ */
+static int write_cache_pages_da(struct address_space *mapping,
+                               struct writeback_control *wbc,
+                               struct mpage_da_data *mpd)
+{
+       int ret = 0;
+       int done = 0;
+       struct pagevec pvec;
+       int nr_pages;
+       pgoff_t index;
+       pgoff_t end;            /* Inclusive */
+       long nr_to_write = wbc->nr_to_write;
+
+       pagevec_init(&pvec, 0);
+       index = wbc->range_start >> PAGE_CACHE_SHIFT;
+       end = wbc->range_end >> PAGE_CACHE_SHIFT;
+
+       while (!done && (index <= end)) {
+               int i;
+
+               nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
+                             PAGECACHE_TAG_DIRTY,
+                             min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
+               if (nr_pages == 0)
+                       break;
+
+               for (i = 0; i < nr_pages; i++) {
+                       struct page *page = pvec.pages[i];
+
+                       /*
+                        * At this point, the page may be truncated or
+                        * invalidated (changing page->mapping to NULL), or
+                        * even swizzled back from swapper_space to tmpfs file
+                        * mapping. However, page->index will not change
+                        * because we have a reference on the page.
+                        */
+                       if (page->index > end) {
+                               done = 1;
+                               break;
+                       }
+
+                       lock_page(page);
+
+                       /*
+                        * Page truncated or invalidated. We can freely skip it
+                        * then, even for data integrity operations: the page
+                        * has disappeared concurrently, so there could be no
+                        * real expectation of this data interity operation
+                        * even if there is now a new, dirty page at the same
+                        * pagecache address.
+                        */
+                       if (unlikely(page->mapping != mapping)) {
+continue_unlock:
+                               unlock_page(page);
+                               continue;
+                       }
+
+                       if (!PageDirty(page)) {
+                               /* someone wrote it for us */
+                               goto continue_unlock;
+                       }
+
+                       if (PageWriteback(page)) {
+                               if (wbc->sync_mode != WB_SYNC_NONE)
+                                       wait_on_page_writeback(page);
+                               else
+                                       goto continue_unlock;
+                       }
+
+                       BUG_ON(PageWriteback(page));
+                       if (!clear_page_dirty_for_io(page))
+                               goto continue_unlock;
+
+                       ret = __mpage_da_writepage(page, wbc, mpd);
+                       if (unlikely(ret)) {
+                               if (ret == AOP_WRITEPAGE_ACTIVATE) {
+                                       unlock_page(page);
+                                       ret = 0;
+                               } else {
+                                       done = 1;
+                                       break;
+                               }
+                       }
+
+                       if (nr_to_write > 0) {
+                               nr_to_write--;
+                               if (nr_to_write == 0 &&
+                                   wbc->sync_mode == WB_SYNC_NONE) {
+                                       /*
+                                        * We stop writing back only if we are
+                                        * not doing integrity sync. In case of
+                                        * integrity sync we have to keep going
+                                        * because someone may be concurrently
+                                        * dirtying pages, and we might have
+                                        * synced a lot of newly appeared dirty
+                                        * pages, but have not synced all of the
+                                        * old dirty pages.
+                                        */
+                                       done = 1;
+                                       break;
+                               }
+                       }
+               }
+               pagevec_release(&pvec);
+               cond_resched();
+       }
+       return ret;
+}
+
+
  static int ext4_da_writepages(struct address_space *mapping,
                               struct writeback_control *wbc)
  {
@@ -2836,7 +2913,6 @@ static int ext4_da_writepages(struct address_space *mapping,
         handle_t *handle = NULL;
         struct mpage_da_data mpd;
         struct inode *inode = mapping->host;
-       int no_nrwrite_index_update;
         int pages_written = 0;
         long pages_skipped;
         unsigned int max_pages;
@@ -2916,12 +2992,6 @@ static int ext4_da_writepages(struct address_space *mapping,
         mpd.wbc = wbc;
         mpd.inode = mapping->host;
  
-       /*
-        * we don't want write_cache_pages to update
-        * nr_to_write and writeback_index
-        */
-       no_nrwrite_index_update = wbc->no_nrwrite_index_update;
-       wbc->no_nrwrite_index_update = 1;
         pages_skipped = wbc->pages_skipped;
  
  retry:
@@ -2941,7 +3011,7 @@ retry:
                 if (IS_ERR(handle)) {
                         ret = PTR_ERR(handle);
                         ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: "
-                              "%ld pages, ino %lu; err %d\n", __func__,
+                              "%ld pages, ino %lu; err %d", __func__,
                                 wbc->nr_to_write, inode->i_ino, ret);
                         goto out_writepages;
                 }
@@ -2963,8 +3033,7 @@ retry:
                 mpd.io_done = 0;
                 mpd.pages_written = 0;
                 mpd.retval = 0;
-               ret = write_cache_pages(mapping, wbc, __mpage_da_writepage,
-                                       &mpd);
+               ret = write_cache_pages_da(mapping, wbc, &mpd);
                 /*
                  * If we have a contiguous extent of pages and we
                  * haven't done the I/O yet, map the blocks and submit
@@ -3016,7 +3085,7 @@ retry:
         if (pages_skipped != wbc->pages_skipped)
                 ext4_msg(inode->i_sb, KERN_CRIT,
                          "This should not happen leaving %s "
-                        "with nr_to_write = %ld ret = %d\n",
+                        "with nr_to_write = %ld ret = %d",
                          __func__, wbc->nr_to_write, ret);
  
         /* Update index */
@@ -3030,8 +3099,6 @@ retry:
                 mapping->writeback_index = index;
  
  out_writepages:
-       if (!no_nrwrite_index_update)
-               wbc->no_nrwrite_index_update = 0;
         wbc->nr_to_write -= nr_to_writebump;
         wbc->range_start = range_start;
         trace_ext4_da_writepages_result(inode, wbc, ret, pages_written);
@@ -3076,7 +3143,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
                                loff_t pos, unsigned len, unsigned flags,
                                struct page **pagep, void **fsdata)
  {
-       int ret, retries = 0, quota_retries = 0;
+       int ret, retries = 0;
         struct page *page;
         pgoff_t index;
         unsigned from, to;
@@ -3135,22 +3202,6 @@ retry:
  
         if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
                 goto retry;
-
-       if ((ret == -EDQUOT) &&
-           EXT4_I(inode)->i_reserved_meta_blocks &&
-           (quota_retries++ < 3)) {
-               /*
-                * Since we often over-estimate the number of meta
-                * data blocks required, we may sometimes get a
-                * spurios out of quota error even though there would
-                * be enough space once we write the data blocks and
-                * find out how many meta data blocks were _really_
-                * required.  So try forcing the inode write to see if
-                * that helps.
-                */
-               write_inode_now(inode, (quota_retries == 3));
-               goto retry;
-       }
  out:
         return ret;
  }
@@ -3546,46 +3597,18 @@ out:
         return ret;
  }
  
+/*
+ * ext4_get_block used when preparing for a DIO write or buffer write.
+ * We allocate an uinitialized extent if blocks haven't been allocated.
+ * The extent will be converted to initialized after the IO is complete.
+ */
  static int ext4_get_block_write(struct inode *inode, sector_t iblock,
                    struct buffer_head *bh_result, int create)
  {
-       handle_t *handle = ext4_journal_current_handle();
-       int ret = 0;
-       unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
-       int dio_credits;
-       int started = 0;
-
         ext4_debug("ext4_get_block_write: inode %lu, create flag %d\n",
                    inode->i_ino, create);
-       /*
-        * ext4_get_block in prepare for a DIO write or buffer write.
-        * We allocate an uinitialized extent if blocks haven't been allocated.
-        * The extent will be converted to initialized after IO complete.
-        */
-       create = EXT4_GET_BLOCKS_IO_CREATE_EXT;
-
-       if (!handle) {
-               if (max_blocks > DIO_MAX_BLOCKS)
-                       max_blocks = DIO_MAX_BLOCKS;
-               dio_credits = ext4_chunk_trans_blocks(inode, max_blocks);
-               handle = ext4_journal_start(inode, dio_credits);
-               if (IS_ERR(handle)) {
-                       ret = PTR_ERR(handle);
-                       goto out;
-               }
-               started = 1;
-       }
-
-       ret = ext4_get_blocks(handle, inode, iblock, max_blocks, bh_result,
-                             create);
-       if (ret > 0) {
-               bh_result->b_size = (ret << inode->i_blkbits);
-               ret = 0;
-       }
-       if (started)
-               ext4_journal_stop(handle);
-out:
-       return ret;
+       return _ext4_get_block(inode, iblock, bh_result,
+                              EXT4_GET_BLOCKS_IO_CREATE_EXT);
  }
  
  static void dump_completed_IO(struct inode * inode)
@@ -3973,7 +3996,7 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
         struct file *file = iocb->ki_filp;
         struct inode *inode = file->f_mapping->host;
  
-       if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
+       if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
                 return ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs);
  
         return ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs);
@@ -4302,10 +4325,9 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode,
  
         if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free,
                                    count)) {
-               ext4_error(inode->i_sb, "inode #%lu: "
-                          "attempt to clear blocks %llu len %lu, invalid",
-                          inode->i_ino, (unsigned long long) block_to_free,
-                          count);
+               EXT4_ERROR_INODE(inode, "attempt to clear invalid "
+                                "blocks %llu len %lu",
+                                (unsigned long long) block_to_free, count);
                 return 1;
         }
  
@@ -4410,11 +4432,10 @@ static void ext4_free_data(handle_t *handle, struct inode *inode,
                 if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh))
                         ext4_handle_dirty_metadata(handle, inode, this_bh);
                 else
-                       ext4_error(inode->i_sb,
-                                  "circular indirect block detected, "
-                                  "inode=%lu, block=%llu",
-                                  inode->i_ino,
-                                  (unsigned long long) this_bh->b_blocknr);
+                       EXT4_ERROR_INODE(inode,
+                                        "circular indirect block detected at "
+                                        "block %llu",
+                               (unsigned long long) this_bh->b_blocknr);
         }
  }
  
@@ -4452,11 +4473,10 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
  
                         if (!ext4_data_block_valid(EXT4_SB(inode->i_sb),
                                                    nr, 1)) {
-                               ext4_error(inode->i_sb,
-                                          "indirect mapped block in inode "
-                                          "#%lu invalid (level %d, blk #%lu)",
-                                          inode->i_ino, depth,
-                                          (unsigned long) nr);
+                               EXT4_ERROR_INODE(inode,
+                                                "invalid indirect mapped "
+                                                "block %lu (level %d)",
+                                                (unsigned long) nr, depth);
                                 break;
                         }
  
@@ -4468,9 +4488,9 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
                          * (should be rare).
                          */
                         if (!bh) {
-                               ext4_error(inode->i_sb,
-                                          "Read failure, inode=%lu, block=%llu",
-                                          inode->i_ino, nr);
+                               EXT4_ERROR_INODE(inode,
+                                                "Read failure block=%llu",
+                                                (unsigned long long) nr);
                                 continue;
                         }
  
@@ -4612,12 +4632,12 @@ void ext4_truncate(struct inode *inode)
         if (!ext4_can_truncate(inode))
                 return;
  
-       EXT4_I(inode)->i_flags &= ~EXT4_EOFBLOCKS_FL;
+       ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
  
         if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC))
                 ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE);
  
-       if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
+       if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
                 ext4_ext_truncate(inode);
                 return;
         }
@@ -4785,8 +4805,8 @@ static int __ext4_get_inode_loc(struct inode *inode,
  
         bh = sb_getblk(sb, block);
         if (!bh) {
-               ext4_error(sb, "unable to read inode block - "
-                          "inode=%lu, block=%llu", inode->i_ino, block);
+               EXT4_ERROR_INODE(inode, "unable to read inode block - "
+                                "block %llu", block);
                 return -EIO;
         }
         if (!buffer_uptodate(bh)) {
@@ -4884,8 +4904,8 @@ make_io:
                 submit_bh(READ_META, bh);
                 wait_on_buffer(bh);
                 if (!buffer_uptodate(bh)) {
-                       ext4_error(sb, "unable to read inode block - inode=%lu,"
-                                  " block=%llu", inode->i_ino, block);
+                       EXT4_ERROR_INODE(inode, "unable to read inode "
+                                        "block %llu", block);
                         brelse(bh);
                         return -EIO;
                 }
@@ -5096,8 +5116,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
         ret = 0;
         if (ei->i_file_acl &&
             !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) {
-               ext4_error(sb, "bad extended attribute block %llu inode #%lu",
-                          ei->i_file_acl, inode->i_ino);
+               EXT4_ERROR_INODE(inode, "bad extended attribute block %llu",
+                                ei->i_file_acl);
                 ret = -EIO;
                 goto bad_inode;
         } else if (ei->i_flags & EXT4_EXTENTS_FL) {
@@ -5142,8 +5162,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
                            new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
         } else {
                 ret = -EIO;
-               ext4_error(inode->i_sb, "bogus i_mode (%o) for inode=%lu",
-                          inode->i_mode, inode->i_ino);
+               EXT4_ERROR_INODE(inode, "bogus i_mode (%o)", inode->i_mode);
                 goto bad_inode;
         }
         brelse(iloc.bh);
@@ -5381,9 +5400,9 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
                 if (wbc->sync_mode == WB_SYNC_ALL)
                         sync_dirty_buffer(iloc.bh);
                 if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) {
-                       ext4_error(inode->i_sb, "IO error syncing inode, "
-                                  "inode=%lu, block=%llu", inode->i_ino,
-                                  (unsigned long long)iloc.bh->b_blocknr);
+                       EXT4_ERROR_INODE(inode,
+                               "IO error syncing inode (block=%llu)",
+                               (unsigned long long) iloc.bh->b_blocknr);
                         err = -EIO;
                 }
                 brelse(iloc.bh);
@@ -5455,7 +5474,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
         }
  
         if (attr->ia_valid & ATTR_SIZE) {
-               if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) {
+               if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
                         struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
  
                         if (attr->ia_size > sbi->s_bitmap_maxbytes) {
@@ -5468,7 +5487,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
         if (S_ISREG(inode->i_mode) &&
             attr->ia_valid & ATTR_SIZE &&
             (attr->ia_size < inode->i_size ||
-            (EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL))) {
+            (ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)))) {
                 handle_t *handle;
  
                 handle = ext4_journal_start(inode, 3);
@@ -5500,7 +5519,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
                         }
                 }
                 /* ext4_truncate will clear the flag */
-               if ((EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL))
+               if ((ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)))
                         ext4_truncate(inode);
         }
  
@@ -5576,7 +5595,7 @@ static int ext4_indirect_trans_blocks(struct inode *inode, int nrblocks,
  
  static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
  {
-       if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
+       if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
                 return ext4_indirect_trans_blocks(inode, nrblocks, chunk);
         return ext4_ext_index_trans_blocks(inode, nrblocks, chunk);
  }
@@ -5911,9 +5930,9 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
          */
  
         if (val)
-               EXT4_I(inode)->i_flags |= EXT4_JOURNAL_DATA_FL;
+               ext4_set_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
         else
-               EXT4_I(inode)->i_flags &= ~EXT4_JOURNAL_DATA_FL;
+               ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
         ext4_set_aops(inode);
  
         jbd2_journal_unlock_updates(journal);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c

index 016d024..bf5ae88 100644 (file)
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -258,7 +258,7 @@ setversion_out:
                 if (me.moved_len > 0)
                         file_remove_suid(donor_filp);
  
-               if (copy_to_user((struct move_extent __user *)arg, 
+               if (copy_to_user((struct move_extent __user *)arg,
                                  &me, sizeof(me)))
                         err = -EFAULT;
  mext_out:
@@ -373,7 +373,30 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
         case EXT4_IOC32_SETRSVSZ:
                 cmd = EXT4_IOC_SETRSVSZ;
                 break;
-       case EXT4_IOC_GROUP_ADD:
+       case EXT4_IOC32_GROUP_ADD: {
+               struct compat_ext4_new_group_input __user *uinput;
+               struct ext4_new_group_input input;
+               mm_segment_t old_fs;
+               int err;
+
+               uinput = compat_ptr(arg);
+               err = get_user(input.group, &uinput->group);
+               err |= get_user(input.block_bitmap, &uinput->block_bitmap);
+               err |= get_user(input.inode_bitmap, &uinput->inode_bitmap);
+               err |= get_user(input.inode_table, &uinput->inode_table);
+               err |= get_user(input.blocks_count, &uinput->blocks_count);
+               err |= get_user(input.reserved_blocks,
+                               &uinput->reserved_blocks);
+               if (err)
+                       return -EFAULT;
+               old_fs = get_fs();
+               set_fs(KERNEL_DS);
+               err = ext4_ioctl(file, EXT4_IOC_GROUP_ADD,
+                                (unsigned long) &input);
+               set_fs(old_fs);
+               return err;
+       }
+       case EXT4_IOC_MOVE_EXT:
                 break;
         default:
                 return -ENOIOCTLCMD;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c

index b423a36..12b3bc0 100644 (file)
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -658,6 +658,27 @@ static void ext4_mb_mark_free_simple(struct super_block *sb,
         }
  }
  
+/*
+ * Cache the order of the largest free extent we have available in this block
+ * group.
+ */
+static void
+mb_set_largest_free_order(struct super_block *sb, struct ext4_group_info *grp)
+{
+       int i;
+       int bits;
+
+       grp->bb_largest_free_order = -1; /* uninit */
+
+       bits = sb->s_blocksize_bits + 1;
+       for (i = bits; i >= 0; i--) {
+               if (grp->bb_counters[i] > 0) {
+                       grp->bb_largest_free_order = i;
+                       break;
+               }
+       }
+}
+
  static noinline_for_stack
  void ext4_mb_generate_buddy(struct super_block *sb,
                                 void *buddy, void *bitmap, ext4_group_t group)
@@ -700,6 +721,7 @@ void ext4_mb_generate_buddy(struct super_block *sb,
                  */
                 grp->bb_free = free;
         }
+       mb_set_largest_free_order(sb, grp);
  
         clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
  
@@ -725,6 +747,9 @@ void ext4_mb_generate_buddy(struct super_block *sb,
   * contain blocks_per_page (PAGE_CACHE_SIZE / blocksize)  blocks.
   * So it can have information regarding groups_per_page which
   * is blocks_per_page/2
+ *
+ * Locking note:  This routine takes the block group lock of all groups
+ * for this page; do not hold this lock when calling this routine!
   */
  
  static int ext4_mb_init_cache(struct page *page, char *incore)
@@ -865,6 +890,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
                         BUG_ON(incore == NULL);
                         mb_debug(1, "put buddy for group %u in page %lu/%x\n",
                                 group, page->index, i * blocksize);
+                       trace_ext4_mb_buddy_bitmap_load(sb, group);
                         grinfo = ext4_get_group_info(sb, group);
                         grinfo->bb_fragments = 0;
                         memset(grinfo->bb_counters, 0,
@@ -882,6 +908,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
                         BUG_ON(incore != NULL);
                         mb_debug(1, "put bitmap for group %u in page %lu/%x\n",
                                 group, page->index, i * blocksize);
+                       trace_ext4_mb_bitmap_load(sb, group);
  
                         /* see comments in ext4_mb_put_pa() */
                         ext4_lock_group(sb, group);
@@ -910,6 +937,11 @@ out:
         return err;
  }
  
+/*
+ * Locking note:  This routine calls ext4_mb_init_cache(), which takes the
+ * block group lock of all groups for this page; do not hold the BG lock when
+ * calling this routine!
+ */
  static noinline_for_stack
  int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
  {
@@ -1004,6 +1036,11 @@ err:
         return ret;
  }
  
+/*
+ * Locking note:  This routine calls ext4_mb_init_cache(), which takes the
+ * block group lock of all groups for this page; do not hold the BG lock when
+ * calling this routine!
+ */
  static noinline_for_stack int
  ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
                                         struct ext4_buddy *e4b)
@@ -1150,7 +1187,7 @@ err:
         return ret;
  }
  
-static void ext4_mb_release_desc(struct ext4_buddy *e4b)
+static void ext4_mb_unload_buddy(struct ext4_buddy *e4b)
  {
         if (e4b->bd_bitmap_page)
                 page_cache_release(e4b->bd_bitmap_page);
@@ -1299,6 +1336,7 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
                         buddy = buddy2;
                 } while (1);
         }
+       mb_set_largest_free_order(sb, e4b->bd_info);
         mb_check_buddy(e4b);
  }
  
@@ -1427,6 +1465,7 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
                 e4b->bd_info->bb_counters[ord]++;
                 e4b->bd_info->bb_counters[ord]++;
         }
+       mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info);
  
         mb_set_bits(EXT4_MB_BITMAP(e4b), ex->fe_start, len0);
         mb_check_buddy(e4b);
@@ -1617,7 +1656,7 @@ int ext4_mb_try_best_found(struct ext4_allocation_context *ac,
         }
  
         ext4_unlock_group(ac->ac_sb, group);
-       ext4_mb_release_desc(e4b);
+       ext4_mb_unload_buddy(e4b);
  
         return 0;
  }
@@ -1672,7 +1711,7 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
                 ext4_mb_use_best_found(ac, e4b);
         }
         ext4_unlock_group(ac->ac_sb, group);
-       ext4_mb_release_desc(e4b);
+       ext4_mb_unload_buddy(e4b);
  
         return 0;
  }
@@ -1821,16 +1860,22 @@ void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
         }
  }
  
+/* This is now called BEFORE we load the buddy bitmap. */
  static int ext4_mb_good_group(struct ext4_allocation_context *ac,
                                 ext4_group_t group, int cr)
  {
         unsigned free, fragments;
-       unsigned i, bits;
         int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb));
         struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
  
         BUG_ON(cr < 0 || cr >= 4);
-       BUG_ON(EXT4_MB_GRP_NEED_INIT(grp));
+
+       /* We only do this if the grp has never been initialized */
+       if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
+               int ret = ext4_mb_init_group(ac->ac_sb, group);
+               if (ret)
+                       return 0;
+       }
  
         free = grp->bb_free;
         fragments = grp->bb_fragments;
@@ -1843,17 +1888,16 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
         case 0:
                 BUG_ON(ac->ac_2order == 0);
  
+               if (grp->bb_largest_free_order < ac->ac_2order)
+                       return 0;
+
                 /* Avoid using the first bg of a flexgroup for data files */
                 if ((ac->ac_flags & EXT4_MB_HINT_DATA) &&
                     (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) &&
                     ((group % flex_size) == 0))
                         return 0;
  
-               bits = ac->ac_sb->s_blocksize_bits + 1;
-               for (i = ac->ac_2order; i <= bits; i++)
-                       if (grp->bb_counters[i] > 0)
-                               return 1;
-               break;
+               return 1;
         case 1:
                 if ((free / fragments) >= ac->ac_g_ex.fe_len)
                         return 1;
@@ -1964,7 +2008,7 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
         sbi = EXT4_SB(sb);
         ngroups = ext4_get_groups_count(sb);
         /* non-extent files are limited to low blocks/groups */
-       if (!(EXT4_I(ac->ac_inode)->i_flags & EXT4_EXTENTS_FL))
+       if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)))
                 ngroups = sbi->s_blockfile_groups;
  
         BUG_ON(ac->ac_status == AC_STATUS_FOUND);
@@ -2024,15 +2068,11 @@ repeat:
                 group = ac->ac_g_ex.fe_group;
  
                 for (i = 0; i < ngroups; group++, i++) {
-                       struct ext4_group_info *grp;
-                       struct ext4_group_desc *desc;
-
                         if (group == ngroups)
                                 group = 0;
  
-                       /* quick check to skip empty groups */
-                       grp = ext4_get_group_info(sb, group);
-                       if (grp->bb_free == 0)
+                       /* This now checks without needing the buddy page */
+                       if (!ext4_mb_good_group(ac, group, cr))
                                 continue;
  
                         err = ext4_mb_load_buddy(sb, group, &e4b);
@@ -2040,15 +2080,18 @@ repeat:
                                 goto out;
  
                         ext4_lock_group(sb, group);
+
+                       /*
+                        * We need to check again after locking the
+                        * block group
+                        */
                         if (!ext4_mb_good_group(ac, group, cr)) {
-                               /* someone did allocation from this group */
                                 ext4_unlock_group(sb, group);
-                               ext4_mb_release_desc(&e4b);
+                               ext4_mb_unload_buddy(&e4b);
                                 continue;
                         }
  
                         ac->ac_groups_scanned++;
-                       desc = ext4_get_group_desc(sb, group, NULL);
                         if (cr == 0)
                                 ext4_mb_simple_scan_group(ac, &e4b);
                         else if (cr == 1 &&
@@ -2058,7 +2101,7 @@ repeat:
                                 ext4_mb_complex_scan_group(ac, &e4b);
  
                         ext4_unlock_group(sb, group);
-                       ext4_mb_release_desc(&e4b);
+                       ext4_mb_unload_buddy(&e4b);
  
                         if (ac->ac_status != AC_STATUS_CONTINUE)
                                 break;
@@ -2148,7 +2191,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
         ext4_lock_group(sb, group);
         memcpy(&sg, ext4_get_group_info(sb, group), i);
         ext4_unlock_group(sb, group);
-       ext4_mb_release_desc(&e4b);
+       ext4_mb_unload_buddy(&e4b);
  
         seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
                         sg.info.bb_fragments, sg.info.bb_first_free);
@@ -2255,6 +2298,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
         INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
         init_rwsem(&meta_group_info[i]->alloc_sem);
         meta_group_info[i]->bb_free_root = RB_ROOT;
+       meta_group_info[i]->bb_largest_free_order = -1;  /* uninit */
  
  #ifdef DOUBLE_CHECK
         {
@@ -2536,6 +2580,7 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
                          entry->count, entry->group, entry);
  
                 if (test_opt(sb, DISCARD)) {
+                       int ret;
                         ext4_fsblk_t discard_block;
  
                         discard_block = entry->start_blk +
@@ -2543,7 +2588,12 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
                         trace_ext4_discard_blocks(sb,
                                         (unsigned long long)discard_block,
                                         entry->count);
-                       sb_issue_discard(sb, discard_block, entry->count);
+                       ret = sb_issue_discard(sb, discard_block, entry->count);
+                       if (ret == EOPNOTSUPP) {
+                               ext4_warning(sb,
+                                       "discard not supported, disabling");
+                               clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD);
+                       }
                 }
  
                 err = ext4_mb_load_buddy(sb, entry->group, &e4b);
@@ -2568,7 +2618,7 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
                 }
                 ext4_unlock_group(sb, entry->group);
                 kmem_cache_free(ext4_free_ext_cachep, entry);
-               ext4_mb_release_desc(&e4b);
+               ext4_mb_unload_buddy(&e4b);
         }
  
         mb_debug(1, "freed %u blocks in %u structures\n", count, count2);
@@ -2641,7 +2691,7 @@ int __init init_ext4_mballoc(void)
  
  void exit_ext4_mballoc(void)
  {
-       /* 
+       /*
          * Wait for completion of call_rcu()'s on ext4_pspace_cachep
          * before destroying the slab cache.
          */
@@ -2981,7 +3031,7 @@ static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
         if (sbi->s_mb_stats && ac->ac_g_ex.fe_len > 1) {
                 atomic_inc(&sbi->s_bal_reqs);
                 atomic_add(ac->ac_b_ex.fe_len, &sbi->s_bal_allocated);
-               if (ac->ac_o_ex.fe_len >= ac->ac_g_ex.fe_len)
+               if (ac->ac_b_ex.fe_len >= ac->ac_o_ex.fe_len)
                         atomic_inc(&sbi->s_bal_success);
                 atomic_add(ac->ac_found, &sbi->s_bal_ex_scanned);
                 if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start &&
@@ -3123,7 +3173,7 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
                         continue;
  
                 /* non-extent files can't have physical blocks past 2^32 */
-               if (!(EXT4_I(ac->ac_inode)->i_flags & EXT4_EXTENTS_FL) &&
+               if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) &&
                         pa->pa_pstart + pa->pa_len > EXT4_MAX_BLOCK_FILE_PHYS)
                         continue;
  
@@ -3280,7 +3330,7 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
         spin_unlock(&pa->pa_lock);
  
         grp_blk = pa->pa_pstart;
-       /* 
+       /*
          * If doing group-based preallocation, pa_pstart may be in the
          * next group when pa is used up
          */
@@ -3697,7 +3747,7 @@ out:
         ext4_unlock_group(sb, group);
         if (ac)
                 kmem_cache_free(ext4_ac_cachep, ac);
-       ext4_mb_release_desc(&e4b);
+       ext4_mb_unload_buddy(&e4b);
         put_bh(bitmap_bh);
         return free;
  }
@@ -3801,7 +3851,7 @@ repeat:
                 if (bitmap_bh == NULL) {
                         ext4_error(sb, "Error reading block bitmap for %u",
                                         group);
-                       ext4_mb_release_desc(&e4b);
+                       ext4_mb_unload_buddy(&e4b);
                         continue;
                 }
  
@@ -3810,7 +3860,7 @@ repeat:
                 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
                 ext4_unlock_group(sb, group);
  
-               ext4_mb_release_desc(&e4b);
+               ext4_mb_unload_buddy(&e4b);
                 put_bh(bitmap_bh);
  
                 list_del(&pa->u.pa_tmp_list);
@@ -4074,7 +4124,7 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
                 ext4_mb_release_group_pa(&e4b, pa, ac);
                 ext4_unlock_group(sb, group);
  
-               ext4_mb_release_desc(&e4b);
+               ext4_mb_unload_buddy(&e4b);
                 list_del(&pa->u.pa_tmp_list);
                 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
         }
@@ -4484,12 +4534,12 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
                         if (!bh)
                                 tbh = sb_find_get_block(inode->i_sb,
                                                         block + i);
-                       ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, 
+                       ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA,
                                     inode, tbh, block + i);
                 }
         }
  
-       /* 
+       /*
          * We need to make sure we don't reuse the freed block until
          * after the transaction is committed, which we can do by
          * treating the block as metadata, below.  We make an
@@ -4610,7 +4660,7 @@ do_more:
                 atomic_add(count, &sbi->s_flex_groups[flex_group].free_blocks);
         }
  
-       ext4_mb_release_desc(&e4b);
+       ext4_mb_unload_buddy(&e4b);
  
         freed += count;
  
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c

index 34dcfc5..6f3a27e 100644 (file)
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -475,7 +475,7 @@ int ext4_ext_migrate(struct inode *inode)
          */
         if (!EXT4_HAS_INCOMPAT_FEATURE(inode->i_sb,
                                        EXT4_FEATURE_INCOMPAT_EXTENTS) ||
-           (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
+           (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
                 return -EINVAL;
  
         if (S_ISLNK(inode->i_mode) && inode->i_blocks == 0)
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c

index d1fc662..3a6c92a 100644 (file)
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -482,6 +482,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode,
         int depth = ext_depth(orig_inode);
         int ret;
  
+       start_ext.ee_block = end_ext.ee_block = 0;
         o_start = o_end = oext = orig_path[depth].p_ext;
         oext_alen = ext4_ext_get_actual_len(oext);
         start_ext.ee_len = end_ext.ee_len = 0;
@@ -529,7 +530,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode,
          * new_ext       |-------|
          */
         if (le32_to_cpu(oext->ee_block) + oext_alen - 1 < new_ext_end) {
-               ext4_error(orig_inode->i_sb,
+               EXT4_ERROR_INODE(orig_inode,
                         "new_ext_end(%u) should be less than or equal to "
                         "oext->ee_block(%u) + oext_alen(%d) - 1",
                         new_ext_end, le32_to_cpu(oext->ee_block),
@@ -692,12 +693,12 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
         while (1) {
                 /* The extent for donor must be found. */
                 if (!dext) {
-                       ext4_error(donor_inode->i_sb,
+                       EXT4_ERROR_INODE(donor_inode,
                                    "The extent for donor must be found");
                         *err = -EIO;
                         goto out;
                 } else if (donor_off != le32_to_cpu(tmp_dext.ee_block)) {
-                       ext4_error(donor_inode->i_sb,
+                       EXT4_ERROR_INODE(donor_inode,
                                 "Donor offset(%u) and the first block of donor "
                                 "extent(%u) should be equal",
                                 donor_off,
@@ -976,11 +977,11 @@ mext_check_arguments(struct inode *orig_inode,
         }
  
         /* Ext4 move extent supports only extent based file */
-       if (!(EXT4_I(orig_inode)->i_flags & EXT4_EXTENTS_FL)) {
+       if (!(ext4_test_inode_flag(orig_inode, EXT4_INODE_EXTENTS))) {
                 ext4_debug("ext4 move extent: orig file is not extents "
                         "based file [ino:orig %lu]\n", orig_inode->i_ino);
                 return -EOPNOTSUPP;
-       } else if (!(EXT4_I(donor_inode)->i_flags & EXT4_EXTENTS_FL)) {
+       } else if (!(ext4_test_inode_flag(donor_inode, EXT4_INODE_EXTENTS))) {
                 ext4_debug("ext4 move extent: donor file is not extents "
                         "based file [ino:donor %lu]\n", donor_inode->i_ino);
                 return -EOPNOTSUPP;
@@ -1354,7 +1355,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
                         if (ret1 < 0)
                                 break;
                         if (*moved_len > len) {
-                               ext4_error(orig_inode->i_sb,
+                               EXT4_ERROR_INODE(orig_inode,
                                         "We replaced blocks too much! "
                                         "sum of replaced: %llu requested: %llu",
                                         *moved_len, len);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c

index 0c070fa..a43e661 100644 (file)
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -187,7 +187,7 @@ unsigned int ext4_rec_len_from_disk(__le16 dlen, unsigned blocksize)
                 return blocksize;
         return (len & 65532) | ((len & 3) << 16);
  }
-  
+
  __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize)
  {
         if ((len > blocksize) || (blocksize > (1 << 18)) || (len & 3))
@@ -197,7 +197,7 @@ __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize)
         if (len == blocksize) {
                 if (blocksize == 65536)
                         return cpu_to_le16(EXT4_MAX_REC_LEN);
-               else 
+               else
                         return cpu_to_le16(0);
         }
         return cpu_to_le16((len & 65532) | ((len >> 16) & 3));
@@ -349,7 +349,7 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
                 brelse(bh);
         }
         if (bcount)
-               printk(KERN_DEBUG "%snames %u, fullness %u (%u%%)\n", 
+               printk(KERN_DEBUG "%snames %u, fullness %u (%u%%)\n",
                        levels ? "" : "   ", names, space/bcount,
                        (space/bcount)*100/blocksize);
         return (struct stats) { names, space, bcount};
@@ -653,10 +653,10 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
         int ret, err;
         __u32 hashval;
  
-       dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n", 
+       dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n",
                        start_hash, start_minor_hash));
         dir = dir_file->f_path.dentry->d_inode;
-       if (!(EXT4_I(dir)->i_flags & EXT4_INDEX_FL)) {
+       if (!(ext4_test_inode_flag(dir, EXT4_INODE_INDEX))) {
                 hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
                 if (hinfo.hash_version <= DX_HASH_TEA)
                         hinfo.hash_version +=
@@ -801,7 +801,7 @@ static void ext4_update_dx_flag(struct inode *inode)
  {
         if (!EXT4_HAS_COMPAT_FEATURE(inode->i_sb,
                                      EXT4_FEATURE_COMPAT_DIR_INDEX))
-               EXT4_I(inode)->i_flags &= ~EXT4_INDEX_FL;
+               ext4_clear_inode_flag(inode, EXT4_INODE_INDEX);
  }
  
  /*
@@ -943,8 +943,8 @@ restart:
                 wait_on_buffer(bh);
                 if (!buffer_uptodate(bh)) {
                         /* read error, skip block & hope for the best */
-                       ext4_error(sb, "reading directory #%lu offset %lu",
-                                  dir->i_ino, (unsigned long)block);
+                       EXT4_ERROR_INODE(dir, "reading directory lblock %lu",
+                                        (unsigned long) block);
                         brelse(bh);
                         goto next;
                 }
@@ -1066,15 +1066,15 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, stru
                 __u32 ino = le32_to_cpu(de->inode);
                 brelse(bh);
                 if (!ext4_valid_inum(dir->i_sb, ino)) {
-                       ext4_error(dir->i_sb, "bad inode number: %u", ino);
+                       EXT4_ERROR_INODE(dir, "bad inode number: %u", ino);
                         return ERR_PTR(-EIO);
                 }
                 inode = ext4_iget(dir->i_sb, ino);
                 if (unlikely(IS_ERR(inode))) {
                         if (PTR_ERR(inode) == -ESTALE) {
-                               ext4_error(dir->i_sb,
-                                               "deleted inode referenced: %u",
-                                               ino);
+                               EXT4_ERROR_INODE(dir,
+                                                "deleted inode referenced: %u",
+                                                ino);
                                 return ERR_PTR(-EIO);
                         } else {
                                 return ERR_CAST(inode);
@@ -1104,8 +1104,8 @@ struct dentry *ext4_get_parent(struct dentry *child)
         brelse(bh);
  
         if (!ext4_valid_inum(child->d_inode->i_sb, ino)) {
-               ext4_error(child->d_inode->i_sb,
-                          "bad inode number: %u", ino);
+               EXT4_ERROR_INODE(child->d_inode,
+                                "bad parent inode number: %u", ino);
                 return ERR_PTR(-EIO);
         }
  
@@ -1141,7 +1141,7 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count,
         unsigned rec_len = 0;
  
         while (count--) {
-               struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *) 
+               struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *)
                                                 (from + (map->offs<<2));
                 rec_len = EXT4_DIR_REC_LEN(de->name_len);
                 memcpy (to, de, rec_len);
@@ -1404,9 +1404,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
         de = (struct ext4_dir_entry_2 *)((char *)fde +
                 ext4_rec_len_from_disk(fde->rec_len, blocksize));
         if ((char *) de >= (((char *) root) + blocksize)) {
-               ext4_error(dir->i_sb,
-                          "invalid rec_len for '..' in inode %lu",
-                          dir->i_ino);
+               EXT4_ERROR_INODE(dir, "invalid rec_len for '..'");
                 brelse(bh);
                 return -EIO;
         }
@@ -1418,7 +1416,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
                 brelse(bh);
                 return retval;
         }
-       EXT4_I(dir)->i_flags |= EXT4_INDEX_FL;
+       ext4_set_inode_flag(dir, EXT4_INODE_INDEX);
         data1 = bh2->b_data;
  
         memcpy (data1, de, len);
@@ -1491,7 +1489,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
                 retval = ext4_dx_add_entry(handle, dentry, inode);
                 if (!retval || (retval != ERR_BAD_DX_DIR))
                         return retval;
-               EXT4_I(dir)->i_flags &= ~EXT4_INDEX_FL;
+               ext4_clear_inode_flag(dir, EXT4_INODE_INDEX);
                 dx_fallback++;
                 ext4_mark_inode_dirty(handle, dir);
         }
@@ -1519,6 +1517,8 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
         de->rec_len = ext4_rec_len_to_disk(blocksize, blocksize);
         retval = add_dirent_to_buf(handle, dentry, inode, de, bh);
         brelse(bh);
+       if (retval == 0)
+               ext4_set_inode_state(inode, EXT4_STATE_NEWENTRY);
         return retval;
  }
  
@@ -1915,9 +1915,8 @@ static int empty_dir(struct inode *inode)
         if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) ||
             !(bh = ext4_bread(NULL, inode, 0, 0, &err))) {
                 if (err)
-                       ext4_error(inode->i_sb,
-                                  "error %d reading directory #%lu offset 0",
-                                  err, inode->i_ino);
+                       EXT4_ERROR_INODE(inode,
+                               "error %d reading directory lblock 0", err);
                 else
                         ext4_warning(inode->i_sb,
                                      "bad directory (dir #%lu) - no data block",
@@ -1941,17 +1940,17 @@ static int empty_dir(struct inode *inode)
         de = ext4_next_entry(de1, sb->s_blocksize);
         while (offset < inode->i_size) {
                 if (!bh ||
-                       (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
+                   (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
+                       unsigned int lblock;
                         err = 0;
                         brelse(bh);
-                       bh = ext4_bread(NULL, inode,
-                               offset >> EXT4_BLOCK_SIZE_BITS(sb), 0, &err);
+                       lblock = offset >> EXT4_BLOCK_SIZE_BITS(sb);
+                       bh = ext4_bread(NULL, inode, lblock, 0, &err);
                         if (!bh) {
                                 if (err)
-                                       ext4_error(sb,
-                                                  "error %d reading directory"
-                                                  " #%lu offset %u",
-                                                  err, inode->i_ino, offset);
+                                       EXT4_ERROR_INODE(inode,
+                                               "error %d reading directory "
+                                               "lblock %u", err, lblock);
                                 offset += sb->s_blocksize;
                                 continue;
                         }
@@ -2297,7 +2296,7 @@ retry:
                 }
         } else {
                 /* clear the extent format for fast symlink */
-               EXT4_I(inode)->i_flags &= ~EXT4_EXTENTS_FL;
+               ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS);
                 inode->i_op = &ext4_fast_symlink_inode_operations;
                 memcpy((char *)&EXT4_I(inode)->i_data, symname, l);
                 inode->i_size = l-1;
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c

index 5692c48..6df797e 100644 (file)
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -911,7 +911,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
         percpu_counter_add(&sbi->s_freeinodes_counter,
                            EXT4_INODES_PER_GROUP(sb));
  
-       if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) {
+       if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG) &&
+           sbi->s_log_groups_per_flex) {
                 ext4_group_t flex_group;
                 flex_group = ext4_flex_group(sbi, input->group);
                 atomic_add(input->free_blocks_count,
diff --git a/fs/ext4/super.c b/fs/ext4/super.c

index e14d22c..49d88c0 100644 (file)
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -241,6 +241,7 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
         if (sb->s_flags & MS_RDONLY)
                 return ERR_PTR(-EROFS);
  
+       vfs_check_frozen(sb, SB_FREEZE_WRITE);
         /* Special case here: if the journal has aborted behind our
          * backs (eg. EIO in the commit thread), then we still need to
          * take the FS itself readonly cleanly. */
@@ -941,6 +942,8 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
         seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0");
         if (test_opt(sb, JOURNAL_ASYNC_COMMIT))
                 seq_puts(seq, ",journal_async_commit");
+       else if (test_opt(sb, JOURNAL_CHECKSUM))
+               seq_puts(seq, ",journal_checksum");
         if (test_opt(sb, NOBH))
                 seq_puts(seq, ",nobh");
         if (test_opt(sb, I_VERSION))
@@ -2213,7 +2216,7 @@ static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
  struct ext4_attr {
         struct attribute attr;
         ssize_t (*show)(struct ext4_attr *, struct ext4_sb_info *, char *);
-       ssize_t (*store)(struct ext4_attr *, struct ext4_sb_info *, 
+       ssize_t (*store)(struct ext4_attr *, struct ext4_sb_info *,
                          const char *, size_t);
         int offset;
  };
@@ -2430,6 +2433,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                                 __releases(kernel_lock)
                                 __acquires(kernel_lock)
  {
+       char *orig_data = kstrdup(data, GFP_KERNEL);
         struct buffer_head *bh;
         struct ext4_super_block *es = NULL;
         struct ext4_sb_info *sbi;
@@ -2793,24 +2797,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
         get_random_bytes(&sbi->s_next_generation, sizeof(u32));
         spin_lock_init(&sbi->s_next_gen_lock);
  
-       err = percpu_counter_init(&sbi->s_freeblocks_counter,
-                       ext4_count_free_blocks(sb));
-       if (!err) {
-               err = percpu_counter_init(&sbi->s_freeinodes_counter,
-                               ext4_count_free_inodes(sb));
-       }
-       if (!err) {
-               err = percpu_counter_init(&sbi->s_dirs_counter,
-                               ext4_count_dirs(sb));
-       }
-       if (!err) {
-               err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0);
-       }
-       if (err) {
-               ext4_msg(sb, KERN_ERR, "insufficient memory");
-               goto failed_mount3;
-       }
-
         sbi->s_stripe = ext4_get_stripe_size(sbi);
         sbi->s_max_writeback_mb_bump = 128;
  
@@ -2910,6 +2896,20 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
         set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
  
  no_journal:
+       err = percpu_counter_init(&sbi->s_freeblocks_counter,
+                                 ext4_count_free_blocks(sb));
+       if (!err)
+               err = percpu_counter_init(&sbi->s_freeinodes_counter,
+                                         ext4_count_free_inodes(sb));
+       if (!err)
+               err = percpu_counter_init(&sbi->s_dirs_counter,
+                                         ext4_count_dirs(sb));
+       if (!err)
+               err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0);
+       if (err) {
+               ext4_msg(sb, KERN_ERR, "insufficient memory");
+               goto failed_mount_wq;
+       }
         if (test_opt(sb, NOBH)) {
                 if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) {
                         ext4_msg(sb, KERN_WARNING, "Ignoring nobh option - "
@@ -3001,7 +3001,7 @@ no_journal:
         err = ext4_setup_system_zone(sb);
         if (err) {
                 ext4_msg(sb, KERN_ERR, "failed to initialize system "
-                        "zone (%d)\n", err);
+                        "zone (%d)", err);
                 goto failed_mount4;
         }
  
@@ -3040,9 +3040,11 @@ no_journal:
         } else
                 descr = "out journal";
  
-       ext4_msg(sb, KERN_INFO, "mounted filesystem with%s", descr);
+       ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "
+               "Opts: %s", descr, orig_data);
  
         lock_kernel();
+       kfree(orig_data);
         return 0;
  
  cantfind_ext4:
@@ -3059,6 +3061,10 @@ failed_mount_wq:
                 jbd2_journal_destroy(sbi->s_journal);
                 sbi->s_journal = NULL;
         }
+       percpu_counter_destroy(&sbi->s_freeblocks_counter);
+       percpu_counter_destroy(&sbi->s_freeinodes_counter);
+       percpu_counter_destroy(&sbi->s_dirs_counter);
+       percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
  failed_mount3:
         if (sbi->s_flex_groups) {
                 if (is_vmalloc_addr(sbi->s_flex_groups))
@@ -3066,10 +3072,6 @@ failed_mount3:
                 else
                         kfree(sbi->s_flex_groups);
         }
-       percpu_counter_destroy(&sbi->s_freeblocks_counter);
-       percpu_counter_destroy(&sbi->s_freeinodes_counter);
-       percpu_counter_destroy(&sbi->s_dirs_counter);
-       percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
  failed_mount2:
         for (i = 0; i < db_count; i++)
                 brelse(sbi->s_group_desc[i]);
@@ -3089,6 +3091,7 @@ out_fail:
         kfree(sbi->s_blockgroup_lock);
         kfree(sbi);
         lock_kernel();
+       kfree(orig_data);
         return ret;
  }
  
@@ -3380,7 +3383,7 @@ static int ext4_commit_super(struct super_block *sb, int sync)
         if (!(sb->s_flags & MS_RDONLY))
                 es->s_wtime = cpu_to_le32(get_seconds());
         es->s_kbytes_written =
-               cpu_to_le64(EXT4_SB(sb)->s_kbytes_written + 
+               cpu_to_le64(EXT4_SB(sb)->s_kbytes_written +
                             ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
                               EXT4_SB(sb)->s_sectors_written_start) >> 1));
         ext4_free_blocks_count_set(es, percpu_counter_sum_positive(
@@ -3485,8 +3488,10 @@ int ext4_force_commit(struct super_block *sb)
                 return 0;
  
         journal = EXT4_SB(sb)->s_journal;
-       if (journal)
+       if (journal) {
+               vfs_check_frozen(sb, SB_FREEZE_WRITE);
                 ret = ext4_journal_force_commit(journal);
+       }
  
         return ret;
  }
@@ -3535,18 +3540,16 @@ static int ext4_freeze(struct super_block *sb)
          * the journal.
          */
         error = jbd2_journal_flush(journal);
-       if (error < 0) {
-       out:
-               jbd2_journal_unlock_updates(journal);
-               return error;
-       }
+       if (error < 0)
+               goto out;
  
         /* Journal blocked and flushed, clear needs_recovery flag. */
         EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
         error = ext4_commit_super(sb, 1);
-       if (error)
-               goto out;
-       return 0;
+out:
+       /* we rely on s_frozen to stop further updates */
+       jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
+       return error;
  }
  
  /*
@@ -3563,7 +3566,6 @@ static int ext4_unfreeze(struct super_block *sb)
         EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
         ext4_commit_super(sb, 1);
         unlock_super(sb);
-       jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
         return 0;
  }
  
@@ -3580,6 +3582,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
  #ifdef CONFIG_QUOTA
         int i;
  #endif
+       char *orig_data = kstrdup(data, GFP_KERNEL);
  
         lock_kernel();
  
@@ -3713,6 +3716,9 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
  #endif
         unlock_super(sb);
         unlock_kernel();
+
+       ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data);
+       kfree(orig_data);
         return 0;
  
  restore_opts:
@@ -3734,6 +3740,7 @@ restore_opts:
  #endif
         unlock_super(sb);
         unlock_kernel();
+       kfree(orig_data);
         return err;
  }
  
@@ -4141,6 +4148,7 @@ static int __init init_ext4_fs(void)
  {
         int err;
  
+       ext4_check_flag_values();
         err = init_ext4_system_zone();
         if (err)
                 return err;
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c

index 00740cb..ed9354a 100644 (file)
--- a/fs/ext4/symlink.c
+++ b/fs/ext4/symlink.c
@@ -34,6 +34,7 @@ const struct inode_operations ext4_symlink_inode_operations = {
         .readlink       = generic_readlink,
         .follow_link    = page_follow_link_light,
         .put_link       = page_put_link,
+       .setattr        = ext4_setattr,
  #ifdef CONFIG_EXT4_FS_XATTR
         .setxattr       = generic_setxattr,
         .getxattr       = generic_getxattr,
@@ -45,6 +46,7 @@ const struct inode_operations ext4_symlink_inode_operations = {
  const struct inode_operations ext4_fast_symlink_inode_operations = {
         .readlink       = generic_readlink,
         .follow_link    = ext4_follow_link,
+       .setattr        = ext4_setattr,
  #ifdef CONFIG_EXT4_FS_XATTR
         .setxattr       = generic_setxattr,
         .getxattr       = generic_getxattr,
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c

index 2de0e95..0433800 100644 (file)
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -228,9 +228,8 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
                 atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
         if (ext4_xattr_check_block(bh)) {
  bad_block:
-               ext4_error(inode->i_sb,
-                          "inode %lu: bad block %llu", inode->i_ino,
-                          EXT4_I(inode)->i_file_acl);
+               EXT4_ERROR_INODE(inode, "bad block %llu",
+                                EXT4_I(inode)->i_file_acl);
                 error = -EIO;
                 goto cleanup;
         }
@@ -372,9 +371,8 @@ ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
         ea_bdebug(bh, "b_count=%d, refcount=%d",
                 atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
         if (ext4_xattr_check_block(bh)) {
-               ext4_error(inode->i_sb,
-                          "inode %lu: bad block %llu", inode->i_ino,
-                          EXT4_I(inode)->i_file_acl);
+               EXT4_ERROR_INODE(inode, "bad block %llu",
+                                EXT4_I(inode)->i_file_acl);
                 error = -EIO;
                 goto cleanup;
         }
@@ -666,8 +664,8 @@ ext4_xattr_block_find(struct inode *inode, struct ext4_xattr_info *i,
                         atomic_read(&(bs->bh->b_count)),
                         le32_to_cpu(BHDR(bs->bh)->h_refcount));
                 if (ext4_xattr_check_block(bs->bh)) {
-                       ext4_error(sb, "inode %lu: bad block %llu",
-                                  inode->i_ino, EXT4_I(inode)->i_file_acl);
+                       EXT4_ERROR_INODE(inode, "bad block %llu",
+                                        EXT4_I(inode)->i_file_acl);
                         error = -EIO;
                         goto cleanup;
                 }
@@ -820,7 +818,7 @@ inserted:
                                                 EXT4_I(inode)->i_block_group);
  
                         /* non-extent files can't have physical blocks past 2^32 */
-                       if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
+                       if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
                                 goal = goal & EXT4_MAX_BLOCK_FILE_PHYS;
  
                         block = ext4_new_meta_blocks(handle, inode,
@@ -828,7 +826,7 @@ inserted:
                         if (error)
                                 goto cleanup;
  
-                       if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
+                       if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
                                 BUG_ON(block > EXT4_MAX_BLOCK_FILE_PHYS);
  
                         ea_idebug(inode, "creating block %d", block);
@@ -880,8 +878,8 @@ cleanup_dquot:
         goto cleanup;
  
  bad_block:
-       ext4_error(inode->i_sb, "inode %lu: bad block %llu",
-                  inode->i_ino, EXT4_I(inode)->i_file_acl);
+       EXT4_ERROR_INODE(inode, "bad block %llu",
+                        EXT4_I(inode)->i_file_acl);
         goto cleanup;
  
  #undef header
@@ -1194,8 +1192,8 @@ retry:
                 if (!bh)
                         goto cleanup;
                 if (ext4_xattr_check_block(bh)) {
-                       ext4_error(inode->i_sb, "inode %lu: bad block %llu",
-                                  inode->i_ino, EXT4_I(inode)->i_file_acl);
+                       EXT4_ERROR_INODE(inode, "bad block %llu",
+                                        EXT4_I(inode)->i_file_acl);
                         error = -EIO;
                         goto cleanup;
                 }
@@ -1372,14 +1370,14 @@ ext4_xattr_delete_inode(handle_t *handle, struct inode *inode)
                 goto cleanup;
         bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
         if (!bh) {
-               ext4_error(inode->i_sb, "inode %lu: block %llu read error",
-                          inode->i_ino, EXT4_I(inode)->i_file_acl);
+               EXT4_ERROR_INODE(inode, "block %llu read error",
+                                EXT4_I(inode)->i_file_acl);
                 goto cleanup;
         }
         if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
             BHDR(bh)->h_blocks != cpu_to_le32(1)) {
-               ext4_error(inode->i_sb, "inode %lu: bad block %llu",
-                          inode->i_ino, EXT4_I(inode)->i_file_acl);
+               EXT4_ERROR_INODE(inode, "bad block %llu",
+                                EXT4_I(inode)->i_file_acl);
                 goto cleanup;
         }
         ext4_xattr_release_block(handle, inode, bh);
@@ -1504,9 +1502,8 @@ again:
                 }
                 bh = sb_bread(inode->i_sb, ce->e_block);
                 if (!bh) {
-                       ext4_error(inode->i_sb,
-                               "inode %lu: block %lu read error",
-                               inode->i_ino, (unsigned long) ce->e_block);
+                       EXT4_ERROR_INODE(inode, "block %lu read error",
+                                        (unsigned long) ce->e_block);
                 } else if (le32_to_cpu(BHDR(bh)->h_refcount) >=
                                 EXT4_XATTR_REFCOUNT_MAX) {
                         ea_idebug(inode, "block %lu refcount %d>=%d",
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c

index bfc70f5..e214d68 100644 (file)
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1311,7 +1311,6 @@ int jbd2_journal_stop(handle_t *handle)
         if (handle->h_sync)
                 transaction->t_synchronous_commit = 1;
         current->journal_info = NULL;
-       spin_lock(&journal->j_state_lock);
         spin_lock(&transaction->t_handle_lock);
         transaction->t_outstanding_credits -= handle->h_buffer_credits;
         transaction->t_updates--;
@@ -1340,8 +1339,7 @@ int jbd2_journal_stop(handle_t *handle)
                 jbd_debug(2, "transaction too old, requesting commit for "
                                         "handle %p\n", handle);
                 /* This is non-blocking */
-               __jbd2_log_start_commit(journal, transaction->t_tid);
-               spin_unlock(&journal->j_state_lock);
+               jbd2_log_start_commit(journal, transaction->t_tid);
  
                 /*
                  * Special case: JBD2_SYNC synchronous updates require us
@@ -1351,7 +1349,6 @@ int jbd2_journal_stop(handle_t *handle)
                         err = jbd2_log_wait_commit(journal, tid);
         } else {
                 spin_unlock(&transaction->t_handle_lock);
-               spin_unlock(&journal->j_state_lock);
         }
  
         lock_map_release(&handle->h_lockdep_map);
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c

index 655a4c5..1ad8bf0 100644 (file)
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1514,11 +1514,13 @@ static void inode_decr_space(struct inode *inode, qsize_t number, int reserve)
  /*
   * This operation can block, but only after everything is updated
   */
-int __dquot_alloc_space(struct inode *inode, qsize_t number,
-               int warn, int reserve)
+int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
  {
         int cnt, ret = 0;
         char warntype[MAXQUOTAS];
+       int warn = flags & DQUOT_SPACE_WARN;
+       int reserve = flags & DQUOT_SPACE_RESERVE;
+       int nofail = flags & DQUOT_SPACE_NOFAIL;
  
         /*
          * First test before acquiring mutex - solves deadlocks when we
@@ -1539,7 +1541,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number,
                         continue;
                 ret = check_bdq(inode->i_dquot[cnt], number, !warn,
                                 warntype+cnt);
-               if (ret) {
+               if (ret && !nofail) {
                         spin_unlock(&dq_data_lock);
                         goto out_flush_warn;
                 }
@@ -1638,10 +1640,11 @@ EXPORT_SYMBOL(dquot_claim_space_nodirty);
  /*
   * This operation can block, but only after everything is updated
   */
-void __dquot_free_space(struct inode *inode, qsize_t number, int reserve)
+void __dquot_free_space(struct inode *inode, qsize_t number, int flags)
  {
         unsigned int cnt;
         char warntype[MAXQUOTAS];
+       int reserve = flags & DQUOT_SPACE_RESERVE;
  
         /* First test before acquiring mutex - solves deadlocks when we
           * re-enter the quota code and are already holding the mutex */
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h

index 370abb1..e38ae53 100644 (file)
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -9,6 +9,10 @@
  
  #include <linux/fs.h>
  
+#define DQUOT_SPACE_WARN       0x1
+#define DQUOT_SPACE_RESERVE    0x2
+#define DQUOT_SPACE_NOFAIL     0x4
+
  static inline struct quota_info *sb_dqopt(struct super_block *sb)
  {
         return &sb->s_dquot;
@@ -41,9 +45,8 @@ int dquot_scan_active(struct super_block *sb,
  struct dquot *dquot_alloc(struct super_block *sb, int type);
  void dquot_destroy(struct dquot *dquot);
  
-int __dquot_alloc_space(struct inode *inode, qsize_t number,
-               int warn, int reserve);
-void __dquot_free_space(struct inode *inode, qsize_t number, int reserve);
+int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags);
+void __dquot_free_space(struct inode *inode, qsize_t number, int flags);
  
  int dquot_alloc_inode(const struct inode *inode);
  
@@ -242,17 +245,17 @@ static inline int dquot_transfer(struct inode *inode, struct iattr *iattr)
  }
  
  static inline int __dquot_alloc_space(struct inode *inode, qsize_t number,
-               int warn, int reserve)
+               int flags)
  {
-       if (!reserve)
+       if (!(flags & DQUOT_SPACE_RESERVE))
                 inode_add_bytes(inode, number);
         return 0;
  }
  
  static inline void __dquot_free_space(struct inode *inode, qsize_t number,
-               int reserve)
+               int flags)
  {
-       if (!reserve)
+       if (!(flags & DQUOT_SPACE_RESERVE))
                 inode_sub_bytes(inode, number);
  }
  
@@ -268,7 +271,13 @@ static inline int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
  
  static inline int dquot_alloc_space_nodirty(struct inode *inode, qsize_t nr)
  {
-       return __dquot_alloc_space(inode, nr, 1, 0);
+       return __dquot_alloc_space(inode, nr, DQUOT_SPACE_WARN);
+}
+
+static inline void dquot_alloc_space_nofail(struct inode *inode, qsize_t nr)
+{
+       __dquot_alloc_space(inode, nr, DQUOT_SPACE_WARN|DQUOT_SPACE_NOFAIL);
+       mark_inode_dirty(inode);
  }
  
  static inline int dquot_alloc_space(struct inode *inode, qsize_t nr)
@@ -286,6 +295,11 @@ static inline int dquot_alloc_block_nodirty(struct inode *inode, qsize_t nr)
         return dquot_alloc_space_nodirty(inode, nr << inode->i_blkbits);
  }
  
+static inline void dquot_alloc_block_nofail(struct inode *inode, qsize_t nr)
+{
+       dquot_alloc_space_nofail(inode, nr << inode->i_blkbits);
+}
+
  static inline int dquot_alloc_block(struct inode *inode, qsize_t nr)
  {
         return dquot_alloc_space(inode, nr << inode->i_blkbits);
@@ -293,7 +307,7 @@ static inline int dquot_alloc_block(struct inode *inode, qsize_t nr)
  
  static inline int dquot_prealloc_block_nodirty(struct inode *inode, qsize_t nr)
  {
-       return __dquot_alloc_space(inode, nr << inode->i_blkbits, 0, 0);
+       return __dquot_alloc_space(inode, nr << inode->i_blkbits, 0);
  }
  
  static inline int dquot_prealloc_block(struct inode *inode, qsize_t nr)
@@ -308,7 +322,8 @@ static inline int dquot_prealloc_block(struct inode *inode, qsize_t nr)
  
  static inline int dquot_reserve_block(struct inode *inode, qsize_t nr)
  {
-       return __dquot_alloc_space(inode, nr << inode->i_blkbits, 1, 1);
+       return __dquot_alloc_space(inode, nr << inode->i_blkbits,
+                               DQUOT_SPACE_WARN|DQUOT_SPACE_RESERVE);
  }
  
  static inline int dquot_claim_block(struct inode *inode, qsize_t nr)
@@ -345,7 +360,7 @@ static inline void dquot_free_block(struct inode *inode, qsize_t nr)
  static inline void dquot_release_reservation_block(struct inode *inode,
                 qsize_t nr)
  {
-       __dquot_free_space(inode, nr << inode->i_blkbits, 1);
+       __dquot_free_space(inode, nr << inode->i_blkbits, DQUOT_SPACE_RESERVE);
  }
  
  #endif /* _LINUX_QUOTAOPS_ */
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h

index 2aa6aa3..5d60ad4 100644 (file)
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -353,7 +353,7 @@ TRACE_EVENT(ext4_discard_blocks,
                   jbd2_dev_to_name(__entry->dev), __entry->blk, __entry->count)
  );
  
-TRACE_EVENT(ext4_mb_new_inode_pa,
+DECLARE_EVENT_CLASS(ext4__mb_new_pa,
         TP_PROTO(struct ext4_allocation_context *ac,
                  struct ext4_prealloc_space *pa),
  
@@ -381,32 +381,20 @@ TRACE_EVENT(ext4_mb_new_inode_pa,
                   __entry->pa_pstart, __entry->pa_len, __entry->pa_lstart)
  );
  
-TRACE_EVENT(ext4_mb_new_group_pa,
+DEFINE_EVENT(ext4__mb_new_pa, ext4_mb_new_inode_pa,
+
         TP_PROTO(struct ext4_allocation_context *ac,
                  struct ext4_prealloc_space *pa),
  
-       TP_ARGS(ac, pa),
-
-       TP_STRUCT__entry(
-               __field(        dev_t,  dev                     )
-               __field(        ino_t,  ino                     )
-               __field(        __u64,  pa_pstart               )
-               __field(        __u32,  pa_len                  )
-               __field(        __u64,  pa_lstart               )
+       TP_ARGS(ac, pa)
+);
  
-       ),
+DEFINE_EVENT(ext4__mb_new_pa, ext4_mb_new_group_pa,
  
-       TP_fast_assign(
-               __entry->dev            = ac->ac_sb->s_dev;
-               __entry->ino            = ac->ac_inode->i_ino;
-               __entry->pa_pstart      = pa->pa_pstart;
-               __entry->pa_len         = pa->pa_len;
-               __entry->pa_lstart      = pa->pa_lstart;
-       ),
+       TP_PROTO(struct ext4_allocation_context *ac,
+                struct ext4_prealloc_space *pa),
  
-       TP_printk("dev %s ino %lu pstart %llu len %u lstart %llu",
-                 jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
-                 __entry->pa_pstart, __entry->pa_len, __entry->pa_lstart)
+       TP_ARGS(ac, pa)
  );
  
  TRACE_EVENT(ext4_mb_release_inode_pa,
@@ -790,7 +778,7 @@ TRACE_EVENT(ext4_mballoc_prealloc,
                   __entry->result_len, __entry->result_logical)
  );
  
-TRACE_EVENT(ext4_mballoc_discard,
+DECLARE_EVENT_CLASS(ext4__mballoc,
         TP_PROTO(struct ext4_allocation_context *ac),
  
         TP_ARGS(ac),
@@ -819,33 +807,18 @@ TRACE_EVENT(ext4_mballoc_discard,
                   __entry->result_len, __entry->result_logical)
  );
  
-TRACE_EVENT(ext4_mballoc_free,
+DEFINE_EVENT(ext4__mballoc, ext4_mballoc_discard,
+
         TP_PROTO(struct ext4_allocation_context *ac),
  
-       TP_ARGS(ac),
+       TP_ARGS(ac)
+);
  
-       TP_STRUCT__entry(
-               __field(        dev_t,  dev                     )
-               __field(        ino_t,  ino                     )
-               __field(        __u32,  result_logical          )
-               __field(          int,  result_start            )
-               __field(        __u32,  result_group            )
-               __field(          int,  result_len              )
-       ),
+DEFINE_EVENT(ext4__mballoc, ext4_mballoc_free,
  
-       TP_fast_assign(
-               __entry->dev            = ac->ac_inode->i_sb->s_dev;
-               __entry->ino            = ac->ac_inode->i_ino;
-               __entry->result_logical = ac->ac_b_ex.fe_logical;
-               __entry->result_start   = ac->ac_b_ex.fe_start;
-               __entry->result_group   = ac->ac_b_ex.fe_group;
-               __entry->result_len     = ac->ac_b_ex.fe_len;
-       ),
+       TP_PROTO(struct ext4_allocation_context *ac),
  
-       TP_printk("dev %s inode %lu extent %u/%d/%u@%u ",
-                 jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
-                 __entry->result_group, __entry->result_start,
-                 __entry->result_len, __entry->result_logical)
+       TP_ARGS(ac)
  );
  
  TRACE_EVENT(ext4_forget,
@@ -974,6 +947,39 @@ TRACE_EVENT(ext4_da_release_space,
                   __entry->reserved_meta_blocks, __entry->allocated_meta_blocks)
  );
  
+DECLARE_EVENT_CLASS(ext4__bitmap_load,
+       TP_PROTO(struct super_block *sb, unsigned long group),
+
+       TP_ARGS(sb, group),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,  dev                     )
+               __field(        __u32,  group                   )
+
+       ),
+
+       TP_fast_assign(
+               __entry->dev    = sb->s_dev;
+               __entry->group  = group;
+       ),
+
+       TP_printk("dev %s group %u",
+                 jbd2_dev_to_name(__entry->dev), __entry->group)
+);
+
+DEFINE_EVENT(ext4__bitmap_load, ext4_mb_bitmap_load,
+
+       TP_PROTO(struct super_block *sb, unsigned long group),
+
+       TP_ARGS(sb, group)
+);
+
+DEFINE_EVENT(ext4__bitmap_load, ext4_mb_buddy_bitmap_load,
+
+       TP_PROTO(struct super_block *sb, unsigned long group),
+
+       TP_ARGS(sb, group)
+);
  
  #endif /* _TRACE_EXT4_H */
author	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 27 May 2010 17:26:37 +0000 (10:26 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 27 May 2010 17:26:37 +0000 (10:26 -0700)
fs/ext4/balloc.c		patch \| blob \| history
fs/ext4/block_validity.c		patch \| blob \| history
fs/ext4/dir.c		patch \| blob \| history
fs/ext4/ext4.h		patch \| blob \| history
fs/ext4/ext4_jbd2.h		patch \| blob \| history
fs/ext4/extents.c		patch \| blob \| history
fs/ext4/file.c		patch \| blob \| history
fs/ext4/fsync.c		patch \| blob \| history
fs/ext4/ialloc.c		patch \| blob \| history
fs/ext4/inode.c		patch \| blob \| history
fs/ext4/ioctl.c		patch \| blob \| history
fs/ext4/mballoc.c		patch \| blob \| history
fs/ext4/migrate.c		patch \| blob \| history
fs/ext4/move_extent.c		patch \| blob \| history
fs/ext4/namei.c		patch \| blob \| history
fs/ext4/resize.c		patch \| blob \| history
fs/ext4/super.c		patch \| blob \| history
fs/ext4/symlink.c		patch \| blob \| history
fs/ext4/xattr.c		patch \| blob \| history
fs/jbd2/transaction.c		patch \| blob \| history
fs/quota/dquot.c		patch \| blob \| history
include/linux/quotaops.h		patch \| blob \| history
include/trace/events/ext4.h		patch \| blob \| history