Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
authorLinus Torvalds <torvalds@linux-foundation.org>
Wed, 2 Nov 2011 17:06:20 +0000 (10:06 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 2 Nov 2011 17:06:20 +0000 (10:06 -0700)
* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (97 commits)
  jbd2: Unify log messages in jbd2 code
  jbd/jbd2: validate sb->s_first in journal_get_superblock()
  ext4: let ext4_ext_rm_leaf work with EXT_DEBUG defined
  ext4: fix a syntax error in ext4_ext_insert_extent when debugging enabled
  ext4: fix a typo in struct ext4_allocation_context
  ext4: Don't normalize an falloc request if it can fit in 1 extent.
  ext4: remove comments about extent mount option in ext4_new_inode()
  ext4: let ext4_discard_partial_buffers handle unaligned range correctly
  ext4: return ENOMEM if find_or_create_pages fails
  ext4: move vars to local scope in ext4_discard_partial_page_buffers_no_lock()
  ext4: Create helper function for EXT4_IO_END_UNWRITTEN and i_aiodio_unwritten
  ext4: optimize locking for end_io extent conversion
  ext4: remove unnecessary call to waitqueue_active()
  ext4: Use correct locking for ext4_end_io_nolock()
  ext4: fix race in xattr block allocation path
  ext4: trace punch_hole correctly in ext4_ext_map_blocks
  ext4: clean up AGGRESSIVE_TEST code
  ext4: move variables to their scope
  ext4: fix quota accounting during migration
  ext4: migrate cleanup
  ...

1  2 
fs/ext4/ext4.h
fs/ext4/file.c
fs/ext4/inode.c
fs/ext4/namei.c
include/linux/ext3_fs.h
include/linux/fs.h

diff --combined fs/ext4/ext4.h
@@@ -144,9 -144,17 +144,17 @@@ struct ext4_allocation_request 
  #define EXT4_MAP_UNWRITTEN    (1 << BH_Unwritten)
  #define EXT4_MAP_BOUNDARY     (1 << BH_Boundary)
  #define EXT4_MAP_UNINIT               (1 << BH_Uninit)
+ /* Sometimes (in the bigalloc case, from ext4_da_get_block_prep) the caller of
+  * ext4_map_blocks wants to know whether or not the underlying cluster has
+  * already been accounted for. EXT4_MAP_FROM_CLUSTER conveys to the caller that
+  * the requested mapping was from previously mapped (or delayed allocated)
+  * cluster. We use BH_AllocFromCluster only for this flag. BH_AllocFromCluster
+  * should never appear on buffer_head's state flags.
+  */
+ #define EXT4_MAP_FROM_CLUSTER (1 << BH_AllocFromCluster)
  #define EXT4_MAP_FLAGS                (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\
                                 EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY |\
-                                EXT4_MAP_UNINIT)
+                                EXT4_MAP_UNINIT | EXT4_MAP_FROM_CLUSTER)
  
  struct ext4_map_blocks {
        ext4_fsblk_t m_pblk;
@@@ -239,8 -247,11 +247,11 @@@ struct ext4_io_submit 
  # define EXT4_BLOCK_SIZE(s)           (EXT4_MIN_BLOCK_SIZE << (s)->s_log_block_size)
  #endif
  #define       EXT4_ADDR_PER_BLOCK(s)          (EXT4_BLOCK_SIZE(s) / sizeof(__u32))
+ #define EXT4_CLUSTER_SIZE(s)          (EXT4_BLOCK_SIZE(s) << \
+                                        EXT4_SB(s)->s_cluster_bits)
  #ifdef __KERNEL__
  # define EXT4_BLOCK_SIZE_BITS(s)      ((s)->s_blocksize_bits)
+ # define EXT4_CLUSTER_BITS(s)         (EXT4_SB(s)->s_cluster_bits)
  #else
  # define EXT4_BLOCK_SIZE_BITS(s)      ((s)->s_log_block_size + 10)
  #endif
  #endif
  #define EXT4_BLOCK_ALIGN(size, blkbits)               ALIGN((size), (1 << (blkbits)))
  
+ /* Translate a block number to a cluster number */
+ #define EXT4_B2C(sbi, blk)    ((blk) >> (sbi)->s_cluster_bits)
+ /* Translate a cluster number to a block number */
+ #define EXT4_C2B(sbi, cluster)        ((cluster) << (sbi)->s_cluster_bits)
+ /* Translate # of blks to # of clusters */
+ #define EXT4_NUM_B2C(sbi, blks)       (((blks) + (sbi)->s_cluster_ratio - 1) >> \
+                                (sbi)->s_cluster_bits)
  /*
   * Structure of a blocks group descriptor
   */
@@@ -289,7 -308,7 +308,7 @@@ struct ext4_group_des
  
  struct flex_groups {
        atomic_t free_inodes;
-       atomic_t free_blocks;
+       atomic_t free_clusters;
        atomic_t used_dirs;
  };
  
  #define EXT4_DESC_SIZE(s)             (EXT4_SB(s)->s_desc_size)
  #ifdef __KERNEL__
  # define EXT4_BLOCKS_PER_GROUP(s)     (EXT4_SB(s)->s_blocks_per_group)
+ # define EXT4_CLUSTERS_PER_GROUP(s)   (EXT4_SB(s)->s_clusters_per_group)
  # define EXT4_DESC_PER_BLOCK(s)               (EXT4_SB(s)->s_desc_per_block)
  # define EXT4_INODES_PER_GROUP(s)     (EXT4_SB(s)->s_inodes_per_group)
  # define EXT4_DESC_PER_BLOCK_BITS(s)  (EXT4_SB(s)->s_desc_per_block_bits)
  
  /* Flags that should be inherited by new inodes from their parent. */
  #define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\
-                          EXT4_SYNC_FL | EXT4_IMMUTABLE_FL | EXT4_APPEND_FL |\
-                          EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
+                          EXT4_SYNC_FL | EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
                           EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\
                           EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL)
  
@@@ -520,6 -539,8 +539,8 @@@ struct ext4_new_group_data 
  #define EXT4_GET_BLOCKS_PUNCH_OUT_EXT         0x0020
        /* Don't normalize allocation size (used for fallocate) */
  #define EXT4_GET_BLOCKS_NO_NORMALIZE          0x0040
+       /* Request will not result in inode size update (user for fallocate) */
+ #define EXT4_GET_BLOCKS_KEEP_SIZE             0x0080
  
  /*
   * Flags used by ext4_free_blocks
  #define EXT4_FREE_BLOCKS_FORGET               0x0002
  #define EXT4_FREE_BLOCKS_VALIDATED    0x0004
  #define EXT4_FREE_BLOCKS_NO_QUOT_UPDATE       0x0008
+ #define EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER 0x0010
+ #define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER  0x0020
+ /*
+  * Flags used by ext4_discard_partial_page_buffers
+  */
+ #define EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED 0x0001
  
  /*
   * ioctl commands
  #define       EXT4_IOC_SETVERSION             _IOW('f', 4, long)
  #define       EXT4_IOC_GETVERSION_OLD         FS_IOC_GETVERSION
  #define       EXT4_IOC_SETVERSION_OLD         FS_IOC_SETVERSION
- #ifdef CONFIG_JBD2_DEBUG
- #define EXT4_IOC_WAIT_FOR_READONLY    _IOR('f', 99, long)
- #endif
  #define EXT4_IOC_GETRSVSZ             _IOR('f', 5, long)
  #define EXT4_IOC_SETRSVSZ             _IOW('f', 6, long)
  #define EXT4_IOC_GROUP_EXTEND         _IOW('f', 7, unsigned long)
  #define EXT4_IOC32_SETRSVSZ           _IOW('f', 6, int)
  #define EXT4_IOC32_GROUP_EXTEND               _IOW('f', 7, unsigned int)
  #define EXT4_IOC32_GROUP_ADD          _IOW('f', 8, struct compat_ext4_new_group_input)
- #ifdef CONFIG_JBD2_DEBUG
- #define EXT4_IOC32_WAIT_FOR_READONLY  _IOR('f', 99, int)
- #endif
  #define EXT4_IOC32_GETVERSION_OLD     FS_IOC32_GETVERSION
  #define EXT4_IOC32_SETVERSION_OLD     FS_IOC32_SETVERSION
  #endif
@@@ -837,6 -859,7 +859,7 @@@ struct ext4_inode_info 
        ext4_group_t    i_last_alloc_group;
  
        /* allocation reservation info for delalloc */
+       /* In case of bigalloc, these refer to clusters rather than blocks */
        unsigned int i_reserved_data_blocks;
        unsigned int i_reserved_meta_blocks;
        unsigned int i_allocated_meta_blocks;
  /*
   * Mount flags
   */
- #define EXT4_MOUNT_OLDALLOC           0x00002  /* Don't use the new Orlov allocator */
  #define EXT4_MOUNT_GRPID              0x00004 /* Create files with directory's group */
  #define EXT4_MOUNT_DEBUG              0x00008 /* Some debugging messages */
  #define EXT4_MOUNT_ERRORS_CONT                0x00010 /* Continue on errors */
  #define EXT4_MOUNT_DISCARD            0x40000000 /* Issue DISCARD requests */
  #define EXT4_MOUNT_INIT_INODE_TABLE   0x80000000 /* Initialize uninitialized itables */
  
+ #define EXT4_MOUNT2_EXPLICIT_DELALLOC 0x00000001 /* User explicitly
+                                                     specified delalloc */
  #define clear_opt(sb, opt)            EXT4_SB(sb)->s_mount_opt &= \
                                                ~EXT4_MOUNT_##opt
  #define set_opt(sb, opt)              EXT4_SB(sb)->s_mount_opt |= \
@@@ -968,9 -993,9 +993,9 @@@ struct ext4_super_block 
  /*10*/        __le32  s_free_inodes_count;    /* Free inodes count */
        __le32  s_first_data_block;     /* First Data Block */
        __le32  s_log_block_size;       /* Block size */
-       __le32  s_obso_log_frag_size;   /* Obsoleted fragment size */
+       __le32  s_log_cluster_size;     /* Allocation cluster size */
  /*20*/        __le32  s_blocks_per_group;     /* # Blocks per group */
-       __le32  s_obso_frags_per_group; /* Obsoleted fragments per group */
+       __le32  s_clusters_per_group;   /* # Clusters per group */
        __le32  s_inodes_per_group;     /* # Inodes per group */
        __le32  s_mtime;                /* Mount time */
  /*30*/        __le32  s_wtime;                /* Write time */
        __u8    s_last_error_func[32];  /* function where the error happened */
  #define EXT4_S_ERR_END offsetof(struct ext4_super_block, s_mount_opts)
        __u8    s_mount_opts[64];
-       __le32  s_reserved[112];        /* Padding to the end of the block */
+       __le32  s_usr_quota_inum;       /* inode for tracking user quota */
+       __le32  s_grp_quota_inum;       /* inode for tracking group quota */
+       __le32  s_overhead_clusters;    /* overhead blocks/clusters in fs */
+       __le32  s_reserved[109];        /* Padding to the end of the block */
  };
  
  #define EXT4_S_ERR_LEN (EXT4_S_ERR_END - EXT4_S_ERR_START)
@@@ -1086,6 -1114,7 +1114,7 @@@ struct ext4_sb_info 
        unsigned long s_desc_size;      /* Size of a group descriptor in bytes */
        unsigned long s_inodes_per_block;/* Number of inodes per block */
        unsigned long s_blocks_per_group;/* Number of blocks in a group */
+       unsigned long s_clusters_per_group; /* Number of clusters in a group */
        unsigned long s_inodes_per_group;/* Number of inodes in a group */
        unsigned long s_itb_per_group;  /* Number of inode table blocks per group */
        unsigned long s_gdb_count;      /* Number of group descriptor blocks */
        ext4_group_t s_blockfile_groups;/* Groups acceptable for non-extent files */
        unsigned long s_overhead_last;  /* Last calculated overhead */
        unsigned long s_blocks_last;    /* Last seen block count */
+       unsigned int s_cluster_ratio;   /* Number of blocks per cluster */
+       unsigned int s_cluster_bits;    /* log2 of s_cluster_ratio */
        loff_t s_bitmap_maxbytes;       /* max bytes for bitmap files */
        struct buffer_head * s_sbh;     /* Buffer containing the super block */
        struct ext4_super_block *s_es;  /* Pointer to the super block in the buffer */
        u32 s_hash_seed[4];
        int s_def_hash_version;
        int s_hash_unsigned;    /* 3 if hash should be signed, 0 if not */
-       struct percpu_counter s_freeblocks_counter;
+       struct percpu_counter s_freeclusters_counter;
        struct percpu_counter s_freeinodes_counter;
        struct percpu_counter s_dirs_counter;
-       struct percpu_counter s_dirtyblocks_counter;
+       struct percpu_counter s_dirtyclusters_counter;
        struct blockgroup_lock *s_blockgroup_lock;
        struct proc_dir_entry *s_proc;
        struct kobject s_kobj;
        u32 s_max_batch_time;
        u32 s_min_batch_time;
        struct block_device *journal_bdev;
- #ifdef CONFIG_JBD2_DEBUG
-       struct timer_list turn_ro_timer;        /* For turning read-only (crash simulation) */
-       wait_queue_head_t ro_wait_queue;        /* For people waiting for the fs to go read-only */
- #endif
  #ifdef CONFIG_QUOTA
        char *s_qf_names[MAXQUOTAS];            /* Names of quota files with journalled quota */
        int s_jquota_fmt;                       /* Format of quota to use */
@@@ -1248,6 -1275,15 +1275,15 @@@ static inline int ext4_valid_inum(struc
                 ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count));
  }
  
+ static inline void ext4_set_io_unwritten_flag(struct inode *inode,
+                                             struct ext4_io_end *io_end)
+ {
+       if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
+               io_end->flag |= EXT4_IO_END_UNWRITTEN;
+               atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
+       }
+ }
  /*
   * Inode dynamic state flags
   */
@@@ -1360,6 -1396,7 +1396,7 @@@ static inline void ext4_clear_state_fla
  #define EXT4_FEATURE_RO_COMPAT_DIR_NLINK      0x0020
  #define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE    0x0040
  #define EXT4_FEATURE_RO_COMPAT_QUOTA          0x0100
+ #define EXT4_FEATURE_RO_COMPAT_BIGALLOC               0x0200
  
  #define EXT4_FEATURE_INCOMPAT_COMPRESSION     0x0001
  #define EXT4_FEATURE_INCOMPAT_FILETYPE                0x0002
                                         EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \
                                         EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \
                                         EXT4_FEATURE_RO_COMPAT_BTREE_DIR |\
-                                        EXT4_FEATURE_RO_COMPAT_HUGE_FILE)
+                                        EXT4_FEATURE_RO_COMPAT_HUGE_FILE |\
+                                        EXT4_FEATURE_RO_COMPAT_BIGALLOC)
  
  /*
   * Default values for user and/or group using reserved blocks
@@@ -1735,9 -1773,9 +1773,9 @@@ extern ext4_fsblk_t ext4_new_meta_block
                                         unsigned int flags,
                                         unsigned long *count,
                                         int *errp);
- extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi,
-                                 s64 nblocks, unsigned int flags);
- extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *);
+ extern int ext4_claim_free_clusters(struct ext4_sb_info *sbi,
+                                   s64 nclusters, unsigned int flags);
+ extern ext4_fsblk_t ext4_count_free_clusters(struct super_block *);
  extern void ext4_check_blocks_bitmap(struct super_block *);
  extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
                                                    ext4_group_t block_group,
  extern int ext4_should_retry_alloc(struct super_block *sb, int *retries);
  struct buffer_head *ext4_read_block_bitmap(struct super_block *sb,
                                      ext4_group_t block_group);
- extern unsigned ext4_init_block_bitmap(struct super_block *sb,
-                                      struct buffer_head *bh,
-                                      ext4_group_t group,
-                                      struct ext4_group_desc *desc);
- #define ext4_free_blocks_after_init(sb, group, desc)                  \
-               ext4_init_block_bitmap(sb, NULL, group, desc)
+ extern void ext4_init_block_bitmap(struct super_block *sb,
+                                  struct buffer_head *bh,
+                                  ext4_group_t group,
+                                  struct ext4_group_desc *desc);
+ extern unsigned ext4_free_clusters_after_init(struct super_block *sb,
+                                             ext4_group_t block_group,
+                                             struct ext4_group_desc *gdp);
+ extern unsigned ext4_num_base_meta_clusters(struct super_block *sb,
+                                           ext4_group_t block_group);
+ extern unsigned ext4_num_overhead_clusters(struct super_block *sb,
+                                          ext4_group_t block_group,
+                                          struct ext4_group_desc *gdp);
  ext4_fsblk_t ext4_inode_to_goal_block(struct inode *);
  
  /* dir.c */
@@@ -1776,7 -1820,8 +1820,8 @@@ extern int ext4fs_dirhash(const char *n
  
  /* ialloc.c */
  extern struct inode *ext4_new_inode(handle_t *, struct inode *, int,
-                                   const struct qstr *qstr, __u32 goal);
+                                   const struct qstr *qstr, __u32 goal,
+                                   uid_t *owner);
  extern void ext4_free_inode(handle_t *, struct inode *);
  extern struct inode * ext4_orphan_get(struct super_block *, unsigned long);
  extern unsigned long ext4_count_free_inodes(struct super_block *);
@@@ -1839,6 -1884,12 +1884,12 @@@ extern int ext4_block_truncate_page(han
                struct address_space *mapping, loff_t from);
  extern int ext4_block_zero_page_range(handle_t *handle,
                struct address_space *mapping, loff_t from, loff_t length);
+ extern int ext4_discard_partial_page_buffers(handle_t *handle,
+               struct address_space *mapping, loff_t from,
+               loff_t length, int flags);
+ extern int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
+               struct inode *inode, struct page *page, loff_t from,
+               loff_t length, int flags);
  extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
  extern qsize_t *ext4_get_reserved_space(struct inode *inode);
  extern void ext4_da_update_reserve_space(struct inode *inode,
@@@ -1878,40 -1929,40 +1929,40 @@@ extern int ext4_group_extend(struct sup
  extern void *ext4_kvmalloc(size_t size, gfp_t flags);
  extern void *ext4_kvzalloc(size_t size, gfp_t flags);
  extern void ext4_kvfree(void *ptr);
 -extern void __ext4_error(struct super_block *, const char *, unsigned int,
 -                       const char *, ...)
 -      __attribute__ ((format (printf, 4, 5)));
 +extern __printf(4, 5)
 +void __ext4_error(struct super_block *, const char *, unsigned int,
 +                const char *, ...);
  #define ext4_error(sb, message...)    __ext4_error(sb, __func__,      \
                                                     __LINE__, ## message)
 -extern void ext4_error_inode(struct inode *, const char *, unsigned int,
 -                           ext4_fsblk_t, const char *, ...)
 -      __attribute__ ((format (printf, 5, 6)));
 -extern void ext4_error_file(struct file *, const char *, unsigned int,
 -                          ext4_fsblk_t, const char *, ...)
 -      __attribute__ ((format (printf, 5, 6)));
 +extern __printf(5, 6)
 +void ext4_error_inode(struct inode *, const char *, unsigned int, ext4_fsblk_t,
 +                    const char *, ...);
 +extern __printf(5, 6)
 +void ext4_error_file(struct file *, const char *, unsigned int, ext4_fsblk_t,
 +                   const char *, ...);
  extern void __ext4_std_error(struct super_block *, const char *,
                             unsigned int, int);
 -extern void __ext4_abort(struct super_block *, const char *, unsigned int,
 -                     const char *, ...)
 -      __attribute__ ((format (printf, 4, 5)));
 +extern __printf(4, 5)
 +void __ext4_abort(struct super_block *, const char *, unsigned int,
 +                const char *, ...);
  #define ext4_abort(sb, message...)    __ext4_abort(sb, __func__, \
                                                       __LINE__, ## message)
 -extern void __ext4_warning(struct super_block *, const char *, unsigned int,
 -                        const char *, ...)
 -      __attribute__ ((format (printf, 4, 5)));
 +extern __printf(4, 5)
 +void __ext4_warning(struct super_block *, const char *, unsigned int,
 +                  const char *, ...);
  #define ext4_warning(sb, message...)  __ext4_warning(sb, __func__, \
                                                       __LINE__, ## message)
 -extern void ext4_msg(struct super_block *, const char *, const char *, ...)
 -      __attribute__ ((format (printf, 3, 4)));
 +extern __printf(3, 4)
 +void ext4_msg(struct super_block *, const char *, const char *, ...);
  extern void __dump_mmp_msg(struct super_block *, struct mmp_struct *mmp,
                           const char *, unsigned int, const char *);
  #define dump_mmp_msg(sb, mmp, msg)    __dump_mmp_msg(sb, mmp, __func__, \
                                                       __LINE__, msg)
 -extern void __ext4_grp_locked_error(const char *, unsigned int, \
 -                                  struct super_block *, ext4_group_t, \
 -                                  unsigned long, ext4_fsblk_t, \
 -                                  const char *, ...)
 -      __attribute__ ((format (printf, 7, 8)));
 +extern __printf(7, 8)
 +void __ext4_grp_locked_error(const char *, unsigned int,
 +                           struct super_block *, ext4_group_t,
 +                           unsigned long, ext4_fsblk_t,
 +                           const char *, ...);
  #define ext4_grp_locked_error(sb, grp, message...) \
        __ext4_grp_locked_error(__func__, __LINE__, (sb), (grp), ## message)
  extern void ext4_update_dynamic_rev(struct super_block *sb);
@@@ -1927,8 -1978,8 +1978,8 @@@ extern ext4_fsblk_t ext4_inode_bitmap(s
                                      struct ext4_group_desc *bg);
  extern ext4_fsblk_t ext4_inode_table(struct super_block *sb,
                                     struct ext4_group_desc *bg);
- extern __u32 ext4_free_blks_count(struct super_block *sb,
-                               struct ext4_group_desc *bg);
+ extern __u32 ext4_free_group_clusters(struct super_block *sb,
+                                     struct ext4_group_desc *bg);
  extern __u32 ext4_free_inodes_count(struct super_block *sb,
                                 struct ext4_group_desc *bg);
  extern __u32 ext4_used_dirs_count(struct super_block *sb,
@@@ -1941,8 -1992,9 +1992,9 @@@ extern void ext4_inode_bitmap_set(struc
                                  struct ext4_group_desc *bg, ext4_fsblk_t blk);
  extern void ext4_inode_table_set(struct super_block *sb,
                                 struct ext4_group_desc *bg, ext4_fsblk_t blk);
- extern void ext4_free_blks_set(struct super_block *sb,
-                              struct ext4_group_desc *bg, __u32 count);
+ extern void ext4_free_group_clusters_set(struct super_block *sb,
+                                        struct ext4_group_desc *bg,
+                                        __u32 count);
  extern void ext4_free_inodes_set(struct super_block *sb,
                                struct ext4_group_desc *bg, __u32 count);
  extern void ext4_used_dirs_set(struct super_block *sb,
@@@ -2051,13 -2103,13 +2103,13 @@@ do {                                                         
  } while (0)
  
  #ifdef CONFIG_SMP
- /* Each CPU can accumulate percpu_counter_batch blocks in their local
-  * counters. So we need to make sure we have free blocks more
+ /* Each CPU can accumulate percpu_counter_batch clusters in their local
+  * counters. So we need to make sure we have free clusters more
   * than percpu_counter_batch  * nr_cpu_ids. Also add a window of 4 times.
   */
- #define EXT4_FREEBLOCKS_WATERMARK (4 * (percpu_counter_batch * nr_cpu_ids))
+ #define EXT4_FREECLUSTERS_WATERMARK (4 * (percpu_counter_batch * nr_cpu_ids))
  #else
- #define EXT4_FREEBLOCKS_WATERMARK 0
+ #define EXT4_FREECLUSTERS_WATERMARK 0
  #endif
  
  static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
@@@ -2243,10 -2295,19 +2295,19 @@@ extern int ext4_multi_mount_protect(str
  enum ext4_state_bits {
        BH_Uninit       /* blocks are allocated but uninitialized on disk */
          = BH_JBDPrivateStart,
+       BH_AllocFromCluster,    /* allocated blocks were part of already
+                                * allocated cluster. Note that this flag will
+                                * never, ever appear in a buffer_head's state
+                                * flag. See EXT4_MAP_FROM_CLUSTER to see where
+                                * this is used. */
+       BH_Da_Mapped,   /* Delayed allocated block that now has a mapping. This
+                        * flag is set when ext4_map_blocks is called on a
+                        * delayed allocated block to get its real mapping. */
  };
  
  BUFFER_FNS(Uninit, uninit)
  TAS_BUFFER_FNS(Uninit, uninit)
+ BUFFER_FNS(Da_Mapped, da_mapped)
  
  /*
   * Add new method to test wether block and inode bitmaps are properly
@@@ -2282,4 -2343,6 +2343,6 @@@ extern void ext4_resize_end(struct supe
  
  #endif        /* __KERNEL__ */
  
+ #include "ext4_extents.h"
  #endif        /* _EXT4_H */
diff --combined fs/ext4/file.c
@@@ -181,8 -181,8 +181,8 @@@ static int ext4_file_open(struct inode 
                path.dentry = mnt->mnt_root;
                cp = d_path(&path, buf, sizeof(buf));
                if (!IS_ERR(cp)) {
-                       memcpy(sbi->s_es->s_last_mounted, cp,
-                              sizeof(sbi->s_es->s_last_mounted));
+                       strlcpy(sbi->s_es->s_last_mounted, cp,
+                               sizeof(sbi->s_es->s_last_mounted));
                        ext4_mark_super_dirty(sb);
                }
        }
@@@ -224,8 -224,53 +224,8 @@@ loff_t ext4_llseek(struct file *file, l
                maxbytes = EXT4_SB(inode->i_sb)->s_bitmap_maxbytes;
        else
                maxbytes = inode->i_sb->s_maxbytes;
 -      mutex_lock(&inode->i_mutex);
 -      switch (origin) {
 -      case SEEK_END:
 -              offset += inode->i_size;
 -              break;
 -      case SEEK_CUR:
 -              if (offset == 0) {
 -                      mutex_unlock(&inode->i_mutex);
 -                      return file->f_pos;
 -              }
 -              offset += file->f_pos;
 -              break;
 -      case SEEK_DATA:
 -              /*
 -               * In the generic case the entire file is data, so as long as
 -               * offset isn't at the end of the file then the offset is data.
 -               */
 -              if (offset >= inode->i_size) {
 -                      mutex_unlock(&inode->i_mutex);
 -                      return -ENXIO;
 -              }
 -              break;
 -      case SEEK_HOLE:
 -              /*
 -               * There is a virtual hole at the end of the file, so as long as
 -               * offset isn't i_size or larger, return i_size.
 -               */
 -              if (offset >= inode->i_size) {
 -                      mutex_unlock(&inode->i_mutex);
 -                      return -ENXIO;
 -              }
 -              offset = inode->i_size;
 -              break;
 -      }
 -
 -      if (offset < 0 || offset > maxbytes) {
 -              mutex_unlock(&inode->i_mutex);
 -              return -EINVAL;
 -      }
 -
 -      if (offset != file->f_pos) {
 -              file->f_pos = offset;
 -              file->f_version = 0;
 -      }
 -      mutex_unlock(&inode->i_mutex);
  
 -      return offset;
 +      return generic_file_llseek_size(file, offset, origin, maxbytes);
  }
  
  const struct file_operations ext4_file_operations = {
diff --combined fs/ext4/inode.c
@@@ -42,7 -42,6 +42,6 @@@
  #include "ext4_jbd2.h"
  #include "xattr.h"
  #include "acl.h"
- #include "ext4_extents.h"
  #include "truncate.h"
  
  #include <trace/events/ext4.h>
@@@ -268,7 -267,7 +267,7 @@@ void ext4_da_update_reserve_space(struc
        struct ext4_inode_info *ei = EXT4_I(inode);
  
        spin_lock(&ei->i_block_reservation_lock);
-       trace_ext4_da_update_reserve_space(inode, used);
+       trace_ext4_da_update_reserve_space(inode, used, quota_claim);
        if (unlikely(used > ei->i_reserved_data_blocks)) {
                ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d "
                         "with only %d reserved data blocks\n",
        /* Update per-inode reservations */
        ei->i_reserved_data_blocks -= used;
        ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks;
-       percpu_counter_sub(&sbi->s_dirtyblocks_counter,
+       percpu_counter_sub(&sbi->s_dirtyclusters_counter,
                           used + ei->i_allocated_meta_blocks);
        ei->i_allocated_meta_blocks = 0;
  
                 * only when we have written all of the delayed
                 * allocation blocks.
                 */
-               percpu_counter_sub(&sbi->s_dirtyblocks_counter,
+               percpu_counter_sub(&sbi->s_dirtyclusters_counter,
                                   ei->i_reserved_meta_blocks);
                ei->i_reserved_meta_blocks = 0;
                ei->i_da_metadata_calc_len = 0;
  
        /* Update quota subsystem for data blocks */
        if (quota_claim)
-               dquot_claim_block(inode, used);
+               dquot_claim_block(inode, EXT4_C2B(sbi, used));
        else {
                /*
                 * We did fallocate with an offset that is already delayed
                 * allocated. So on delayed allocated writeback we should
                 * not re-claim the quota for fallocated blocks.
                 */
-               dquot_release_reservation_block(inode, used);
+               dquot_release_reservation_block(inode, EXT4_C2B(sbi, used));
        }
  
        /*
@@@ -398,6 -397,49 +397,49 @@@ static pgoff_t ext4_num_dirty_pages(str
        return num;
  }
  
+ /*
+  * Sets the BH_Da_Mapped bit on the buffer heads corresponding to the given map.
+  */
+ static void set_buffers_da_mapped(struct inode *inode,
+                                  struct ext4_map_blocks *map)
+ {
+       struct address_space *mapping = inode->i_mapping;
+       struct pagevec pvec;
+       int i, nr_pages;
+       pgoff_t index, end;
+       index = map->m_lblk >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
+       end = (map->m_lblk + map->m_len - 1) >>
+               (PAGE_CACHE_SHIFT - inode->i_blkbits);
+       pagevec_init(&pvec, 0);
+       while (index <= end) {
+               nr_pages = pagevec_lookup(&pvec, mapping, index,
+                                         min(end - index + 1,
+                                             (pgoff_t)PAGEVEC_SIZE));
+               if (nr_pages == 0)
+                       break;
+               for (i = 0; i < nr_pages; i++) {
+                       struct page *page = pvec.pages[i];
+                       struct buffer_head *bh, *head;
+                       if (unlikely(page->mapping != mapping) ||
+                           !PageDirty(page))
+                               break;
+                       if (page_has_buffers(page)) {
+                               bh = head = page_buffers(page);
+                               do {
+                                       set_buffer_da_mapped(bh);
+                                       bh = bh->b_this_page;
+                               } while (bh != head);
+                       }
+                       index++;
+               }
+               pagevec_release(&pvec);
+       }
+ }
  /*
   * The ext4_map_blocks() function tries to look up the requested blocks,
   * and returns if the blocks are already mapped.
   * the buffer head is mapped.
   *
   * It returns 0 if plain look up failed (blocks have not been allocated), in
-  * that casem, buffer head is unmapped
+  * that case, buffer head is unmapped
   *
   * It returns the error in case of allocation failure.
   */
@@@ -435,9 -477,11 +477,11 @@@ int ext4_map_blocks(handle_t *handle, s
         */
        down_read((&EXT4_I(inode)->i_data_sem));
        if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
-               retval = ext4_ext_map_blocks(handle, inode, map, 0);
+               retval = ext4_ext_map_blocks(handle, inode, map, flags &
+                                            EXT4_GET_BLOCKS_KEEP_SIZE);
        } else {
-               retval = ext4_ind_map_blocks(handle, inode, map, 0);
+               retval = ext4_ind_map_blocks(handle, inode, map, flags &
+                                            EXT4_GET_BLOCKS_KEEP_SIZE);
        }
        up_read((&EXT4_I(inode)->i_data_sem));
  
         * Returns if the blocks have already allocated
         *
         * Note that if blocks have been preallocated
-        * ext4_ext_get_block() returns th create = 0
+        * ext4_ext_get_block() returns the create = 0
         * with buffer head unmapped.
         */
        if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED)
                        (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE))
                        ext4_da_update_reserve_space(inode, retval, 1);
        }
-       if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
+       if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
                ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED);
  
+               /* If we have successfully mapped the delayed allocated blocks,
+                * set the BH_Da_Mapped bit on them. Its important to do this
+                * under the protection of i_data_sem.
+                */
+               if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED)
+                       set_buffers_da_mapped(inode, map);
+       }
        up_write((&EXT4_I(inode)->i_data_sem));
        if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
                int ret = check_block_validity(inode, map);
@@@ -647,7 -699,7 +699,7 @@@ struct buffer_head *ext4_bread(handle_
                return bh;
        if (buffer_uptodate(bh))
                return bh;
 -      ll_rw_block(READ_META, 1, &bh);
 +      ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh);
        wait_on_buffer(bh);
        if (buffer_uptodate(bh))
                return bh;
@@@ -909,7 -961,11 +961,11 @@@ static int ext4_ordered_write_end(struc
                        ext4_orphan_add(handle, inode);
                if (ret2 < 0)
                        ret = ret2;
+       } else {
+               unlock_page(page);
+               page_cache_release(page);
        }
        ret2 = ext4_journal_stop(handle);
        if (!ret)
                ret = ret2;
@@@ -1037,14 -1093,14 +1093,14 @@@ static int ext4_journalled_write_end(st
  }
  
  /*
-  * Reserve a single block located at lblock
+  * Reserve a single cluster located at lblock
   */
  static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
  {
        int retries = 0;
        struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
        struct ext4_inode_info *ei = EXT4_I(inode);
-       unsigned long md_needed;
+       unsigned int md_needed;
        int ret;
  
        /*
         */
  repeat:
        spin_lock(&ei->i_block_reservation_lock);
-       md_needed = ext4_calc_metadata_amount(inode, lblock);
+       md_needed = EXT4_NUM_B2C(sbi,
+                                ext4_calc_metadata_amount(inode, lblock));
        trace_ext4_da_reserve_space(inode, md_needed);
        spin_unlock(&ei->i_block_reservation_lock);
  
         * us from metadata over-estimation, though we may go over by
         * a small amount in the end.  Here we just reserve for data.
         */
-       ret = dquot_reserve_block(inode, 1);
+       ret = dquot_reserve_block(inode, EXT4_C2B(sbi, 1));
        if (ret)
                return ret;
        /*
         * We do still charge estimated metadata to the sb though;
         * we cannot afford to run out of free blocks.
         */
-       if (ext4_claim_free_blocks(sbi, md_needed + 1, 0)) {
-               dquot_release_reservation_block(inode, 1);
+       if (ext4_claim_free_clusters(sbi, md_needed + 1, 0)) {
+               dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1));
                if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
                        yield();
                        goto repeat;
@@@ -1118,19 -1175,21 +1175,21 @@@ static void ext4_da_release_space(struc
                 * We can release all of the reserved metadata blocks
                 * only when we have written all of the delayed
                 * allocation blocks.
+                * Note that in case of bigalloc, i_reserved_meta_blocks,
+                * i_reserved_data_blocks, etc. refer to number of clusters.
                 */
-               percpu_counter_sub(&sbi->s_dirtyblocks_counter,
+               percpu_counter_sub(&sbi->s_dirtyclusters_counter,
                                   ei->i_reserved_meta_blocks);
                ei->i_reserved_meta_blocks = 0;
                ei->i_da_metadata_calc_len = 0;
        }
  
        /* update fs dirty data blocks counter */
-       percpu_counter_sub(&sbi->s_dirtyblocks_counter, to_free);
+       percpu_counter_sub(&sbi->s_dirtyclusters_counter, to_free);
  
        spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
  
-       dquot_release_reservation_block(inode, to_free);
+       dquot_release_reservation_block(inode, EXT4_C2B(sbi, to_free));
  }
  
  static void ext4_da_page_release_reservation(struct page *page,
        int to_release = 0;
        struct buffer_head *head, *bh;
        unsigned int curr_off = 0;
+       struct inode *inode = page->mapping->host;
+       struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+       int num_clusters;
  
        head = page_buffers(page);
        bh = head;
                if ((offset <= curr_off) && (buffer_delay(bh))) {
                        to_release++;
                        clear_buffer_delay(bh);
+                       clear_buffer_da_mapped(bh);
                }
                curr_off = next_off;
        } while ((bh = bh->b_this_page) != head);
-       ext4_da_release_space(page->mapping->host, to_release);
+       /* If we have released all the blocks belonging to a cluster, then we
+        * need to release the reserved space for that cluster. */
+       num_clusters = EXT4_NUM_B2C(sbi, to_release);
+       while (num_clusters > 0) {
+               ext4_fsblk_t lblk;
+               lblk = (page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits)) +
+                       ((num_clusters - 1) << sbi->s_cluster_bits);
+               if (sbi->s_cluster_ratio == 1 ||
+                   !ext4_find_delalloc_cluster(inode, lblk, 1))
+                       ext4_da_release_space(inode, 1);
+               num_clusters--;
+       }
  }
  
  /*
@@@ -1253,6 -1329,8 +1329,8 @@@ static int mpage_da_submit_io(struct mp
                                                clear_buffer_delay(bh);
                                                bh->b_blocknr = pblock;
                                        }
+                                       if (buffer_da_mapped(bh))
+                                               clear_buffer_da_mapped(bh);
                                        if (buffer_unwritten(bh) ||
                                            buffer_mapped(bh))
                                                BUG_ON(bh->b_blocknr != pblock);
@@@ -1346,12 -1424,15 +1424,15 @@@ static void ext4_print_free_blocks(stru
  {
        struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
        printk(KERN_CRIT "Total free blocks count %lld\n",
-              ext4_count_free_blocks(inode->i_sb));
+              EXT4_C2B(EXT4_SB(inode->i_sb),
+                       ext4_count_free_clusters(inode->i_sb)));
        printk(KERN_CRIT "Free/Dirty block details\n");
        printk(KERN_CRIT "free_blocks=%lld\n",
-              (long long) percpu_counter_sum(&sbi->s_freeblocks_counter));
+              (long long) EXT4_C2B(EXT4_SB(inode->i_sb),
+               percpu_counter_sum(&sbi->s_freeclusters_counter)));
        printk(KERN_CRIT "dirty_blocks=%lld\n",
-              (long long) percpu_counter_sum(&sbi->s_dirtyblocks_counter));
+              (long long) EXT4_C2B(EXT4_SB(inode->i_sb),
+               percpu_counter_sum(&sbi->s_dirtyclusters_counter)));
        printk(KERN_CRIT "Block reservation details\n");
        printk(KERN_CRIT "i_reserved_data_blocks=%u\n",
               EXT4_I(inode)->i_reserved_data_blocks);
@@@ -1430,8 -1511,7 +1511,7 @@@ static void mpage_da_map_and_submit(str
                if (err == -EAGAIN)
                        goto submit_io;
  
-               if (err == -ENOSPC &&
-                   ext4_count_free_blocks(sb)) {
+               if (err == -ENOSPC && ext4_count_free_clusters(sb)) {
                        mpd->retval = err;
                        goto submit_io;
                }
  
                for (i = 0; i < map.m_len; i++)
                        unmap_underlying_metadata(bdev, map.m_pblk + i);
-       }
  
-       if (ext4_should_order_data(mpd->inode)) {
-               err = ext4_jbd2_file_inode(handle, mpd->inode);
-               if (err)
-                       /* This only happens if the journal is aborted */
-                       return;
+               if (ext4_should_order_data(mpd->inode)) {
+                       err = ext4_jbd2_file_inode(handle, mpd->inode);
+                       if (err) {
+                               /* Only if the journal is aborted */
+                               mpd->retval = err;
+                               goto submit_io;
+                       }
+               }
        }
  
        /*
@@@ -1583,6 -1665,66 +1665,66 @@@ static int ext4_bh_delay_or_unwritten(h
        return (buffer_delay(bh) || buffer_unwritten(bh)) && buffer_dirty(bh);
  }
  
+ /*
+  * This function is grabs code from the very beginning of
+  * ext4_map_blocks, but assumes that the caller is from delayed write
+  * time. This function looks up the requested blocks and sets the
+  * buffer delay bit under the protection of i_data_sem.
+  */
+ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
+                             struct ext4_map_blocks *map,
+                             struct buffer_head *bh)
+ {
+       int retval;
+       sector_t invalid_block = ~((sector_t) 0xffff);
+       if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es))
+               invalid_block = ~0;
+       map->m_flags = 0;
+       ext_debug("ext4_da_map_blocks(): inode %lu, max_blocks %u,"
+                 "logical block %lu\n", inode->i_ino, map->m_len,
+                 (unsigned long) map->m_lblk);
+       /*
+        * Try to see if we can get the block without requesting a new
+        * file system block.
+        */
+       down_read((&EXT4_I(inode)->i_data_sem));
+       if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
+               retval = ext4_ext_map_blocks(NULL, inode, map, 0);
+       else
+               retval = ext4_ind_map_blocks(NULL, inode, map, 0);
+       if (retval == 0) {
+               /*
+                * XXX: __block_prepare_write() unmaps passed block,
+                * is it OK?
+                */
+               /* If the block was allocated from previously allocated cluster,
+                * then we dont need to reserve it again. */
+               if (!(map->m_flags & EXT4_MAP_FROM_CLUSTER)) {
+                       retval = ext4_da_reserve_space(inode, iblock);
+                       if (retval)
+                               /* not enough space to reserve */
+                               goto out_unlock;
+               }
+               /* Clear EXT4_MAP_FROM_CLUSTER flag since its purpose is served
+                * and it should not appear on the bh->b_state.
+                */
+               map->m_flags &= ~EXT4_MAP_FROM_CLUSTER;
+               map_bh(bh, inode->i_sb, invalid_block);
+               set_buffer_new(bh);
+               set_buffer_delay(bh);
+       }
+ out_unlock:
+       up_read((&EXT4_I(inode)->i_data_sem));
+       return retval;
+ }
  /*
   * This is a special get_blocks_t callback which is used by
   * ext4_da_write_begin().  It will either return mapped block or
@@@ -1600,10 -1742,6 +1742,6 @@@ static int ext4_da_get_block_prep(struc
  {
        struct ext4_map_blocks map;
        int ret = 0;
-       sector_t invalid_block = ~((sector_t) 0xffff);
-       if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es))
-               invalid_block = ~0;
  
        BUG_ON(create == 0);
        BUG_ON(bh->b_size != inode->i_sb->s_blocksize);
         * preallocated blocks are unmapped but should treated
         * the same as allocated blocks.
         */
-       ret = ext4_map_blocks(NULL, inode, &map, 0);
-       if (ret < 0)
+       ret = ext4_da_map_blocks(inode, iblock, &map, bh);
+       if (ret <= 0)
                return ret;
-       if (ret == 0) {
-               if (buffer_delay(bh))
-                       return 0; /* Not sure this could or should happen */
-               /*
-                * XXX: __block_write_begin() unmaps passed block, is it OK?
-                */
-               ret = ext4_da_reserve_space(inode, iblock);
-               if (ret)
-                       /* not enough space to reserve */
-                       return ret;
-               map_bh(bh, inode->i_sb, invalid_block);
-               set_buffer_new(bh);
-               set_buffer_delay(bh);
-               return 0;
-       }
  
        map_bh(bh, inode->i_sb, map.m_pblk);
        bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags;
@@@ -1811,12 -1933,8 +1933,12 @@@ static int ext4_writepage(struct page *
                 * We don't want to do block allocation, so redirty
                 * the page and return.  We may reach here when we do
                 * a journal commit via journal_submit_inode_data_buffers.
 -               * We can also reach here via shrink_page_list
 +               * We can also reach here via shrink_page_list but it
 +               * should never be for direct reclaim so warn if that
 +               * happens
                 */
 +              WARN_ON_ONCE((current->flags & (PF_MEMALLOC|PF_KSWAPD)) ==
 +                                                              PF_MEMALLOC);
                goto redirty_page;
        }
        if (commit_write)
@@@ -2050,6 -2168,7 +2172,7 @@@ static int ext4_da_writepages(struct ad
        struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
        pgoff_t done_index = 0;
        pgoff_t end;
+       struct blk_plug plug;
  
        trace_ext4_da_writepages(inode, wbc);
  
@@@ -2128,6 -2247,7 +2251,7 @@@ retry
        if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
                tag_pages_for_writeback(mapping, index, end);
  
+       blk_start_plug(&plug);
        while (!ret && wbc->nr_to_write > 0) {
  
                /*
                        ret = 0;
                } else if (ret == MPAGE_DA_EXTENT_TAIL) {
                        /*
-                        * got one extent now try with
-                        * rest of the pages
+                        * Got one extent now try with rest of the pages.
+                        * If mpd.retval is set -EIO, journal is aborted.
+                        * So we don't need to write any more.
                         */
                        pages_written += mpd.pages_written;
-                       ret = 0;
+                       ret = mpd.retval;
                        io_done = 1;
                } else if (wbc->nr_to_write)
                        /*
                         */
                        break;
        }
+       blk_finish_plug(&plug);
        if (!io_done && !cycled) {
                cycled = 1;
                index = 0;
@@@ -2230,10 -2352,11 +2356,11 @@@ static int ext4_nonda_switch(struct sup
         * Delalloc need an accurate free block accounting. So switch
         * to non delalloc when we are near to error range.
         */
-       free_blocks  = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
-       dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyblocks_counter);
+       free_blocks  = EXT4_C2B(sbi,
+               percpu_counter_read_positive(&sbi->s_freeclusters_counter));
+       dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyclusters_counter);
        if (2 * free_blocks < 3 * dirty_blocks ||
-               free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) {
+               free_blocks < (dirty_blocks + EXT4_FREECLUSTERS_WATERMARK)) {
                /*
                 * free block count is less than 150% of dirty blocks
                 * or free blocks is less than watermark
@@@ -2259,6 -2382,7 +2386,7 @@@ static int ext4_da_write_begin(struct f
        pgoff_t index;
        struct inode *inode = mapping->host;
        handle_t *handle;
+       loff_t page_len;
  
        index = pos >> PAGE_CACHE_SHIFT;
  
@@@ -2305,6 -2429,13 +2433,13 @@@ retry
                 */
                if (pos + len > inode->i_size)
                        ext4_truncate_failed_write(inode);
+       } else {
+               page_len = pos & (PAGE_CACHE_SIZE - 1);
+               if (page_len > 0) {
+                       ret = ext4_discard_partial_page_buffers_no_lock(handle,
+                               inode, page, pos - page_len, page_len,
+                               EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED);
+               }
        }
  
        if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
@@@ -2347,6 -2478,7 +2482,7 @@@ static int ext4_da_write_end(struct fil
        loff_t new_i_size;
        unsigned long start, end;
        int write_mode = (int)(unsigned long)fsdata;
+       loff_t page_len;
  
        if (write_mode == FALL_BACK_TO_NONDELALLOC) {
                if (ext4_should_order_data(inode)) {
        }
        ret2 = generic_write_end(file, mapping, pos, len, copied,
                                                        page, fsdata);
+       page_len = PAGE_CACHE_SIZE -
+                       ((pos + copied - 1) & (PAGE_CACHE_SIZE - 1));
+       if (page_len > 0) {
+               ret = ext4_discard_partial_page_buffers_no_lock(handle,
+                       inode, page, pos + copied - 1, page_len,
+                       EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED);
+       }
        copied = ret2;
        if (ret2 < 0)
                ret = ret2;
@@@ -2689,10 -2831,7 +2835,7 @@@ static void ext4_end_io_buffer_write(st
         * but being more careful is always safe for the future change.
         */
        inode = io_end->inode;
-       if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
-               io_end->flag |= EXT4_IO_END_UNWRITTEN;
-               atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
-       }
+       ext4_set_io_unwritten_flag(inode, io_end);
  
        /* Add the io_end to per-inode completed io list*/
        spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
@@@ -2858,6 -2997,12 +3001,12 @@@ static ssize_t ext4_direct_IO(int rw, s
        struct inode *inode = file->f_mapping->host;
        ssize_t ret;
  
+       /*
+        * If we are doing data journalling we don't support O_DIRECT
+        */
+       if (ext4_should_journal_data(inode))
+               return 0;
        trace_ext4_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw);
        if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
                ret = ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs);
@@@ -2927,6 -3072,7 +3076,7 @@@ static const struct address_space_opera
        .bmap                   = ext4_bmap,
        .invalidatepage         = ext4_invalidatepage,
        .releasepage            = ext4_releasepage,
+       .direct_IO              = ext4_direct_IO,
        .is_partially_uptodate  = block_is_partially_uptodate,
        .error_remove_page      = generic_error_remove_page,
  };
@@@ -2963,6 -3109,227 +3113,227 @@@ void ext4_set_aops(struct inode *inode
                inode->i_mapping->a_ops = &ext4_journalled_aops;
  }
  
+ /*
+  * ext4_discard_partial_page_buffers()
+  * Wrapper function for ext4_discard_partial_page_buffers_no_lock.
+  * This function finds and locks the page containing the offset
+  * "from" and passes it to ext4_discard_partial_page_buffers_no_lock.
+  * Calling functions that already have the page locked should call
+  * ext4_discard_partial_page_buffers_no_lock directly.
+  */
+ int ext4_discard_partial_page_buffers(handle_t *handle,
+               struct address_space *mapping, loff_t from,
+               loff_t length, int flags)
+ {
+       struct inode *inode = mapping->host;
+       struct page *page;
+       int err = 0;
+       page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT,
+                                  mapping_gfp_mask(mapping) & ~__GFP_FS);
+       if (!page)
+               return -ENOMEM;
+       err = ext4_discard_partial_page_buffers_no_lock(handle, inode, page,
+               from, length, flags);
+       unlock_page(page);
+       page_cache_release(page);
+       return err;
+ }
+ /*
+  * ext4_discard_partial_page_buffers_no_lock()
+  * Zeros a page range of length 'length' starting from offset 'from'.
+  * Buffer heads that correspond to the block aligned regions of the
+  * zeroed range will be unmapped.  Unblock aligned regions
+  * will have the corresponding buffer head mapped if needed so that
+  * that region of the page can be updated with the partial zero out.
+  *
+  * This function assumes that the page has already been  locked.  The
+  * The range to be discarded must be contained with in the given page.
+  * If the specified range exceeds the end of the page it will be shortened
+  * to the end of the page that corresponds to 'from'.  This function is
+  * appropriate for updating a page and it buffer heads to be unmapped and
+  * zeroed for blocks that have been either released, or are going to be
+  * released.
+  *
+  * handle: The journal handle
+  * inode:  The files inode
+  * page:   A locked page that contains the offset "from"
+  * from:   The starting byte offset (from the begining of the file)
+  *         to begin discarding
+  * len:    The length of bytes to discard
+  * flags:  Optional flags that may be used:
+  *
+  *         EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED
+  *         Only zero the regions of the page whose buffer heads
+  *         have already been unmapped.  This flag is appropriate
+  *         for updateing the contents of a page whose blocks may
+  *         have already been released, and we only want to zero
+  *         out the regions that correspond to those released blocks.
+  *
+  * Returns zero on sucess or negative on failure.
+  */
+ int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
+               struct inode *inode, struct page *page, loff_t from,
+               loff_t length, int flags)
+ {
+       ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT;
+       unsigned int offset = from & (PAGE_CACHE_SIZE-1);
+       unsigned int blocksize, max, pos;
+       ext4_lblk_t iblock;
+       struct buffer_head *bh;
+       int err = 0;
+       blocksize = inode->i_sb->s_blocksize;
+       max = PAGE_CACHE_SIZE - offset;
+       if (index != page->index)
+               return -EINVAL;
+       /*
+        * correct length if it does not fall between
+        * 'from' and the end of the page
+        */
+       if (length > max || length < 0)
+               length = max;
+       iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
+       if (!page_has_buffers(page)) {
+               /*
+                * If the range to be discarded covers a partial block
+                * we need to get the page buffers.  This is because
+                * partial blocks cannot be released and the page needs
+                * to be updated with the contents of the block before
+                * we write the zeros on top of it.
+                */
+               if ((from & (blocksize - 1)) ||
+                   ((from + length) & (blocksize - 1))) {
+                       create_empty_buffers(page, blocksize, 0);
+               } else {
+                       /*
+                        * If there are no partial blocks,
+                        * there is nothing to update,
+                        * so we can return now
+                        */
+                       return 0;
+               }
+       }
+       /* Find the buffer that contains "offset" */
+       bh = page_buffers(page);
+       pos = blocksize;
+       while (offset >= pos) {
+               bh = bh->b_this_page;
+               iblock++;
+               pos += blocksize;
+       }
+       pos = offset;
+       while (pos < offset + length) {
+               unsigned int end_of_block, range_to_discard;
+               err = 0;
+               /* The length of space left to zero and unmap */
+               range_to_discard = offset + length - pos;
+               /* The length of space until the end of the block */
+               end_of_block = blocksize - (pos & (blocksize-1));
+               /*
+                * Do not unmap or zero past end of block
+                * for this buffer head
+                */
+               if (range_to_discard > end_of_block)
+                       range_to_discard = end_of_block;
+               /*
+                * Skip this buffer head if we are only zeroing unampped
+                * regions of the page
+                */
+               if (flags & EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED &&
+                       buffer_mapped(bh))
+                               goto next;
+               /* If the range is block aligned, unmap */
+               if (range_to_discard == blocksize) {
+                       clear_buffer_dirty(bh);
+                       bh->b_bdev = NULL;
+                       clear_buffer_mapped(bh);
+                       clear_buffer_req(bh);
+                       clear_buffer_new(bh);
+                       clear_buffer_delay(bh);
+                       clear_buffer_unwritten(bh);
+                       clear_buffer_uptodate(bh);
+                       zero_user(page, pos, range_to_discard);
+                       BUFFER_TRACE(bh, "Buffer discarded");
+                       goto next;
+               }
+               /*
+                * If this block is not completely contained in the range
+                * to be discarded, then it is not going to be released. Because
+                * we need to keep this block, we need to make sure this part
+                * of the page is uptodate before we modify it by writeing
+                * partial zeros on it.
+                */
+               if (!buffer_mapped(bh)) {
+                       /*
+                        * Buffer head must be mapped before we can read
+                        * from the block
+                        */
+                       BUFFER_TRACE(bh, "unmapped");
+                       ext4_get_block(inode, iblock, bh, 0);
+                       /* unmapped? It's a hole - nothing to do */
+                       if (!buffer_mapped(bh)) {
+                               BUFFER_TRACE(bh, "still unmapped");
+                               goto next;
+                       }
+               }
+               /* Ok, it's mapped. Make sure it's up-to-date */
+               if (PageUptodate(page))
+                       set_buffer_uptodate(bh);
+               if (!buffer_uptodate(bh)) {
+                       err = -EIO;
+                       ll_rw_block(READ, 1, &bh);
+                       wait_on_buffer(bh);
+                       /* Uhhuh. Read error. Complain and punt.*/
+                       if (!buffer_uptodate(bh))
+                               goto next;
+               }
+               if (ext4_should_journal_data(inode)) {
+                       BUFFER_TRACE(bh, "get write access");
+                       err = ext4_journal_get_write_access(handle, bh);
+                       if (err)
+                               goto next;
+               }
+               zero_user(page, pos, range_to_discard);
+               err = 0;
+               if (ext4_should_journal_data(inode)) {
+                       err = ext4_handle_dirty_metadata(handle, inode, bh);
+               } else
+                       mark_buffer_dirty(bh);
+               BUFFER_TRACE(bh, "Partial buffer zeroed");
+ next:
+               bh = bh->b_this_page;
+               iblock++;
+               pos += range_to_discard;
+       }
+       return err;
+ }
  /*
   * ext4_block_truncate_page() zeroes out a mapping from file offset `from'
   * up to the end of the block which corresponds to `from'.
@@@ -3005,7 -3372,7 +3376,7 @@@ int ext4_block_zero_page_range(handle_
        page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT,
                                   mapping_gfp_mask(mapping) & ~__GFP_FS);
        if (!page)
-               return -EINVAL;
+               return -ENOMEM;
  
        blocksize = inode->i_sb->s_blocksize;
        max = blocksize - (offset & (blocksize - 1));
        err = 0;
        if (ext4_should_journal_data(inode)) {
                err = ext4_handle_dirty_metadata(handle, inode, bh);
-       } else {
-               if (ext4_should_order_data(inode) && EXT4_I(inode)->jinode)
-                       err = ext4_jbd2_file_inode(handle, inode);
+       } else
                mark_buffer_dirty(bh);
-       }
  
  unlock:
        unlock_page(page);
@@@ -3119,6 -3483,11 +3487,11 @@@ int ext4_punch_hole(struct file *file, 
                return -ENOTSUPP;
        }
  
+       if (EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) {
+               /* TODO: Add support for bigalloc file systems */
+               return -ENOTSUPP;
+       }
        return ext4_ext_punch_hole(file, offset, length);
  }
  
@@@ -3302,7 -3671,7 +3675,7 @@@ make_io
                trace_ext4_load_inode(inode);
                get_bh(bh);
                bh->b_end_io = end_buffer_read_sync;
 -              submit_bh(READ_META, bh);
 +              submit_bh(READ | REQ_META | REQ_PRIO, bh);
                wait_on_buffer(bh);
                if (!buffer_uptodate(bh)) {
                        EXT4_ERROR_INODE_BLOCK(inode, block,
@@@ -4420,6 -4789,7 +4793,7 @@@ retry_alloc
                          PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) {
                        unlock_page(page);
                        ret = VM_FAULT_SIGBUS;
+                       ext4_journal_stop(handle);
                        goto out;
                }
                ext4_set_inode_state(inode, EXT4_STATE_JDATA);
diff --combined fs/ext4/namei.c
@@@ -922,8 -922,7 +922,8 @@@ restart
                                bh = ext4_getblk(NULL, dir, b++, 0, &err);
                                bh_use[ra_max] = bh;
                                if (bh)
 -                                      ll_rw_block(READ_META, 1, &bh);
 +                                      ll_rw_block(READ | REQ_META | REQ_PRIO,
 +                                                  1, &bh);
                        }
                }
                if ((bh = bh_use[ra_ptr++]) == NULL)
@@@ -1586,7 -1585,7 +1586,7 @@@ static int ext4_dx_add_entry(handle_t *
                        dxtrace(dx_show_index("node", frames[1].entries));
                        dxtrace(dx_show_index("node",
                               ((struct dx_node *) bh2->b_data)->entries));
-                       err = ext4_handle_dirty_metadata(handle, inode, bh2);
+                       err = ext4_handle_dirty_metadata(handle, dir, bh2);
                        if (err)
                                goto journal_error;
                        brelse (bh2);
                        if (err)
                                goto journal_error;
                }
-               err = ext4_handle_dirty_metadata(handle, inode, frames[0].bh);
+               err = ext4_handle_dirty_metadata(handle, dir, frames[0].bh);
                if (err) {
                        ext4_std_error(inode->i_sb, err);
                        goto cleanup;
@@@ -1707,9 -1706,8 +1707,8 @@@ static void ext4_inc_count(handle_t *ha
   */
  static void ext4_dec_count(handle_t *handle, struct inode *inode)
  {
-       drop_nlink(inode);
-       if (S_ISDIR(inode->i_mode) && inode->i_nlink == 0)
-               inc_nlink(inode);
+       if (!S_ISDIR(inode->i_mode) || inode->i_nlink > 2)
+               drop_nlink(inode);
  }
  
  
@@@ -1756,7 -1754,7 +1755,7 @@@ retry
        if (IS_DIRSYNC(dir))
                ext4_handle_sync(handle);
  
-       inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0);
+       inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0, NULL);
        err = PTR_ERR(inode);
        if (!IS_ERR(inode)) {
                inode->i_op = &ext4_file_inode_operations;
@@@ -1792,7 -1790,7 +1791,7 @@@ retry
        if (IS_DIRSYNC(dir))
                ext4_handle_sync(handle);
  
-       inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0);
+       inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0, NULL);
        err = PTR_ERR(inode);
        if (!IS_ERR(inode)) {
                init_special_inode(inode, inode->i_mode, rdev);
@@@ -1832,7 -1830,7 +1831,7 @@@ retry
                ext4_handle_sync(handle);
  
        inode = ext4_new_inode(handle, dir, S_IFDIR | mode,
-                              &dentry->d_name, 0);
+                              &dentry->d_name, 0, NULL);
        err = PTR_ERR(inode);
        if (IS_ERR(inode))
                goto out_stop;
        ext4_set_de_type(dir->i_sb, de, S_IFDIR);
        inode->i_nlink = 2;
        BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata");
-       err = ext4_handle_dirty_metadata(handle, dir, dir_block);
+       err = ext4_handle_dirty_metadata(handle, inode, dir_block);
        if (err)
                goto out_clear_inode;
        err = ext4_mark_inode_dirty(handle, inode);
@@@ -2279,7 -2277,7 +2278,7 @@@ retry
                ext4_handle_sync(handle);
  
        inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO,
-                              &dentry->d_name, 0);
+                              &dentry->d_name, 0, NULL);
        err = PTR_ERR(inode);
        if (IS_ERR(inode))
                goto out_stop;
@@@ -2530,7 -2528,7 +2529,7 @@@ static int ext4_rename(struct inode *ol
                PARENT_INO(dir_bh->b_data, new_dir->i_sb->s_blocksize) =
                                                cpu_to_le32(new_dir->i_ino);
                BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata");
-               retval = ext4_handle_dirty_metadata(handle, old_dir, dir_bh);
+               retval = ext4_handle_dirty_metadata(handle, old_inode, dir_bh);
                if (retval) {
                        ext4_std_error(old_dir->i_sb, retval);
                        goto end_rename;
diff --combined include/linux/ext3_fs.h
@@@ -180,8 -180,8 +180,8 @@@ struct ext3_group_des
  
  /* Flags that should be inherited by new inodes from their parent. */
  #define EXT3_FL_INHERITED (EXT3_SECRM_FL | EXT3_UNRM_FL | EXT3_COMPR_FL |\
-                          EXT3_SYNC_FL | EXT3_IMMUTABLE_FL | EXT3_APPEND_FL |\
-                          EXT3_NODUMP_FL | EXT3_NOATIME_FL | EXT3_COMPRBLK_FL|\
+                          EXT3_SYNC_FL | EXT3_NODUMP_FL |\
+                          EXT3_NOATIME_FL | EXT3_COMPRBLK_FL |\
                           EXT3_NOCOMPR_FL | EXT3_JOURNAL_DATA_FL |\
                           EXT3_NOTAIL_FL | EXT3_DIRSYNC_FL)
  
@@@ -381,7 -381,7 +381,7 @@@ struct ext3_inode 
   * Mount flags
   */
  #define EXT3_MOUNT_CHECK              0x00001 /* Do mount-time checks */
 -#define EXT3_MOUNT_OLDALLOC           0x00002  /* Don't use the new Orlov allocator */
 +/* EXT3_MOUNT_OLDALLOC was there */
  #define EXT3_MOUNT_GRPID              0x00004 /* Create files with directory's group */
  #define EXT3_MOUNT_DEBUG              0x00008 /* Some debugging messages */
  #define EXT3_MOUNT_ERRORS_CONT                0x00010 /* Continue on errors */
@@@ -937,15 -937,15 +937,15 @@@ extern int ext3_group_extend(struct sup
                                ext3_fsblk_t n_blocks_count);
  
  /* super.c */
 -extern void ext3_error (struct super_block *, const char *, const char *, ...)
 -      __attribute__ ((format (printf, 3, 4)));
 +extern __printf(3, 4)
 +void ext3_error(struct super_block *, const char *, const char *, ...);
  extern void __ext3_std_error (struct super_block *, const char *, int);
 -extern void ext3_abort (struct super_block *, const char *, const char *, ...)
 -      __attribute__ ((format (printf, 3, 4)));
 -extern void ext3_warning (struct super_block *, const char *, const char *, ...)
 -      __attribute__ ((format (printf, 3, 4)));
 -extern void ext3_msg(struct super_block *, const char *, const char *, ...)
 -      __attribute__ ((format (printf, 3, 4)));
 +extern __printf(3, 4)
 +void ext3_abort(struct super_block *, const char *, const char *, ...);
 +extern __printf(3, 4)
 +void ext3_warning(struct super_block *, const char *, const char *, ...);
 +extern __printf(3, 4)
 +void ext3_msg(struct super_block *, const char *, const char *, ...);
  extern void ext3_update_dynamic_rev (struct super_block *sb);
  
  #define ext3_std_error(sb, errno)                             \
diff --combined include/linux/fs.h
@@@ -58,15 -58,14 +58,15 @@@ struct inodes_stat_t 
  
  #define NR_FILE  8192 /* this can well be larger on a larger system */
  
 -#define MAY_EXEC 1
 -#define MAY_WRITE 2
 -#define MAY_READ 4
 -#define MAY_APPEND 8
 -#define MAY_ACCESS 16
 -#define MAY_OPEN 32
 -#define MAY_CHDIR 64
 -#define MAY_NOT_BLOCK 128     /* called from RCU mode, don't block */
 +#define MAY_EXEC              0x00000001
 +#define MAY_WRITE             0x00000002
 +#define MAY_READ              0x00000004
 +#define MAY_APPEND            0x00000008
 +#define MAY_ACCESS            0x00000010
 +#define MAY_OPEN              0x00000020
 +#define MAY_CHDIR             0x00000040
 +/* called from RCU mode, don't block */
 +#define MAY_NOT_BLOCK         0x00000080
  
  /*
   * flags in file.f_mode.  Note that FMODE_READ and FMODE_WRITE must correspond
  #define READA                 RWA_MASK
  
  #define READ_SYNC             (READ | REQ_SYNC)
 -#define READ_META             (READ | REQ_META)
  #define WRITE_SYNC            (WRITE | REQ_SYNC | REQ_NOIDLE)
  #define WRITE_ODIRECT         (WRITE | REQ_SYNC)
 -#define WRITE_META            (WRITE | REQ_META)
  #define WRITE_FLUSH           (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FLUSH)
  #define WRITE_FUA             (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FUA)
  #define WRITE_FLUSH_FUA               (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FLUSH | REQ_FUA)
@@@ -770,12 -771,13 +770,13 @@@ struct inode 
        unsigned long           i_ino;
        unsigned int            i_nlink;
        dev_t                   i_rdev;
-       loff_t                  i_size;
        struct timespec         i_atime;
        struct timespec         i_mtime;
        struct timespec         i_ctime;
-       unsigned int            i_blkbits;
+       spinlock_t              i_lock; /* i_blocks, i_bytes, maybe i_size */
+       unsigned short          i_bytes;
        blkcnt_t                i_blocks;
+       loff_t                  i_size;
  
  #ifdef __NEED_I_SIZE_ORDERED
        seqcount_t              i_size_seqcount;
  
        /* Misc */
        unsigned long           i_state;
-       spinlock_t              i_lock; /* i_blocks, i_bytes, maybe i_size */
        struct mutex            i_mutex;
  
        unsigned long           dirtied_when;   /* jiffies of first dirtying */
                struct rcu_head         i_rcu;
        };
        atomic_t                i_count;
+       unsigned int            i_blkbits;
        u64                     i_version;
-       unsigned short          i_bytes;
        atomic_t                i_dio_count;
+       atomic_t                i_writecount;
        const struct file_operations    *i_fop; /* former ->i_op->default_file_ops */
        struct file_lock        *i_flock;
        struct address_space    i_data;
  #ifdef CONFIG_IMA
        atomic_t                i_readcount; /* struct files open RO */
  #endif
-       atomic_t                i_writecount;
        void                    *i_private; /* fs or device private pointer */
  };
  
@@@ -964,12 -965,7 +964,12 @@@ struct file 
  #define f_dentry      f_path.dentry
  #define f_vfsmnt      f_path.mnt
        const struct file_operations    *f_op;
 -      spinlock_t              f_lock;  /* f_ep_links, f_flags, no IRQ */
 +
 +      /*
 +       * Protects f_ep_links, f_flags, f_pos vs i_size in lseek SEEK_CUR.
 +       * Must not be taken from IRQ context.
 +       */
 +      spinlock_t              f_lock;
  #ifdef CONFIG_SMP
        int                     f_sb_list_cpu;
  #endif
@@@ -1069,8 -1065,6 +1069,8 @@@ static inline int file_check_writeable(
  #define FL_LEASE      32      /* lease held on this file */
  #define FL_CLOSE      64      /* unlock on close */
  #define FL_SLEEP      128     /* A blocking lock */
 +#define FL_DOWNGRADE_PENDING  256 /* Lease is being downgraded */
 +#define FL_UNLOCK_PENDING     512 /* Lease is being broken */
  
  /*
   * Special return value from posix_lock_file() and vfs_lock_file() for
@@@ -1117,7 -1111,7 +1117,7 @@@ struct file_lock 
        struct list_head fl_link;       /* doubly linked list of all locks */
        struct list_head fl_block;      /* circular list of blocked processes */
        fl_owner_t fl_owner;
 -      unsigned char fl_flags;
 +      unsigned int fl_flags;
        unsigned char fl_type;
        unsigned int fl_pid;
        struct pid *fl_nspid;
        loff_t fl_end;
  
        struct fasync_struct *  fl_fasync; /* for lease break notifications */
 -      unsigned long fl_break_time;    /* for nonblocking lease breaks */
 +      /* for lease breaks: */
 +      unsigned long fl_break_time;
 +      unsigned long fl_downgrade_time;
  
        const struct file_lock_operations *fl_ops;      /* Callbacks for filesystems */
        const struct lock_manager_operations *fl_lmops; /* Callbacks for lockmanagers */
@@@ -1633,10 -1625,9 +1633,10 @@@ struct inode_operations 
  struct seq_file;
  
  ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
 -                              unsigned long nr_segs, unsigned long fast_segs,
 -                              struct iovec *fast_pointer,
 -                              struct iovec **ret_pointer);
 +                            unsigned long nr_segs, unsigned long fast_segs,
 +                            struct iovec *fast_pointer,
 +                            struct iovec **ret_pointer,
 +                            int check_access);
  
  extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
  extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *);
@@@ -2327,11 -2318,6 +2327,11 @@@ extern struct inode * iget5_locked(stru
  extern struct inode * iget_locked(struct super_block *, unsigned long);
  extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *);
  extern int insert_inode_locked(struct inode *);
 +#ifdef CONFIG_DEBUG_LOCK_ALLOC
 +extern void lockdep_annotate_inode_mutex_key(struct inode *inode);
 +#else
 +static inline void lockdep_annotate_inode_mutex_key(struct inode *inode) { };
 +#endif
  extern void unlock_new_inode(struct inode *);
  extern unsigned int get_next_ino(void);
  
@@@ -2408,8 -2394,8 +2408,8 @@@ file_ra_state_init(struct file_ra_stat
  extern loff_t noop_llseek(struct file *file, loff_t offset, int origin);
  extern loff_t no_llseek(struct file *file, loff_t offset, int origin);
  extern loff_t generic_file_llseek(struct file *file, loff_t offset, int origin);
 -extern loff_t generic_file_llseek_unlocked(struct file *file, loff_t offset,
 -                      int origin);
 +extern loff_t generic_file_llseek_size(struct file *file, loff_t offset,
 +              int origin, loff_t maxsize);
  extern int generic_file_open(struct inode * inode, struct file * filp);
  extern int nonseekable_open(struct inode * inode, struct file * filp);
  
@@@ -2635,8 -2621,8 +2635,8 @@@ static const struct file_operations __f
        .llseek  = generic_file_llseek,                                 \
  };
  
 -static inline void __attribute__((format(printf, 1, 2)))
 -__simple_attr_check_format(const char *fmt, ...)
 +static inline __printf(1, 2)
 +void __simple_attr_check_format(const char *fmt, ...)
  {
        /* don't do anything, just let the compiler check the arguments; */
  }