Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

author Linus Torvalds <torvalds@linux-foundation.org>

Wed, 2 Nov 2011 17:06:20 +0000 (10:06 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Wed, 2 Nov 2011 17:06:20 +0000 (10:06 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Wed, 2 Nov 2011 17:06:20 +0000 (10:06 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Wed, 2 Nov 2011 17:06:20 +0000 (10:06 -0700)
diff --combined fs/ext4/ext4.h

index cec3145,604c200..5b0e26a
--- 1/fs/ext4/ext4.h
--- 2/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@@ -144,9 -144,17 +144,17 @@@ struct ext4_allocation_request 
   #define EXT4_MAP_UNWRITTEN    (1 << BH_Unwritten)
   #define EXT4_MAP_BOUNDARY     (1 << BH_Boundary)
   #define EXT4_MAP_UNINIT               (1 << BH_Uninit)
+ /* Sometimes (in the bigalloc case, from ext4_da_get_block_prep) the caller of
+  * ext4_map_blocks wants to know whether or not the underlying cluster has
+  * already been accounted for. EXT4_MAP_FROM_CLUSTER conveys to the caller that
+  * the requested mapping was from previously mapped (or delayed allocated)
+  * cluster. We use BH_AllocFromCluster only for this flag. BH_AllocFromCluster
+  * should never appear on buffer_head's state flags.
+  */
+ #define EXT4_MAP_FROM_CLUSTER (1 << BH_AllocFromCluster)
   #define EXT4_MAP_FLAGS                (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\
                                  EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY |\
-                                EXT4_MAP_UNINIT)
+                                EXT4_MAP_UNINIT | EXT4_MAP_FROM_CLUSTER)
   
   struct ext4_map_blocks {
         ext4_fsblk_t m_pblk;
@@@ -239,8 -247,11 +247,11 @@@ struct ext4_io_submit 
   # define EXT4_BLOCK_SIZE(s)           (EXT4_MIN_BLOCK_SIZE << (s)->s_log_block_size)
   #endif
   #define       EXT4_ADDR_PER_BLOCK(s)          (EXT4_BLOCK_SIZE(s) / sizeof(__u32))
+ #define EXT4_CLUSTER_SIZE(s)          (EXT4_BLOCK_SIZE(s) << \
+                                        EXT4_SB(s)->s_cluster_bits)
   #ifdef __KERNEL__
   # define EXT4_BLOCK_SIZE_BITS(s)      ((s)->s_blocksize_bits)
+ # define EXT4_CLUSTER_BITS(s)         (EXT4_SB(s)->s_cluster_bits)
   #else
   # define EXT4_BLOCK_SIZE_BITS(s)      ((s)->s_log_block_size + 10)
   #endif
@@@ -258,6 -269,14 +269,14 @@@
   #endif
   #define EXT4_BLOCK_ALIGN(size, blkbits)               ALIGN((size), (1 << (blkbits)))
   
+ /* Translate a block number to a cluster number */
+ #define EXT4_B2C(sbi, blk)    ((blk) >> (sbi)->s_cluster_bits)
+ /* Translate a cluster number to a block number */
+ #define EXT4_C2B(sbi, cluster)        ((cluster) << (sbi)->s_cluster_bits)
+ /* Translate # of blks to # of clusters */
+ #define EXT4_NUM_B2C(sbi, blks)       (((blks) + (sbi)->s_cluster_ratio - 1) >> \
+                                (sbi)->s_cluster_bits)
+ 
   /*
    * Structure of a blocks group descriptor
    */
@@@ -289,7 -308,7 +308,7 @@@ struct ext4_group_des
   
   struct flex_groups {
         atomic_t free_inodes;
-       atomic_t free_blocks;
+       atomic_t free_clusters;
         atomic_t used_dirs;
   };
   
@@@ -306,6 -325,7 +325,7 @@@
   #define EXT4_DESC_SIZE(s)             (EXT4_SB(s)->s_desc_size)
   #ifdef __KERNEL__
   # define EXT4_BLOCKS_PER_GROUP(s)     (EXT4_SB(s)->s_blocks_per_group)
+ # define EXT4_CLUSTERS_PER_GROUP(s)   (EXT4_SB(s)->s_clusters_per_group)
   # define EXT4_DESC_PER_BLOCK(s)               (EXT4_SB(s)->s_desc_per_block)
   # define EXT4_INODES_PER_GROUP(s)     (EXT4_SB(s)->s_inodes_per_group)
   # define EXT4_DESC_PER_BLOCK_BITS(s)  (EXT4_SB(s)->s_desc_per_block_bits)
@@@ -358,8 -378,7 +378,7 @@@
   
   /* Flags that should be inherited by new inodes from their parent. */
   #define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\
-                          EXT4_SYNC_FL | EXT4_IMMUTABLE_FL | EXT4_APPEND_FL |\
-                          EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
+                          EXT4_SYNC_FL | EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
                            EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\
                            EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL)
   
@@@ -520,6 -539,8 +539,8 @@@ struct ext4_new_group_data 
   #define EXT4_GET_BLOCKS_PUNCH_OUT_EXT         0x0020
         /* Don't normalize allocation size (used for fallocate) */
   #define EXT4_GET_BLOCKS_NO_NORMALIZE          0x0040
+       /* Request will not result in inode size update (user for fallocate) */
+ #define EXT4_GET_BLOCKS_KEEP_SIZE             0x0080
   
   /*
    * Flags used by ext4_free_blocks
@@@ -528,6 -549,13 +549,13 @@@
   #define EXT4_FREE_BLOCKS_FORGET               0x0002
   #define EXT4_FREE_BLOCKS_VALIDATED    0x0004
   #define EXT4_FREE_BLOCKS_NO_QUOT_UPDATE       0x0008
+ #define EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER 0x0010
+ #define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER  0x0020
+ 
+ /*
+  * Flags used by ext4_discard_partial_page_buffers
+  */
+ #define EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED 0x0001
   
   /*
    * ioctl commands
@@@ -538,9 -566,6 +566,6 @@@
   #define       EXT4_IOC_SETVERSION             _IOW('f', 4, long)
   #define       EXT4_IOC_GETVERSION_OLD         FS_IOC_GETVERSION
   #define       EXT4_IOC_SETVERSION_OLD         FS_IOC_SETVERSION
- #ifdef CONFIG_JBD2_DEBUG
- #define EXT4_IOC_WAIT_FOR_READONLY    _IOR('f', 99, long)
- #endif
   #define EXT4_IOC_GETRSVSZ             _IOR('f', 5, long)
   #define EXT4_IOC_SETRSVSZ             _IOW('f', 6, long)
   #define EXT4_IOC_GROUP_EXTEND         _IOW('f', 7, unsigned long)
@@@ -563,9 -588,6 +588,6 @@@
   #define EXT4_IOC32_SETRSVSZ           _IOW('f', 6, int)
   #define EXT4_IOC32_GROUP_EXTEND               _IOW('f', 7, unsigned int)
   #define EXT4_IOC32_GROUP_ADD          _IOW('f', 8, struct compat_ext4_new_group_input)
- #ifdef CONFIG_JBD2_DEBUG
- #define EXT4_IOC32_WAIT_FOR_READONLY  _IOR('f', 99, int)
- #endif
   #define EXT4_IOC32_GETVERSION_OLD     FS_IOC32_GETVERSION
   #define EXT4_IOC32_SETVERSION_OLD     FS_IOC32_SETVERSION
   #endif
@@@ -837,6 -859,7 +859,7 @@@ struct ext4_inode_info 
         ext4_group_t    i_last_alloc_group;
   
         /* allocation reservation info for delalloc */
+       /* In case of bigalloc, these refer to clusters rather than blocks */
         unsigned int i_reserved_data_blocks;
         unsigned int i_reserved_meta_blocks;
         unsigned int i_allocated_meta_blocks;
@@@ -886,7 -909,6 +909,6 @@@
   /*
    * Mount flags
    */
- #define EXT4_MOUNT_OLDALLOC           0x00002  /* Don't use the new Orlov allocator */
   #define EXT4_MOUNT_GRPID              0x00004 /* Create files with directory's group */
   #define EXT4_MOUNT_DEBUG              0x00008 /* Some debugging messages */
   #define EXT4_MOUNT_ERRORS_CONT                0x00010 /* Continue on errors */
@@@ -918,6 -940,9 +940,9 @@@
   #define EXT4_MOUNT_DISCARD            0x40000000 /* Issue DISCARD requests */
   #define EXT4_MOUNT_INIT_INODE_TABLE   0x80000000 /* Initialize uninitialized itables */
   
+ #define EXT4_MOUNT2_EXPLICIT_DELALLOC 0x00000001 /* User explicitly
+                                                     specified delalloc */
+ 
   #define clear_opt(sb, opt)            EXT4_SB(sb)->s_mount_opt &= \
                                                 ~EXT4_MOUNT_##opt
   #define set_opt(sb, opt)              EXT4_SB(sb)->s_mount_opt |= \
@@@ -968,9 -993,9 +993,9 @@@ struct ext4_super_block 
   /*10*/        __le32  s_free_inodes_count;    /* Free inodes count */
         __le32  s_first_data_block;     /* First Data Block */
         __le32  s_log_block_size;       /* Block size */
-       __le32  s_obso_log_frag_size;   /* Obsoleted fragment size */
+       __le32  s_log_cluster_size;     /* Allocation cluster size */
   /*20*/        __le32  s_blocks_per_group;     /* # Blocks per group */
-       __le32  s_obso_frags_per_group; /* Obsoleted fragments per group */
+       __le32  s_clusters_per_group;   /* # Clusters per group */
         __le32  s_inodes_per_group;     /* # Inodes per group */
         __le32  s_mtime;                /* Mount time */
   /*30*/        __le32  s_wtime;                /* Write time */
@@@ -1066,7 -1091,10 +1091,10 @@@
         __u8    s_last_error_func[32];  /* function where the error happened */
   #define EXT4_S_ERR_END offsetof(struct ext4_super_block, s_mount_opts)
         __u8    s_mount_opts[64];
-       __le32  s_reserved[112];        /* Padding to the end of the block */
+       __le32  s_usr_quota_inum;       /* inode for tracking user quota */
+       __le32  s_grp_quota_inum;       /* inode for tracking group quota */
+       __le32  s_overhead_clusters;    /* overhead blocks/clusters in fs */
+       __le32  s_reserved[109];        /* Padding to the end of the block */
   };
   
   #define EXT4_S_ERR_LEN (EXT4_S_ERR_END - EXT4_S_ERR_START)
@@@ -1086,6 -1114,7 +1114,7 @@@ struct ext4_sb_info 
         unsigned long s_desc_size;      /* Size of a group descriptor in bytes */
         unsigned long s_inodes_per_block;/* Number of inodes per block */
         unsigned long s_blocks_per_group;/* Number of blocks in a group */
+       unsigned long s_clusters_per_group; /* Number of clusters in a group */
         unsigned long s_inodes_per_group;/* Number of inodes in a group */
         unsigned long s_itb_per_group;  /* Number of inode table blocks per group */
         unsigned long s_gdb_count;      /* Number of group descriptor blocks */
@@@ -1094,6 -1123,8 +1123,8 @@@
         ext4_group_t s_blockfile_groups;/* Groups acceptable for non-extent files */
         unsigned long s_overhead_last;  /* Last calculated overhead */
         unsigned long s_blocks_last;    /* Last seen block count */
+       unsigned int s_cluster_ratio;   /* Number of blocks per cluster */
+       unsigned int s_cluster_bits;    /* log2 of s_cluster_ratio */
         loff_t s_bitmap_maxbytes;       /* max bytes for bitmap files */
         struct buffer_head * s_sbh;     /* Buffer containing the super block */
         struct ext4_super_block *s_es;  /* Pointer to the super block in the buffer */
@@@ -1117,10 -1148,10 +1148,10 @@@
         u32 s_hash_seed[4];
         int s_def_hash_version;
         int s_hash_unsigned;    /* 3 if hash should be signed, 0 if not */
-       struct percpu_counter s_freeblocks_counter;
+       struct percpu_counter s_freeclusters_counter;
         struct percpu_counter s_freeinodes_counter;
         struct percpu_counter s_dirs_counter;
-       struct percpu_counter s_dirtyblocks_counter;
+       struct percpu_counter s_dirtyclusters_counter;
         struct blockgroup_lock *s_blockgroup_lock;
         struct proc_dir_entry *s_proc;
         struct kobject s_kobj;
@@@ -1136,10 -1167,6 +1167,6 @@@
         u32 s_max_batch_time;
         u32 s_min_batch_time;
         struct block_device *journal_bdev;
- #ifdef CONFIG_JBD2_DEBUG
-       struct timer_list turn_ro_timer;        /* For turning read-only (crash simulation) */
-       wait_queue_head_t ro_wait_queue;        /* For people waiting for the fs to go read-only */
- #endif
   #ifdef CONFIG_QUOTA
         char *s_qf_names[MAXQUOTAS];            /* Names of quota files with journalled quota */
         int s_jquota_fmt;                       /* Format of quota to use */
@@@ -1248,6 -1275,15 +1275,15 @@@ static inline int ext4_valid_inum(struc
                  ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count));
   }
   
+ static inline void ext4_set_io_unwritten_flag(struct inode *inode,
+                                             struct ext4_io_end *io_end)
+ {
+       if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
+               io_end->flag |= EXT4_IO_END_UNWRITTEN;
+               atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
+       }
+ }
+ 
   /*
    * Inode dynamic state flags
    */
@@@ -1360,6 -1396,7 +1396,7 @@@ static inline void ext4_clear_state_fla
   #define EXT4_FEATURE_RO_COMPAT_DIR_NLINK      0x0020
   #define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE    0x0040
   #define EXT4_FEATURE_RO_COMPAT_QUOTA          0x0100
+ #define EXT4_FEATURE_RO_COMPAT_BIGALLOC               0x0200
   
   #define EXT4_FEATURE_INCOMPAT_COMPRESSION     0x0001
   #define EXT4_FEATURE_INCOMPAT_FILETYPE                0x0002
@@@ -1402,7 -1439,8 +1439,8 @@@
                                          EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \
                                          EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \
                                          EXT4_FEATURE_RO_COMPAT_BTREE_DIR |\
-                                        EXT4_FEATURE_RO_COMPAT_HUGE_FILE)
+                                        EXT4_FEATURE_RO_COMPAT_HUGE_FILE |\
+                                        EXT4_FEATURE_RO_COMPAT_BIGALLOC)
   
   /*
    * Default values for user and/or group using reserved blocks
@@@ -1735,9 -1773,9 +1773,9 @@@ extern ext4_fsblk_t ext4_new_meta_block
                                          unsigned int flags,
                                          unsigned long *count,
                                          int *errp);
- extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi,
-                                 s64 nblocks, unsigned int flags);
- extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *);
+ extern int ext4_claim_free_clusters(struct ext4_sb_info *sbi,
+                                   s64 nclusters, unsigned int flags);
+ extern ext4_fsblk_t ext4_count_free_clusters(struct super_block *);
   extern void ext4_check_blocks_bitmap(struct super_block *);
   extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
                                                     ext4_group_t block_group,
@@@ -1745,12 -1783,18 +1783,18 @@@
   extern int ext4_should_retry_alloc(struct super_block *sb, int *retries);
   struct buffer_head *ext4_read_block_bitmap(struct super_block *sb,
                                       ext4_group_t block_group);
- extern unsigned ext4_init_block_bitmap(struct super_block *sb,
-                                      struct buffer_head *bh,
-                                      ext4_group_t group,
-                                      struct ext4_group_desc *desc);
- #define ext4_free_blocks_after_init(sb, group, desc)                  \
-               ext4_init_block_bitmap(sb, NULL, group, desc)
+ extern void ext4_init_block_bitmap(struct super_block *sb,
+                                  struct buffer_head *bh,
+                                  ext4_group_t group,
+                                  struct ext4_group_desc *desc);
+ extern unsigned ext4_free_clusters_after_init(struct super_block *sb,
+                                             ext4_group_t block_group,
+                                             struct ext4_group_desc *gdp);
+ extern unsigned ext4_num_base_meta_clusters(struct super_block *sb,
+                                           ext4_group_t block_group);
+ extern unsigned ext4_num_overhead_clusters(struct super_block *sb,
+                                          ext4_group_t block_group,
+                                          struct ext4_group_desc *gdp);
   ext4_fsblk_t ext4_inode_to_goal_block(struct inode *);
   
   /* dir.c */
@@@ -1776,7 -1820,8 +1820,8 @@@ extern int ext4fs_dirhash(const char *n
   
   /* ialloc.c */
   extern struct inode *ext4_new_inode(handle_t *, struct inode *, int,
-                                   const struct qstr *qstr, __u32 goal);
+                                   const struct qstr *qstr, __u32 goal,
+                                   uid_t *owner);
   extern void ext4_free_inode(handle_t *, struct inode *);
   extern struct inode * ext4_orphan_get(struct super_block *, unsigned long);
   extern unsigned long ext4_count_free_inodes(struct super_block *);
@@@ -1839,6 -1884,12 +1884,12 @@@ extern int ext4_block_truncate_page(han
                 struct address_space *mapping, loff_t from);
   extern int ext4_block_zero_page_range(handle_t *handle,
                 struct address_space *mapping, loff_t from, loff_t length);
+ extern int ext4_discard_partial_page_buffers(handle_t *handle,
+               struct address_space *mapping, loff_t from,
+               loff_t length, int flags);
+ extern int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
+               struct inode *inode, struct page *page, loff_t from,
+               loff_t length, int flags);
   extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
   extern qsize_t *ext4_get_reserved_space(struct inode *inode);
   extern void ext4_da_update_reserve_space(struct inode *inode,
@@@ -1878,40 -1929,40 +1929,40 @@@ extern int ext4_group_extend(struct sup
   extern void *ext4_kvmalloc(size_t size, gfp_t flags);
   extern void *ext4_kvzalloc(size_t size, gfp_t flags);
   extern void ext4_kvfree(void *ptr);
- -extern void __ext4_error(struct super_block *, const char *, unsigned int,
- -                       const char *, ...)
- -      __attribute__ ((format (printf, 4, 5)));
+ +extern __printf(4, 5)
+ +void __ext4_error(struct super_block *, const char *, unsigned int,
+ +                const char *, ...);
   #define ext4_error(sb, message...)    __ext4_error(sb, __func__,      \
                                                      __LINE__, ## message)
- -extern void ext4_error_inode(struct inode *, const char *, unsigned int,
- -                           ext4_fsblk_t, const char *, ...)
- -      __attribute__ ((format (printf, 5, 6)));
- -extern void ext4_error_file(struct file *, const char *, unsigned int,
- -                          ext4_fsblk_t, const char *, ...)
- -      __attribute__ ((format (printf, 5, 6)));
+ +extern __printf(5, 6)
+ +void ext4_error_inode(struct inode *, const char *, unsigned int, ext4_fsblk_t,
+ +                    const char *, ...);
+ +extern __printf(5, 6)
+ +void ext4_error_file(struct file *, const char *, unsigned int, ext4_fsblk_t,
+ +                   const char *, ...);
   extern void __ext4_std_error(struct super_block *, const char *,
                              unsigned int, int);
- -extern void __ext4_abort(struct super_block *, const char *, unsigned int,
- -                     const char *, ...)
- -      __attribute__ ((format (printf, 4, 5)));
+ +extern __printf(4, 5)
+ +void __ext4_abort(struct super_block *, const char *, unsigned int,
+ +                const char *, ...);
   #define ext4_abort(sb, message...)    __ext4_abort(sb, __func__, \
                                                        __LINE__, ## message)
- -extern void __ext4_warning(struct super_block *, const char *, unsigned int,
- -                        const char *, ...)
- -      __attribute__ ((format (printf, 4, 5)));
+ +extern __printf(4, 5)
+ +void __ext4_warning(struct super_block *, const char *, unsigned int,
+ +                  const char *, ...);
   #define ext4_warning(sb, message...)  __ext4_warning(sb, __func__, \
                                                        __LINE__, ## message)
- -extern void ext4_msg(struct super_block *, const char *, const char *, ...)
- -      __attribute__ ((format (printf, 3, 4)));
+ +extern __printf(3, 4)
+ +void ext4_msg(struct super_block *, const char *, const char *, ...);
   extern void __dump_mmp_msg(struct super_block *, struct mmp_struct *mmp,
                            const char *, unsigned int, const char *);
   #define dump_mmp_msg(sb, mmp, msg)    __dump_mmp_msg(sb, mmp, __func__, \
                                                        __LINE__, msg)
- -extern void __ext4_grp_locked_error(const char *, unsigned int, \
- -                                  struct super_block *, ext4_group_t, \
- -                                  unsigned long, ext4_fsblk_t, \
- -                                  const char *, ...)
- -      __attribute__ ((format (printf, 7, 8)));
+ +extern __printf(7, 8)
+ +void __ext4_grp_locked_error(const char *, unsigned int,
+ +                           struct super_block *, ext4_group_t,
+ +                           unsigned long, ext4_fsblk_t,
+ +                           const char *, ...);
   #define ext4_grp_locked_error(sb, grp, message...) \
         __ext4_grp_locked_error(__func__, __LINE__, (sb), (grp), ## message)
   extern void ext4_update_dynamic_rev(struct super_block *sb);
@@@ -1927,8 -1978,8 +1978,8 @@@ extern ext4_fsblk_t ext4_inode_bitmap(s
                                       struct ext4_group_desc *bg);
   extern ext4_fsblk_t ext4_inode_table(struct super_block *sb,
                                      struct ext4_group_desc *bg);
- extern __u32 ext4_free_blks_count(struct super_block *sb,
-                               struct ext4_group_desc *bg);
+ extern __u32 ext4_free_group_clusters(struct super_block *sb,
+                                     struct ext4_group_desc *bg);
   extern __u32 ext4_free_inodes_count(struct super_block *sb,
                                  struct ext4_group_desc *bg);
   extern __u32 ext4_used_dirs_count(struct super_block *sb,
@@@ -1941,8 -1992,9 +1992,9 @@@ extern void ext4_inode_bitmap_set(struc
                                   struct ext4_group_desc *bg, ext4_fsblk_t blk);
   extern void ext4_inode_table_set(struct super_block *sb,
                                  struct ext4_group_desc *bg, ext4_fsblk_t blk);
- extern void ext4_free_blks_set(struct super_block *sb,
-                              struct ext4_group_desc *bg, __u32 count);
+ extern void ext4_free_group_clusters_set(struct super_block *sb,
+                                        struct ext4_group_desc *bg,
+                                        __u32 count);
   extern void ext4_free_inodes_set(struct super_block *sb,
                                 struct ext4_group_desc *bg, __u32 count);
   extern void ext4_used_dirs_set(struct super_block *sb,
@@@ -2051,13 -2103,13 +2103,13 @@@ do {                                                         
   } while (0)
   
   #ifdef CONFIG_SMP
- /* Each CPU can accumulate percpu_counter_batch blocks in their local
-  * counters. So we need to make sure we have free blocks more
+ /* Each CPU can accumulate percpu_counter_batch clusters in their local
+  * counters. So we need to make sure we have free clusters more
    * than percpu_counter_batch  * nr_cpu_ids. Also add a window of 4 times.
    */
- #define EXT4_FREEBLOCKS_WATERMARK (4 * (percpu_counter_batch * nr_cpu_ids))
+ #define EXT4_FREECLUSTERS_WATERMARK (4 * (percpu_counter_batch * nr_cpu_ids))
   #else
- #define EXT4_FREEBLOCKS_WATERMARK 0
+ #define EXT4_FREECLUSTERS_WATERMARK 0
   #endif
   
   static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
@@@ -2243,10 -2295,19 +2295,19 @@@ extern int ext4_multi_mount_protect(str
   enum ext4_state_bits {
         BH_Uninit       /* blocks are allocated but uninitialized on disk */
           = BH_JBDPrivateStart,
+       BH_AllocFromCluster,    /* allocated blocks were part of already
+                                * allocated cluster. Note that this flag will
+                                * never, ever appear in a buffer_head's state
+                                * flag. See EXT4_MAP_FROM_CLUSTER to see where
+                                * this is used. */
+       BH_Da_Mapped,   /* Delayed allocated block that now has a mapping. This
+                        * flag is set when ext4_map_blocks is called on a
+                        * delayed allocated block to get its real mapping. */
   };
   
   BUFFER_FNS(Uninit, uninit)
   TAS_BUFFER_FNS(Uninit, uninit)
+ BUFFER_FNS(Da_Mapped, da_mapped)
   
   /*
    * Add new method to test wether block and inode bitmaps are properly
@@@ -2282,4 -2343,6 +2343,6 @@@ extern void ext4_resize_end(struct supe
   
   #endif        /* __KERNEL__ */
   
+ #include "ext4_extents.h"
+ 
   #endif        /* _EXT4_H */
diff --combined fs/ext4/file.c

index b9548f4,9781099..cb70f18
--- 1/fs/ext4/file.c
--- 2/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@@ -181,8 -181,8 +181,8 @@@ static int ext4_file_open(struct inode 
                 path.dentry = mnt->mnt_root;
                 cp = d_path(&path, buf, sizeof(buf));
                 if (!IS_ERR(cp)) {
-                       memcpy(sbi->s_es->s_last_mounted, cp,
-                              sizeof(sbi->s_es->s_last_mounted));
+                       strlcpy(sbi->s_es->s_last_mounted, cp,
+                               sizeof(sbi->s_es->s_last_mounted));
                         ext4_mark_super_dirty(sb);
                 }
         }
@@@ -224,8 -224,53 +224,8 @@@ loff_t ext4_llseek(struct file *file, l
                 maxbytes = EXT4_SB(inode->i_sb)->s_bitmap_maxbytes;
         else
                 maxbytes = inode->i_sb->s_maxbytes;
- -      mutex_lock(&inode->i_mutex);
- -      switch (origin) {
- -      case SEEK_END:
- -              offset += inode->i_size;
- -              break;
- -      case SEEK_CUR:
- -              if (offset == 0) {
- -                      mutex_unlock(&inode->i_mutex);
- -                      return file->f_pos;
- -              }
- -              offset += file->f_pos;
- -              break;
- -      case SEEK_DATA:
- -              /*
- -               * In the generic case the entire file is data, so as long as
- -               * offset isn't at the end of the file then the offset is data.
- -               */
- -              if (offset >= inode->i_size) {
- -                      mutex_unlock(&inode->i_mutex);
- -                      return -ENXIO;
- -              }
- -              break;
- -      case SEEK_HOLE:
- -              /*
- -               * There is a virtual hole at the end of the file, so as long as
- -               * offset isn't i_size or larger, return i_size.
- -               */
- -              if (offset >= inode->i_size) {
- -                      mutex_unlock(&inode->i_mutex);
- -                      return -ENXIO;
- -              }
- -              offset = inode->i_size;
- -              break;
- -      }
- -
- -      if (offset < 0 || offset > maxbytes) {
- -              mutex_unlock(&inode->i_mutex);
- -              return -EINVAL;
- -      }
- -
- -      if (offset != file->f_pos) {
- -              file->f_pos = offset;
- -              file->f_version = 0;
- -      }
- -      mutex_unlock(&inode->i_mutex);
   
- -      return offset;
+ +      return generic_file_llseek_size(file, offset, origin, maxbytes);
   }
   
   const struct file_operations ext4_file_operations = {
diff --combined fs/ext4/inode.c

index 0defe0b,de05e86..f2419a1
--- 1/fs/ext4/inode.c
--- 2/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@@ -42,7 -42,6 +42,6 @@@
   #include "ext4_jbd2.h"
   #include "xattr.h"
   #include "acl.h"
- #include "ext4_extents.h"
   #include "truncate.h"
   
   #include <trace/events/ext4.h>
@@@ -268,7 -267,7 +267,7 @@@ void ext4_da_update_reserve_space(struc
         struct ext4_inode_info *ei = EXT4_I(inode);
   
         spin_lock(&ei->i_block_reservation_lock);
-       trace_ext4_da_update_reserve_space(inode, used);
+       trace_ext4_da_update_reserve_space(inode, used, quota_claim);
         if (unlikely(used > ei->i_reserved_data_blocks)) {
                 ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d "
                          "with only %d reserved data blocks\n",
@@@ -281,7 -280,7 +280,7 @@@
         /* Update per-inode reservations */
         ei->i_reserved_data_blocks -= used;
         ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks;
-       percpu_counter_sub(&sbi->s_dirtyblocks_counter,
+       percpu_counter_sub(&sbi->s_dirtyclusters_counter,
                            used + ei->i_allocated_meta_blocks);
         ei->i_allocated_meta_blocks = 0;
   
@@@ -291,7 -290,7 +290,7 @@@
                  * only when we have written all of the delayed
                  * allocation blocks.
                  */
-               percpu_counter_sub(&sbi->s_dirtyblocks_counter,
+               percpu_counter_sub(&sbi->s_dirtyclusters_counter,
                                    ei->i_reserved_meta_blocks);
                 ei->i_reserved_meta_blocks = 0;
                 ei->i_da_metadata_calc_len = 0;
@@@ -300,14 -299,14 +299,14 @@@
   
         /* Update quota subsystem for data blocks */
         if (quota_claim)
-               dquot_claim_block(inode, used);
+               dquot_claim_block(inode, EXT4_C2B(sbi, used));
         else {
                 /*
                  * We did fallocate with an offset that is already delayed
                  * allocated. So on delayed allocated writeback we should
                  * not re-claim the quota for fallocated blocks.
                  */
-               dquot_release_reservation_block(inode, used);
+               dquot_release_reservation_block(inode, EXT4_C2B(sbi, used));
         }
   
         /*
@@@ -398,6 -397,49 +397,49 @@@ static pgoff_t ext4_num_dirty_pages(str
         return num;
   }
   
+ /*
+  * Sets the BH_Da_Mapped bit on the buffer heads corresponding to the given map.
+  */
+ static void set_buffers_da_mapped(struct inode *inode,
+                                  struct ext4_map_blocks *map)
+ {
+       struct address_space *mapping = inode->i_mapping;
+       struct pagevec pvec;
+       int i, nr_pages;
+       pgoff_t index, end;
+ 
+       index = map->m_lblk >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
+       end = (map->m_lblk + map->m_len - 1) >>
+               (PAGE_CACHE_SHIFT - inode->i_blkbits);
+ 
+       pagevec_init(&pvec, 0);
+       while (index <= end) {
+               nr_pages = pagevec_lookup(&pvec, mapping, index,
+                                         min(end - index + 1,
+                                             (pgoff_t)PAGEVEC_SIZE));
+               if (nr_pages == 0)
+                       break;
+               for (i = 0; i < nr_pages; i++) {
+                       struct page *page = pvec.pages[i];
+                       struct buffer_head *bh, *head;
+ 
+                       if (unlikely(page->mapping != mapping) ||
+                           !PageDirty(page))
+                               break;
+ 
+                       if (page_has_buffers(page)) {
+                               bh = head = page_buffers(page);
+                               do {
+                                       set_buffer_da_mapped(bh);
+                                       bh = bh->b_this_page;
+                               } while (bh != head);
+                       }
+                       index++;
+               }
+               pagevec_release(&pvec);
+       }
+ }
+ 
   /*
    * The ext4_map_blocks() function tries to look up the requested blocks,
    * and returns if the blocks are already mapped.
@@@ -416,7 -458,7 +458,7 @@@
    * the buffer head is mapped.
    *
    * It returns 0 if plain look up failed (blocks have not been allocated), in
-  * that casem, buffer head is unmapped
+  * that case, buffer head is unmapped
    *
    * It returns the error in case of allocation failure.
    */
@@@ -435,9 -477,11 +477,11 @@@ int ext4_map_blocks(handle_t *handle, s
          */
         down_read((&EXT4_I(inode)->i_data_sem));
         if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
-               retval = ext4_ext_map_blocks(handle, inode, map, 0);
+               retval = ext4_ext_map_blocks(handle, inode, map, flags &
+                                            EXT4_GET_BLOCKS_KEEP_SIZE);
         } else {
-               retval = ext4_ind_map_blocks(handle, inode, map, 0);
+               retval = ext4_ind_map_blocks(handle, inode, map, flags &
+                                            EXT4_GET_BLOCKS_KEEP_SIZE);
         }
         up_read((&EXT4_I(inode)->i_data_sem));
   
@@@ -455,7 -499,7 +499,7 @@@
          * Returns if the blocks have already allocated
          *
          * Note that if blocks have been preallocated
-        * ext4_ext_get_block() returns th create = 0
+        * ext4_ext_get_block() returns the create = 0
          * with buffer head unmapped.
          */
         if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED)
@@@ -517,9 -561,17 +561,17 @@@
                         (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE))
                         ext4_da_update_reserve_space(inode, retval, 1);
         }
-       if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
+       if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
                 ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED);
   
+               /* If we have successfully mapped the delayed allocated blocks,
+                * set the BH_Da_Mapped bit on them. Its important to do this
+                * under the protection of i_data_sem.
+                */
+               if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED)
+                       set_buffers_da_mapped(inode, map);
+       }
+ 
         up_write((&EXT4_I(inode)->i_data_sem));
         if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
                 int ret = check_block_validity(inode, map);
@@@ -647,7 -699,7 +699,7 @@@ struct buffer_head *ext4_bread(handle_
                 return bh;
         if (buffer_uptodate(bh))
                 return bh;
- -      ll_rw_block(READ_META, 1, &bh);
+ +      ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh);
         wait_on_buffer(bh);
         if (buffer_uptodate(bh))
                 return bh;
@@@ -909,7 -961,11 +961,11 @@@ static int ext4_ordered_write_end(struc
                         ext4_orphan_add(handle, inode);
                 if (ret2 < 0)
                         ret = ret2;
+       } else {
+               unlock_page(page);
+               page_cache_release(page);
         }
+ 
         ret2 = ext4_journal_stop(handle);
         if (!ret)
                 ret = ret2;
@@@ -1037,14 -1093,14 +1093,14 @@@ static int ext4_journalled_write_end(st
   }
   
   /*
-  * Reserve a single block located at lblock
+  * Reserve a single cluster located at lblock
    */
   static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
   {
         int retries = 0;
         struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
         struct ext4_inode_info *ei = EXT4_I(inode);
-       unsigned long md_needed;
+       unsigned int md_needed;
         int ret;
   
         /*
@@@ -1054,7 -1110,8 +1110,8 @@@
          */
   repeat:
         spin_lock(&ei->i_block_reservation_lock);
-       md_needed = ext4_calc_metadata_amount(inode, lblock);
+       md_needed = EXT4_NUM_B2C(sbi,
+                                ext4_calc_metadata_amount(inode, lblock));
         trace_ext4_da_reserve_space(inode, md_needed);
         spin_unlock(&ei->i_block_reservation_lock);
   
@@@ -1063,15 -1120,15 +1120,15 @@@
          * us from metadata over-estimation, though we may go over by
          * a small amount in the end.  Here we just reserve for data.
          */
-       ret = dquot_reserve_block(inode, 1);
+       ret = dquot_reserve_block(inode, EXT4_C2B(sbi, 1));
         if (ret)
                 return ret;
         /*
          * We do still charge estimated metadata to the sb though;
          * we cannot afford to run out of free blocks.
          */
-       if (ext4_claim_free_blocks(sbi, md_needed + 1, 0)) {
-               dquot_release_reservation_block(inode, 1);
+       if (ext4_claim_free_clusters(sbi, md_needed + 1, 0)) {
+               dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1));
                 if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
                         yield();
                         goto repeat;
@@@ -1118,19 -1175,21 +1175,21 @@@ static void ext4_da_release_space(struc
                  * We can release all of the reserved metadata blocks
                  * only when we have written all of the delayed
                  * allocation blocks.
+                * Note that in case of bigalloc, i_reserved_meta_blocks,
+                * i_reserved_data_blocks, etc. refer to number of clusters.
                  */
-               percpu_counter_sub(&sbi->s_dirtyblocks_counter,
+               percpu_counter_sub(&sbi->s_dirtyclusters_counter,
                                    ei->i_reserved_meta_blocks);
                 ei->i_reserved_meta_blocks = 0;
                 ei->i_da_metadata_calc_len = 0;
         }
   
         /* update fs dirty data blocks counter */
-       percpu_counter_sub(&sbi->s_dirtyblocks_counter, to_free);
+       percpu_counter_sub(&sbi->s_dirtyclusters_counter, to_free);
   
         spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
   
-       dquot_release_reservation_block(inode, to_free);
+       dquot_release_reservation_block(inode, EXT4_C2B(sbi, to_free));
   }
   
   static void ext4_da_page_release_reservation(struct page *page,
@@@ -1139,6 -1198,9 +1198,9 @@@
         int to_release = 0;
         struct buffer_head *head, *bh;
         unsigned int curr_off = 0;
+       struct inode *inode = page->mapping->host;
+       struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+       int num_clusters;
   
         head = page_buffers(page);
         bh = head;
@@@ -1148,10 -1210,24 +1210,24 @@@
                 if ((offset <= curr_off) && (buffer_delay(bh))) {
                         to_release++;
                         clear_buffer_delay(bh);
+                       clear_buffer_da_mapped(bh);
                 }
                 curr_off = next_off;
         } while ((bh = bh->b_this_page) != head);
-       ext4_da_release_space(page->mapping->host, to_release);
+ 
+       /* If we have released all the blocks belonging to a cluster, then we
+        * need to release the reserved space for that cluster. */
+       num_clusters = EXT4_NUM_B2C(sbi, to_release);
+       while (num_clusters > 0) {
+               ext4_fsblk_t lblk;
+               lblk = (page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits)) +
+                       ((num_clusters - 1) << sbi->s_cluster_bits);
+               if (sbi->s_cluster_ratio == 1 ||
+                   !ext4_find_delalloc_cluster(inode, lblk, 1))
+                       ext4_da_release_space(inode, 1);
+ 
+               num_clusters--;
+       }
   }
   
   /*
@@@ -1253,6 -1329,8 +1329,8 @@@ static int mpage_da_submit_io(struct mp
                                                 clear_buffer_delay(bh);
                                                 bh->b_blocknr = pblock;
                                         }
+                                       if (buffer_da_mapped(bh))
+                                               clear_buffer_da_mapped(bh);
                                         if (buffer_unwritten(bh) ||
                                             buffer_mapped(bh))
                                                 BUG_ON(bh->b_blocknr != pblock);
@@@ -1346,12 -1424,15 +1424,15 @@@ static void ext4_print_free_blocks(stru
   {
         struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
         printk(KERN_CRIT "Total free blocks count %lld\n",
-              ext4_count_free_blocks(inode->i_sb));
+              EXT4_C2B(EXT4_SB(inode->i_sb),
+                       ext4_count_free_clusters(inode->i_sb)));
         printk(KERN_CRIT "Free/Dirty block details\n");
         printk(KERN_CRIT "free_blocks=%lld\n",
-              (long long) percpu_counter_sum(&sbi->s_freeblocks_counter));
+              (long long) EXT4_C2B(EXT4_SB(inode->i_sb),
+               percpu_counter_sum(&sbi->s_freeclusters_counter)));
         printk(KERN_CRIT "dirty_blocks=%lld\n",
-              (long long) percpu_counter_sum(&sbi->s_dirtyblocks_counter));
+              (long long) EXT4_C2B(EXT4_SB(inode->i_sb),
+               percpu_counter_sum(&sbi->s_dirtyclusters_counter)));
         printk(KERN_CRIT "Block reservation details\n");
         printk(KERN_CRIT "i_reserved_data_blocks=%u\n",
                EXT4_I(inode)->i_reserved_data_blocks);
@@@ -1430,8 -1511,7 +1511,7 @@@ static void mpage_da_map_and_submit(str
                 if (err == -EAGAIN)
                         goto submit_io;
   
-               if (err == -ENOSPC &&
-                   ext4_count_free_blocks(sb)) {
+               if (err == -ENOSPC && ext4_count_free_clusters(sb)) {
                         mpd->retval = err;
                         goto submit_io;
                 }
@@@ -1471,13 -1551,15 +1551,15 @@@
   
                 for (i = 0; i < map.m_len; i++)
                         unmap_underlying_metadata(bdev, map.m_pblk + i);
-       }
   
-       if (ext4_should_order_data(mpd->inode)) {
-               err = ext4_jbd2_file_inode(handle, mpd->inode);
-               if (err)
-                       /* This only happens if the journal is aborted */
-                       return;
+               if (ext4_should_order_data(mpd->inode)) {
+                       err = ext4_jbd2_file_inode(handle, mpd->inode);
+                       if (err) {
+                               /* Only if the journal is aborted */
+                               mpd->retval = err;
+                               goto submit_io;
+                       }
+               }
         }
   
         /*
@@@ -1583,6 -1665,66 +1665,66 @@@ static int ext4_bh_delay_or_unwritten(h
         return (buffer_delay(bh) || buffer_unwritten(bh)) && buffer_dirty(bh);
   }
   
+ /*
+  * This function is grabs code from the very beginning of
+  * ext4_map_blocks, but assumes that the caller is from delayed write
+  * time. This function looks up the requested blocks and sets the
+  * buffer delay bit under the protection of i_data_sem.
+  */
+ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
+                             struct ext4_map_blocks *map,
+                             struct buffer_head *bh)
+ {
+       int retval;
+       sector_t invalid_block = ~((sector_t) 0xffff);
+ 
+       if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es))
+               invalid_block = ~0;
+ 
+       map->m_flags = 0;
+       ext_debug("ext4_da_map_blocks(): inode %lu, max_blocks %u,"
+                 "logical block %lu\n", inode->i_ino, map->m_len,
+                 (unsigned long) map->m_lblk);
+       /*
+        * Try to see if we can get the block without requesting a new
+        * file system block.
+        */
+       down_read((&EXT4_I(inode)->i_data_sem));
+       if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
+               retval = ext4_ext_map_blocks(NULL, inode, map, 0);
+       else
+               retval = ext4_ind_map_blocks(NULL, inode, map, 0);
+ 
+       if (retval == 0) {
+               /*
+                * XXX: __block_prepare_write() unmaps passed block,
+                * is it OK?
+                */
+               /* If the block was allocated from previously allocated cluster,
+                * then we dont need to reserve it again. */
+               if (!(map->m_flags & EXT4_MAP_FROM_CLUSTER)) {
+                       retval = ext4_da_reserve_space(inode, iblock);
+                       if (retval)
+                               /* not enough space to reserve */
+                               goto out_unlock;
+               }
+ 
+               /* Clear EXT4_MAP_FROM_CLUSTER flag since its purpose is served
+                * and it should not appear on the bh->b_state.
+                */
+               map->m_flags &= ~EXT4_MAP_FROM_CLUSTER;
+ 
+               map_bh(bh, inode->i_sb, invalid_block);
+               set_buffer_new(bh);
+               set_buffer_delay(bh);
+       }
+ 
+ out_unlock:
+       up_read((&EXT4_I(inode)->i_data_sem));
+ 
+       return retval;
+ }
+ 
   /*
    * This is a special get_blocks_t callback which is used by
    * ext4_da_write_begin().  It will either return mapped block or
@@@ -1600,10 -1742,6 +1742,6 @@@ static int ext4_da_get_block_prep(struc
   {
         struct ext4_map_blocks map;
         int ret = 0;
-       sector_t invalid_block = ~((sector_t) 0xffff);
- 
-       if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es))
-               invalid_block = ~0;
   
         BUG_ON(create == 0);
         BUG_ON(bh->b_size != inode->i_sb->s_blocksize);
@@@ -1616,25 -1754,9 +1754,9 @@@
          * preallocated blocks are unmapped but should treated
          * the same as allocated blocks.
          */
-       ret = ext4_map_blocks(NULL, inode, &map, 0);
-       if (ret < 0)
+       ret = ext4_da_map_blocks(inode, iblock, &map, bh);
+       if (ret <= 0)
                 return ret;
-       if (ret == 0) {
-               if (buffer_delay(bh))
-                       return 0; /* Not sure this could or should happen */
-               /*
-                * XXX: __block_write_begin() unmaps passed block, is it OK?
-                */
-               ret = ext4_da_reserve_space(inode, iblock);
-               if (ret)
-                       /* not enough space to reserve */
-                       return ret;
- 
-               map_bh(bh, inode->i_sb, invalid_block);
-               set_buffer_new(bh);
-               set_buffer_delay(bh);
-               return 0;
-       }
   
         map_bh(bh, inode->i_sb, map.m_pblk);
         bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags;
@@@ -1811,12 -1933,8 +1933,12 @@@ static int ext4_writepage(struct page *
                  * We don't want to do block allocation, so redirty
                  * the page and return.  We may reach here when we do
                  * a journal commit via journal_submit_inode_data_buffers.
- -               * We can also reach here via shrink_page_list
+ +               * We can also reach here via shrink_page_list but it
+ +               * should never be for direct reclaim so warn if that
+ +               * happens
                  */
+ +              WARN_ON_ONCE((current->flags & (PF_MEMALLOC|PF_KSWAPD)) ==
+ +                                                              PF_MEMALLOC);
                 goto redirty_page;
         }
         if (commit_write)
@@@ -2050,6 -2168,7 +2172,7 @@@ static int ext4_da_writepages(struct ad
         struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
         pgoff_t done_index = 0;
         pgoff_t end;
+       struct blk_plug plug;
   
         trace_ext4_da_writepages(inode, wbc);
   
@@@ -2128,6 -2247,7 +2251,7 @@@ retry
         if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
                 tag_pages_for_writeback(mapping, index, end);
   
+       blk_start_plug(&plug);
         while (!ret && wbc->nr_to_write > 0) {
   
                 /*
@@@ -2178,11 -2298,12 +2302,12 @@@
                         ret = 0;
                 } else if (ret == MPAGE_DA_EXTENT_TAIL) {
                         /*
-                        * got one extent now try with
-                        * rest of the pages
+                        * Got one extent now try with rest of the pages.
+                        * If mpd.retval is set -EIO, journal is aborted.
+                        * So we don't need to write any more.
                          */
                         pages_written += mpd.pages_written;
-                       ret = 0;
+                       ret = mpd.retval;
                         io_done = 1;
                 } else if (wbc->nr_to_write)
                         /*
@@@ -2192,6 -2313,7 +2317,7 @@@
                          */
                         break;
         }
+       blk_finish_plug(&plug);
         if (!io_done && !cycled) {
                 cycled = 1;
                 index = 0;
@@@ -2230,10 -2352,11 +2356,11 @@@ static int ext4_nonda_switch(struct sup
          * Delalloc need an accurate free block accounting. So switch
          * to non delalloc when we are near to error range.
          */
-       free_blocks  = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
-       dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyblocks_counter);
+       free_blocks  = EXT4_C2B(sbi,
+               percpu_counter_read_positive(&sbi->s_freeclusters_counter));
+       dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyclusters_counter);
         if (2 * free_blocks < 3 * dirty_blocks ||
-               free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) {
+               free_blocks < (dirty_blocks + EXT4_FREECLUSTERS_WATERMARK)) {
                 /*
                  * free block count is less than 150% of dirty blocks
                  * or free blocks is less than watermark
@@@ -2259,6 -2382,7 +2386,7 @@@ static int ext4_da_write_begin(struct f
         pgoff_t index;
         struct inode *inode = mapping->host;
         handle_t *handle;
+       loff_t page_len;
   
         index = pos >> PAGE_CACHE_SHIFT;
   
@@@ -2305,6 -2429,13 +2433,13 @@@ retry
                  */
                 if (pos + len > inode->i_size)
                         ext4_truncate_failed_write(inode);
+       } else {
+               page_len = pos & (PAGE_CACHE_SIZE - 1);
+               if (page_len > 0) {
+                       ret = ext4_discard_partial_page_buffers_no_lock(handle,
+                               inode, page, pos - page_len, page_len,
+                               EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED);
+               }
         }
   
         if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
@@@ -2347,6 -2478,7 +2482,7 @@@ static int ext4_da_write_end(struct fil
         loff_t new_i_size;
         unsigned long start, end;
         int write_mode = (int)(unsigned long)fsdata;
+       loff_t page_len;
   
         if (write_mode == FALL_BACK_TO_NONDELALLOC) {
                 if (ext4_should_order_data(inode)) {
@@@ -2395,6 -2527,16 +2531,16 @@@
         }
         ret2 = generic_write_end(file, mapping, pos, len, copied,
                                                         page, fsdata);
+ 
+       page_len = PAGE_CACHE_SIZE -
+                       ((pos + copied - 1) & (PAGE_CACHE_SIZE - 1));
+ 
+       if (page_len > 0) {
+               ret = ext4_discard_partial_page_buffers_no_lock(handle,
+                       inode, page, pos + copied - 1, page_len,
+                       EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED);
+       }
+ 
         copied = ret2;
         if (ret2 < 0)
                 ret = ret2;
@@@ -2689,10 -2831,7 +2835,7 @@@ static void ext4_end_io_buffer_write(st
          * but being more careful is always safe for the future change.
          */
         inode = io_end->inode;
-       if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
-               io_end->flag |= EXT4_IO_END_UNWRITTEN;
-               atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
-       }
+       ext4_set_io_unwritten_flag(inode, io_end);
   
         /* Add the io_end to per-inode completed io list*/
         spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
@@@ -2858,6 -2997,12 +3001,12 @@@ static ssize_t ext4_direct_IO(int rw, s
         struct inode *inode = file->f_mapping->host;
         ssize_t ret;
   
+       /*
+        * If we are doing data journalling we don't support O_DIRECT
+        */
+       if (ext4_should_journal_data(inode))
+               return 0;
+ 
         trace_ext4_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw);
         if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
                 ret = ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs);
@@@ -2927,6 -3072,7 +3076,7 @@@ static const struct address_space_opera
         .bmap                   = ext4_bmap,
         .invalidatepage         = ext4_invalidatepage,
         .releasepage            = ext4_releasepage,
+       .direct_IO              = ext4_direct_IO,
         .is_partially_uptodate  = block_is_partially_uptodate,
         .error_remove_page      = generic_error_remove_page,
   };
@@@ -2963,6 -3109,227 +3113,227 @@@ void ext4_set_aops(struct inode *inode
                 inode->i_mapping->a_ops = &ext4_journalled_aops;
   }
   
+ 
+ /*
+  * ext4_discard_partial_page_buffers()
+  * Wrapper function for ext4_discard_partial_page_buffers_no_lock.
+  * This function finds and locks the page containing the offset
+  * "from" and passes it to ext4_discard_partial_page_buffers_no_lock.
+  * Calling functions that already have the page locked should call
+  * ext4_discard_partial_page_buffers_no_lock directly.
+  */
+ int ext4_discard_partial_page_buffers(handle_t *handle,
+               struct address_space *mapping, loff_t from,
+               loff_t length, int flags)
+ {
+       struct inode *inode = mapping->host;
+       struct page *page;
+       int err = 0;
+ 
+       page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT,
+                                  mapping_gfp_mask(mapping) & ~__GFP_FS);
+       if (!page)
+               return -ENOMEM;
+ 
+       err = ext4_discard_partial_page_buffers_no_lock(handle, inode, page,
+               from, length, flags);
+ 
+       unlock_page(page);
+       page_cache_release(page);
+       return err;
+ }
+ 
+ /*
+  * ext4_discard_partial_page_buffers_no_lock()
+  * Zeros a page range of length 'length' starting from offset 'from'.
+  * Buffer heads that correspond to the block aligned regions of the
+  * zeroed range will be unmapped.  Unblock aligned regions
+  * will have the corresponding buffer head mapped if needed so that
+  * that region of the page can be updated with the partial zero out.
+  *
+  * This function assumes that the page has already been  locked.  The
+  * The range to be discarded must be contained with in the given page.
+  * If the specified range exceeds the end of the page it will be shortened
+  * to the end of the page that corresponds to 'from'.  This function is
+  * appropriate for updating a page and it buffer heads to be unmapped and
+  * zeroed for blocks that have been either released, or are going to be
+  * released.
+  *
+  * handle: The journal handle
+  * inode:  The files inode
+  * page:   A locked page that contains the offset "from"
+  * from:   The starting byte offset (from the begining of the file)
+  *         to begin discarding
+  * len:    The length of bytes to discard
+  * flags:  Optional flags that may be used:
+  *
+  *         EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED
+  *         Only zero the regions of the page whose buffer heads
+  *         have already been unmapped.  This flag is appropriate
+  *         for updateing the contents of a page whose blocks may
+  *         have already been released, and we only want to zero
+  *         out the regions that correspond to those released blocks.
+  *
+  * Returns zero on sucess or negative on failure.
+  */
+ int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
+               struct inode *inode, struct page *page, loff_t from,
+               loff_t length, int flags)
+ {
+       ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT;
+       unsigned int offset = from & (PAGE_CACHE_SIZE-1);
+       unsigned int blocksize, max, pos;
+       ext4_lblk_t iblock;
+       struct buffer_head *bh;
+       int err = 0;
+ 
+       blocksize = inode->i_sb->s_blocksize;
+       max = PAGE_CACHE_SIZE - offset;
+ 
+       if (index != page->index)
+               return -EINVAL;
+ 
+       /*
+        * correct length if it does not fall between
+        * 'from' and the end of the page
+        */
+       if (length > max || length < 0)
+               length = max;
+ 
+       iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
+ 
+       if (!page_has_buffers(page)) {
+               /*
+                * If the range to be discarded covers a partial block
+                * we need to get the page buffers.  This is because
+                * partial blocks cannot be released and the page needs
+                * to be updated with the contents of the block before
+                * we write the zeros on top of it.
+                */
+               if ((from & (blocksize - 1)) ||
+                   ((from + length) & (blocksize - 1))) {
+                       create_empty_buffers(page, blocksize, 0);
+               } else {
+                       /*
+                        * If there are no partial blocks,
+                        * there is nothing to update,
+                        * so we can return now
+                        */
+                       return 0;
+               }
+       }
+ 
+       /* Find the buffer that contains "offset" */
+       bh = page_buffers(page);
+       pos = blocksize;
+       while (offset >= pos) {
+               bh = bh->b_this_page;
+               iblock++;
+               pos += blocksize;
+       }
+ 
+       pos = offset;
+       while (pos < offset + length) {
+               unsigned int end_of_block, range_to_discard;
+ 
+               err = 0;
+ 
+               /* The length of space left to zero and unmap */
+               range_to_discard = offset + length - pos;
+ 
+               /* The length of space until the end of the block */
+               end_of_block = blocksize - (pos & (blocksize-1));
+ 
+               /*
+                * Do not unmap or zero past end of block
+                * for this buffer head
+                */
+               if (range_to_discard > end_of_block)
+                       range_to_discard = end_of_block;
+ 
+ 
+               /*
+                * Skip this buffer head if we are only zeroing unampped
+                * regions of the page
+                */
+               if (flags & EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED &&
+                       buffer_mapped(bh))
+                               goto next;
+ 
+               /* If the range is block aligned, unmap */
+               if (range_to_discard == blocksize) {
+                       clear_buffer_dirty(bh);
+                       bh->b_bdev = NULL;
+                       clear_buffer_mapped(bh);
+                       clear_buffer_req(bh);
+                       clear_buffer_new(bh);
+                       clear_buffer_delay(bh);
+                       clear_buffer_unwritten(bh);
+                       clear_buffer_uptodate(bh);
+                       zero_user(page, pos, range_to_discard);
+                       BUFFER_TRACE(bh, "Buffer discarded");
+                       goto next;
+               }
+ 
+               /*
+                * If this block is not completely contained in the range
+                * to be discarded, then it is not going to be released. Because
+                * we need to keep this block, we need to make sure this part
+                * of the page is uptodate before we modify it by writeing
+                * partial zeros on it.
+                */
+               if (!buffer_mapped(bh)) {
+                       /*
+                        * Buffer head must be mapped before we can read
+                        * from the block
+                        */
+                       BUFFER_TRACE(bh, "unmapped");
+                       ext4_get_block(inode, iblock, bh, 0);
+                       /* unmapped? It's a hole - nothing to do */
+                       if (!buffer_mapped(bh)) {
+                               BUFFER_TRACE(bh, "still unmapped");
+                               goto next;
+                       }
+               }
+ 
+               /* Ok, it's mapped. Make sure it's up-to-date */
+               if (PageUptodate(page))
+                       set_buffer_uptodate(bh);
+ 
+               if (!buffer_uptodate(bh)) {
+                       err = -EIO;
+                       ll_rw_block(READ, 1, &bh);
+                       wait_on_buffer(bh);
+                       /* Uhhuh. Read error. Complain and punt.*/
+                       if (!buffer_uptodate(bh))
+                               goto next;
+               }
+ 
+               if (ext4_should_journal_data(inode)) {
+                       BUFFER_TRACE(bh, "get write access");
+                       err = ext4_journal_get_write_access(handle, bh);
+                       if (err)
+                               goto next;
+               }
+ 
+               zero_user(page, pos, range_to_discard);
+ 
+               err = 0;
+               if (ext4_should_journal_data(inode)) {
+                       err = ext4_handle_dirty_metadata(handle, inode, bh);
+               } else
+                       mark_buffer_dirty(bh);
+ 
+               BUFFER_TRACE(bh, "Partial buffer zeroed");
+ next:
+               bh = bh->b_this_page;
+               iblock++;
+               pos += range_to_discard;
+       }
+ 
+       return err;
+ }
+ 
   /*
    * ext4_block_truncate_page() zeroes out a mapping from file offset `from'
    * up to the end of the block which corresponds to `from'.
@@@ -3005,7 -3372,7 +3376,7 @@@ int ext4_block_zero_page_range(handle_
         page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT,
                                    mapping_gfp_mask(mapping) & ~__GFP_FS);
         if (!page)
-               return -EINVAL;
+               return -ENOMEM;
   
         blocksize = inode->i_sb->s_blocksize;
         max = blocksize - (offset & (blocksize - 1));
@@@ -3074,11 -3441,8 +3445,8 @@@
         err = 0;
         if (ext4_should_journal_data(inode)) {
                 err = ext4_handle_dirty_metadata(handle, inode, bh);
-       } else {
-               if (ext4_should_order_data(inode) && EXT4_I(inode)->jinode)
-                       err = ext4_jbd2_file_inode(handle, inode);
+       } else
                 mark_buffer_dirty(bh);
-       }
   
   unlock:
         unlock_page(page);
@@@ -3119,6 -3483,11 +3487,11 @@@ int ext4_punch_hole(struct file *file, 
                 return -ENOTSUPP;
         }
   
+       if (EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) {
+               /* TODO: Add support for bigalloc file systems */
+               return -ENOTSUPP;
+       }
+ 
         return ext4_ext_punch_hole(file, offset, length);
   }
   
@@@ -3302,7 -3671,7 +3675,7 @@@ make_io
                 trace_ext4_load_inode(inode);
                 get_bh(bh);
                 bh->b_end_io = end_buffer_read_sync;
- -              submit_bh(READ_META, bh);
+ +              submit_bh(READ | REQ_META | REQ_PRIO, bh);
                 wait_on_buffer(bh);
                 if (!buffer_uptodate(bh)) {
                         EXT4_ERROR_INODE_BLOCK(inode, block,
@@@ -4420,6 -4789,7 +4793,7 @@@ retry_alloc
                           PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) {
                         unlock_page(page);
                         ret = VM_FAULT_SIGBUS;
+                       ext4_journal_stop(handle);
                         goto out;
                 }
                 ext4_set_inode_state(inode, EXT4_STATE_JDATA);
diff --combined fs/ext4/namei.c

index 1c924fa,f73d582..2a75eed
--- 1/fs/ext4/namei.c
--- 2/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@@ -922,8 -922,7 +922,8 @@@ restart
                                 bh = ext4_getblk(NULL, dir, b++, 0, &err);
                                 bh_use[ra_max] = bh;
                                 if (bh)
- -                                      ll_rw_block(READ_META, 1, &bh);
+ +                                      ll_rw_block(READ | REQ_META | REQ_PRIO,
+ +                                                  1, &bh);
                         }
                 }
                 if ((bh = bh_use[ra_ptr++]) == NULL)
@@@ -1586,7 -1585,7 +1586,7 @@@ static int ext4_dx_add_entry(handle_t *
                         dxtrace(dx_show_index("node", frames[1].entries));
                         dxtrace(dx_show_index("node",
                                ((struct dx_node *) bh2->b_data)->entries));
-                       err = ext4_handle_dirty_metadata(handle, inode, bh2);
+                       err = ext4_handle_dirty_metadata(handle, dir, bh2);
                         if (err)
                                 goto journal_error;
                         brelse (bh2);
@@@ -1612,7 -1611,7 +1612,7 @@@
                         if (err)
                                 goto journal_error;
                 }
-               err = ext4_handle_dirty_metadata(handle, inode, frames[0].bh);
+               err = ext4_handle_dirty_metadata(handle, dir, frames[0].bh);
                 if (err) {
                         ext4_std_error(inode->i_sb, err);
                         goto cleanup;
@@@ -1707,9 -1706,8 +1707,8 @@@ static void ext4_inc_count(handle_t *ha
    */
   static void ext4_dec_count(handle_t *handle, struct inode *inode)
   {
-       drop_nlink(inode);
-       if (S_ISDIR(inode->i_mode) && inode->i_nlink == 0)
-               inc_nlink(inode);
+       if (!S_ISDIR(inode->i_mode) || inode->i_nlink > 2)
+               drop_nlink(inode);
   }
   
   
@@@ -1756,7 -1754,7 +1755,7 @@@ retry
         if (IS_DIRSYNC(dir))
                 ext4_handle_sync(handle);
   
-       inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0);
+       inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0, NULL);
         err = PTR_ERR(inode);
         if (!IS_ERR(inode)) {
                 inode->i_op = &ext4_file_inode_operations;
@@@ -1792,7 -1790,7 +1791,7 @@@ retry
         if (IS_DIRSYNC(dir))
                 ext4_handle_sync(handle);
   
-       inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0);
+       inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0, NULL);
         err = PTR_ERR(inode);
         if (!IS_ERR(inode)) {
                 init_special_inode(inode, inode->i_mode, rdev);
@@@ -1832,7 -1830,7 +1831,7 @@@ retry
                 ext4_handle_sync(handle);
   
         inode = ext4_new_inode(handle, dir, S_IFDIR | mode,
-                              &dentry->d_name, 0);
+                              &dentry->d_name, 0, NULL);
         err = PTR_ERR(inode);
         if (IS_ERR(inode))
                 goto out_stop;
@@@ -1863,7 -1861,7 +1862,7 @@@
         ext4_set_de_type(dir->i_sb, de, S_IFDIR);
         inode->i_nlink = 2;
         BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata");
-       err = ext4_handle_dirty_metadata(handle, dir, dir_block);
+       err = ext4_handle_dirty_metadata(handle, inode, dir_block);
         if (err)
                 goto out_clear_inode;
         err = ext4_mark_inode_dirty(handle, inode);
@@@ -2279,7 -2277,7 +2278,7 @@@ retry
                 ext4_handle_sync(handle);
   
         inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO,
-                              &dentry->d_name, 0);
+                              &dentry->d_name, 0, NULL);
         err = PTR_ERR(inode);
         if (IS_ERR(inode))
                 goto out_stop;
@@@ -2530,7 -2528,7 +2529,7 @@@ static int ext4_rename(struct inode *ol
                 PARENT_INO(dir_bh->b_data, new_dir->i_sb->s_blocksize) =
                                                 cpu_to_le32(new_dir->i_ino);
                 BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata");
-               retval = ext4_handle_dirty_metadata(handle, old_dir, dir_bh);
+               retval = ext4_handle_dirty_metadata(handle, old_inode, dir_bh);
                 if (retval) {
                         ext4_std_error(old_dir->i_sb, retval);
                         goto end_rename;
diff --combined include/linux/ext3_fs.h

index f5fceff,0244611..dec9911
--- 1/include/linux/ext3_fs.h
--- 2/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@@ -180,8 -180,8 +180,8 @@@ struct ext3_group_des
   
   /* Flags that should be inherited by new inodes from their parent. */
   #define EXT3_FL_INHERITED (EXT3_SECRM_FL | EXT3_UNRM_FL | EXT3_COMPR_FL |\
-                          EXT3_SYNC_FL | EXT3_IMMUTABLE_FL | EXT3_APPEND_FL |\
-                          EXT3_NODUMP_FL | EXT3_NOATIME_FL | EXT3_COMPRBLK_FL|\
+                          EXT3_SYNC_FL | EXT3_NODUMP_FL |\
+                          EXT3_NOATIME_FL | EXT3_COMPRBLK_FL |\
                            EXT3_NOCOMPR_FL | EXT3_JOURNAL_DATA_FL |\
                            EXT3_NOTAIL_FL | EXT3_DIRSYNC_FL)
   
@@@ -381,7 -381,7 +381,7 @@@ struct ext3_inode 
    * Mount flags
    */
   #define EXT3_MOUNT_CHECK              0x00001 /* Do mount-time checks */
- -#define EXT3_MOUNT_OLDALLOC           0x00002  /* Don't use the new Orlov allocator */
+ +/* EXT3_MOUNT_OLDALLOC was there */
   #define EXT3_MOUNT_GRPID              0x00004 /* Create files with directory's group */
   #define EXT3_MOUNT_DEBUG              0x00008 /* Some debugging messages */
   #define EXT3_MOUNT_ERRORS_CONT                0x00010 /* Continue on errors */
@@@ -937,15 -937,15 +937,15 @@@ extern int ext3_group_extend(struct sup
                                 ext3_fsblk_t n_blocks_count);
   
   /* super.c */
- -extern void ext3_error (struct super_block *, const char *, const char *, ...)
- -      __attribute__ ((format (printf, 3, 4)));
+ +extern __printf(3, 4)
+ +void ext3_error(struct super_block *, const char *, const char *, ...);
   extern void __ext3_std_error (struct super_block *, const char *, int);
- -extern void ext3_abort (struct super_block *, const char *, const char *, ...)
- -      __attribute__ ((format (printf, 3, 4)));
- -extern void ext3_warning (struct super_block *, const char *, const char *, ...)
- -      __attribute__ ((format (printf, 3, 4)));
- -extern void ext3_msg(struct super_block *, const char *, const char *, ...)
- -      __attribute__ ((format (printf, 3, 4)));
+ +extern __printf(3, 4)
+ +void ext3_abort(struct super_block *, const char *, const char *, ...);
+ +extern __printf(3, 4)
+ +void ext3_warning(struct super_block *, const char *, const char *, ...);
+ +extern __printf(3, 4)
+ +void ext3_msg(struct super_block *, const char *, const char *, ...);
   extern void ext3_update_dynamic_rev (struct super_block *sb);
   
   #define ext3_std_error(sb, errno)                             \
diff --combined include/linux/fs.h

index 7a049fd,fbda9a6..78af938
--- 1/include/linux/fs.h
--- 2/include/linux/fs.h
+++ b/include/linux/fs.h
@@@ -58,15 -58,14 +58,15 @@@ struct inodes_stat_t 
   
   #define NR_FILE  8192 /* this can well be larger on a larger system */
   
- -#define MAY_EXEC 1
- -#define MAY_WRITE 2
- -#define MAY_READ 4
- -#define MAY_APPEND 8
- -#define MAY_ACCESS 16
- -#define MAY_OPEN 32
- -#define MAY_CHDIR 64
- -#define MAY_NOT_BLOCK 128     /* called from RCU mode, don't block */
+ +#define MAY_EXEC              0x00000001
+ +#define MAY_WRITE             0x00000002
+ +#define MAY_READ              0x00000004
+ +#define MAY_APPEND            0x00000008
+ +#define MAY_ACCESS            0x00000010
+ +#define MAY_OPEN              0x00000020
+ +#define MAY_CHDIR             0x00000040
+ +/* called from RCU mode, don't block */
+ +#define MAY_NOT_BLOCK         0x00000080
   
   /*
    * flags in file.f_mode.  Note that FMODE_READ and FMODE_WRITE must correspond
@@@ -163,8 -162,10 +163,8 @@@
   #define READA                 RWA_MASK
   
   #define READ_SYNC             (READ | REQ_SYNC)
- -#define READ_META             (READ | REQ_META)
   #define WRITE_SYNC            (WRITE | REQ_SYNC | REQ_NOIDLE)
   #define WRITE_ODIRECT         (WRITE | REQ_SYNC)
- -#define WRITE_META            (WRITE | REQ_META)
   #define WRITE_FLUSH           (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FLUSH)
   #define WRITE_FUA             (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FUA)
   #define WRITE_FLUSH_FUA               (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FLUSH | REQ_FUA)
@@@ -770,12 -771,13 +770,13 @@@ struct inode 
         unsigned long           i_ino;
         unsigned int            i_nlink;
         dev_t                   i_rdev;
-       loff_t                  i_size;
         struct timespec         i_atime;
         struct timespec         i_mtime;
         struct timespec         i_ctime;
-       unsigned int            i_blkbits;
+       spinlock_t              i_lock; /* i_blocks, i_bytes, maybe i_size */
+       unsigned short          i_bytes;
         blkcnt_t                i_blocks;
+       loff_t                  i_size;
   
   #ifdef __NEED_I_SIZE_ORDERED
         seqcount_t              i_size_seqcount;
@@@ -783,7 -785,6 +784,6 @@@
   
         /* Misc */
         unsigned long           i_state;
-       spinlock_t              i_lock; /* i_blocks, i_bytes, maybe i_size */
         struct mutex            i_mutex;
   
         unsigned long           dirtied_when;   /* jiffies of first dirtying */
@@@ -797,9 -798,10 +797,10 @@@
                 struct rcu_head         i_rcu;
         };
         atomic_t                i_count;
+       unsigned int            i_blkbits;
         u64                     i_version;
-       unsigned short          i_bytes;
         atomic_t                i_dio_count;
+       atomic_t                i_writecount;
         const struct file_operations    *i_fop; /* former ->i_op->default_file_ops */
         struct file_lock        *i_flock;
         struct address_space    i_data;
@@@ -823,7 -825,6 +824,6 @@@
   #ifdef CONFIG_IMA
         atomic_t                i_readcount; /* struct files open RO */
   #endif
-       atomic_t                i_writecount;
         void                    *i_private; /* fs or device private pointer */
   };
   
@@@ -964,12 -965,7 +964,12 @@@ struct file 
   #define f_dentry      f_path.dentry
   #define f_vfsmnt      f_path.mnt
         const struct file_operations    *f_op;
- -      spinlock_t              f_lock;  /* f_ep_links, f_flags, no IRQ */
+ +
+ +      /*
+ +       * Protects f_ep_links, f_flags, f_pos vs i_size in lseek SEEK_CUR.
+ +       * Must not be taken from IRQ context.
+ +       */
+ +      spinlock_t              f_lock;
   #ifdef CONFIG_SMP
         int                     f_sb_list_cpu;
   #endif
@@@ -1069,8 -1065,6 +1069,8 @@@ static inline int file_check_writeable(
   #define FL_LEASE      32      /* lease held on this file */
   #define FL_CLOSE      64      /* unlock on close */
   #define FL_SLEEP      128     /* A blocking lock */
+ +#define FL_DOWNGRADE_PENDING  256 /* Lease is being downgraded */
+ +#define FL_UNLOCK_PENDING     512 /* Lease is being broken */
   
   /*
    * Special return value from posix_lock_file() and vfs_lock_file() for
@@@ -1117,7 -1111,7 +1117,7 @@@ struct file_lock 
         struct list_head fl_link;       /* doubly linked list of all locks */
         struct list_head fl_block;      /* circular list of blocked processes */
         fl_owner_t fl_owner;
- -      unsigned char fl_flags;
+ +      unsigned int fl_flags;
         unsigned char fl_type;
         unsigned int fl_pid;
         struct pid *fl_nspid;
@@@ -1127,9 -1121,7 +1127,9 @@@
         loff_t fl_end;
   
         struct fasync_struct *  fl_fasync; /* for lease break notifications */
- -      unsigned long fl_break_time;    /* for nonblocking lease breaks */
+ +      /* for lease breaks: */
+ +      unsigned long fl_break_time;
+ +      unsigned long fl_downgrade_time;
   
         const struct file_lock_operations *fl_ops;      /* Callbacks for filesystems */
         const struct lock_manager_operations *fl_lmops; /* Callbacks for lockmanagers */
@@@ -1633,10 -1625,9 +1633,10 @@@ struct inode_operations 
   struct seq_file;
   
   ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
- -                              unsigned long nr_segs, unsigned long fast_segs,
- -                              struct iovec *fast_pointer,
- -                              struct iovec **ret_pointer);
+ +                            unsigned long nr_segs, unsigned long fast_segs,
+ +                            struct iovec *fast_pointer,
+ +                            struct iovec **ret_pointer,
+ +                            int check_access);
   
   extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
   extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *);
@@@ -2327,11 -2318,6 +2327,11 @@@ extern struct inode * iget5_locked(stru
   extern struct inode * iget_locked(struct super_block *, unsigned long);
   extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *);
   extern int insert_inode_locked(struct inode *);
+ +#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ +extern void lockdep_annotate_inode_mutex_key(struct inode *inode);
+ +#else
+ +static inline void lockdep_annotate_inode_mutex_key(struct inode *inode) { };
+ +#endif
   extern void unlock_new_inode(struct inode *);
   extern unsigned int get_next_ino(void);
   
@@@ -2408,8 -2394,8 +2408,8 @@@ file_ra_state_init(struct file_ra_stat
   extern loff_t noop_llseek(struct file *file, loff_t offset, int origin);
   extern loff_t no_llseek(struct file *file, loff_t offset, int origin);
   extern loff_t generic_file_llseek(struct file *file, loff_t offset, int origin);
- -extern loff_t generic_file_llseek_unlocked(struct file *file, loff_t offset,
- -                      int origin);
+ +extern loff_t generic_file_llseek_size(struct file *file, loff_t offset,
+ +              int origin, loff_t maxsize);
   extern int generic_file_open(struct inode * inode, struct file * filp);
   extern int nonseekable_open(struct inode * inode, struct file * filp);
   
@@@ -2635,8 -2621,8 +2635,8 @@@ static const struct file_operations __f
         .llseek  = generic_file_llseek,                                 \
   };
   
- -static inline void __attribute__((format(printf, 1, 2)))
- -__simple_attr_check_format(const char *fmt, ...)
+ +static inline __printf(1, 2)
+ +void __simple_attr_check_format(const char *fmt, ...)
   {
         /* don't do anything, just let the compiler check the arguments; */
   }
author	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 2 Nov 2011 17:06:20 +0000 (10:06 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 2 Nov 2011 17:06:20 +0000 (10:06 -0700)
		1	2
fs/ext4/ext4.h	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/file.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/inode.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/namei.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/ext3_fs.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/fs.h	patch \|	diff1 \|	diff2 \|	blob \| history