Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/djm/tmem
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 26 May 2011 17:50:56 +0000 (10:50 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 26 May 2011 17:50:56 +0000 (10:50 -0700)
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/djm/tmem:
  xen: cleancache shim to Xen Transcendent Memory
  ocfs2: add cleancache support
  ext4: add cleancache support
  btrfs: add cleancache support
  ext3: add cleancache support
  mm/fs: add hooks to support cleancache
  mm: cleancache core ops functions and config
  fs: add field to superblock to support cleancache
  mm/fs: cleancache documentation

Fix up trivial conflict in fs/btrfs/extent_io.c due to includes

1  2 
drivers/xen/Makefile
fs/btrfs/extent_io.c
fs/buffer.c
fs/ext4/super.c
fs/super.c
include/linux/fs.h
mm/filemap.c

diff --combined drivers/xen/Makefile
@@@ -1,24 -1,25 +1,25 @@@
  obj-y += grant-table.o features.o events.o manage.o balloon.o
  obj-y += xenbus/
+ obj-y += tmem.o
  
  nostackp := $(call cc-option, -fno-stack-protector)
  CFLAGS_features.o                     := $(nostackp)
  
 -obj-$(CONFIG_BLOCK)           += biomerge.o
 -obj-$(CONFIG_HOTPLUG_CPU)     += cpu_hotplug.o
 -obj-$(CONFIG_XEN_XENCOMM)     += xencomm.o
 -obj-$(CONFIG_XEN_BALLOON)     += xen-balloon.o
 -obj-$(CONFIG_XEN_DEV_EVTCHN)  += xen-evtchn.o
 -obj-$(CONFIG_XEN_GNTDEV)      += xen-gntdev.o
 +obj-$(CONFIG_BLOCK)                   += biomerge.o
 +obj-$(CONFIG_HOTPLUG_CPU)             += cpu_hotplug.o
 +obj-$(CONFIG_XEN_XENCOMM)             += xencomm.o
 +obj-$(CONFIG_XEN_BALLOON)             += xen-balloon.o
 +obj-$(CONFIG_XEN_DEV_EVTCHN)          += xen-evtchn.o
 +obj-$(CONFIG_XEN_GNTDEV)              += xen-gntdev.o
  obj-$(CONFIG_XEN_GRANT_DEV_ALLOC)     += xen-gntalloc.o
 -obj-$(CONFIG_XENFS)           += xenfs/
 +obj-$(CONFIG_XENFS)                   += xenfs/
  obj-$(CONFIG_XEN_SYS_HYPERVISOR)      += sys-hypervisor.o
 -obj-$(CONFIG_XEN_PLATFORM_PCI)        += xen-platform-pci.o
 -obj-$(CONFIG_SWIOTLB_XEN)     += swiotlb-xen.o
 -obj-$(CONFIG_XEN_DOM0)                += pci.o
 +obj-$(CONFIG_XEN_PLATFORM_PCI)                += xen-platform-pci.o
 +obj-$(CONFIG_SWIOTLB_XEN)             += swiotlb-xen.o
 +obj-$(CONFIG_XEN_DOM0)                        += pci.o
  
 -xen-evtchn-y                  := evtchn.o
 +xen-evtchn-y                          := evtchn.o
  xen-gntdev-y                          := gntdev.o
  xen-gntalloc-y                                := gntalloc.o
  
 -xen-platform-pci-y            := platform-pci.o
 +xen-platform-pci-y                    := platform-pci.o
diff --combined fs/btrfs/extent_io.c
@@@ -10,7 -10,7 +10,8 @@@
  #include <linux/swap.h>
  #include <linux/writeback.h>
  #include <linux/pagevec.h>
 +#include <linux/prefetch.h>
+ #include <linux/cleancache.h>
  #include "extent_io.h"
  #include "extent_map.h"
  #include "compat.h"
@@@ -2016,6 -2016,13 +2017,13 @@@ static int __extent_read_full_page(stru
  
        set_page_extent_mapped(page);
  
+       if (!PageUptodate(page)) {
+               if (cleancache_get_page(page) == 0) {
+                       BUG_ON(blocksize != PAGE_SIZE);
+                       goto out;
+               }
+       }
        end = page_end;
        while (1) {
                lock_extent(tree, start, end, GFP_NOFS);
                cur = cur + iosize;
                page_offset += iosize;
        }
+ out:
        if (!nr) {
                if (!PageError(page))
                        SetPageUptodate(page);
diff --combined fs/buffer.c
@@@ -41,6 -41,7 +41,7 @@@
  #include <linux/bitops.h>
  #include <linux/mpage.h>
  #include <linux/bit_spinlock.h>
+ #include <linux/cleancache.h>
  
  static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
  
@@@ -269,6 -270,10 +270,10 @@@ void invalidate_bdev(struct block_devic
        invalidate_bh_lrus();
        lru_add_drain_all();    /* make sure all lru add caches are flushed */
        invalidate_mapping_pages(mapping, 0, -1);
+       /* 99% of the time, we don't need to flush the cleancache on the bdev.
+        * But, for the strange corners, lets be cautious
+        */
+       cleancache_flush_inode(mapping);
  }
  EXPORT_SYMBOL(invalidate_bdev);
  
@@@ -2331,26 -2336,24 +2336,26 @@@ EXPORT_SYMBOL(block_commit_write)
   * page lock we can determine safely if the page is beyond EOF. If it is not
   * beyond EOF, then the page is guaranteed safe against truncation until we
   * unlock the page.
 + *
 + * Direct callers of this function should call vfs_check_frozen() so that page
 + * fault does not busyloop until the fs is thawed.
   */
 -int
 -block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
 -                 get_block_t get_block)
 +int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
 +                       get_block_t get_block)
  {
        struct page *page = vmf->page;
        struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
        unsigned long end;
        loff_t size;
 -      int ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */
 +      int ret;
  
        lock_page(page);
        size = i_size_read(inode);
        if ((page->mapping != inode->i_mapping) ||
            (page_offset(page) > size)) {
 -              /* page got truncated out from underneath us */
 -              unlock_page(page);
 -              goto out;
 +              /* We overload EFAULT to mean page got truncated */
 +              ret = -EFAULT;
 +              goto out_unlock;
        }
  
        /* page is wholly or partially inside EOF */
        if (!ret)
                ret = block_commit_write(page, 0, end);
  
 -      if (unlikely(ret)) {
 -              unlock_page(page);
 -              if (ret == -ENOMEM)
 -                      ret = VM_FAULT_OOM;
 -              else /* -ENOSPC, -EIO, etc */
 -                      ret = VM_FAULT_SIGBUS;
 -      } else
 -              ret = VM_FAULT_LOCKED;
 -
 -out:
 +      if (unlikely(ret < 0))
 +              goto out_unlock;
 +      /*
 +       * Freezing in progress? We check after the page is marked dirty and
 +       * with page lock held so if the test here fails, we are sure freezing
 +       * code will wait during syncing until the page fault is done - at that
 +       * point page will be dirty and unlocked so freezing code will write it
 +       * and writeprotect it again.
 +       */
 +      set_page_dirty(page);
 +      if (inode->i_sb->s_frozen != SB_UNFROZEN) {
 +              ret = -EAGAIN;
 +              goto out_unlock;
 +      }
 +      return 0;
 +out_unlock:
 +      unlock_page(page);
        return ret;
 +}
 +EXPORT_SYMBOL(__block_page_mkwrite);
 +
 +int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
 +                 get_block_t get_block)
 +{
 +      int ret;
 +      struct super_block *sb = vma->vm_file->f_path.dentry->d_inode->i_sb;
 +
 +      /*
 +       * This check is racy but catches the common case. The check in
 +       * __block_page_mkwrite() is reliable.
 +       */
 +      vfs_check_frozen(sb, SB_FREEZE_WRITE);
 +      ret = __block_page_mkwrite(vma, vmf, get_block);
 +      return block_page_mkwrite_return(ret);
  }
  EXPORT_SYMBOL(block_page_mkwrite);
  
diff --combined fs/ext4/super.c
@@@ -38,6 -38,7 +38,7 @@@
  #include <linux/ctype.h>
  #include <linux/log2.h>
  #include <linux/crc16.h>
+ #include <linux/cleancache.h>
  #include <asm/uaccess.h>
  
  #include <linux/kthread.h>
@@@ -75,27 -76,11 +76,27 @@@ static void ext4_write_super(struct sup
  static int ext4_freeze(struct super_block *sb);
  static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
                       const char *dev_name, void *data);
 +static inline int ext2_feature_set_ok(struct super_block *sb);
 +static inline int ext3_feature_set_ok(struct super_block *sb);
  static int ext4_feature_set_ok(struct super_block *sb, int readonly);
  static void ext4_destroy_lazyinit_thread(void);
  static void ext4_unregister_li_request(struct super_block *sb);
  static void ext4_clear_request_list(void);
  
 +#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
 +static struct file_system_type ext2_fs_type = {
 +      .owner          = THIS_MODULE,
 +      .name           = "ext2",
 +      .mount          = ext4_mount,
 +      .kill_sb        = kill_block_super,
 +      .fs_flags       = FS_REQUIRES_DEV,
 +};
 +#define IS_EXT2_SB(sb) ((sb)->s_bdev->bd_holder == &ext2_fs_type)
 +#else
 +#define IS_EXT2_SB(sb) (0)
 +#endif
 +
 +
  #if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
  static struct file_system_type ext3_fs_type = {
        .owner          = THIS_MODULE,
@@@ -822,8 -807,6 +823,8 @@@ static void ext4_put_super(struct super
                invalidate_bdev(sbi->journal_bdev);
                ext4_blkdev_remove(sbi);
        }
 +      if (sbi->s_mmp_tsk)
 +              kthread_stop(sbi->s_mmp_tsk);
        sb->s_fs_info = NULL;
        /*
         * Now that we are completely done shutting down the
@@@ -1114,7 -1097,7 +1115,7 @@@ static int ext4_show_options(struct seq
  
        if (!test_opt(sb, INIT_INODE_TABLE))
                seq_puts(seq, ",noinit_inode_table");
 -      else if (sbi->s_li_wait_mult)
 +      else if (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT)
                seq_printf(seq, ",init_inode_table=%u",
                           (unsigned) sbi->s_li_wait_mult);
  
@@@ -1205,7 -1188,9 +1206,7 @@@ static ssize_t ext4_quota_write(struct 
                                const char *data, size_t len, loff_t off);
  
  static const struct dquot_operations ext4_quota_operations = {
 -#ifdef CONFIG_QUOTA
        .get_reserved_space = ext4_get_reserved_space,
 -#endif
        .write_dquot    = ext4_write_dquot,
        .acquire_dquot  = ext4_acquire_dquot,
        .release_dquot  = ext4_release_dquot,
@@@ -1916,7 -1901,7 +1917,7 @@@ static int ext4_setup_super(struct supe
                ext4_msg(sb, KERN_WARNING,
                         "warning: mounting fs with errors, "
                         "running e2fsck is recommended");
 -      else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 &&
 +      else if ((__s16) le16_to_cpu(es->s_max_mnt_count) > 0 &&
                 le16_to_cpu(es->s_mnt_count) >=
                 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
                ext4_msg(sb, KERN_WARNING,
                        EXT4_INODES_PER_GROUP(sb),
                        sbi->s_mount_opt, sbi->s_mount_opt2);
  
+       cleancache_init_fs(sb);
        return res;
  }
  
@@@ -2441,18 -2427,6 +2443,18 @@@ static ssize_t lifetime_write_kbytes_sh
                          EXT4_SB(sb)->s_sectors_written_start) >> 1)));
  }
  
 +static ssize_t extent_cache_hits_show(struct ext4_attr *a,
 +                                    struct ext4_sb_info *sbi, char *buf)
 +{
 +      return snprintf(buf, PAGE_SIZE, "%lu\n", sbi->extent_cache_hits);
 +}
 +
 +static ssize_t extent_cache_misses_show(struct ext4_attr *a,
 +                                      struct ext4_sb_info *sbi, char *buf)
 +{
 +      return snprintf(buf, PAGE_SIZE, "%lu\n", sbi->extent_cache_misses);
 +}
 +
  static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
                                          struct ext4_sb_info *sbi,
                                          const char *buf, size_t count)
@@@ -2510,8 -2484,6 +2512,8 @@@ static struct ext4_attr ext4_attr_##nam
  EXT4_RO_ATTR(delayed_allocation_blocks);
  EXT4_RO_ATTR(session_write_kbytes);
  EXT4_RO_ATTR(lifetime_write_kbytes);
 +EXT4_RO_ATTR(extent_cache_hits);
 +EXT4_RO_ATTR(extent_cache_misses);
  EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show,
                 inode_readahead_blks_store, s_inode_readahead_blks);
  EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal);
@@@ -2527,8 -2499,6 +2529,8 @@@ static struct attribute *ext4_attrs[] 
        ATTR_LIST(delayed_allocation_blocks),
        ATTR_LIST(session_write_kbytes),
        ATTR_LIST(lifetime_write_kbytes),
 +      ATTR_LIST(extent_cache_hits),
 +      ATTR_LIST(extent_cache_misses),
        ATTR_LIST(inode_readahead_blks),
        ATTR_LIST(inode_goal),
        ATTR_LIST(mb_stats),
@@@ -2691,6 -2661,12 +2693,6 @@@ static void print_daily_error_info(unsi
        mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ);  /* Once a day */
  }
  
 -static void ext4_lazyinode_timeout(unsigned long data)
 -{
 -      struct task_struct *p = (struct task_struct *)data;
 -      wake_up_process(p);
 -}
 -
  /* Find next suitable group and run ext4_init_inode_table */
  static int ext4_run_li_request(struct ext4_li_request *elr)
  {
                ret = ext4_init_inode_table(sb, group,
                                            elr->lr_timeout ? 0 : 1);
                if (elr->lr_timeout == 0) {
 -                      timeout = jiffies - timeout;
 -                      if (elr->lr_sbi->s_li_wait_mult)
 -                              timeout *= elr->lr_sbi->s_li_wait_mult;
 -                      else
 -                              timeout *= 20;
 +                      timeout = (jiffies - timeout) *
 +                                elr->lr_sbi->s_li_wait_mult;
                        elr->lr_timeout = timeout;
                }
                elr->lr_next_sched = jiffies + elr->lr_timeout;
  
  /*
   * Remove lr_request from the list_request and free the
 - * request tructure. Should be called with li_list_mtx held
 + * request structure. Should be called with li_list_mtx held
   */
  static void ext4_remove_li_request(struct ext4_li_request *elr)
  {
  
  static void ext4_unregister_li_request(struct super_block *sb)
  {
 -      struct ext4_li_request *elr = EXT4_SB(sb)->s_li_request;
 -
 -      if (!ext4_li_info)
 +      mutex_lock(&ext4_li_mtx);
 +      if (!ext4_li_info) {
 +              mutex_unlock(&ext4_li_mtx);
                return;
 +      }
  
        mutex_lock(&ext4_li_info->li_list_mtx);
 -      ext4_remove_li_request(elr);
 +      ext4_remove_li_request(EXT4_SB(sb)->s_li_request);
        mutex_unlock(&ext4_li_info->li_list_mtx);
 +      mutex_unlock(&ext4_li_mtx);
  }
  
  static struct task_struct *ext4_lazyinit_task;
@@@ -2781,10 -2758,17 +2783,10 @@@ static int ext4_lazyinit_thread(void *a
        struct ext4_lazy_init *eli = (struct ext4_lazy_init *)arg;
        struct list_head *pos, *n;
        struct ext4_li_request *elr;
 -      unsigned long next_wakeup;
 -      DEFINE_WAIT(wait);
 +      unsigned long next_wakeup, cur;
  
        BUG_ON(NULL == eli);
  
 -      eli->li_timer.data = (unsigned long)current;
 -      eli->li_timer.function = ext4_lazyinode_timeout;
 -
 -      eli->li_task = current;
 -      wake_up(&eli->li_wait_task);
 -
  cont_thread:
        while (true) {
                next_wakeup = MAX_JIFFY_OFFSET;
                if (freezing(current))
                        refrigerator();
  
 -              if ((time_after_eq(jiffies, next_wakeup)) ||
 +              cur = jiffies;
 +              if ((time_after_eq(cur, next_wakeup)) ||
                    (MAX_JIFFY_OFFSET == next_wakeup)) {
                        cond_resched();
                        continue;
                }
  
 -              eli->li_timer.expires = next_wakeup;
 -              add_timer(&eli->li_timer);
 -              prepare_to_wait(&eli->li_wait_daemon, &wait,
 -                              TASK_INTERRUPTIBLE);
 -              if (time_before(jiffies, next_wakeup))
 -                      schedule();
 -              finish_wait(&eli->li_wait_daemon, &wait);
 +              schedule_timeout_interruptible(next_wakeup - cur);
 +
                if (kthread_should_stop()) {
                        ext4_clear_request_list();
                        goto exit_thread;
@@@ -2847,7 -2835,12 +2849,7 @@@ exit_thread
                goto cont_thread;
        }
        mutex_unlock(&eli->li_list_mtx);
 -      del_timer_sync(&ext4_li_info->li_timer);
 -      eli->li_task = NULL;
 -      wake_up(&eli->li_wait_task);
 -
        kfree(ext4_li_info);
 -      ext4_lazyinit_task = NULL;
        ext4_li_info = NULL;
        mutex_unlock(&ext4_li_mtx);
  
@@@ -2875,6 -2868,7 +2877,6 @@@ static int ext4_run_lazyinit_thread(voi
        if (IS_ERR(ext4_lazyinit_task)) {
                int err = PTR_ERR(ext4_lazyinit_task);
                ext4_clear_request_list();
 -              del_timer_sync(&ext4_li_info->li_timer);
                kfree(ext4_li_info);
                ext4_li_info = NULL;
                printk(KERN_CRIT "EXT4: error %d creating inode table "
                return err;
        }
        ext4_li_info->li_state |= EXT4_LAZYINIT_RUNNING;
 -
 -      wait_event(ext4_li_info->li_wait_task, ext4_li_info->li_task != NULL);
        return 0;
  }
  
@@@ -2917,9 -2913,13 +2919,9 @@@ static int ext4_li_info_new(void
        if (!eli)
                return -ENOMEM;
  
 -      eli->li_task = NULL;
        INIT_LIST_HEAD(&eli->li_request_list);
        mutex_init(&eli->li_list_mtx);
  
 -      init_waitqueue_head(&eli->li_wait_daemon);
 -      init_waitqueue_head(&eli->li_wait_task);
 -      init_timer(&eli->li_timer);
        eli->li_state |= EXT4_LAZYINIT_QUIT;
  
        ext4_li_info = eli;
@@@ -2962,19 -2962,20 +2964,19 @@@ static int ext4_register_li_request(str
        ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
        int ret = 0;
  
 -      if (sbi->s_li_request != NULL)
 +      if (sbi->s_li_request != NULL) {
 +              /*
 +               * Reset timeout so it can be computed again, because
 +               * s_li_wait_mult might have changed.
 +               */
 +              sbi->s_li_request->lr_timeout = 0;
                return 0;
 +      }
  
        if (first_not_zeroed == ngroups ||
            (sb->s_flags & MS_RDONLY) ||
 -          !test_opt(sb, INIT_INODE_TABLE)) {
 -              sbi->s_li_request = NULL;
 +          !test_opt(sb, INIT_INODE_TABLE))
                return 0;
 -      }
 -
 -      if (first_not_zeroed == ngroups) {
 -              sbi->s_li_request = NULL;
 -              return 0;
 -      }
  
        elr = ext4_li_request_new(sb, first_not_zeroed);
        if (!elr)
@@@ -3167,12 -3168,6 +3169,12 @@@ static int ext4_fill_super(struct super
            ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0))
                set_opt(sb, DELALLOC);
  
 +      /*
 +       * set default s_li_wait_mult for lazyinit, for the case there is
 +       * no mount option specified.
 +       */
 +      sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
 +
        if (!parse_options((char *) sbi->s_es->s_mount_opts, sb,
                           &journal_devnum, &journal_ioprio, NULL, 0)) {
                ext4_msg(sb, KERN_WARNING,
                       "feature flags set on rev 0 fs, "
                       "running e2fsck is recommended");
  
 +      if (IS_EXT2_SB(sb)) {
 +              if (ext2_feature_set_ok(sb))
 +                      ext4_msg(sb, KERN_INFO, "mounting ext2 file system "
 +                               "using the ext4 subsystem");
 +              else {
 +                      ext4_msg(sb, KERN_ERR, "couldn't mount as ext2 due "
 +                               "to feature incompatibilities");
 +                      goto failed_mount;
 +              }
 +      }
 +
 +      if (IS_EXT3_SB(sb)) {
 +              if (ext3_feature_set_ok(sb))
 +                      ext4_msg(sb, KERN_INFO, "mounting ext3 file system "
 +                               "using the ext4 subsystem");
 +              else {
 +                      ext4_msg(sb, KERN_ERR, "couldn't mount as ext3 due "
 +                               "to feature incompatibilities");
 +                      goto failed_mount;
 +              }
 +      }
 +
        /*
         * Check feature flags regardless of the revision level, since we
         * previously didn't change the revision level when setting the flags,
                          EXT4_HAS_INCOMPAT_FEATURE(sb,
                                    EXT4_FEATURE_INCOMPAT_RECOVER));
  
 +      if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_MMP) &&
 +          !(sb->s_flags & MS_RDONLY))
 +              if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block)))
 +                      goto failed_mount3;
 +
        /*
         * The first inode we look at is the journal inode.  Don't try
         * root first: it may be modified in the journal!
                goto failed_mount_wq;
        } else {
                clear_opt(sb, DATA_FLAGS);
 -              set_opt(sb, WRITEBACK_DATA);
                sbi->s_journal = NULL;
                needs_recovery = 0;
                goto no_journal;
@@@ -3740,8 -3709,6 +3742,8 @@@ failed_mount3
        percpu_counter_destroy(&sbi->s_freeinodes_counter);
        percpu_counter_destroy(&sbi->s_dirs_counter);
        percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
 +      if (sbi->s_mmp_tsk)
 +              kthread_stop(sbi->s_mmp_tsk);
  failed_mount2:
        for (i = 0; i < db_count; i++)
                brelse(sbi->s_group_desc[i]);
@@@ -4277,7 -4244,7 +4279,7 @@@ static int ext4_remount(struct super_bl
        int enable_quota = 0;
        ext4_group_t g;
        unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
 -      int err;
 +      int err = 0;
  #ifdef CONFIG_QUOTA
        int i;
  #endif
                                goto restore_opts;
                        if (!ext4_setup_super(sb, es, 0))
                                sb->s_flags &= ~MS_RDONLY;
 +                      if (EXT4_HAS_INCOMPAT_FEATURE(sb,
 +                                                   EXT4_FEATURE_INCOMPAT_MMP))
 +                              if (ext4_multi_mount_protect(sb,
 +                                              le64_to_cpu(es->s_mmp_block))) {
 +                                      err = -EROFS;
 +                                      goto restore_opts;
 +                              }
                        enable_quota = 1;
                }
        }
@@@ -4474,7 -4434,6 +4476,7 @@@ static int ext4_statfs(struct dentry *d
        struct ext4_sb_info *sbi = EXT4_SB(sb);
        struct ext4_super_block *es = sbi->s_es;
        u64 fsid;
 +      s64 bfree;
  
        if (test_opt(sb, MINIX_DF)) {
                sbi->s_overhead_last = 0;
        buf->f_type = EXT4_SUPER_MAGIC;
        buf->f_bsize = sb->s_blocksize;
        buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last;
 -      buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) -
 +      bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) -
                       percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter);
 +      /* prevent underflow in case that few free space is available */
 +      buf->f_bfree = max_t(s64, bfree, 0);
        buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es);
        if (buf->f_bfree < ext4_r_blocks_count(es))
                buf->f_bavail = 0;
@@@ -4697,9 -4654,6 +4699,9 @@@ static int ext4_quota_off(struct super_
        if (test_opt(sb, DELALLOC))
                sync_filesystem(sb);
  
 +      if (!inode)
 +              goto out;
 +
        /* Update modification times of quota files when userspace can
         * start looking at them */
        handle = ext4_journal_start(inode, 1);
@@@ -4820,6 -4774,14 +4822,6 @@@ static struct dentry *ext4_mount(struc
  }
  
  #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
 -static struct file_system_type ext2_fs_type = {
 -      .owner          = THIS_MODULE,
 -      .name           = "ext2",
 -      .mount          = ext4_mount,
 -      .kill_sb        = kill_block_super,
 -      .fs_flags       = FS_REQUIRES_DEV,
 -};
 -
  static inline void register_as_ext2(void)
  {
        int err = register_filesystem(&ext2_fs_type);
@@@ -4832,22 -4794,10 +4834,22 @@@ static inline void unregister_as_ext2(v
  {
        unregister_filesystem(&ext2_fs_type);
  }
 +
 +static inline int ext2_feature_set_ok(struct super_block *sb)
 +{
 +      if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT2_FEATURE_INCOMPAT_SUPP))
 +              return 0;
 +      if (sb->s_flags & MS_RDONLY)
 +              return 1;
 +      if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT2_FEATURE_RO_COMPAT_SUPP))
 +              return 0;
 +      return 1;
 +}
  MODULE_ALIAS("ext2");
  #else
  static inline void register_as_ext2(void) { }
  static inline void unregister_as_ext2(void) { }
 +static inline int ext2_feature_set_ok(struct super_block *sb) { return 0; }
  #endif
  
  #if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
@@@ -4863,24 -4813,10 +4865,24 @@@ static inline void unregister_as_ext3(v
  {
        unregister_filesystem(&ext3_fs_type);
  }
 +
 +static inline int ext3_feature_set_ok(struct super_block *sb)
 +{
 +      if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP))
 +              return 0;
 +      if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL))
 +              return 0;
 +      if (sb->s_flags & MS_RDONLY)
 +              return 1;
 +      if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP))
 +              return 0;
 +      return 1;
 +}
  MODULE_ALIAS("ext3");
  #else
  static inline void register_as_ext3(void) { }
  static inline void unregister_as_ext3(void) { }
 +static inline int ext3_feature_set_ok(struct super_block *sb) { return 0; }
  #endif
  
  static struct file_system_type ext4_fs_type = {
@@@ -4964,8 -4900,8 +4966,8 @@@ static int __init ext4_init_fs(void
        err = init_inodecache();
        if (err)
                goto out1;
 -      register_as_ext2();
        register_as_ext3();
 +      register_as_ext2();
        err = register_filesystem(&ext4_fs_type);
        if (err)
                goto out;
diff --combined fs/super.c
@@@ -31,6 -31,7 +31,7 @@@
  #include <linux/mutex.h>
  #include <linux/backing-dev.h>
  #include <linux/rculist_bl.h>
+ #include <linux/cleancache.h>
  #include "internal.h"
  
  
@@@ -112,6 -113,7 +113,7 @@@ static struct super_block *alloc_super(
                s->s_maxbytes = MAX_NON_LFS;
                s->s_op = &default_op;
                s->s_time_gran = 1000000000;
+               s->cleancache_poolid = -1;
        }
  out:
        return s;
@@@ -177,6 -179,7 +179,7 @@@ void deactivate_locked_super(struct sup
  {
        struct file_system_type *fs = s->s_type;
        if (atomic_dec_and_test(&s->s_active)) {
+               cleancache_flush_fs(s);
                fs->kill_sb(s);
                /*
                 * We need to call rcu_barrier so all the delayed rcu free
@@@ -948,7 -951,8 +951,7 @@@ mount_fs(struct file_system_type *type
         * filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE
         * but s_maxbytes was an unsigned long long for many releases. Throw
         * this warning for a little while to try and catch filesystems that
 -       * violate this rule. This warning should be either removed or
 -       * converted to a BUG() in 2.6.34.
 +       * violate this rule.
         */
        WARN((sb->s_maxbytes < 0), "%s set sb->s_maxbytes to "
                "negative value (%lld)\n", type->name, sb->s_maxbytes);
diff --combined include/linux/fs.h
@@@ -23,8 -23,7 +23,8 @@@
  
  /* Fixed constants first: */
  #undef NR_OPEN
 -#define INR_OPEN 1024         /* Initial setting for nfile rlimits */
 +#define INR_OPEN_CUR 1024     /* Initial setting for nfile rlimits */
 +#define INR_OPEN_MAX 4096     /* Hard limit for nfile rlimits */
  
  #define BLOCK_SIZE_BITS 10
  #define BLOCK_SIZE (1<<BLOCK_SIZE_BITS)
@@@ -635,7 -634,8 +635,7 @@@ struct address_space 
        unsigned int            i_mmap_writable;/* count VM_SHARED mappings */
        struct prio_tree_root   i_mmap;         /* tree of private and shared mappings */
        struct list_head        i_mmap_nonlinear;/*list VM_NONLINEAR mappings */
 -      spinlock_t              i_mmap_lock;    /* protect tree, count, list */
 -      unsigned int            truncate_count; /* Cover race condition with truncate */
 +      struct mutex            i_mmap_mutex;   /* protect tree, count, list */
        unsigned long           nrpages;        /* number of total pages */
        pgoff_t                 writeback_index;/* writeback starts here */
        const struct address_space_operations *a_ops;   /* methods */
        spinlock_t              private_lock;   /* for use by the address_space */
        struct list_head        private_list;   /* ditto */
        struct address_space    *assoc_mapping; /* ditto */
 -      struct mutex            unmap_mutex;    /* to protect unmapping */
  } __attribute__((aligned(sizeof(long))));
        /*
         * On most architectures that alignment is already the case; but
@@@ -1428,6 -1429,11 +1428,11 @@@ struct super_block 
         */
        char __rcu *s_options;
        const struct dentry_operations *s_d_op; /* default d_op for dentries */
+       /*
+        * Saved pool identifier for cleancache (-1 means none)
+        */
+       int cleancache_poolid;
  };
  
  extern struct timespec current_fs_time(struct super_block *sb);
diff --combined mm/filemap.c
@@@ -34,6 -34,7 +34,7 @@@
  #include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */
  #include <linux/memcontrol.h>
  #include <linux/mm_inline.h> /* for page_is_file_cache() */
+ #include <linux/cleancache.h>
  #include "internal.h"
  
  /*
  /*
   * Lock ordering:
   *
 - *  ->i_mmap_lock             (truncate_pagecache)
 + *  ->i_mmap_mutex            (truncate_pagecache)
   *    ->private_lock          (__free_pte->__set_page_dirty_buffers)
   *      ->swap_lock           (exclusive_swap_page, others)
   *        ->mapping->tree_lock
   *
   *  ->i_mutex
 - *    ->i_mmap_lock           (truncate->unmap_mapping_range)
 + *    ->i_mmap_mutex          (truncate->unmap_mapping_range)
   *
   *  ->mmap_sem
 - *    ->i_mmap_lock
 + *    ->i_mmap_mutex
   *      ->page_table_lock or pte_lock (various, mainly in memory.c)
   *        ->mapping->tree_lock        (arch-dependent flush_dcache_mmap_lock)
   *
@@@ -84,7 -85,7 +85,7 @@@
   *    sb_lock                 (fs/fs-writeback.c)
   *    ->mapping->tree_lock    (__sync_single_inode)
   *
 - *  ->i_mmap_lock
 + *  ->i_mmap_mutex
   *    ->anon_vma.lock         (vma_adjust)
   *
   *  ->anon_vma.lock
   *
   *  (code doesn't rely on that order, so you could switch it around)
   *  ->tasklist_lock             (memory_failure, collect_procs_ao)
 - *    ->i_mmap_lock
 + *    ->i_mmap_mutex
   */
  
  /*
@@@ -118,6 -119,16 +119,16 @@@ void __delete_from_page_cache(struct pa
  {
        struct address_space *mapping = page->mapping;
  
+       /*
+        * if we're uptodate, flush out into the cleancache, otherwise
+        * invalidate any existing cleancache entries.  We can't leave
+        * stale data around in the cleancache once our page is gone
+        */
+       if (PageUptodate(page) && PageMappedToDisk(page))
+               cleancache_put_page(page);
+       else
+               cleancache_flush_page(mapping, page);
        radix_tree_delete(&mapping->page_tree, page->index);
        page->mapping = NULL;
        mapping->nrpages--;
@@@ -562,17 -573,6 +573,17 @@@ void wait_on_page_bit(struct page *page
  }
  EXPORT_SYMBOL(wait_on_page_bit);
  
 +int wait_on_page_bit_killable(struct page *page, int bit_nr)
 +{
 +      DEFINE_WAIT_BIT(wait, &page->flags, bit_nr);
 +
 +      if (!test_bit(bit_nr, &page->flags))
 +              return 0;
 +
 +      return __wait_on_bit(page_waitqueue(page), &wait,
 +                           sleep_on_page_killable, TASK_KILLABLE);
 +}
 +
  /**
   * add_page_wait_queue - Add an arbitrary waiter to a page's wait queue
   * @page: Page defining the wait queue of interest
@@@ -654,32 -654,15 +665,32 @@@ EXPORT_SYMBOL_GPL(__lock_page_killable)
  int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
                         unsigned int flags)
  {
 -      if (!(flags & FAULT_FLAG_ALLOW_RETRY)) {
 -              __lock_page(page);
 -              return 1;
 -      } else {
 -              if (!(flags & FAULT_FLAG_RETRY_NOWAIT)) {
 -                      up_read(&mm->mmap_sem);
 +      if (flags & FAULT_FLAG_ALLOW_RETRY) {
 +              /*
 +               * CAUTION! In this case, mmap_sem is not released
 +               * even though return 0.
 +               */
 +              if (flags & FAULT_FLAG_RETRY_NOWAIT)
 +                      return 0;
 +
 +              up_read(&mm->mmap_sem);
 +              if (flags & FAULT_FLAG_KILLABLE)
 +                      wait_on_page_locked_killable(page);
 +              else
                        wait_on_page_locked(page);
 -              }
                return 0;
 +      } else {
 +              if (flags & FAULT_FLAG_KILLABLE) {
 +                      int ret;
 +
 +                      ret = __lock_page_killable(page);
 +                      if (ret) {
 +                              up_read(&mm->mmap_sem);
 +                              return 0;
 +                      }
 +              } else
 +                      __lock_page(page);
 +              return 1;
        }
  }
  
@@@ -1556,17 -1539,15 +1567,17 @@@ static void do_sync_mmap_readahead(stru
        /* If we don't want any read-ahead, don't bother */
        if (VM_RandomReadHint(vma))
                return;
 +      if (!ra->ra_pages)
 +              return;
  
 -      if (VM_SequentialReadHint(vma) ||
 -                      offset - 1 == (ra->prev_pos >> PAGE_CACHE_SHIFT)) {
 +      if (VM_SequentialReadHint(vma)) {
                page_cache_sync_readahead(mapping, ra, file, offset,
                                          ra->ra_pages);
                return;
        }
  
 -      if (ra->mmap_miss < INT_MAX)
 +      /* Avoid banging the cache line if not needed */
 +      if (ra->mmap_miss < MMAP_LOTSAMISS * 10)
                ra->mmap_miss++;
  
        /*
         * mmap read-around
         */
        ra_pages = max_sane_readahead(ra->ra_pages);
 -      if (ra_pages) {
 -              ra->start = max_t(long, 0, offset - ra_pages/2);
 -              ra->size = ra_pages;
 -              ra->async_size = 0;
 -              ra_submit(ra, mapping, file);
 -      }
 +      ra->start = max_t(long, 0, offset - ra_pages / 2);
 +      ra->size = ra_pages;
 +      ra->async_size = ra_pages / 4;
 +      ra_submit(ra, mapping, file);
  }
  
  /*
@@@ -1688,6 -1671,7 +1699,6 @@@ retry_find
                return VM_FAULT_SIGBUS;
        }
  
 -      ra->prev_pos = (loff_t)offset << PAGE_CACHE_SHIFT;
        vmf->page = page;
        return ret | VM_FAULT_LOCKED;