Merge branch 'mlx4-net'

[pandora-kernel.git] / fs / super.c
diff --git a/fs/super.c b/fs/super.c

index 05a0216..2b7dc90 100644 (file)
--- a/fs/super.c
+++ b/fs/super.c
@@ -71,14 +71,14 @@ static unsigned long super_cache_scan(struct shrinker *shrink,
         if (!(sc->gfp_mask & __GFP_FS))
                 return SHRINK_STOP;
  
-       if (!grab_super_passive(sb))
+       if (!trylock_super(sb))
                 return SHRINK_STOP;
  
         if (sb->s_op->nr_cached_objects)
-               fs_objects = sb->s_op->nr_cached_objects(sb, sc->nid);
+               fs_objects = sb->s_op->nr_cached_objects(sb, sc);
  
-       inodes = list_lru_count_node(&sb->s_inode_lru, sc->nid);
-       dentries = list_lru_count_node(&sb->s_dentry_lru, sc->nid);
+       inodes = list_lru_shrink_count(&sb->s_inode_lru, sc);
+       dentries = list_lru_shrink_count(&sb->s_dentry_lru, sc);
         total_objects = dentries + inodes + fs_objects + 1;
         if (!total_objects)
                 total_objects = 1;
@@ -86,22 +86,26 @@ static unsigned long super_cache_scan(struct shrinker *shrink,
         /* proportion the scan between the caches */
         dentries = mult_frac(sc->nr_to_scan, dentries, total_objects);
         inodes = mult_frac(sc->nr_to_scan, inodes, total_objects);
+       fs_objects = mult_frac(sc->nr_to_scan, fs_objects, total_objects);
  
         /*
          * prune the dcache first as the icache is pinned by it, then
          * prune the icache, followed by the filesystem specific caches
+        *
+        * Ensure that we always scan at least one object - memcg kmem
+        * accounting uses this to fully empty the caches.
          */
-       freed = prune_dcache_sb(sb, dentries, sc->nid);
-       freed += prune_icache_sb(sb, inodes, sc->nid);
+       sc->nr_to_scan = dentries + 1;
+       freed = prune_dcache_sb(sb, sc);
+       sc->nr_to_scan = inodes + 1;
+       freed += prune_icache_sb(sb, sc);
  
         if (fs_objects) {
-               fs_objects = mult_frac(sc->nr_to_scan, fs_objects,
-                                                               total_objects);
-               freed += sb->s_op->free_cached_objects(sb, fs_objects,
-                                                      sc->nid);
+               sc->nr_to_scan = fs_objects + 1;
+               freed += sb->s_op->free_cached_objects(sb, sc);
         }
  
-       drop_super(sb);
+       up_read(&sb->s_umount);
         return freed;
  }
  
@@ -114,21 +118,18 @@ static unsigned long super_cache_count(struct shrinker *shrink,
         sb = container_of(shrink, struct super_block, s_shrink);
  
         /*
-        * Don't call grab_super_passive as it is a potential
+        * Don't call trylock_super as it is a potential
          * scalability bottleneck. The counts could get updated
          * between super_cache_count and super_cache_scan anyway.
          * Call to super_cache_count with shrinker_rwsem held
-        * ensures the safety of call to list_lru_count_node() and
+        * ensures the safety of call to list_lru_shrink_count() and
          * s_op->nr_cached_objects().
          */
         if (sb->s_op && sb->s_op->nr_cached_objects)
-               total_objects = sb->s_op->nr_cached_objects(sb,
-                                                sc->nid);
+               total_objects = sb->s_op->nr_cached_objects(sb, sc);
  
-       total_objects += list_lru_count_node(&sb->s_dentry_lru,
-                                                sc->nid);
-       total_objects += list_lru_count_node(&sb->s_inode_lru,
-                                                sc->nid);
+       total_objects += list_lru_shrink_count(&sb->s_dentry_lru, sc);
+       total_objects += list_lru_shrink_count(&sb->s_inode_lru, sc);
  
         total_objects = vfs_pressure_ratio(total_objects);
         return total_objects;
@@ -191,9 +192,9 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
         INIT_HLIST_BL_HEAD(&s->s_anon);
         INIT_LIST_HEAD(&s->s_inodes);
  
-       if (list_lru_init(&s->s_dentry_lru))
+       if (list_lru_init_memcg(&s->s_dentry_lru))
                 goto fail;
-       if (list_lru_init(&s->s_inode_lru))
+       if (list_lru_init_memcg(&s->s_inode_lru))
                 goto fail;
  
         init_rwsem(&s->s_umount);
@@ -229,7 +230,7 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
         s->s_shrink.scan_objects = super_cache_scan;
         s->s_shrink.count_objects = super_cache_count;
         s->s_shrink.batch = 1024;
-       s->s_shrink.flags = SHRINKER_NUMA_AWARE;
+       s->s_shrink.flags = SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE;
         return s;
  
  fail:
@@ -284,6 +285,14 @@ void deactivate_locked_super(struct super_block *s)
                 unregister_shrinker(&s->s_shrink);
                 fs->kill_sb(s);
  
+               /*
+                * Since list_lru_destroy() may sleep, we cannot call it from
+                * put_super(), where we hold the sb_lock. Therefore we destroy
+                * the lru lists right now.
+                */
+               list_lru_destroy(&s->s_dentry_lru);
+               list_lru_destroy(&s->s_inode_lru);
+
                 put_filesystem(fs);
                 put_super(s);
         } else {
@@ -339,35 +348,31 @@ static int grab_super(struct super_block *s) __releases(sb_lock)
  }
  
  /*
- *     grab_super_passive - acquire a passive reference
+ *     trylock_super - try to grab ->s_umount shared
   *     @sb: reference we are trying to grab
   *
- *     Tries to acquire a passive reference. This is used in places where we
+ *     Try to prevent fs shutdown.  This is used in places where we
   *     cannot take an active reference but we need to ensure that the
- *     superblock does not go away while we are working on it. It returns
- *     false if a reference was not gained, and returns true with the s_umount
- *     lock held in read mode if a reference is gained. On successful return,
- *     the caller must drop the s_umount lock and the passive reference when
- *     done.
+ *     filesystem is not shut down while we are working on it. It returns
+ *     false if we cannot acquire s_umount or if we lose the race and
+ *     filesystem already got into shutdown, and returns true with the s_umount
+ *     lock held in read mode in case of success. On successful return,
+ *     the caller must drop the s_umount lock when done.
+ *
+ *     Note that unlike get_super() et.al. this one does *not* bump ->s_count.
+ *     The reason why it's safe is that we are OK with doing trylock instead
+ *     of down_read().  There's a couple of places that are OK with that, but
+ *     it's very much not a general-purpose interface.
   */
-bool grab_super_passive(struct super_block *sb)
+bool trylock_super(struct super_block *sb)
  {
-       spin_lock(&sb_lock);
-       if (hlist_unhashed(&sb->s_instances)) {
-               spin_unlock(&sb_lock);
-               return false;
-       }
-
-       sb->s_count++;
-       spin_unlock(&sb_lock);
-
         if (down_read_trylock(&sb->s_umount)) {
-               if (sb->s_root && (sb->s_flags & MS_BORN))
+               if (!hlist_unhashed(&sb->s_instances) &&
+                   sb->s_root && (sb->s_flags & MS_BORN))
                         return true;
                 up_read(&sb->s_umount);
         }
  
-       put_super(sb);
         return false;
  }
  
@@ -706,9 +711,9 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
         remount_ro = (flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY);
  
         if (remount_ro) {
-               if (sb->s_pins.first) {
+               if (!hlist_empty(&sb->s_pins)) {
                         up_write(&sb->s_umount);
-                       sb_pin_kill(sb);
+                       group_pin_kill(&sb->s_pins);
                         down_write(&sb->s_umount);
                         if (!sb->s_root)
                                 return 0;