swap: make each swap partition have one address_space
authorShaohua Li <shli@kernel.org>
Sat, 23 Feb 2013 00:34:37 +0000 (16:34 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sun, 24 Feb 2013 01:50:17 +0000 (17:50 -0800)
When I use several fast SSD to do swap, swapper_space.tree_lock is
heavily contended.  This makes each swap partition have one
address_space to reduce the lock contention.  There is an array of
address_space for swap.  The swap entry type is the index to the array.

In my test with 3 SSD, this increases the swapout throughput 20%.

[akpm@linux-foundation.org: revert unneeded change to  __add_to_swap_cache]
Signed-off-by: Shaohua Li <shli@fusionio.com>
Cc: Hugh Dickins <hughd@google.com>
Acked-by: Rik van Riel <riel@redhat.com>
Acked-by: Minchan Kim <minchan@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
fs/proc/meminfo.c
include/linux/swap.h
mm/memcontrol.c
mm/mincore.c
mm/swap.c
mm/swap_state.c
mm/swapfile.c
mm/util.c

index c3dac61..1efaaa1 100644 (file)
@@ -40,7 +40,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
                * sysctl_overcommit_ratio / 100) + total_swap_pages;
 
        cached = global_page_state(NR_FILE_PAGES) -
-                       total_swapcache_pages - i.bufferram;
+                       total_swapcache_pages() - i.bufferram;
        if (cached < 0)
                cached = 0;
 
@@ -109,7 +109,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
                K(i.freeram),
                K(i.bufferram),
                K(cached),
-               K(total_swapcache_pages),
+               K(total_swapcache_pages()),
                K(pages[LRU_ACTIVE_ANON]   + pages[LRU_ACTIVE_FILE]),
                K(pages[LRU_INACTIVE_ANON] + pages[LRU_INACTIVE_FILE]),
                K(pages[LRU_ACTIVE_ANON]),
index 8c66486..235c039 100644 (file)
@@ -8,7 +8,7 @@
 #include <linux/memcontrol.h>
 #include <linux/sched.h>
 #include <linux/node.h>
-
+#include <linux/fs.h>
 #include <linux/atomic.h>
 #include <asm/page.h>
 
@@ -330,8 +330,9 @@ int generic_swapfile_activate(struct swap_info_struct *, struct file *,
                sector_t *);
 
 /* linux/mm/swap_state.c */
-extern struct address_space swapper_space;
-#define total_swapcache_pages  swapper_space.nrpages
+extern struct address_space swapper_spaces[];
+#define swap_address_space(entry) (&swapper_spaces[swp_type(entry)])
+extern unsigned long total_swapcache_pages(void);
 extern void show_swap_cache_info(void);
 extern int add_to_swap(struct page *);
 extern int add_to_swap_cache(struct page *, swp_entry_t, gfp_t);
@@ -382,7 +383,7 @@ mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout)
 
 #define nr_swap_pages                          0L
 #define total_swap_pages                       0L
-#define total_swapcache_pages                  0UL
+#define total_swapcache_pages()                        0UL
 
 #define si_swapinfo(val) \
        do { (val)->freeswap = (val)->totalswap = 0; } while (0)
index c878b1c..f858615 100644 (file)
@@ -6307,7 +6307,7 @@ static struct page *mc_handle_swap_pte(struct vm_area_struct *vma,
         * Because lookup_swap_cache() updates some statistics counter,
         * we call find_get_page() with swapper_space directly.
         */
-       page = find_get_page(&swapper_space, ent.val);
+       page = find_get_page(swap_address_space(ent), ent.val);
        if (do_swap_account)
                entry->val = ent.val;
 
@@ -6348,7 +6348,7 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma,
                swp_entry_t swap = radix_to_swp_entry(page);
                if (do_swap_account)
                        *entry = swap;
-               page = find_get_page(&swapper_space, swap.val);
+               page = find_get_page(swap_address_space(swap), swap.val);
        }
 #endif
        return page;
index 936b4ce..da2be56 100644 (file)
@@ -75,7 +75,7 @@ static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff)
        /* shmem/tmpfs may return swap: account for swapcache page too. */
        if (radix_tree_exceptional_entry(page)) {
                swp_entry_t swap = radix_to_swp_entry(page);
-               page = find_get_page(&swapper_space, swap.val);
+               page = find_get_page(swap_address_space(swap), swap.val);
        }
 #endif
        if (page) {
@@ -135,7 +135,8 @@ static void mincore_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
                        } else {
 #ifdef CONFIG_SWAP
                                pgoff = entry.val;
-                               *vec = mincore_page(&swapper_space, pgoff);
+                               *vec = mincore_page(swap_address_space(entry),
+                                       pgoff);
 #else
                                WARN_ON(1);
                                *vec = 1;
index 6310dc2..8a529a0 100644 (file)
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -855,9 +855,14 @@ EXPORT_SYMBOL(pagevec_lookup_tag);
 void __init swap_setup(void)
 {
        unsigned long megs = totalram_pages >> (20 - PAGE_SHIFT);
-
 #ifdef CONFIG_SWAP
-       bdi_init(swapper_space.backing_dev_info);
+       int i;
+
+       bdi_init(swapper_spaces[0].backing_dev_info);
+       for (i = 0; i < MAX_SWAPFILES; i++) {
+               spin_lock_init(&swapper_spaces[i].tree_lock);
+               INIT_LIST_HEAD(&swapper_spaces[i].i_mmap_nonlinear);
+       }
 #endif
 
        /* Use a smaller cluster for small-memory machines */
index 0cb36fb..8d6644c 100644 (file)
@@ -36,12 +36,12 @@ static struct backing_dev_info swap_backing_dev_info = {
        .capabilities   = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED,
 };
 
-struct address_space swapper_space = {
-       .page_tree      = RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN),
-       .tree_lock      = __SPIN_LOCK_UNLOCKED(swapper_space.tree_lock),
-       .a_ops          = &swap_aops,
-       .i_mmap_nonlinear = LIST_HEAD_INIT(swapper_space.i_mmap_nonlinear),
-       .backing_dev_info = &swap_backing_dev_info,
+struct address_space swapper_spaces[MAX_SWAPFILES] = {
+       [0 ... MAX_SWAPFILES - 1] = {
+               .page_tree      = RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN),
+               .a_ops          = &swap_aops,
+               .backing_dev_info = &swap_backing_dev_info,
+       }
 };
 
 #define INC_CACHE_INFO(x)      do { swap_cache_info.x++; } while (0)
@@ -53,9 +53,19 @@ static struct {
        unsigned long find_total;
 } swap_cache_info;
 
+unsigned long total_swapcache_pages(void)
+{
+       int i;
+       unsigned long ret = 0;
+
+       for (i = 0; i < MAX_SWAPFILES; i++)
+               ret += swapper_spaces[i].nrpages;
+       return ret;
+}
+
 void show_swap_cache_info(void)
 {
-       printk("%lu pages in swap cache\n", total_swapcache_pages);
+       printk("%lu pages in swap cache\n", total_swapcache_pages());
        printk("Swap cache stats: add %lu, delete %lu, find %lu/%lu\n",
                swap_cache_info.add_total, swap_cache_info.del_total,
                swap_cache_info.find_success, swap_cache_info.find_total);
@@ -70,6 +80,7 @@ void show_swap_cache_info(void)
 static int __add_to_swap_cache(struct page *page, swp_entry_t entry)
 {
        int error;
+       struct address_space *address_space;
 
        VM_BUG_ON(!PageLocked(page));
        VM_BUG_ON(PageSwapCache(page));
@@ -79,14 +90,16 @@ static int __add_to_swap_cache(struct page *page, swp_entry_t entry)
        SetPageSwapCache(page);
        set_page_private(page, entry.val);
 
-       spin_lock_irq(&swapper_space.tree_lock);
-       error = radix_tree_insert(&swapper_space.page_tree, entry.val, page);
+       address_space = swap_address_space(entry);
+       spin_lock_irq(&address_space->tree_lock);
+       error = radix_tree_insert(&address_space->page_tree,
+                                       entry.val, page);
        if (likely(!error)) {
-               total_swapcache_pages++;
+               address_space->nrpages++;
                __inc_zone_page_state(page, NR_FILE_PAGES);
                INC_CACHE_INFO(add_total);
        }
-       spin_unlock_irq(&swapper_space.tree_lock);
+       spin_unlock_irq(&address_space->tree_lock);
 
        if (unlikely(error)) {
                /*
@@ -122,14 +135,19 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
  */
 void __delete_from_swap_cache(struct page *page)
 {
+       swp_entry_t entry;
+       struct address_space *address_space;
+
        VM_BUG_ON(!PageLocked(page));
        VM_BUG_ON(!PageSwapCache(page));
        VM_BUG_ON(PageWriteback(page));
 
-       radix_tree_delete(&swapper_space.page_tree, page_private(page));
+       entry.val = page_private(page);
+       address_space = swap_address_space(entry);
+       radix_tree_delete(&address_space->page_tree, page_private(page));
        set_page_private(page, 0);
        ClearPageSwapCache(page);
-       total_swapcache_pages--;
+       address_space->nrpages--;
        __dec_zone_page_state(page, NR_FILE_PAGES);
        INC_CACHE_INFO(del_total);
 }
@@ -195,12 +213,14 @@ int add_to_swap(struct page *page)
 void delete_from_swap_cache(struct page *page)
 {
        swp_entry_t entry;
+       struct address_space *address_space;
 
        entry.val = page_private(page);
 
-       spin_lock_irq(&swapper_space.tree_lock);
+       address_space = swap_address_space(entry);
+       spin_lock_irq(&address_space->tree_lock);
        __delete_from_swap_cache(page);
-       spin_unlock_irq(&swapper_space.tree_lock);
+       spin_unlock_irq(&address_space->tree_lock);
 
        swapcache_free(entry, page);
        page_cache_release(page);
@@ -263,7 +283,7 @@ struct page * lookup_swap_cache(swp_entry_t entry)
 {
        struct page *page;
 
-       page = find_get_page(&swapper_space, entry.val);
+       page = find_get_page(swap_address_space(entry), entry.val);
 
        if (page)
                INC_CACHE_INFO(find_success);
@@ -290,7 +310,8 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
                 * called after lookup_swap_cache() failed, re-calling
                 * that would confuse statistics.
                 */
-               found_page = find_get_page(&swapper_space, entry.val);
+               found_page = find_get_page(swap_address_space(entry),
+                                       entry.val);
                if (found_page)
                        break;
 
index e97a0e5..e51864e 100644 (file)
@@ -79,7 +79,7 @@ __try_to_reclaim_swap(struct swap_info_struct *si, unsigned long offset)
        struct page *page;
        int ret = 0;
 
-       page = find_get_page(&swapper_space, entry.val);
+       page = find_get_page(swap_address_space(entry), entry.val);
        if (!page)
                return 0;
        /*
@@ -699,7 +699,8 @@ int free_swap_and_cache(swp_entry_t entry)
        p = swap_info_get(entry);
        if (p) {
                if (swap_entry_free(p, entry, 1) == SWAP_HAS_CACHE) {
-                       page = find_get_page(&swapper_space, entry.val);
+                       page = find_get_page(swap_address_space(entry),
+                                               entry.val);
                        if (page && !trylock_page(page)) {
                                page_cache_release(page);
                                page = NULL;
index 16a7319..ab1424d 100644 (file)
--- a/mm/util.c
+++ b/mm/util.c
@@ -6,6 +6,7 @@
 #include <linux/sched.h>
 #include <linux/security.h>
 #include <linux/swap.h>
+#include <linux/swapops.h>
 #include <asm/uaccess.h>
 
 #include "internal.h"
@@ -389,9 +390,12 @@ struct address_space *page_mapping(struct page *page)
 
        VM_BUG_ON(PageSlab(page));
 #ifdef CONFIG_SWAP
-       if (unlikely(PageSwapCache(page)))
-               mapping = &swapper_space;
-       else
+       if (unlikely(PageSwapCache(page))) {
+               swp_entry_t entry;
+
+               entry.val = page_private(page);
+               mapping = swap_address_space(entry);
+       } else
 #endif
        if ((unsigned long)mapping & PAGE_MAPPING_ANON)
                mapping = NULL;