swapfile: swapon use discard (trim)
authorHugh Dickins <hugh@veritas.com>
Tue, 6 Jan 2009 22:39:51 +0000 (14:39 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 6 Jan 2009 23:59:05 +0000 (15:59 -0800)
When adding swap, all the old data on swap can be forgotten: sys_swapon()
discard all but the header page of the swap partition (or every extent but
the header of the swap file), to give a solidstate swap device the
opportunity to optimize its wear-levelling.

If that succeeds, note SWP_DISCARDABLE for later use, and report it with a
"D" at the right end of the kernel's "Adding ...  swap" message.  Perhaps
something should be shown in /proc/swaps (swapon -s), but we have to be
more cautious before making any addition to that format.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Jens Axboe <jens.axboe@oracle.com>
Cc: Matthew Wilcox <matthew@wil.cx>
Cc: Joern Engel <joern@logfs.org>
Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
Cc: Donjun Shin <djshin90@gmail.com>
Cc: Tejun Heo <teheo@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
include/linux/swap.h
mm/swapfile.c

index 9cabb8b..0b9210e 100644 (file)
@@ -120,6 +120,7 @@ struct swap_extent {
 enum {
        SWP_USED        = (1 << 0),     /* is slot in swap_info[] used? */
        SWP_WRITEOK     = (1 << 1),     /* ok to write to this swap?    */
+       SWP_DISCARDABLE = (1 << 2),     /* blkdev supports discard */
                                        /* add others here before... */
        SWP_SCANNING    = (1 << 8),     /* refcount in scan_swap_map */
 };
index 4d9855f..fbeb4bb 100644 (file)
@@ -84,6 +84,37 @@ void swap_unplug_io_fn(struct backing_dev_info *unused_bdi, struct page *page)
        up_read(&swap_unplug_sem);
 }
 
+/*
+ * swapon tell device that all the old swap contents can be discarded,
+ * to allow the swap device to optimize its wear-levelling.
+ */
+static int discard_swap(struct swap_info_struct *si)
+{
+       struct swap_extent *se;
+       int err = 0;
+
+       list_for_each_entry(se, &si->extent_list, list) {
+               sector_t start_block = se->start_block << (PAGE_SHIFT - 9);
+               pgoff_t nr_blocks = se->nr_pages << (PAGE_SHIFT - 9);
+
+               if (se->start_page == 0) {
+                       /* Do not discard the swap header page! */
+                       start_block += 1 << (PAGE_SHIFT - 9);
+                       nr_blocks -= 1 << (PAGE_SHIFT - 9);
+                       if (!nr_blocks)
+                               continue;
+               }
+
+               err = blkdev_issue_discard(si->bdev, start_block,
+                                               nr_blocks, GFP_KERNEL);
+               if (err)
+                       break;
+
+               cond_resched();
+       }
+       return err;             /* That will often be -EOPNOTSUPP */
+}
+
 #define SWAPFILE_CLUSTER       256
 #define LATENCY_LIMIT          256
 
@@ -1658,6 +1689,9 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
                goto bad_swap;
        }
 
+       if (discard_swap(p) == 0)
+               p->flags |= SWP_DISCARDABLE;
+
        mutex_lock(&swapon_mutex);
        spin_lock(&swap_lock);
        if (swap_flags & SWAP_FLAG_PREFER)
@@ -1671,9 +1705,10 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
        total_swap_pages += nr_good_pages;
 
        printk(KERN_INFO "Adding %uk swap on %s.  "
-                       "Priority:%d extents:%d across:%lluk\n",
+                       "Priority:%d extents:%d across:%lluk%s\n",
                nr_good_pages<<(PAGE_SHIFT-10), name, p->prio,
-               nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10));
+               nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10),
+               (p->flags & SWP_DISCARDABLE) ? " D" : "");
 
        /* insert swap space into swap_list: */
        prev = -1;