return ret;
}
-/*
- * We need this because the bdev->unplug_fn can sleep and we cannot
- * hold swap_lock while calling the unplug_fn. And swap_lock
- * cannot be turned into a mutex.
- */
-static DECLARE_RWSEM(swap_unplug_sem);
-
-void swap_unplug_io_fn(struct backing_dev_info *unused_bdi, struct page *page)
-{
- swp_entry_t entry;
-
- down_read(&swap_unplug_sem);
- entry.val = page_private(page);
- if (PageSwapCache(page)) {
- struct block_device *bdev = swap_info[swp_type(entry)]->bdev;
- struct backing_dev_info *bdi;
-
- /*
- * If the page is removed from swapcache from under us (with a
- * racy try_to_unuse/swapoff) we need an additional reference
- * count to avoid reading garbage from page_private(page) above.
- * If the WARN_ON triggers during a swapoff it maybe the race
- * condition and it's harmless. However if it triggers without
- * swapoff it signals a problem.
- */
- WARN_ON(page_count(page) <= 1);
-
- bdi = bdev->bd_inode->i_mapping->backing_dev_info;
- blk_run_backing_dev(bdi, page);
- }
- up_read(&swap_unplug_sem);
-}
-
/*
* swapon tell device that all the old swap contents can be discarded,
* to allow the swap device to optimize its wear-levelling.
#define SWAPFILE_CLUSTER 256
#define LATENCY_LIMIT 256
-static inline unsigned long scan_swap_map(struct swap_info_struct *si,
- unsigned char usage)
+static unsigned long scan_swap_map(struct swap_info_struct *si,
+ unsigned char usage)
{
unsigned long offset;
unsigned long scan_base;
static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
unsigned long addr, swp_entry_t entry, struct page *page)
{
- struct mem_cgroup *ptr = NULL;
+ struct mem_cgroup *ptr;
spinlock_t *ptl;
pte_t *pte;
int ret = 1;
goto out;
}
+static void enable_swap_info(struct swap_info_struct *p, int prio,
+ unsigned char *swap_map)
+{
+ int i, prev;
+
+ spin_lock(&swap_lock);
+ if (prio >= 0)
+ p->prio = prio;
+ else
+ p->prio = --least_priority;
+ p->swap_map = swap_map;
+ p->flags |= SWP_WRITEOK;
+ nr_swap_pages += p->pages;
+ total_swap_pages += p->pages;
+
+ /* insert swap space into swap_list: */
+ prev = -1;
+ for (i = swap_list.head; i >= 0; i = swap_info[i]->next) {
+ if (p->prio >= swap_info[i]->prio)
+ break;
+ prev = i;
+ }
+ p->next = i;
+ if (prev < 0)
+ swap_list.head = swap_list.next = p->type;
+ else
+ swap_info[prev]->next = p->type;
+ spin_unlock(&swap_lock);
+}
+
SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
{
struct swap_info_struct *p = NULL;
current->flags &= ~PF_OOM_ORIGIN;
if (err) {
+ /*
+ * reading p->prio and p->swap_map outside the lock is
+ * safe here because only sys_swapon and sys_swapoff
+ * change them, and there can be no other sys_swapon or
+ * sys_swapoff for this swap_info_struct at this point.
+ */
/* re-insert swap space back into swap_list */
- spin_lock(&swap_lock);
- if (p->prio < 0)
- p->prio = --least_priority;
- prev = -1;
- for (i = swap_list.head; i >= 0; i = swap_info[i]->next) {
- if (p->prio >= swap_info[i]->prio)
- break;
- prev = i;
- }
- p->next = i;
- if (prev < 0)
- swap_list.head = swap_list.next = type;
- else
- swap_info[prev]->next = type;
- nr_swap_pages += p->pages;
- total_swap_pages += p->pages;
- p->flags |= SWP_WRITEOK;
- spin_unlock(&swap_lock);
+ enable_swap_info(p, p->prio, p->swap_map);
goto out_dput;
}
- /* wait for any unplug function to finish */
- down_write(&swap_unplug_sem);
- up_write(&swap_unplug_sem);
-
destroy_swap_extents(p);
if (p->flags & SWP_CONTINUED)
free_swap_count_continuations(p);
sys_swapon);
if (error < 0) {
p->bdev = NULL;
- error = -EINVAL;
- goto bad_swap;
+ return -EINVAL;
}
p->old_block_size = block_size(p->bdev);
error = set_blocksize(p->bdev, PAGE_SIZE);
if (error < 0)
- goto bad_swap;
+ return error;
p->flags |= SWP_BLKDEV;
} else if (S_ISREG(inode->i_mode)) {
p->bdev = inode->i_sb->s_bdev;
mutex_lock(&inode->i_mutex);
- if (IS_SWAPFILE(inode)) {
- error = -EBUSY;
- goto bad_swap;
+ if (IS_SWAPFILE(inode))
+ return -EBUSY;
+ } else
+ return -EINVAL;
+
+ return 0;
+}
+
+static unsigned long read_swap_header(struct swap_info_struct *p,
+ union swap_header *swap_header,
+ struct inode *inode)
+{
+ int i;
+ unsigned long maxpages;
+ unsigned long swapfilepages;
+
+ if (memcmp("SWAPSPACE2", swap_header->magic.magic, 10)) {
+ printk(KERN_ERR "Unable to find swap-space signature\n");
+ return 0;
+ }
+
+ /* swap partition endianess hack... */
+ if (swab32(swap_header->info.version) == 1) {
+ swab32s(&swap_header->info.version);
+ swab32s(&swap_header->info.last_page);
+ swab32s(&swap_header->info.nr_badpages);
+ for (i = 0; i < swap_header->info.nr_badpages; i++)
+ swab32s(&swap_header->info.badpages[i]);
+ }
+ /* Check the swap header's sub-version */
+ if (swap_header->info.version != 1) {
+ printk(KERN_WARNING
+ "Unable to handle swap header version %d\n",
+ swap_header->info.version);
+ return 0;
+ }
+
+ p->lowest_bit = 1;
+ p->cluster_next = 1;
+ p->cluster_nr = 0;
+
+ /*
+ * Find out how many pages are allowed for a single swap
+ * device. There are two limiting factors: 1) the number of
+ * bits for the swap offset in the swp_entry_t type and
+ * 2) the number of bits in the a swap pte as defined by
+ * the different architectures. In order to find the
+ * largest possible bit mask a swap entry with swap type 0
+ * and swap offset ~0UL is created, encoded to a swap pte,
+ * decoded to a swp_entry_t again and finally the swap
+ * offset is extracted. This will mask all the bits from
+ * the initial ~0UL mask that can't be encoded in either
+ * the swp_entry_t or the architecture definition of a
+ * swap pte.
+ */
+ maxpages = swp_offset(pte_to_swp_entry(
+ swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1;
+ if (maxpages > swap_header->info.last_page) {
+ maxpages = swap_header->info.last_page + 1;
+ /* p->max is an unsigned int: don't overflow it */
+ if ((unsigned int)maxpages == 0)
+ maxpages = UINT_MAX;
+ }
+ p->highest_bit = maxpages - 1;
+
+ if (!maxpages)
+ return 0;
+ swapfilepages = i_size_read(inode) >> PAGE_SHIFT;
+ if (swapfilepages && maxpages > swapfilepages) {
+ printk(KERN_WARNING
+ "Swap area shorter than signature indicates\n");
+ return 0;
+ }
+ if (swap_header->info.nr_badpages && S_ISREG(inode->i_mode))
+ return 0;
+ if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES)
+ return 0;
+
+ return maxpages;
+}
+
+static int setup_swap_map_and_extents(struct swap_info_struct *p,
+ union swap_header *swap_header,
+ unsigned char *swap_map,
+ unsigned long maxpages,
+ sector_t *span)
+{
+ int i;
+ unsigned int nr_good_pages;
+ int nr_extents;
+
+ nr_good_pages = maxpages - 1; /* omit header page */
+
+ for (i = 0; i < swap_header->info.nr_badpages; i++) {
+ unsigned int page_nr = swap_header->info.badpages[i];
+ if (page_nr == 0 || page_nr > swap_header->info.last_page)
+ return -EINVAL;
+ if (page_nr < maxpages) {
+ swap_map[page_nr] = SWAP_MAP_BAD;
+ nr_good_pages--;
}
- } else {
- error = -EINVAL;
- goto bad_swap;
}
- return 0;
+ if (nr_good_pages) {
+ swap_map[0] = SWAP_MAP_BAD;
+ p->max = maxpages;
+ p->pages = nr_good_pages;
+ nr_extents = setup_swap_extents(p, span);
+ if (nr_extents < 0)
+ return nr_extents;
+ nr_good_pages = p->pages;
+ }
+ if (!nr_good_pages) {
+ printk(KERN_WARNING "Empty swap-file\n");
+ return -EINVAL;
+ }
-bad_swap:
- return error;
+ return nr_extents;
}
SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
char *name;
struct file *swap_file = NULL;
struct address_space *mapping;
- int i, prev;
+ int i;
+ int prio;
int error;
union swap_header *swap_header;
- unsigned int nr_good_pages;
- int nr_extents = 0;
+ int nr_extents;
sector_t span;
unsigned long maxpages;
- unsigned long swapfilepages;
unsigned char *swap_map = NULL;
struct page *page = NULL;
struct inode *inode = NULL;
p->swap_file = swap_file;
mapping = swap_file->f_mapping;
- inode = mapping->host;
for (i = 0; i < nr_swapfiles; i++) {
struct swap_info_struct *q = swap_info[i];
}
}
+ inode = mapping->host;
+ /* If S_ISREG(inode->i_mode) will do mutex_lock(&inode->i_mutex); */
error = claim_swapfile(p, inode);
if (unlikely(error))
goto bad_swap;
- swapfilepages = i_size_read(inode) >> PAGE_SHIFT;
-
/*
* Read the swap header.
*/
}
swap_header = kmap(page);
- if (memcmp("SWAPSPACE2", swap_header->magic.magic, 10)) {
- printk(KERN_ERR "Unable to find swap-space signature\n");
- error = -EINVAL;
- goto bad_swap;
- }
-
- /* swap partition endianess hack... */
- if (swab32(swap_header->info.version) == 1) {
- swab32s(&swap_header->info.version);
- swab32s(&swap_header->info.last_page);
- swab32s(&swap_header->info.nr_badpages);
- for (i = 0; i < swap_header->info.nr_badpages; i++)
- swab32s(&swap_header->info.badpages[i]);
- }
- /* Check the swap header's sub-version */
- if (swap_header->info.version != 1) {
- printk(KERN_WARNING
- "Unable to handle swap header version %d\n",
- swap_header->info.version);
+ maxpages = read_swap_header(p, swap_header, inode);
+ if (unlikely(!maxpages)) {
error = -EINVAL;
goto bad_swap;
}
- p->lowest_bit = 1;
- p->cluster_next = 1;
- p->cluster_nr = 0;
-
- /*
- * Find out how many pages are allowed for a single swap
- * device. There are two limiting factors: 1) the number of
- * bits for the swap offset in the swp_entry_t type and
- * 2) the number of bits in the a swap pte as defined by
- * the different architectures. In order to find the
- * largest possible bit mask a swap entry with swap type 0
- * and swap offset ~0UL is created, encoded to a swap pte,
- * decoded to a swp_entry_t again and finally the swap
- * offset is extracted. This will mask all the bits from
- * the initial ~0UL mask that can't be encoded in either
- * the swp_entry_t or the architecture definition of a
- * swap pte.
- */
- maxpages = swp_offset(pte_to_swp_entry(
- swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1;
- if (maxpages > swap_header->info.last_page) {
- maxpages = swap_header->info.last_page + 1;
- /* p->max is an unsigned int: don't overflow it */
- if ((unsigned int)maxpages == 0)
- maxpages = UINT_MAX;
- }
- p->highest_bit = maxpages - 1;
-
- error = -EINVAL;
- if (!maxpages)
- goto bad_swap;
- if (swapfilepages && maxpages > swapfilepages) {
- printk(KERN_WARNING
- "Swap area shorter than signature indicates\n");
- goto bad_swap;
- }
- if (swap_header->info.nr_badpages && S_ISREG(inode->i_mode))
- goto bad_swap;
- if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES)
- goto bad_swap;
-
/* OK, set up the swap map and apply the bad block list */
swap_map = vzalloc(maxpages);
if (!swap_map) {
goto bad_swap;
}
- nr_good_pages = maxpages - 1; /* omit header page */
-
- for (i = 0; i < swap_header->info.nr_badpages; i++) {
- unsigned int page_nr = swap_header->info.badpages[i];
- if (page_nr == 0 || page_nr > swap_header->info.last_page) {
- error = -EINVAL;
- goto bad_swap;
- }
- if (page_nr < maxpages) {
- swap_map[page_nr] = SWAP_MAP_BAD;
- nr_good_pages--;
- }
- }
-
error = swap_cgroup_swapon(p->type, maxpages);
if (error)
goto bad_swap;
- if (nr_good_pages) {
- swap_map[0] = SWAP_MAP_BAD;
- p->max = maxpages;
- p->pages = nr_good_pages;
- nr_extents = setup_swap_extents(p, &span);
- if (nr_extents < 0) {
- error = nr_extents;
- goto bad_swap;
- }
- nr_good_pages = p->pages;
- }
- if (!nr_good_pages) {
- printk(KERN_WARNING "Empty swap-file\n");
- error = -EINVAL;
+ nr_extents = setup_swap_map_and_extents(p, swap_header, swap_map,
+ maxpages, &span);
+ if (unlikely(nr_extents < 0)) {
+ error = nr_extents;
goto bad_swap;
}
}
mutex_lock(&swapon_mutex);
- spin_lock(&swap_lock);
+ prio = -1;
if (swap_flags & SWAP_FLAG_PREFER)
- p->prio =
+ prio =
(swap_flags & SWAP_FLAG_PRIO_MASK) >> SWAP_FLAG_PRIO_SHIFT;
- else
- p->prio = --least_priority;
- p->swap_map = swap_map;
- p->flags |= SWP_WRITEOK;
- nr_swap_pages += nr_good_pages;
- total_swap_pages += nr_good_pages;
+ enable_swap_info(p, prio, swap_map);
printk(KERN_INFO "Adding %uk swap on %s. "
"Priority:%d extents:%d across:%lluk %s%s\n",
- nr_good_pages<<(PAGE_SHIFT-10), name, p->prio,
+ p->pages<<(PAGE_SHIFT-10), name, p->prio,
nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10),
(p->flags & SWP_SOLIDSTATE) ? "SS" : "",
(p->flags & SWP_DISCARDABLE) ? "D" : "");
- /* insert swap space into swap_list: */
- prev = -1;
- for (i = swap_list.head; i >= 0; i = swap_info[i]->next) {
- if (p->prio >= swap_info[i]->prio)
- break;
- prev = i;
- }
- p->next = i;
- if (prev < 0)
- swap_list.head = swap_list.next = p->type;
- else
- swap_info[prev]->next = p->type;
- spin_unlock(&swap_lock);
mutex_unlock(&swapon_mutex);
atomic_inc(&proc_poll_event);
wake_up_interruptible(&proc_poll_wait);
spin_unlock(&swap_lock);
vfree(swap_map);
if (swap_file) {
- if (inode && S_ISREG(inode->i_mode))
+ if (inode && S_ISREG(inode->i_mode)) {
mutex_unlock(&inode->i_mutex);
+ inode = NULL;
+ }
filp_close(swap_file, NULL);
}
out: