hugepages: fix use after free bug in "quota" handling

author David Gibson <david@gibson.dropbear.id.au>

Wed, 21 Mar 2012 23:34:12 +0000 (16:34 -0700)

committer Ben Hutchings <ben@decadent.org.uk>

Wed, 25 Jul 2012 03:11:01 +0000 (04:11 +0100)
author David Gibson <david@gibson.dropbear.id.au>
Wed, 21 Mar 2012 23:34:12 +0000 (16:34 -0700)
committer Ben Hutchings <ben@decadent.org.uk>
Wed, 25 Jul 2012 03:11:01 +0000 (04:11 +0100)
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c

index 2d0ca24..ebc2f4d 100644 (file)
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -592,9 +592,15 @@ static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf)
                 spin_lock(&sbinfo->stat_lock);
                 /* If no limits set, just report 0 for max/free/used
                  * blocks, like simple_statfs() */
-               if (sbinfo->max_blocks >= 0) {
-                       buf->f_blocks = sbinfo->max_blocks;
-                       buf->f_bavail = buf->f_bfree = sbinfo->free_blocks;
+               if (sbinfo->spool) {
+                       long free_pages;
+
+                       spin_lock(&sbinfo->spool->lock);
+                       buf->f_blocks = sbinfo->spool->max_hpages;
+                       free_pages = sbinfo->spool->max_hpages
+                               - sbinfo->spool->used_hpages;
+                       buf->f_bavail = buf->f_bfree = free_pages;
+                       spin_unlock(&sbinfo->spool->lock);
                         buf->f_files = sbinfo->max_inodes;
                         buf->f_ffree = sbinfo->free_inodes;
                 }
@@ -610,6 +616,10 @@ static void hugetlbfs_put_super(struct super_block *sb)
  
         if (sbi) {
                 sb->s_fs_info = NULL;
+
+               if (sbi->spool)
+                       hugepage_put_subpool(sbi->spool);
+
                 kfree(sbi);
         }
  }
@@ -841,10 +851,14 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
         sb->s_fs_info = sbinfo;
         sbinfo->hstate = config.hstate;
         spin_lock_init(&sbinfo->stat_lock);
-       sbinfo->max_blocks = config.nr_blocks;
-       sbinfo->free_blocks = config.nr_blocks;
         sbinfo->max_inodes = config.nr_inodes;
         sbinfo->free_inodes = config.nr_inodes;
+       sbinfo->spool = NULL;
+       if (config.nr_blocks != -1) {
+               sbinfo->spool = hugepage_new_subpool(config.nr_blocks);
+               if (!sbinfo->spool)
+                       goto out_free;
+       }
         sb->s_maxbytes = MAX_LFS_FILESIZE;
         sb->s_blocksize = huge_page_size(config.hstate);
         sb->s_blocksize_bits = huge_page_shift(config.hstate);
@@ -864,38 +878,12 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
         sb->s_root = root;
         return 0;
  out_free:
+       if (sbinfo->spool)
+               kfree(sbinfo->spool);
         kfree(sbinfo);
         return -ENOMEM;
  }
  
-int hugetlb_get_quota(struct address_space *mapping, long delta)
-{
-       int ret = 0;
-       struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(mapping->host->i_sb);
-
-       if (sbinfo->free_blocks > -1) {
-               spin_lock(&sbinfo->stat_lock);
-               if (sbinfo->free_blocks - delta >= 0)
-                       sbinfo->free_blocks -= delta;
-               else
-                       ret = -ENOMEM;
-               spin_unlock(&sbinfo->stat_lock);
-       }
-
-       return ret;
-}
-
-void hugetlb_put_quota(struct address_space *mapping, long delta)
-{
-       struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(mapping->host->i_sb);
-
-       if (sbinfo->free_blocks > -1) {
-               spin_lock(&sbinfo->stat_lock);
-               sbinfo->free_blocks += delta;
-               spin_unlock(&sbinfo->stat_lock);
-       }
-}
-
  static struct dentry *hugetlbfs_mount(struct file_system_type *fs_type,
         int flags, const char *dev_name, void *data)
  {
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h

index d9d6c86..c5ed2f1 100644 (file)
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -14,6 +14,15 @@ struct user_struct;
  #include <linux/shm.h>
  #include <asm/tlbflush.h>
  
+struct hugepage_subpool {
+       spinlock_t lock;
+       long count;
+       long max_hpages, used_hpages;
+};
+
+struct hugepage_subpool *hugepage_new_subpool(long nr_blocks);
+void hugepage_put_subpool(struct hugepage_subpool *spool);
+
  int PageHuge(struct page *page);
  
  void reset_vma_resv_huge_pages(struct vm_area_struct *vma);
@@ -138,12 +147,11 @@ struct hugetlbfs_config {
  };
  
  struct hugetlbfs_sb_info {
-       long    max_blocks;   /* blocks allowed */
-       long    free_blocks;  /* blocks free */
         long    max_inodes;   /* inodes allowed */
         long    free_inodes;  /* inodes free */
         spinlock_t      stat_lock;
         struct hstate *hstate;
+       struct hugepage_subpool *spool;
  };
  
  
@@ -166,8 +174,6 @@ extern const struct file_operations hugetlbfs_file_operations;
  extern const struct vm_operations_struct hugetlb_vm_ops;
  struct file *hugetlb_file_setup(const char *name, size_t size, vm_flags_t acct,
                                 struct user_struct **user, int creat_flags);
-int hugetlb_get_quota(struct address_space *mapping, long delta);
-void hugetlb_put_quota(struct address_space *mapping, long delta);
  
  static inline int is_file_hugepages(struct file *file)
  {
diff --git a/mm/hugetlb.c b/mm/hugetlb.c

index 5f5c545..7c535b0 100644 (file)
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -53,6 +53,84 @@ static unsigned long __initdata default_hstate_size;
   */
  static DEFINE_SPINLOCK(hugetlb_lock);
  
+static inline void unlock_or_release_subpool(struct hugepage_subpool *spool)
+{
+       bool free = (spool->count == 0) && (spool->used_hpages == 0);
+
+       spin_unlock(&spool->lock);
+
+       /* If no pages are used, and no other handles to the subpool
+        * remain, free the subpool the subpool remain */
+       if (free)
+               kfree(spool);
+}
+
+struct hugepage_subpool *hugepage_new_subpool(long nr_blocks)
+{
+       struct hugepage_subpool *spool;
+
+       spool = kmalloc(sizeof(*spool), GFP_KERNEL);
+       if (!spool)
+               return NULL;
+
+       spin_lock_init(&spool->lock);
+       spool->count = 1;
+       spool->max_hpages = nr_blocks;
+       spool->used_hpages = 0;
+
+       return spool;
+}
+
+void hugepage_put_subpool(struct hugepage_subpool *spool)
+{
+       spin_lock(&spool->lock);
+       BUG_ON(!spool->count);
+       spool->count--;
+       unlock_or_release_subpool(spool);
+}
+
+static int hugepage_subpool_get_pages(struct hugepage_subpool *spool,
+                                     long delta)
+{
+       int ret = 0;
+
+       if (!spool)
+               return 0;
+
+       spin_lock(&spool->lock);
+       if ((spool->used_hpages + delta) <= spool->max_hpages) {
+               spool->used_hpages += delta;
+       } else {
+               ret = -ENOMEM;
+       }
+       spin_unlock(&spool->lock);
+
+       return ret;
+}
+
+static void hugepage_subpool_put_pages(struct hugepage_subpool *spool,
+                                      long delta)
+{
+       if (!spool)
+               return;
+
+       spin_lock(&spool->lock);
+       spool->used_hpages -= delta;
+       /* If hugetlbfs_put_super couldn't free spool due to
+       * an outstanding quota reference, free it now. */
+       unlock_or_release_subpool(spool);
+}
+
+static inline struct hugepage_subpool *subpool_inode(struct inode *inode)
+{
+       return HUGETLBFS_SB(inode->i_sb)->spool;
+}
+
+static inline struct hugepage_subpool *subpool_vma(struct vm_area_struct *vma)
+{
+       return subpool_inode(vma->vm_file->f_dentry->d_inode);
+}
+
  /*
   * Region tracking -- allows tracking of reservations and instantiated pages
   *                    across the pages in a mapping.
@@ -533,9 +611,9 @@ static void free_huge_page(struct page *page)
          */
         struct hstate *h = page_hstate(page);
         int nid = page_to_nid(page);
-       struct address_space *mapping;
+       struct hugepage_subpool *spool =
+               (struct hugepage_subpool *)page_private(page);
  
-       mapping = (struct address_space *) page_private(page);
         set_page_private(page, 0);
         page->mapping = NULL;
         BUG_ON(page_count(page));
@@ -551,8 +629,7 @@ static void free_huge_page(struct page *page)
                 enqueue_huge_page(h, page);
         }
         spin_unlock(&hugetlb_lock);
-       if (mapping)
-               hugetlb_put_quota(mapping, 1);
+       hugepage_subpool_put_pages(spool, 1);
  }
  
  static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
@@ -966,11 +1043,12 @@ static void return_unused_surplus_pages(struct hstate *h,
  /*
   * Determine if the huge page at addr within the vma has an associated
   * reservation.  Where it does not we will need to logically increase
- * reservation and actually increase quota before an allocation can occur.
- * Where any new reservation would be required the reservation change is
- * prepared, but not committed.  Once the page has been quota'd allocated
- * an instantiated the change should be committed via vma_commit_reservation.
- * No action is required on failure.
+ * reservation and actually increase subpool usage before an allocation
+ * can occur.  Where any new reservation would be required the
+ * reservation change is prepared, but not committed.  Once the page
+ * has been allocated from the subpool and instantiated the change should
+ * be committed via vma_commit_reservation.  No action is required on
+ * failure.
   */
  static long vma_needs_reservation(struct hstate *h,
                         struct vm_area_struct *vma, unsigned long addr)
@@ -1019,24 +1097,24 @@ static void vma_commit_reservation(struct hstate *h,
  static struct page *alloc_huge_page(struct vm_area_struct *vma,
                                     unsigned long addr, int avoid_reserve)
  {
+       struct hugepage_subpool *spool = subpool_vma(vma);
         struct hstate *h = hstate_vma(vma);
         struct page *page;
-       struct address_space *mapping = vma->vm_file->f_mapping;
-       struct inode *inode = mapping->host;
         long chg;
  
         /*
-        * Processes that did not create the mapping will have no reserves and
-        * will not have accounted against quota. Check that the quota can be
-        * made before satisfying the allocation
-        * MAP_NORESERVE mappings may also need pages and quota allocated
-        * if no reserve mapping overlaps.
+        * Processes that did not create the mapping will have no
+        * reserves and will not have accounted against subpool
+        * limit. Check that the subpool limit can be made before
+        * satisfying the allocation MAP_NORESERVE mappings may also
+        * need pages and subpool limit allocated allocated if no reserve
+        * mapping overlaps.
          */
         chg = vma_needs_reservation(h, vma, addr);
         if (chg < 0)
                 return ERR_PTR(-VM_FAULT_OOM);
         if (chg)
-               if (hugetlb_get_quota(inode->i_mapping, chg))
+               if (hugepage_subpool_get_pages(spool, chg))
                         return ERR_PTR(-VM_FAULT_SIGBUS);
  
         spin_lock(&hugetlb_lock);
@@ -1046,12 +1124,12 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
         if (!page) {
                 page = alloc_buddy_huge_page(h, NUMA_NO_NODE);
                 if (!page) {
-                       hugetlb_put_quota(inode->i_mapping, chg);
+                       hugepage_subpool_put_pages(spool, chg);
                         return ERR_PTR(-VM_FAULT_SIGBUS);
                 }
         }
  
-       set_page_private(page, (unsigned long) mapping);
+       set_page_private(page, (unsigned long)spool);
  
         vma_commit_reservation(h, vma, addr);
  
@@ -2081,6 +2159,7 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma)
  {
         struct hstate *h = hstate_vma(vma);
         struct resv_map *reservations = vma_resv_map(vma);
+       struct hugepage_subpool *spool = subpool_vma(vma);
         unsigned long reserve;
         unsigned long start;
         unsigned long end;
@@ -2096,7 +2175,7 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma)
  
                 if (reserve) {
                         hugetlb_acct_memory(h, -reserve);
-                       hugetlb_put_quota(vma->vm_file->f_mapping, reserve);
+                       hugepage_subpool_put_pages(spool, reserve);
                 }
         }
  }
@@ -2326,7 +2405,7 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
         address = address & huge_page_mask(h);
         pgoff = ((address - vma->vm_start) >> PAGE_SHIFT)
                 + (vma->vm_pgoff >> PAGE_SHIFT);
-       mapping = (struct address_space *)page_private(page);
+       mapping = vma->vm_file->f_dentry->d_inode->i_mapping;
  
         /*
          * Take the mapping lock for the duration of the table walk. As
@@ -2865,11 +2944,12 @@ int hugetlb_reserve_pages(struct inode *inode,
  {
         long ret, chg;
         struct hstate *h = hstate_inode(inode);
+       struct hugepage_subpool *spool = subpool_inode(inode);
  
         /*
          * Only apply hugepage reservation if asked. At fault time, an
          * attempt will be made for VM_NORESERVE to allocate a page
-        * and filesystem quota without using reserves
+        * without using reserves
          */
         if (vm_flags & VM_NORESERVE)
                 return 0;
@@ -2898,19 +2978,19 @@ int hugetlb_reserve_pages(struct inode *inode,
                 goto out_err;
         }
  
-       /* There must be enough filesystem quota for the mapping */
-       if (hugetlb_get_quota(inode->i_mapping, chg)) {
+       /* There must be enough pages in the subpool for the mapping */
+       if (hugepage_subpool_get_pages(spool, chg)) {
                 ret = -ENOSPC;
                 goto out_err;
         }
  
         /*
          * Check enough hugepages are available for the reservation.
-        * Hand back the quota if there are not
+        * Hand the pages back to the subpool if there are not
          */
         ret = hugetlb_acct_memory(h, chg);
         if (ret < 0) {
-               hugetlb_put_quota(inode->i_mapping, chg);
+               hugepage_subpool_put_pages(spool, chg);
                 goto out_err;
         }
  
@@ -2938,12 +3018,13 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed)
  {
         struct hstate *h = hstate_inode(inode);
         long chg = region_truncate(&inode->i_mapping->private_list, offset);
+       struct hugepage_subpool *spool = subpool_inode(inode);
  
         spin_lock(&inode->i_lock);
         inode->i_blocks -= (blocks_per_huge_page(h) * freed);
         spin_unlock(&inode->i_lock);
  
-       hugetlb_put_quota(inode->i_mapping, (chg - freed));
+       hugepage_subpool_put_pages(spool, (chg - freed));
         hugetlb_acct_memory(h, -(chg - freed));
  }
author	David Gibson <david@gibson.dropbear.id.au>
	Wed, 21 Mar 2012 23:34:12 +0000 (16:34 -0700)
committer	Ben Hutchings <ben@decadent.org.uk>
	Wed, 25 Jul 2012 03:11:01 +0000 (04:11 +0100)
fs/hugetlbfs/inode.c		patch \| blob \| history
include/linux/hugetlb.h		patch \| blob \| history
mm/hugetlb.c		patch \| blob \| history