Merge branch 'stable-3.2' into pandora-3.2
authorGrazvydas Ignotas <notasas@gmail.com>
Fri, 11 Jul 2014 22:32:15 +0000 (01:32 +0300)
committerGrazvydas Ignotas <notasas@gmail.com>
Fri, 11 Jul 2014 22:32:15 +0000 (01:32 +0300)
1  2 
arch/x86/include/asm/hugetlb.h
fs/ubifs/file.c
mm/hugetlb.c
mm/memory-failure.c
mm/memory.c
mm/page-writeback.c
mm/vmscan.c

@@@ -51,6 -51,7 +51,7 @@@ static inline pte_t huge_ptep_get_and_c
  static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
                                         unsigned long addr, pte_t *ptep)
  {
+       ptep_clear_flush(vma, addr, ptep);
  }
  
  static inline int huge_pte_none(pte_t pte)
@@@ -90,8 -91,4 +91,8 @@@ static inline void arch_release_hugepag
  {
  }
  
 +static inline void arch_clear_hugepage_flags(struct page *page)
 +{
 +}
 +
  #endif /* _ASM_X86_HUGETLB_H */
diff --combined fs/ubifs/file.c
@@@ -97,7 -97,7 +97,7 @@@ static int read_block(struct inode *ino
  dump:
        ubifs_err("bad data node (block %u, inode %lu)",
                  block, inode->i_ino);
 -      dbg_dump_node(c, dn);
 +      ubifs_dump_node(c, dn);
        return -EINVAL;
  }
  
@@@ -1486,8 -1486,8 +1486,8 @@@ static int ubifs_vm_page_mkwrite(struc
        err = ubifs_budget_space(c, &req);
        if (unlikely(err)) {
                if (err == -ENOSPC)
 -                      ubifs_warn("out of space for mmapped file "
 -                                 "(inode number %lu)", inode->i_ino);
 +                      ubifs_warn("out of space for mmapped file (inode number %lu)",
 +                                 inode->i_ino);
                return VM_FAULT_SIGBUS;
        }
  
                        ubifs_release_dirty_inode_budget(c, ui);
        }
  
-       unlock_page(page);
-       return 0;
+       return VM_FAULT_LOCKED;
  
  out_unlock:
        unlock_page(page);
@@@ -1562,10 -1561,12 +1561,10 @@@ const struct address_space_operations u
  const struct inode_operations ubifs_file_inode_operations = {
        .setattr     = ubifs_setattr,
        .getattr     = ubifs_getattr,
 -#ifdef CONFIG_UBIFS_FS_XATTR
        .setxattr    = ubifs_setxattr,
        .getxattr    = ubifs_getxattr,
        .listxattr   = ubifs_listxattr,
        .removexattr = ubifs_removexattr,
 -#endif
  };
  
  const struct inode_operations ubifs_symlink_inode_operations = {
diff --combined mm/hugetlb.c
@@@ -633,7 -633,6 +633,7 @@@ static void free_huge_page(struct page 
                h->surplus_huge_pages--;
                h->surplus_huge_pages_node[nid]--;
        } else {
 +              arch_clear_hugepage_flags(page);
                enqueue_huge_page(h, page);
        }
        spin_unlock(&hugetlb_lock);
@@@ -1079,6 -1078,7 +1079,7 @@@ static void return_unused_surplus_pages
        while (nr_pages--) {
                if (!free_pool_huge_page(h, &node_states[N_HIGH_MEMORY], 1))
                        break;
+               cond_resched_lock(&hugetlb_lock);
        }
  }
  
@@@ -1713,9 -1713,9 +1714,9 @@@ static void __init hugetlb_sysfs_init(v
  
  /*
   * node_hstate/s - associate per node hstate attributes, via their kobjects,
 - * with node sysdevs in node_devices[] using a parallel array.  The array
 - * index of a node sysdev or _hstate == node id.
 - * This is here to avoid any static dependency of the node sysdev driver, in
 + * with node devices in node_devices[] using a parallel array.  The array
 + * index of a node device or _hstate == node id.
 + * This is here to avoid any static dependency of the node device driver, in
   * the base kernel, on the hugetlb module.
   */
  struct node_hstate {
  struct node_hstate node_hstates[MAX_NUMNODES];
  
  /*
 - * A subset of global hstate attributes for node sysdevs
 + * A subset of global hstate attributes for node devices
   */
  static struct attribute *per_node_hstate_attrs[] = {
        &nr_hugepages_attr.attr,
@@@ -1739,7 -1739,7 +1740,7 @@@ static struct attribute_group per_node_
  };
  
  /*
 - * kobj_to_node_hstate - lookup global hstate for node sysdev hstate attr kobj.
 + * kobj_to_node_hstate - lookup global hstate for node device hstate attr kobj.
   * Returns node id via non-NULL nidp.
   */
  static struct hstate *kobj_to_node_hstate(struct kobject *kobj, int *nidp)
  }
  
  /*
 - * Unregister hstate attributes from a single node sysdev.
 + * Unregister hstate attributes from a single node device.
   * No-op if no hstate attributes attached.
   */
  void hugetlb_unregister_node(struct node *node)
  {
        struct hstate *h;
 -      struct node_hstate *nhs = &node_hstates[node->sysdev.id];
 +      struct node_hstate *nhs = &node_hstates[node->dev.id];
  
        if (!nhs->hugepages_kobj)
                return;         /* no hstate attributes */
  }
  
  /*
 - * hugetlb module exit:  unregister hstate attributes from node sysdevs
 + * hugetlb module exit:  unregister hstate attributes from node devices
   * that have them.
   */
  static void hugetlb_unregister_all_nodes(void)
        int nid;
  
        /*
 -       * disable node sysdev registrations.
 +       * disable node device registrations.
         */
        register_hugetlbfs_with_node(NULL, NULL);
  
  }
  
  /*
 - * Register hstate attributes for a single node sysdev.
 + * Register hstate attributes for a single node device.
   * No-op if attributes already registered.
   */
  void hugetlb_register_node(struct node *node)
  {
        struct hstate *h;
 -      struct node_hstate *nhs = &node_hstates[node->sysdev.id];
 +      struct node_hstate *nhs = &node_hstates[node->dev.id];
        int err;
  
        if (nhs->hugepages_kobj)
                return;         /* already allocated */
  
        nhs->hugepages_kobj = kobject_create_and_add("hugepages",
 -                                                      &node->sysdev.kobj);
 +                                                      &node->dev.kobj);
        if (!nhs->hugepages_kobj)
                return;
  
                if (err) {
                        printk(KERN_ERR "Hugetlb: Unable to add hstate %s"
                                        " for node %d\n",
 -                                              h->name, node->sysdev.id);
 +                                              h->name, node->dev.id);
                        hugetlb_unregister_node(node);
                        break;
                }
  
  /*
   * hugetlb init time:  register hstate attributes for all registered node
 - * sysdevs of nodes that have memory.  All on-line nodes should have
 - * registered their associated sysdev by this time.
 + * devices of nodes that have memory.  All on-line nodes should have
 + * registered their associated device by this time.
   */
  static void hugetlb_register_all_nodes(void)
  {
  
        for_each_node_state(nid, N_HIGH_MEMORY) {
                struct node *node = &node_devices[nid];
 -              if (node->sysdev.id == nid)
 +              if (node->dev.id == nid)
                        hugetlb_register_node(node);
        }
  
        /*
 -       * Let the node sysdev driver know we're here so it can
 +       * Let the node device driver know we're here so it can
         * [un]register hstate attributes on node hotplug.
         */
        register_hugetlbfs_with_node(hugetlb_register_node,
@@@ -2272,6 -2272,31 +2273,31 @@@ static void set_huge_ptep_writable(stru
                update_mmu_cache(vma, address, ptep);
  }
  
+ static int is_hugetlb_entry_migration(pte_t pte)
+ {
+       swp_entry_t swp;
+       if (huge_pte_none(pte) || pte_present(pte))
+               return 0;
+       swp = pte_to_swp_entry(pte);
+       if (non_swap_entry(swp) && is_migration_entry(swp))
+               return 1;
+       else
+               return 0;
+ }
+ static int is_hugetlb_entry_hwpoisoned(pte_t pte)
+ {
+       swp_entry_t swp;
+       if (huge_pte_none(pte) || pte_present(pte))
+               return 0;
+       swp = pte_to_swp_entry(pte);
+       if (non_swap_entry(swp) && is_hwpoison_entry(swp))
+               return 1;
+       else
+               return 0;
+ }
  
  int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
                            struct vm_area_struct *vma)
  
                spin_lock(&dst->page_table_lock);
                spin_lock_nested(&src->page_table_lock, SINGLE_DEPTH_NESTING);
-               if (!huge_pte_none(huge_ptep_get(src_pte))) {
+               entry = huge_ptep_get(src_pte);
+               if (huge_pte_none(entry)) { /* skip none entry */
+                       ;
+               } else if (unlikely(is_hugetlb_entry_migration(entry) ||
+                                   is_hugetlb_entry_hwpoisoned(entry))) {
+                       swp_entry_t swp_entry = pte_to_swp_entry(entry);
+                       if (is_write_migration_entry(swp_entry) && cow) {
+                               /*
+                                * COW mappings require pages in both
+                                * parent and child to be set to read.
+                                */
+                               make_migration_entry_read(&swp_entry);
+                               entry = swp_entry_to_pte(swp_entry);
+                               set_huge_pte_at(src, addr, src_pte, entry);
+                       }
+                       set_huge_pte_at(dst, addr, dst_pte, entry);
+               } else {
                        if (cow)
                                huge_ptep_set_wrprotect(src, addr, src_pte);
-                       entry = huge_ptep_get(src_pte);
                        ptepage = pte_page(entry);
                        get_page(ptepage);
                        page_dup_rmap(ptepage);
@@@ -2317,32 -2358,6 +2359,6 @@@ nomem
        return -ENOMEM;
  }
  
- static int is_hugetlb_entry_migration(pte_t pte)
- {
-       swp_entry_t swp;
-       if (huge_pte_none(pte) || pte_present(pte))
-               return 0;
-       swp = pte_to_swp_entry(pte);
-       if (non_swap_entry(swp) && is_migration_entry(swp))
-               return 1;
-       else
-               return 0;
- }
- static int is_hugetlb_entry_hwpoisoned(pte_t pte)
- {
-       swp_entry_t swp;
-       if (huge_pte_none(pte) || pte_present(pte))
-               return 0;
-       swp = pte_to_swp_entry(pte);
-       if (non_swap_entry(swp) && is_hwpoison_entry(swp))
-               return 1;
-       else
-               return 0;
- }
  void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
                            unsigned long end, struct page *ref_page)
  {
diff --combined mm/memory-failure.c
@@@ -1033,15 -1033,16 +1033,16 @@@ int __memory_failure(unsigned long pfn
                        return 0;
                } else if (PageHuge(hpage)) {
                        /*
-                        * Check "just unpoisoned", "filter hit", and
-                        * "race with other subpage."
+                        * Check "filter hit" and "race with other subpage."
                         */
                        lock_page(hpage);
-                       if (!PageHWPoison(hpage)
-                           || (hwpoison_filter(p) && TestClearPageHWPoison(p))
-                           || (p != hpage && TestSetPageHWPoison(hpage))) {
-                               atomic_long_sub(nr_pages, &mce_bad_pages);
-                               return 0;
+                       if (PageHWPoison(hpage)) {
+                               if ((hwpoison_filter(p) && TestClearPageHWPoison(p))
+                                   || (p != hpage && TestSetPageHWPoison(hpage))) {
+                                       atomic_long_sub(nr_pages, &mce_bad_pages);
+                                       unlock_page(hpage);
+                                       return 0;
+                               }
                        }
                        set_page_hwpoison_huge_page(hpage);
                        res = dequeue_hwpoisoned_huge_page(hpage);
         */
        if (!PageHWPoison(p)) {
                printk(KERN_ERR "MCE %#lx: just unpoisoned\n", pfn);
+               atomic_long_sub(nr_pages, &mce_bad_pages);
+               put_page(hpage);
                res = 0;
                goto out;
        }
@@@ -1400,7 -1403,7 +1403,7 @@@ static int get_any_page(struct page *p
                /* Not a free page */
                ret = 1;
        }
 -      unset_migratetype_isolate(p);
 +      unset_migratetype_isolate(p, MIGRATE_MOVABLE);
        unlock_memory_hotplug();
        return ret;
  }
diff --combined mm/memory.c
@@@ -1401,7 -1401,6 +1401,7 @@@ unsigned long zap_page_range(struct vm_
        tlb_finish_mmu(&tlb, address, end);
        return end;
  }
 +EXPORT_SYMBOL_GPL(zap_page_range);
  
  /**
   * zap_vma_ptes - remove ptes mapping the vma
@@@ -1853,12 -1852,17 +1853,17 @@@ int fixup_user_fault(struct task_struc
                     unsigned long address, unsigned int fault_flags)
  {
        struct vm_area_struct *vma;
+       vm_flags_t vm_flags;
        int ret;
  
        vma = find_extend_vma(mm, address);
        if (!vma || address < vma->vm_start)
                return -EFAULT;
  
+       vm_flags = (fault_flags & FAULT_FLAG_WRITE) ? VM_WRITE : VM_READ;
+       if (!(vm_flags & vma->vm_flags))
+               return -EFAULT;
        ret = handle_mm_fault(mm, vma, address, fault_flags);
        if (ret & VM_FAULT_ERROR) {
                if (ret & VM_FAULT_OOM)
@@@ -3540,9 -3544,8 +3545,9 @@@ retry
  
                barrier();
                if (pmd_trans_huge(orig_pmd)) {
 -                      if (flags & FAULT_FLAG_WRITE &&
 -                          !pmd_write(orig_pmd) &&
 +                      unsigned int dirty = flags & FAULT_FLAG_WRITE;
 +
 +                      if (dirty && !pmd_write(orig_pmd) &&
                            !pmd_trans_splitting(orig_pmd)) {
                                ret = do_huge_pmd_wp_page(mm, vma, address, pmd,
                                                          orig_pmd);
                                if (unlikely(ret & VM_FAULT_OOM))
                                        goto retry;
                                return ret;
 +                      } else {
 +                              huge_pmd_set_accessed(mm, vma, address, pmd,
 +                                                    orig_pmd, dirty);
                        }
                        return 0;
                }
diff --combined mm/page-writeback.c
@@@ -129,67 -129,6 +129,67 @@@ unsigned long global_dirty_limit
   */
  static struct prop_descriptor vm_completions;
  
 +/*
 + * Work out the current dirty-memory clamping and background writeout
 + * thresholds.
 + *
 + * The main aim here is to lower them aggressively if there is a lot of mapped
 + * memory around.  To avoid stressing page reclaim with lots of unreclaimable
 + * pages.  It is better to clamp down on writers than to start swapping, and
 + * performing lots of scanning.
 + *
 + * We only allow 1/2 of the currently-unmapped memory to be dirtied.
 + *
 + * We don't permit the clamping level to fall below 5% - that is getting rather
 + * excessive.
 + *
 + * We make sure that the background writeout level is below the adjusted
 + * clamping level.
 + */
 +static unsigned long highmem_dirtyable_memory(unsigned long total)
 +{
 +#ifdef CONFIG_HIGHMEM
 +      int node;
 +      unsigned long x = 0;
 +
 +      for_each_node_state(node, N_HIGH_MEMORY) {
 +              struct zone *z =
 +                      &NODE_DATA(node)->node_zones[ZONE_HIGHMEM];
 +
 +              x += zone_page_state(z, NR_FREE_PAGES) +
 +                   zone_reclaimable_pages(z) - z->dirty_balance_reserve;
 +      }
 +      /*
 +       * Make sure that the number of highmem pages is never larger
 +       * than the number of the total dirtyable memory. This can only
 +       * occur in very strange VM situations but we want to make sure
 +       * that this does not occur.
 +       */
 +      return min(x, total);
 +#else
 +      return 0;
 +#endif
 +}
 +
 +/**
 + * determine_dirtyable_memory - amount of memory that may be used
 + *
 + * Returns the numebr of pages that can currently be freed and used
 + * by the kernel for direct mappings.
 + */
 +static unsigned long determine_dirtyable_memory(void)
 +{
 +      unsigned long x;
 +
 +      x = global_page_state(NR_FREE_PAGES) + global_reclaimable_pages() -
 +          dirty_balance_reserve;
 +
 +      if (!vm_highmem_is_dirtyable)
 +              x -= highmem_dirtyable_memory(x);
 +
 +      return x + 1;   /* Ensure that we never return 0 */
 +}
 +
  /*
   * couple the period to the dirty_ratio:
   *
@@@ -257,6 -196,7 +257,6 @@@ int dirty_ratio_handler(struct ctl_tabl
        return ret;
  }
  
 -
  int dirty_bytes_handler(struct ctl_table *table, int write,
                void __user *buffer, size_t *lenp,
                loff_t *ppos)
@@@ -351,6 -291,67 +351,6 @@@ int bdi_set_max_ratio(struct backing_de
  }
  EXPORT_SYMBOL(bdi_set_max_ratio);
  
 -/*
 - * Work out the current dirty-memory clamping and background writeout
 - * thresholds.
 - *
 - * The main aim here is to lower them aggressively if there is a lot of mapped
 - * memory around.  To avoid stressing page reclaim with lots of unreclaimable
 - * pages.  It is better to clamp down on writers than to start swapping, and
 - * performing lots of scanning.
 - *
 - * We only allow 1/2 of the currently-unmapped memory to be dirtied.
 - *
 - * We don't permit the clamping level to fall below 5% - that is getting rather
 - * excessive.
 - *
 - * We make sure that the background writeout level is below the adjusted
 - * clamping level.
 - */
 -
 -static unsigned long highmem_dirtyable_memory(unsigned long total)
 -{
 -#ifdef CONFIG_HIGHMEM
 -      int node;
 -      unsigned long x = 0;
 -
 -      for_each_node_state(node, N_HIGH_MEMORY) {
 -              struct zone *z =
 -                      &NODE_DATA(node)->node_zones[ZONE_HIGHMEM];
 -
 -              x += zone_page_state(z, NR_FREE_PAGES) +
 -                   zone_reclaimable_pages(z);
 -      }
 -      /*
 -       * Make sure that the number of highmem pages is never larger
 -       * than the number of the total dirtyable memory. This can only
 -       * occur in very strange VM situations but we want to make sure
 -       * that this does not occur.
 -       */
 -      return min(x, total);
 -#else
 -      return 0;
 -#endif
 -}
 -
 -/**
 - * determine_dirtyable_memory - amount of memory that may be used
 - *
 - * Returns the numebr of pages that can currently be freed and used
 - * by the kernel for direct mappings.
 - */
 -unsigned long determine_dirtyable_memory(void)
 -{
 -      unsigned long x;
 -
 -      x = global_page_state(NR_FREE_PAGES) + global_reclaimable_pages();
 -
 -      if (!vm_highmem_is_dirtyable)
 -              x -= highmem_dirtyable_memory(x);
 -
 -      return x + 1;   /* Ensure that we never return 0 */
 -}
 -
  static unsigned long dirty_freerun_ceiling(unsigned long thresh,
                                           unsigned long bg_thresh)
  {
@@@ -558,7 -559,7 +558,7 @@@ static unsigned long bdi_position_ratio
         *     => fast response on large errors; small oscillation near setpoint
         */
        setpoint = (freerun + limit) / 2;
-       x = div_s64((setpoint - dirty) << RATELIMIT_CALC_SHIFT,
+       x = div64_s64(((s64)setpoint - (s64)dirty) << RATELIMIT_CALC_SHIFT,
                    limit - setpoint + 1);
        pos_ratio = x;
        pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT;
        x_intercept = bdi_setpoint + span;
  
        if (bdi_dirty < x_intercept - span / 4) {
-               pos_ratio = div_u64(pos_ratio * (x_intercept - bdi_dirty),
+               pos_ratio = div64_u64(pos_ratio * (x_intercept - bdi_dirty),
                                    x_intercept - bdi_setpoint + 1);
        } else
                pos_ratio /= 4;
diff --combined mm/vmscan.c
@@@ -734,6 -734,24 +734,6 @@@ static enum page_references page_check_
        return PAGEREF_RECLAIM;
  }
  
 -static noinline_for_stack void free_page_list(struct list_head *free_pages)
 -{
 -      struct pagevec freed_pvec;
 -      struct page *page, *tmp;
 -
 -      pagevec_init(&freed_pvec, 1);
 -
 -      list_for_each_entry_safe(page, tmp, free_pages, lru) {
 -              list_del(&page->lru);
 -              if (!pagevec_add(&freed_pvec, page)) {
 -                      __pagevec_free(&freed_pvec);
 -                      pagevec_reinit(&freed_pvec);
 -              }
 -      }
 -
 -      pagevec_free(&freed_pvec);
 -}
 -
  /*
   * shrink_page_list() returns the number of reclaimed pages
   */
@@@ -997,7 -1015,7 +997,7 @@@ keep_lumpy
        if (nr_dirty && nr_dirty == nr_congested && scanning_global_lru(sc))
                zone_set_flag(zone, ZONE_CONGESTED);
  
 -      free_page_list(&free_pages);
 +      free_hot_cold_page_list(&free_pages, 1);
  
        list_splice(&ret_pages, page_list);
        count_vm_events(PGACTIVATE, pgactivate);
@@@ -2998,7 -3016,10 +2998,10 @@@ static int kswapd(void *p
                }
        }
  
+       tsk->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD);
        current->reclaim_state = NULL;
+       lockdep_clear_current_reclaim_state();
        return 0;
  }
  
@@@ -3503,16 -3524,16 +3506,16 @@@ int scan_unevictable_handler(struct ctl
   * a specified node's per zone unevictable lists for evictable pages.
   */
  
 -static ssize_t read_scan_unevictable_node(struct sys_device *dev,
 -                                        struct sysdev_attribute *attr,
 +static ssize_t read_scan_unevictable_node(struct device *dev,
 +                                        struct device_attribute *attr,
                                          char *buf)
  {
        warn_scan_unevictable_pages();
        return sprintf(buf, "0\n");     /* always zero; should fit... */
  }
  
 -static ssize_t write_scan_unevictable_node(struct sys_device *dev,
 -                                         struct sysdev_attribute *attr,
 +static ssize_t write_scan_unevictable_node(struct device *dev,
 +                                         struct device_attribute *attr,
                                        const char *buf, size_t count)
  {
        warn_scan_unevictable_pages();
  }
  
  
 -static SYSDEV_ATTR(scan_unevictable_pages, S_IRUGO | S_IWUSR,
 +static DEVICE_ATTR(scan_unevictable_pages, S_IRUGO | S_IWUSR,
                        read_scan_unevictable_node,
                        write_scan_unevictable_node);
  
  int scan_unevictable_register_node(struct node *node)
  {
 -      return sysdev_create_file(&node->sysdev, &attr_scan_unevictable_pages);
 +      return device_create_file(&node->dev, &dev_attr_scan_unevictable_pages);
  }
  
  void scan_unevictable_unregister_node(struct node *node)
  {
 -      sysdev_remove_file(&node->sysdev, &attr_scan_unevictable_pages);
 +      device_remove_file(&node->dev, &dev_attr_scan_unevictable_pages);
  }
  #endif