Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu

author Linus Torvalds <torvalds@linux-foundation.org>

Sat, 23 Oct 2010 00:31:36 +0000 (17:31 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Sat, 23 Oct 2010 00:31:36 +0000 (17:31 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Sat, 23 Oct 2010 00:31:36 +0000 (17:31 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Sat, 23 Oct 2010 00:31:36 +0000 (17:31 -0700)
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h

index cd28f9a..f899e01 100644 (file)
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -47,6 +47,20 @@
  #ifdef CONFIG_SMP
  #define __percpu_arg(x)                "%%"__stringify(__percpu_seg)":%P" #x
  #define __my_cpu_offset                percpu_read(this_cpu_off)
+
+/*
+ * Compared to the generic __my_cpu_offset version, the following
+ * saves one instruction and avoids clobbering a temp register.
+ */
+#define __this_cpu_ptr(ptr)                            \
+({                                                     \
+       unsigned long tcp_ptr__;                        \
+       __verify_pcpu_ptr(ptr);                         \
+       asm volatile("add " __percpu_arg(1) ", %0"      \
+                    : "=r" (tcp_ptr__)                 \
+                    : "m" (this_cpu_off), "0" (ptr));  \
+       (typeof(*(ptr)) __kernel __force *)tcp_ptr__;   \
+})
  #else
  #define __percpu_arg(x)                "%P" #x
  #endif
diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h

index 08923b6..d17784e 100644 (file)
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -55,14 +55,18 @@ extern unsigned long __per_cpu_offset[NR_CPUS];
   */
  #define per_cpu(var, cpu) \
         (*SHIFT_PERCPU_PTR(&(var), per_cpu_offset(cpu)))
-#define __get_cpu_var(var) \
-       (*SHIFT_PERCPU_PTR(&(var), my_cpu_offset))
-#define __raw_get_cpu_var(var) \
-       (*SHIFT_PERCPU_PTR(&(var), __my_cpu_offset))
  
-#define this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, my_cpu_offset)
+#ifndef __this_cpu_ptr
  #define __this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, __my_cpu_offset)
+#endif
+#ifdef CONFIG_DEBUG_PREEMPT
+#define this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, my_cpu_offset)
+#else
+#define this_cpu_ptr(ptr) __this_cpu_ptr(ptr)
+#endif
  
+#define __get_cpu_var(var) (*this_cpu_ptr(&(var)))
+#define __raw_get_cpu_var(var) (*__this_cpu_ptr(&(var)))
  
  #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA
  extern void setup_per_cpu_areas(void);
diff --git a/include/linux/percpu.h b/include/linux/percpu.h

index 0eb5083..5095b83 100644 (file)
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -48,10 +48,8 @@
         preempt_enable();                               \
  } while (0)
  
-#ifdef CONFIG_SMP
-
  /* minimum unit size, also is the maximum supported allocation size */
-#define PCPU_MIN_UNIT_SIZE             PFN_ALIGN(64 << 10)
+#define PCPU_MIN_UNIT_SIZE             PFN_ALIGN(32 << 10)
  
  /*
   * Percpu allocator can serve percpu allocations before slab is
@@ -146,37 +144,20 @@ extern int __init pcpu_page_first_chunk(size_t reserved_size,
   * dynamically allocated. Non-atomic access to the current CPU's
   * version should probably be combined with get_cpu()/put_cpu().
   */
+#ifdef CONFIG_SMP
  #define per_cpu_ptr(ptr, cpu)  SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu)))
+#else
+#define per_cpu_ptr(ptr, cpu)  ({ (void)(cpu); VERIFY_PERCPU_PTR((ptr)); })
+#endif
  
  extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align);
  extern bool is_kernel_percpu_address(unsigned long addr);
  
-#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
+#if !defined(CONFIG_SMP) || !defined(CONFIG_HAVE_SETUP_PER_CPU_AREA)
  extern void __init setup_per_cpu_areas(void);
  #endif
  extern void __init percpu_init_late(void);
  
-#else /* CONFIG_SMP */
-
-#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); VERIFY_PERCPU_PTR((ptr)); })
-
-/* can't distinguish from other static vars, always false */
-static inline bool is_kernel_percpu_address(unsigned long addr)
-{
-       return false;
-}
-
-static inline void __init setup_per_cpu_areas(void) { }
-
-static inline void __init percpu_init_late(void) { }
-
-static inline void *pcpu_lpage_remapped(void *kaddr)
-{
-       return NULL;
-}
-
-#endif /* CONFIG_SMP */
-
  extern void __percpu *__alloc_percpu(size_t size, size_t align);
  extern void free_percpu(void __percpu *__pdata);
  extern phys_addr_t per_cpu_ptr_to_phys(void *addr);
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h

index 01c2145..63a4fe6 100644 (file)
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -117,10 +117,12 @@ extern rwlock_t vmlist_lock;
  extern struct vm_struct *vmlist;
  extern __init void vm_area_register_early(struct vm_struct *vm, size_t align);
  
+#ifdef CONFIG_SMP
  struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
                                      const size_t *sizes, int nr_vms,
                                      size_t align, gfp_t gfp_mask);
  
  void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms);
+#endif
  
  #endif /* _LINUX_VMALLOC_H */
diff --git a/mm/Kconfig b/mm/Kconfig

index f0fb912..c2c8a4a 100644 (file)
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -301,3 +301,11 @@ config NOMMU_INITIAL_TRIM_EXCESS
           of 1 says that all excess pages should be trimmed.
  
           See Documentation/nommu-mmap.txt for more information.
+
+#
+# UP and nommu archs use km based percpu allocator
+#
+config NEED_PER_CPU_KM
+       depends on !SMP
+       bool
+       default y
diff --git a/mm/Makefile b/mm/Makefile

index 34b2546..f73f75a 100644 (file)
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -11,7 +11,7 @@ obj-y                 := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
                            maccess.o page_alloc.o page-writeback.o \
                            readahead.o swap.o truncate.o vmscan.o shmem.o \
                            prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
-                          page_isolation.o mm_init.o mmu_context.o \
+                          page_isolation.o mm_init.o mmu_context.o percpu.o \
                            $(mmu-y)
  obj-y += init-mm.o
  
@@ -36,11 +36,6 @@ obj-$(CONFIG_FAILSLAB) += failslab.o
  obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
  obj-$(CONFIG_FS_XIP) += filemap_xip.o
  obj-$(CONFIG_MIGRATION) += migrate.o
-ifdef CONFIG_SMP
-obj-y += percpu.o
-else
-obj-y += percpu_up.o
-endif
  obj-$(CONFIG_QUICKLIST) += quicklist.o
  obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o
  obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o
diff --git a/mm/percpu-km.c b/mm/percpu-km.c

index df68085..89633fe 100644 (file)
--- a/mm/percpu-km.c
+++ b/mm/percpu-km.c
@@ -27,7 +27,7 @@
   *   chunk size is not aligned.  percpu-km code will whine about it.
   */
  
-#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
+#if defined(CONFIG_SMP) && defined(CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK)
  #error "contiguous percpu allocation is incompatible with paged first chunk"
  #endif
  
@@ -35,7 +35,11 @@
  
  static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size)
  {
-       /* noop */
+       unsigned int cpu;
+
+       for_each_possible_cpu(cpu)
+               memset((void *)pcpu_chunk_addr(chunk, cpu, 0) + off, 0, size);
+
         return 0;
  }
  
diff --git a/mm/percpu.c b/mm/percpu.c

index c76ef38..6fc9015 100644 (file)
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -76,6 +76,7 @@
  #define PCPU_SLOT_BASE_SHIFT           5       /* 1-31 shares the same slot */
  #define PCPU_DFL_MAP_ALLOC             16      /* start a map with 16 ents */
  
+#ifdef CONFIG_SMP
  /* default addr <-> pcpu_ptr mapping, override in asm/percpu.h if necessary */
  #ifndef __addr_to_pcpu_ptr
  #define __addr_to_pcpu_ptr(addr)                                       \
@@ -89,6 +90,11 @@
                          (unsigned long)pcpu_base_addr -                \
                          (unsigned long)__per_cpu_start)
  #endif
+#else  /* CONFIG_SMP */
+/* on UP, it's always identity mapped */
+#define __addr_to_pcpu_ptr(addr)       (void __percpu *)(addr)
+#define __pcpu_ptr_to_addr(ptr)                (void __force *)(ptr)
+#endif /* CONFIG_SMP */
  
  struct pcpu_chunk {
         struct list_head        list;           /* linked to pcpu_slot lists */
@@ -820,8 +826,8 @@ fail_unlock_mutex:
   * @size: size of area to allocate in bytes
   * @align: alignment of area (max PAGE_SIZE)
   *
- * Allocate percpu area of @size bytes aligned at @align.  Might
- * sleep.  Might trigger writeouts.
+ * Allocate zero-filled percpu area of @size bytes aligned at @align.
+ * Might sleep.  Might trigger writeouts.
   *
   * CONTEXT:
   * Does GFP_KERNEL allocation.
@@ -840,9 +846,10 @@ EXPORT_SYMBOL_GPL(__alloc_percpu);
   * @size: size of area to allocate in bytes
   * @align: alignment of area (max PAGE_SIZE)
   *
- * Allocate percpu area of @size bytes aligned at @align from reserved
- * percpu area if arch has set it up; otherwise, allocation is served
- * from the same dynamic area.  Might sleep.  Might trigger writeouts.
+ * Allocate zero-filled percpu area of @size bytes aligned at @align
+ * from reserved percpu area if arch has set it up; otherwise,
+ * allocation is served from the same dynamic area.  Might sleep.
+ * Might trigger writeouts.
   *
   * CONTEXT:
   * Does GFP_KERNEL allocation.
@@ -949,6 +956,7 @@ EXPORT_SYMBOL_GPL(free_percpu);
   */
  bool is_kernel_percpu_address(unsigned long addr)
  {
+#ifdef CONFIG_SMP
         const size_t static_size = __per_cpu_end - __per_cpu_start;
         void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr);
         unsigned int cpu;
@@ -959,6 +967,8 @@ bool is_kernel_percpu_address(unsigned long addr)
                 if ((void *)addr >= start && (void *)addr < start + static_size)
                         return true;
          }
+#endif
+       /* on UP, can't distinguish from other static vars, always false */
         return false;
  }
  
@@ -1066,161 +1076,6 @@ void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai)
         free_bootmem(__pa(ai), ai->__ai_size);
  }
  
-/**
- * pcpu_build_alloc_info - build alloc_info considering distances between CPUs
- * @reserved_size: the size of reserved percpu area in bytes
- * @dyn_size: minimum free size for dynamic allocation in bytes
- * @atom_size: allocation atom size
- * @cpu_distance_fn: callback to determine distance between cpus, optional
- *
- * This function determines grouping of units, their mappings to cpus
- * and other parameters considering needed percpu size, allocation
- * atom size and distances between CPUs.
- *
- * Groups are always mutliples of atom size and CPUs which are of
- * LOCAL_DISTANCE both ways are grouped together and share space for
- * units in the same group.  The returned configuration is guaranteed
- * to have CPUs on different nodes on different groups and >=75% usage
- * of allocated virtual address space.
- *
- * RETURNS:
- * On success, pointer to the new allocation_info is returned.  On
- * failure, ERR_PTR value is returned.
- */
-static struct pcpu_alloc_info * __init pcpu_build_alloc_info(
-                               size_t reserved_size, size_t dyn_size,
-                               size_t atom_size,
-                               pcpu_fc_cpu_distance_fn_t cpu_distance_fn)
-{
-       static int group_map[NR_CPUS] __initdata;
-       static int group_cnt[NR_CPUS] __initdata;
-       const size_t static_size = __per_cpu_end - __per_cpu_start;
-       int nr_groups = 1, nr_units = 0;
-       size_t size_sum, min_unit_size, alloc_size;
-       int upa, max_upa, uninitialized_var(best_upa);  /* units_per_alloc */
-       int last_allocs, group, unit;
-       unsigned int cpu, tcpu;
-       struct pcpu_alloc_info *ai;
-       unsigned int *cpu_map;
-
-       /* this function may be called multiple times */
-       memset(group_map, 0, sizeof(group_map));
-       memset(group_cnt, 0, sizeof(group_cnt));
-
-       /* calculate size_sum and ensure dyn_size is enough for early alloc */
-       size_sum = PFN_ALIGN(static_size + reserved_size +
-                           max_t(size_t, dyn_size, PERCPU_DYNAMIC_EARLY_SIZE));
-       dyn_size = size_sum - static_size - reserved_size;
-
-       /*
-        * Determine min_unit_size, alloc_size and max_upa such that
-        * alloc_size is multiple of atom_size and is the smallest
-        * which can accomodate 4k aligned segments which are equal to
-        * or larger than min_unit_size.
-        */
-       min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE);
-
-       alloc_size = roundup(min_unit_size, atom_size);
-       upa = alloc_size / min_unit_size;
-       while (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK))
-               upa--;
-       max_upa = upa;
-
-       /* group cpus according to their proximity */
-       for_each_possible_cpu(cpu) {
-               group = 0;
-       next_group:
-               for_each_possible_cpu(tcpu) {
-                       if (cpu == tcpu)
-                               break;
-                       if (group_map[tcpu] == group && cpu_distance_fn &&
-                           (cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE ||
-                            cpu_distance_fn(tcpu, cpu) > LOCAL_DISTANCE)) {
-                               group++;
-                               nr_groups = max(nr_groups, group + 1);
-                               goto next_group;
-                       }
-               }
-               group_map[cpu] = group;
-               group_cnt[group]++;
-       }
-
-       /*
-        * Expand unit size until address space usage goes over 75%
-        * and then as much as possible without using more address
-        * space.
-        */
-       last_allocs = INT_MAX;
-       for (upa = max_upa; upa; upa--) {
-               int allocs = 0, wasted = 0;
-
-               if (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK))
-                       continue;
-
-               for (group = 0; group < nr_groups; group++) {
-                       int this_allocs = DIV_ROUND_UP(group_cnt[group], upa);
-                       allocs += this_allocs;
-                       wasted += this_allocs * upa - group_cnt[group];
-               }
-
-               /*
-                * Don't accept if wastage is over 1/3.  The
-                * greater-than comparison ensures upa==1 always
-                * passes the following check.
-                */
-               if (wasted > num_possible_cpus() / 3)
-                       continue;
-
-               /* and then don't consume more memory */
-               if (allocs > last_allocs)
-                       break;
-               last_allocs = allocs;
-               best_upa = upa;
-       }
-       upa = best_upa;
-
-       /* allocate and fill alloc_info */
-       for (group = 0; group < nr_groups; group++)
-               nr_units += roundup(group_cnt[group], upa);
-
-       ai = pcpu_alloc_alloc_info(nr_groups, nr_units);
-       if (!ai)
-               return ERR_PTR(-ENOMEM);
-       cpu_map = ai->groups[0].cpu_map;
-
-       for (group = 0; group < nr_groups; group++) {
-               ai->groups[group].cpu_map = cpu_map;
-               cpu_map += roundup(group_cnt[group], upa);
-       }
-
-       ai->static_size = static_size;
-       ai->reserved_size = reserved_size;
-       ai->dyn_size = dyn_size;
-       ai->unit_size = alloc_size / upa;
-       ai->atom_size = atom_size;
-       ai->alloc_size = alloc_size;
-
-       for (group = 0, unit = 0; group_cnt[group]; group++) {
-               struct pcpu_group_info *gi = &ai->groups[group];
-
-               /*
-                * Initialize base_offset as if all groups are located
-                * back-to-back.  The caller should update this to
-                * reflect actual allocation.
-                */
-               gi->base_offset = unit * ai->unit_size;
-
-               for_each_possible_cpu(cpu)
-                       if (group_map[cpu] == group)
-                               gi->cpu_map[gi->nr_units++] = cpu;
-               gi->nr_units = roundup(gi->nr_units, upa);
-               unit += gi->nr_units;
-       }
-       BUG_ON(unit != nr_units);
-
-       return ai;
-}
-
  /**
   * pcpu_dump_alloc_info - print out information about pcpu_alloc_info
   * @lvl: loglevel
@@ -1363,7 +1218,9 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
  
         /* sanity checks */
         PCPU_SETUP_BUG_ON(ai->nr_groups <= 0);
+#ifdef CONFIG_SMP
         PCPU_SETUP_BUG_ON(!ai->static_size);
+#endif
         PCPU_SETUP_BUG_ON(!base_addr);
         PCPU_SETUP_BUG_ON(ai->unit_size < size_sum);
         PCPU_SETUP_BUG_ON(ai->unit_size & ~PAGE_MASK);
@@ -1488,6 +1345,8 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
         return 0;
  }
  
+#ifdef CONFIG_SMP
+
  const char *pcpu_fc_names[PCPU_FC_NR] __initdata = {
         [PCPU_FC_AUTO]  = "auto",
         [PCPU_FC_EMBED] = "embed",
@@ -1515,8 +1374,180 @@ static int __init percpu_alloc_setup(char *str)
  }
  early_param("percpu_alloc", percpu_alloc_setup);
  
+/*
+ * pcpu_embed_first_chunk() is used by the generic percpu setup.
+ * Build it if needed by the arch config or the generic setup is going
+ * to be used.
+ */
  #if defined(CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK) || \
         !defined(CONFIG_HAVE_SETUP_PER_CPU_AREA)
+#define BUILD_EMBED_FIRST_CHUNK
+#endif
+
+/* build pcpu_page_first_chunk() iff needed by the arch config */
+#if defined(CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK)
+#define BUILD_PAGE_FIRST_CHUNK
+#endif
+
+/* pcpu_build_alloc_info() is used by both embed and page first chunk */
+#if defined(BUILD_EMBED_FIRST_CHUNK) || defined(BUILD_PAGE_FIRST_CHUNK)
+/**
+ * pcpu_build_alloc_info - build alloc_info considering distances between CPUs
+ * @reserved_size: the size of reserved percpu area in bytes
+ * @dyn_size: minimum free size for dynamic allocation in bytes
+ * @atom_size: allocation atom size
+ * @cpu_distance_fn: callback to determine distance between cpus, optional
+ *
+ * This function determines grouping of units, their mappings to cpus
+ * and other parameters considering needed percpu size, allocation
+ * atom size and distances between CPUs.
+ *
+ * Groups are always mutliples of atom size and CPUs which are of
+ * LOCAL_DISTANCE both ways are grouped together and share space for
+ * units in the same group.  The returned configuration is guaranteed
+ * to have CPUs on different nodes on different groups and >=75% usage
+ * of allocated virtual address space.
+ *
+ * RETURNS:
+ * On success, pointer to the new allocation_info is returned.  On
+ * failure, ERR_PTR value is returned.
+ */
+static struct pcpu_alloc_info * __init pcpu_build_alloc_info(
+                               size_t reserved_size, size_t dyn_size,
+                               size_t atom_size,
+                               pcpu_fc_cpu_distance_fn_t cpu_distance_fn)
+{
+       static int group_map[NR_CPUS] __initdata;
+       static int group_cnt[NR_CPUS] __initdata;
+       const size_t static_size = __per_cpu_end - __per_cpu_start;
+       int nr_groups = 1, nr_units = 0;
+       size_t size_sum, min_unit_size, alloc_size;
+       int upa, max_upa, uninitialized_var(best_upa);  /* units_per_alloc */
+       int last_allocs, group, unit;
+       unsigned int cpu, tcpu;
+       struct pcpu_alloc_info *ai;
+       unsigned int *cpu_map;
+
+       /* this function may be called multiple times */
+       memset(group_map, 0, sizeof(group_map));
+       memset(group_cnt, 0, sizeof(group_cnt));
+
+       /* calculate size_sum and ensure dyn_size is enough for early alloc */
+       size_sum = PFN_ALIGN(static_size + reserved_size +
+                           max_t(size_t, dyn_size, PERCPU_DYNAMIC_EARLY_SIZE));
+       dyn_size = size_sum - static_size - reserved_size;
+
+       /*
+        * Determine min_unit_size, alloc_size and max_upa such that
+        * alloc_size is multiple of atom_size and is the smallest
+        * which can accomodate 4k aligned segments which are equal to
+        * or larger than min_unit_size.
+        */
+       min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE);
+
+       alloc_size = roundup(min_unit_size, atom_size);
+       upa = alloc_size / min_unit_size;
+       while (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK))
+               upa--;
+       max_upa = upa;
+
+       /* group cpus according to their proximity */
+       for_each_possible_cpu(cpu) {
+               group = 0;
+       next_group:
+               for_each_possible_cpu(tcpu) {
+                       if (cpu == tcpu)
+                               break;
+                       if (group_map[tcpu] == group && cpu_distance_fn &&
+                           (cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE ||
+                            cpu_distance_fn(tcpu, cpu) > LOCAL_DISTANCE)) {
+                               group++;
+                               nr_groups = max(nr_groups, group + 1);
+                               goto next_group;
+                       }
+               }
+               group_map[cpu] = group;
+               group_cnt[group]++;
+       }
+
+       /*
+        * Expand unit size until address space usage goes over 75%
+        * and then as much as possible without using more address
+        * space.
+        */
+       last_allocs = INT_MAX;
+       for (upa = max_upa; upa; upa--) {
+               int allocs = 0, wasted = 0;
+
+               if (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK))
+                       continue;
+
+               for (group = 0; group < nr_groups; group++) {
+                       int this_allocs = DIV_ROUND_UP(group_cnt[group], upa);
+                       allocs += this_allocs;
+                       wasted += this_allocs * upa - group_cnt[group];
+               }
+
+               /*
+                * Don't accept if wastage is over 1/3.  The
+                * greater-than comparison ensures upa==1 always
+                * passes the following check.
+                */
+               if (wasted > num_possible_cpus() / 3)
+                       continue;
+
+               /* and then don't consume more memory */
+               if (allocs > last_allocs)
+                       break;
+               last_allocs = allocs;
+               best_upa = upa;
+       }
+       upa = best_upa;
+
+       /* allocate and fill alloc_info */
+       for (group = 0; group < nr_groups; group++)
+               nr_units += roundup(group_cnt[group], upa);
+
+       ai = pcpu_alloc_alloc_info(nr_groups, nr_units);
+       if (!ai)
+               return ERR_PTR(-ENOMEM);
+       cpu_map = ai->groups[0].cpu_map;
+
+       for (group = 0; group < nr_groups; group++) {
+               ai->groups[group].cpu_map = cpu_map;
+               cpu_map += roundup(group_cnt[group], upa);
+       }
+
+       ai->static_size = static_size;
+       ai->reserved_size = reserved_size;
+       ai->dyn_size = dyn_size;
+       ai->unit_size = alloc_size / upa;
+       ai->atom_size = atom_size;
+       ai->alloc_size = alloc_size;
+
+       for (group = 0, unit = 0; group_cnt[group]; group++) {
+               struct pcpu_group_info *gi = &ai->groups[group];
+
+               /*
+                * Initialize base_offset as if all groups are located
+                * back-to-back.  The caller should update this to
+                * reflect actual allocation.
+                */
+               gi->base_offset = unit * ai->unit_size;
+
+               for_each_possible_cpu(cpu)
+                       if (group_map[cpu] == group)
+                               gi->cpu_map[gi->nr_units++] = cpu;
+               gi->nr_units = roundup(gi->nr_units, upa);
+               unit += gi->nr_units;
+       }
+       BUG_ON(unit != nr_units);
+
+       return ai;
+}
+#endif /* BUILD_EMBED_FIRST_CHUNK || BUILD_PAGE_FIRST_CHUNK */
+
+#if defined(BUILD_EMBED_FIRST_CHUNK)
  /**
   * pcpu_embed_first_chunk - embed the first percpu chunk into bootmem
   * @reserved_size: the size of reserved percpu area in bytes
@@ -1645,10 +1676,9 @@ out_free:
                 free_bootmem(__pa(areas), areas_size);
         return rc;
  }
-#endif /* CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK ||
-         !CONFIG_HAVE_SETUP_PER_CPU_AREA */
+#endif /* BUILD_EMBED_FIRST_CHUNK */
  
-#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
+#ifdef BUILD_PAGE_FIRST_CHUNK
  /**
   * pcpu_page_first_chunk - map the first chunk using PAGE_SIZE pages
   * @reserved_size: the size of reserved percpu area in bytes
@@ -1756,10 +1786,11 @@ out_free_ar:
         pcpu_free_alloc_info(ai);
         return rc;
  }
-#endif /* CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK */
+#endif /* BUILD_PAGE_FIRST_CHUNK */
  
+#ifndef        CONFIG_HAVE_SETUP_PER_CPU_AREA
  /*
- * Generic percpu area setup.
+ * Generic SMP percpu area setup.
   *
   * The embedding helper is used because its behavior closely resembles
   * the original non-dynamic generic percpu area setup.  This is
@@ -1770,7 +1801,6 @@ out_free_ar:
   * on the physical linear memory mapping which uses large page
   * mappings on applicable archs.
   */
-#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
  unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
  EXPORT_SYMBOL(__per_cpu_offset);
  
@@ -1799,13 +1829,48 @@ void __init setup_per_cpu_areas(void)
                                     PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, NULL,
                                     pcpu_dfl_fc_alloc, pcpu_dfl_fc_free);
         if (rc < 0)
-               panic("Failed to initialized percpu areas.");
+               panic("Failed to initialize percpu areas.");
  
         delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
         for_each_possible_cpu(cpu)
                 __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
  }
-#endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */
+#endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */
+
+#else  /* CONFIG_SMP */
+
+/*
+ * UP percpu area setup.
+ *
+ * UP always uses km-based percpu allocator with identity mapping.
+ * Static percpu variables are indistinguishable from the usual static
+ * variables and don't require any special preparation.
+ */
+void __init setup_per_cpu_areas(void)
+{
+       const size_t unit_size =
+               roundup_pow_of_two(max_t(size_t, PCPU_MIN_UNIT_SIZE,
+                                        PERCPU_DYNAMIC_RESERVE));
+       struct pcpu_alloc_info *ai;
+       void *fc;
+
+       ai = pcpu_alloc_alloc_info(1, 1);
+       fc = __alloc_bootmem(unit_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
+       if (!ai || !fc)
+               panic("Failed to allocate memory for percpu areas.");
+
+       ai->dyn_size = unit_size;
+       ai->unit_size = unit_size;
+       ai->atom_size = unit_size;
+       ai->alloc_size = unit_size;
+       ai->groups[0].nr_units = 1;
+       ai->groups[0].cpu_map[0] = 0;
+
+       if (pcpu_setup_first_chunk(ai, fc) < 0)
+               panic("Failed to initialize percpu areas.");
+}
+
+#endif /* CONFIG_SMP */
  
  /*
   * First and reserved chunks are initialized with temporary allocation
diff --git a/mm/percpu_up.c b/mm/percpu_up.c

deleted file mode 100644 (file)

index db884fa..0000000
--- a/mm/percpu_up.c
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * mm/percpu_up.c - dummy percpu memory allocator implementation for UP
- */
-
-#include <linux/module.h>
-#include <linux/percpu.h>
-#include <linux/slab.h>
-
-void __percpu *__alloc_percpu(size_t size, size_t align)
-{
-       /*
-        * Can't easily make larger alignment work with kmalloc.  WARN
-        * on it.  Larger alignment should only be used for module
-        * percpu sections on SMP for which this path isn't used.
-        */
-       WARN_ON_ONCE(align > SMP_CACHE_BYTES);
-       return (void __percpu __force *)kzalloc(size, GFP_KERNEL);
-}
-EXPORT_SYMBOL_GPL(__alloc_percpu);
-
-void free_percpu(void __percpu *p)
-{
-       kfree(this_cpu_ptr(p));
-}
-EXPORT_SYMBOL_GPL(free_percpu);
-
-phys_addr_t per_cpu_ptr_to_phys(void *addr)
-{
-       return __pa(addr);
-}
diff --git a/mm/vmalloc.c b/mm/vmalloc.c

index d8087f0..9f90962 100644 (file)
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2065,6 +2065,7 @@ void free_vm_area(struct vm_struct *area)
  }
  EXPORT_SYMBOL_GPL(free_vm_area);
  
+#ifdef CONFIG_SMP
  static struct vmap_area *node_to_va(struct rb_node *n)
  {
         return n ? rb_entry(n, struct vmap_area, rb_node) : NULL;
@@ -2345,6 +2346,7 @@ void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms)
                 free_vm_area(vms[i]);
         kfree(vms);
  }
+#endif /* CONFIG_SMP */
  
  #ifdef CONFIG_PROC_FS
  static void *s_start(struct seq_file *m, loff_t *pos)
author	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 23 Oct 2010 00:31:36 +0000 (17:31 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 23 Oct 2010 00:31:36 +0000 (17:31 -0700)
arch/x86/include/asm/percpu.h		patch \| blob \| history
include/asm-generic/percpu.h		patch \| blob \| history
include/linux/percpu.h		patch \| blob \| history
include/linux/vmalloc.h		patch \| blob \| history
mm/Kconfig		patch \| blob \| history
mm/Makefile		patch \| blob \| history
mm/percpu-km.c		patch \| blob \| history
mm/percpu.c		patch \| blob \| history
mm/percpu_up.c	[deleted file]	patch \| blob \| history
mm/vmalloc.c		patch \| blob \| history