+ * @atom_size: allocation atom size
+ * @cpu_distance_fn: callback to determine distance between cpus, optional
+ *
+ * This function determines grouping of units, their mappings to cpus
+ * and other parameters considering needed percpu size, allocation
+ * atom size and distances between CPUs.
+ *
+ * Groups are always mutliples of atom size and CPUs which are of
+ * LOCAL_DISTANCE both ways are grouped together and share space for
+ * units in the same group. The returned configuration is guaranteed
+ * to have CPUs on different nodes on different groups and >=75% usage
+ * of allocated virtual address space.
+ *
+ * RETURNS:
+ * On success, pointer to the new allocation_info is returned. On
+ * failure, ERR_PTR value is returned.
+ */
+struct pcpu_alloc_info * __init pcpu_build_alloc_info(
+ size_t reserved_size, ssize_t dyn_size,
+ size_t atom_size,
+ pcpu_fc_cpu_distance_fn_t cpu_distance_fn)
+{
+ static int group_map[NR_CPUS] __initdata;
+ static int group_cnt[NR_CPUS] __initdata;
+ const size_t static_size = __per_cpu_end - __per_cpu_start;
+ int group_cnt_max = 0, nr_groups = 1, nr_units = 0;
+ size_t size_sum, min_unit_size, alloc_size;
+ int upa, max_upa, uninitialized_var(best_upa); /* units_per_alloc */
+ int last_allocs, group, unit;
+ unsigned int cpu, tcpu;
+ struct pcpu_alloc_info *ai;
+ unsigned int *cpu_map;
+
+ /*
+ * Determine min_unit_size, alloc_size and max_upa such that
+ * alloc_size is multiple of atom_size and is the smallest
+ * which can accomodate 4k aligned segments which are equal to
+ * or larger than min_unit_size.
+ */
+ size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, &dyn_size);
+ min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE);
+
+ alloc_size = roundup(min_unit_size, atom_size);
+ upa = alloc_size / min_unit_size;
+ while (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK))
+ upa--;
+ max_upa = upa;
+
+ /* group cpus according to their proximity */
+ for_each_possible_cpu(cpu) {
+ group = 0;
+ next_group:
+ for_each_possible_cpu(tcpu) {
+ if (cpu == tcpu)
+ break;
+ if (group_map[tcpu] == group && cpu_distance_fn &&
+ (cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE ||
+ cpu_distance_fn(tcpu, cpu) > LOCAL_DISTANCE)) {
+ group++;
+ nr_groups = max(nr_groups, group + 1);
+ goto next_group;
+ }
+ }
+ group_map[cpu] = group;
+ group_cnt[group]++;
+ group_cnt_max = max(group_cnt_max, group_cnt[group]);
+ }
+
+ /*
+ * Expand unit size until address space usage goes over 75%
+ * and then as much as possible without using more address
+ * space.
+ */
+ last_allocs = INT_MAX;
+ for (upa = max_upa; upa; upa--) {
+ int allocs = 0, wasted = 0;
+
+ if (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK))
+ continue;
+
+ for (group = 0; group < nr_groups; group++) {
+ int this_allocs = DIV_ROUND_UP(group_cnt[group], upa);
+ allocs += this_allocs;
+ wasted += this_allocs * upa - group_cnt[group];
+ }
+
+ /*
+ * Don't accept if wastage is over 25%. The
+ * greater-than comparison ensures upa==1 always
+ * passes the following check.
+ */
+ if (wasted > num_possible_cpus() / 3)
+ continue;
+
+ /* and then don't consume more memory */
+ if (allocs > last_allocs)
+ break;
+ last_allocs = allocs;
+ best_upa = upa;
+ }
+ upa = best_upa;
+
+ /* allocate and fill alloc_info */
+ for (group = 0; group < nr_groups; group++)
+ nr_units += roundup(group_cnt[group], upa);
+
+ ai = pcpu_alloc_alloc_info(nr_groups, nr_units);
+ if (!ai)
+ return ERR_PTR(-ENOMEM);
+ cpu_map = ai->groups[0].cpu_map;
+
+ for (group = 0; group < nr_groups; group++) {
+ ai->groups[group].cpu_map = cpu_map;
+ cpu_map += roundup(group_cnt[group], upa);
+ }
+
+ ai->static_size = static_size;
+ ai->reserved_size = reserved_size;
+ ai->dyn_size = dyn_size;
+ ai->unit_size = alloc_size / upa;
+ ai->atom_size = atom_size;
+ ai->alloc_size = alloc_size;
+
+ for (group = 0, unit = 0; group_cnt[group]; group++) {
+ struct pcpu_group_info *gi = &ai->groups[group];
+
+ /*
+ * Initialize base_offset as if all groups are located
+ * back-to-back. The caller should update this to
+ * reflect actual allocation.
+ */
+ gi->base_offset = unit * ai->unit_size;
+
+ for_each_possible_cpu(cpu)
+ if (group_map[cpu] == group)
+ gi->cpu_map[gi->nr_units++] = cpu;
+ gi->nr_units = roundup(gi->nr_units, upa);
+ unit += gi->nr_units;
+ }
+ BUG_ON(unit != nr_units);
+
+ return ai;
+}
+
+/**
+ * pcpu_dump_alloc_info - print out information about pcpu_alloc_info
+ * @lvl: loglevel
+ * @ai: allocation info to dump
+ *
+ * Print out information about @ai using loglevel @lvl.
+ */
+static void pcpu_dump_alloc_info(const char *lvl,
+ const struct pcpu_alloc_info *ai)
+{
+ int group_width = 1, cpu_width = 1, width;
+ char empty_str[] = "--------";
+ int alloc = 0, alloc_end = 0;
+ int group, v;
+ int upa, apl; /* units per alloc, allocs per line */
+
+ v = ai->nr_groups;
+ while (v /= 10)
+ group_width++;
+
+ v = num_possible_cpus();
+ while (v /= 10)
+ cpu_width++;
+ empty_str[min_t(int, cpu_width, sizeof(empty_str) - 1)] = '\0';
+
+ upa = ai->alloc_size / ai->unit_size;
+ width = upa * (cpu_width + 1) + group_width + 3;
+ apl = rounddown_pow_of_two(max(60 / width, 1));
+
+ printk("%spcpu-alloc: s%zu r%zu d%zu u%zu alloc=%zu*%zu",
+ lvl, ai->static_size, ai->reserved_size, ai->dyn_size,
+ ai->unit_size, ai->alloc_size / ai->atom_size, ai->atom_size);
+
+ for (group = 0; group < ai->nr_groups; group++) {
+ const struct pcpu_group_info *gi = &ai->groups[group];
+ int unit = 0, unit_end = 0;
+
+ BUG_ON(gi->nr_units % upa);
+ for (alloc_end += gi->nr_units / upa;
+ alloc < alloc_end; alloc++) {
+ if (!(alloc % apl)) {
+ printk("\n");
+ printk("%spcpu-alloc: ", lvl);
+ }
+ printk("[%0*d] ", group_width, group);
+
+ for (unit_end += upa; unit < unit_end; unit++)
+ if (gi->cpu_map[unit] != NR_CPUS)
+ printk("%0*d ", cpu_width,
+ gi->cpu_map[unit]);
+ else
+ printk("%s ", empty_str);
+ }
+ }
+ printk("\n");
+}
+
+/**
+ * pcpu_setup_first_chunk - initialize the first percpu chunk
+ * @ai: pcpu_alloc_info describing how to percpu area is shaped
+ * @base_addr: mapped address