From: Andy Whitcroft Date: Thu, 23 Jun 2005 07:08:00 +0000 (-0700) Subject: [PATCH] sparsemem hotplug base X-Git-Tag: v2.6.13-rc1~68^2~512 X-Git-Url: https://git.openpandora.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=29751f6991e845f7d002a6ae520bf996b38c8dcd;p=pandora-kernel.git [PATCH] sparsemem hotplug base Make sparse's initalization be accessible at runtime. This allows sparse mappings to be created after boot in a hotplug situation. This patch is separated from the previous one just to give an indication how much of the sparse infrastructure is *just* for hotplug memory. The section_mem_map doesn't really store a pointer. It stores something that is convenient to do some math against to get a pointer. It isn't valid to just do *section_mem_map, so I don't think it should be stored as a pointer. There are a couple of things I'd like to store about a section. First of all, the fact that it is !NULL does not mean that it is present. There could be such a combination where section_mem_map *is* NULL, but the math gets you properly to a real mem_map. So, I don't think that check is safe. Since we're storing 32-bit-aligned structures, we have a few bits in the bottom of the pointer to play with. Use one bit to encode whether there's really a mem_map there, and the other one to tell whether there's a valid section there. We need to distinguish between the two because sometimes there's a gap between when a section is discovered to be present and when we can get the mem_map for it. Signed-off-by: Dave Hansen Signed-off-by: Andy Whitcroft Signed-off-by: Jack Steiner Signed-off-by: Bob Picco Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 746b57e3d370..6c90461ed99f 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -476,11 +476,56 @@ extern struct pglist_data contig_page_data; struct page; struct mem_section { - struct page *section_mem_map; + /* + * This is, logically, a pointer to an array of struct + * pages. However, it is stored with some other magic. + * (see sparse.c::sparse_init_one_section()) + * + * Making it a UL at least makes someone do a cast + * before using it wrong. + */ + unsigned long section_mem_map; }; extern struct mem_section mem_section[NR_MEM_SECTIONS]; +static inline struct mem_section *__nr_to_section(unsigned long nr) +{ + return &mem_section[nr]; +} + +/* + * We use the lower bits of the mem_map pointer to store + * a little bit of information. There should be at least + * 3 bits here due to 32-bit alignment. + */ +#define SECTION_MARKED_PRESENT (1UL<<0) +#define SECTION_HAS_MEM_MAP (1UL<<1) +#define SECTION_MAP_LAST_BIT (1UL<<2) +#define SECTION_MAP_MASK (~(SECTION_MAP_LAST_BIT-1)) + +static inline struct page *__section_mem_map_addr(struct mem_section *section) +{ + unsigned long map = section->section_mem_map; + map &= SECTION_MAP_MASK; + return (struct page *)map; +} + +static inline int valid_section(struct mem_section *section) +{ + return (section->section_mem_map & SECTION_MARKED_PRESENT); +} + +static inline int section_has_mem_map(struct mem_section *section) +{ + return (section->section_mem_map & SECTION_HAS_MEM_MAP); +} + +static inline int valid_section_nr(unsigned long nr) +{ + return valid_section(__nr_to_section(nr)); +} + /* * Given a kernel address, find the home node of the underlying memory. */ @@ -488,24 +533,25 @@ extern struct mem_section mem_section[NR_MEM_SECTIONS]; static inline struct mem_section *__pfn_to_section(unsigned long pfn) { - return &mem_section[pfn_to_section_nr(pfn)]; + return __nr_to_section(pfn_to_section_nr(pfn)); } #define pfn_to_page(pfn) \ ({ \ unsigned long __pfn = (pfn); \ - __pfn_to_section(__pfn)->section_mem_map + __pfn; \ + __section_mem_map_addr(__pfn_to_section(__pfn)) + __pfn; \ }) #define page_to_pfn(page) \ ({ \ - page - mem_section[page_to_section(page)].section_mem_map; \ + page - __section_mem_map_addr(__nr_to_section( \ + page_to_section(page))); \ }) static inline int pfn_valid(unsigned long pfn) { if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) return 0; - return mem_section[pfn_to_section_nr(pfn)].section_mem_map != 0; + return valid_section(__nr_to_section(pfn_to_section_nr(pfn))); } /* diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 1eb683f9b3af..7ee675ad101e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1650,8 +1650,8 @@ void __init memmap_init_zone(unsigned long size, int nid, unsigned long zone, unsigned long start_pfn) { struct page *page; - int end_pfn = start_pfn + size; - int pfn; + unsigned long end_pfn = start_pfn + size; + unsigned long pfn; for (pfn = start_pfn; pfn < end_pfn; pfn++, page++) { if (!early_pfn_valid(pfn)) diff --git a/mm/sparse.c b/mm/sparse.c index f888385b9e14..b54e304df4a7 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -25,7 +25,7 @@ void memory_present(int nid, unsigned long start, unsigned long end) for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) { unsigned long section = pfn_to_section_nr(pfn); if (!mem_section[section].section_mem_map) - mem_section[section].section_mem_map = (void *) -1; + mem_section[section].section_mem_map = SECTION_MARKED_PRESENT; } } @@ -50,6 +50,56 @@ unsigned long __init node_memmap_size_bytes(int nid, unsigned long start_pfn, return nr_pages * sizeof(struct page); } +/* + * Subtle, we encode the real pfn into the mem_map such that + * the identity pfn - section_mem_map will return the actual + * physical page frame number. + */ +static unsigned long sparse_encode_mem_map(struct page *mem_map, unsigned long pnum) +{ + return (unsigned long)(mem_map - (section_nr_to_pfn(pnum))); +} + +/* + * We need this if we ever free the mem_maps. While not implemented yet, + * this function is included for parity with its sibling. + */ +static __attribute((unused)) +struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum) +{ + return ((struct page *)coded_mem_map) + section_nr_to_pfn(pnum); +} + +static int sparse_init_one_section(struct mem_section *ms, + unsigned long pnum, struct page *mem_map) +{ + if (!valid_section(ms)) + return -EINVAL; + + ms->section_mem_map |= sparse_encode_mem_map(mem_map, pnum); + + return 1; +} + +static struct page *sparse_early_mem_map_alloc(unsigned long pnum) +{ + struct page *map; + int nid = early_pfn_to_nid(section_nr_to_pfn(pnum)); + + map = alloc_remap(nid, sizeof(struct page) * PAGES_PER_SECTION); + if (map) + return map; + + map = alloc_bootmem_node(NODE_DATA(nid), + sizeof(struct page) * PAGES_PER_SECTION); + if (map) + return map; + + printk(KERN_WARNING "%s: allocation failed\n", __FUNCTION__); + mem_section[pnum].section_mem_map = 0; + return NULL; +} + /* * Allocate the accumulated non-linear sections, allocate a mem_map * for each and record the physical to section mapping. @@ -58,28 +108,30 @@ void sparse_init(void) { unsigned long pnum; struct page *map; - int nid; for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { - if (!mem_section[pnum].section_mem_map) + if (!valid_section_nr(pnum)) continue; - nid = early_pfn_to_nid(section_nr_to_pfn(pnum)); - map = alloc_remap(nid, sizeof(struct page) * PAGES_PER_SECTION); - if (!map) - map = alloc_bootmem_node(NODE_DATA(nid), - sizeof(struct page) * PAGES_PER_SECTION); - if (!map) { - mem_section[pnum].section_mem_map = 0; - continue; - } - - /* - * Subtle, we encode the real pfn into the mem_map such that - * the identity pfn - section_mem_map will return the actual - * physical page frame number. - */ - mem_section[pnum].section_mem_map = map - - section_nr_to_pfn(pnum); + map = sparse_early_mem_map_alloc(pnum); + if (map) + sparse_init_one_section(&mem_section[pnum], pnum, map); } } + +/* + * returns the number of sections whose mem_maps were properly + * set. If this is <=0, then that means that the passed-in + * map was not consumed and must be freed. + */ +int sparse_add_one_section(unsigned long start_pfn, int nr_pages, struct page *map) +{ + struct mem_section *ms = __pfn_to_section(start_pfn); + + if (ms->section_mem_map & SECTION_MARKED_PRESENT) + return -EEXIST; + + ms->section_mem_map |= SECTION_MARKED_PRESENT; + + return sparse_init_one_section(ms, pfn_to_section_nr(start_pfn), map); +}