pandora: defconfig: update
[pandora-kernel.git] / arch / x86 / mm / dump_pagetables.c
1 /*
2  * Debug helper to dump the current kernel pagetables of the system
3  * so that we can see what the various memory ranges are set to.
4  *
5  * (C) Copyright 2008 Intel Corporation
6  *
7  * Author: Arjan van de Ven <arjan@linux.intel.com>
8  *
9  * This program is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU General Public License
11  * as published by the Free Software Foundation; version 2
12  * of the License.
13  */
14
15 #include <linux/debugfs.h>
16 #include <linux/mm.h>
17 #include <linux/module.h>
18 #include <linux/seq_file.h>
19
20 #include <asm/pgtable.h>
21
22 /*
23  * The dumper groups pagetable entries of the same type into one, and for
24  * that it needs to keep some state when walking, and flush this state
25  * when a "break" in the continuity is found.
26  */
27 struct pg_state {
28         int level;
29         pgprot_t current_prot;
30         unsigned long start_address;
31         unsigned long current_address;
32         const struct addr_marker *marker;
33         unsigned long lines;
34 };
35
36 struct addr_marker {
37         unsigned long start_address;
38         const char *name;
39         unsigned long max_lines;
40 };
41
42 /* indices for address_markers; keep sync'd w/ address_markers below */
43 enum address_markers_idx {
44         USER_SPACE_NR = 0,
45 #ifdef CONFIG_X86_64
46         KERNEL_SPACE_NR,
47         LOW_KERNEL_NR,
48         VMALLOC_START_NR,
49         VMEMMAP_START_NR,
50         ESPFIX_START_NR,
51         HIGH_KERNEL_NR,
52         MODULES_VADDR_NR,
53         MODULES_END_NR,
54 #else
55         KERNEL_SPACE_NR,
56         VMALLOC_START_NR,
57         VMALLOC_END_NR,
58 # ifdef CONFIG_HIGHMEM
59         PKMAP_BASE_NR,
60 # endif
61         FIXADDR_START_NR,
62 #endif
63 };
64
65 /* Address space markers hints */
66 static struct addr_marker address_markers[] = {
67         { 0, "User Space" },
68 #ifdef CONFIG_X86_64
69         { 0x8000000000000000UL, "Kernel Space" },
70         { PAGE_OFFSET,          "Low Kernel Mapping" },
71         { VMALLOC_START,        "vmalloc() Area" },
72         { VMEMMAP_START,        "Vmemmap" },
73         { ESPFIX_BASE_ADDR,     "ESPfix Area", 16 },
74         { __START_KERNEL_map,   "High Kernel Mapping" },
75         { MODULES_VADDR,        "Modules" },
76         { MODULES_END,          "End Modules" },
77 #else
78         { PAGE_OFFSET,          "Kernel Mapping" },
79         { 0/* VMALLOC_START */, "vmalloc() Area" },
80         { 0/*VMALLOC_END*/,     "vmalloc() End" },
81 # ifdef CONFIG_HIGHMEM
82         { 0/*PKMAP_BASE*/,      "Persisent kmap() Area" },
83 # endif
84         { 0/*FIXADDR_START*/,   "Fixmap Area" },
85 #endif
86         { -1, NULL }            /* End of list */
87 };
88
89 /* Multipliers for offsets within the PTEs */
90 #define PTE_LEVEL_MULT (PAGE_SIZE)
91 #define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT)
92 #define PUD_LEVEL_MULT (PTRS_PER_PMD * PMD_LEVEL_MULT)
93 #define PGD_LEVEL_MULT (PTRS_PER_PUD * PUD_LEVEL_MULT)
94
95 /*
96  * Print a readable form of a pgprot_t to the seq_file
97  */
98 static void printk_prot(struct seq_file *m, pgprot_t prot, int level)
99 {
100         pgprotval_t pr = pgprot_val(prot);
101         static const char * const level_name[] =
102                 { "cr3", "pgd", "pud", "pmd", "pte" };
103
104         if (!pgprot_val(prot)) {
105                 /* Not present */
106                 seq_printf(m, "                          ");
107         } else {
108                 if (pr & _PAGE_USER)
109                         seq_printf(m, "USR ");
110                 else
111                         seq_printf(m, "    ");
112                 if (pr & _PAGE_RW)
113                         seq_printf(m, "RW ");
114                 else
115                         seq_printf(m, "ro ");
116                 if (pr & _PAGE_PWT)
117                         seq_printf(m, "PWT ");
118                 else
119                         seq_printf(m, "    ");
120                 if (pr & _PAGE_PCD)
121                         seq_printf(m, "PCD ");
122                 else
123                         seq_printf(m, "    ");
124
125                 /* Bit 9 has a different meaning on level 3 vs 4 */
126                 if (level <= 3) {
127                         if (pr & _PAGE_PSE)
128                                 seq_printf(m, "PSE ");
129                         else
130                                 seq_printf(m, "    ");
131                 } else {
132                         if (pr & _PAGE_PAT)
133                                 seq_printf(m, "pat ");
134                         else
135                                 seq_printf(m, "    ");
136                 }
137                 if (pr & _PAGE_GLOBAL)
138                         seq_printf(m, "GLB ");
139                 else
140                         seq_printf(m, "    ");
141                 if (pr & _PAGE_NX)
142                         seq_printf(m, "NX ");
143                 else
144                         seq_printf(m, "x  ");
145         }
146         seq_printf(m, "%s\n", level_name[level]);
147 }
148
149 /*
150  * On 64 bits, sign-extend the 48 bit address to 64 bit
151  */
152 static unsigned long normalize_addr(unsigned long u)
153 {
154 #ifdef CONFIG_X86_64
155         return (signed long)(u << 16) >> 16;
156 #else
157         return u;
158 #endif
159 }
160
161 /*
162  * This function gets called on a break in a continuous series
163  * of PTE entries; the next one is different so we need to
164  * print what we collected so far.
165  */
166 static void note_page(struct seq_file *m, struct pg_state *st,
167                       pgprot_t new_prot, int level)
168 {
169         pgprotval_t prot, cur;
170         static const char units[] = "BKMGTPE";
171
172         /*
173          * If we have a "break" in the series, we need to flush the state that
174          * we have now. "break" is either changing perms, levels or
175          * address space marker.
176          */
177         prot = pgprot_val(new_prot) & PTE_FLAGS_MASK;
178         cur = pgprot_val(st->current_prot) & PTE_FLAGS_MASK;
179
180         if (!st->level) {
181                 /* First entry */
182                 st->current_prot = new_prot;
183                 st->level = level;
184                 st->marker = address_markers;
185                 st->lines = 0;
186                 seq_printf(m, "---[ %s ]---\n", st->marker->name);
187         } else if (prot != cur || level != st->level ||
188                    st->current_address >= st->marker[1].start_address) {
189                 const char *unit = units;
190                 unsigned long delta;
191                 int width = sizeof(unsigned long) * 2;
192
193                 /*
194                  * Now print the actual finished series
195                  */
196                 if (!st->marker->max_lines ||
197                     st->lines < st->marker->max_lines) {
198                         seq_printf(m, "0x%0*lx-0x%0*lx   ",
199                                    width, st->start_address,
200                                    width, st->current_address);
201
202                         delta = (st->current_address - st->start_address);
203                         while (!(delta & 1023) && unit[1]) {
204                                 delta >>= 10;
205                                 unit++;
206                         }
207                         seq_printf(m, "%9lu%c ", delta, *unit);
208                         printk_prot(m, st->current_prot, st->level);
209                 }
210                 st->lines++;
211
212                 /*
213                  * We print markers for special areas of address space,
214                  * such as the start of vmalloc space etc.
215                  * This helps in the interpretation.
216                  */
217                 if (st->current_address >= st->marker[1].start_address) {
218                         if (st->marker->max_lines &&
219                             st->lines > st->marker->max_lines) {
220                                 unsigned long nskip =
221                                         st->lines - st->marker->max_lines;
222                                 seq_printf(m, "... %lu entr%s skipped ... \n",
223                                            nskip, nskip == 1 ? "y" : "ies");
224                         }
225                         st->marker++;
226                         st->lines = 0;
227                         seq_printf(m, "---[ %s ]---\n", st->marker->name);
228                 }
229
230                 st->start_address = st->current_address;
231                 st->current_prot = new_prot;
232                 st->level = level;
233         }
234 }
235
236 static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr,
237                                                         unsigned long P)
238 {
239         int i;
240         pte_t *start;
241
242         start = (pte_t *) pmd_page_vaddr(addr);
243         for (i = 0; i < PTRS_PER_PTE; i++) {
244                 pgprot_t prot = pte_pgprot(*start);
245
246                 st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT);
247                 note_page(m, st, prot, 4);
248                 start++;
249         }
250 }
251
252 #if PTRS_PER_PMD > 1
253
254 static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr,
255                                                         unsigned long P)
256 {
257         int i;
258         pmd_t *start;
259
260         start = (pmd_t *) pud_page_vaddr(addr);
261         for (i = 0; i < PTRS_PER_PMD; i++) {
262                 st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT);
263                 if (!pmd_none(*start)) {
264                         pgprotval_t prot = pmd_val(*start) & PTE_FLAGS_MASK;
265
266                         if (pmd_large(*start) || !pmd_present(*start))
267                                 note_page(m, st, __pgprot(prot), 3);
268                         else
269                                 walk_pte_level(m, st, *start,
270                                                P + i * PMD_LEVEL_MULT);
271                 } else
272                         note_page(m, st, __pgprot(0), 3);
273                 start++;
274         }
275 }
276
277 #else
278 #define walk_pmd_level(m,s,a,p) walk_pte_level(m,s,__pmd(pud_val(a)),p)
279 #define pud_large(a) pmd_large(__pmd(pud_val(a)))
280 #define pud_none(a)  pmd_none(__pmd(pud_val(a)))
281 #endif
282
283 #if PTRS_PER_PUD > 1
284
285 static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
286                                                         unsigned long P)
287 {
288         int i;
289         pud_t *start;
290
291         start = (pud_t *) pgd_page_vaddr(addr);
292
293         for (i = 0; i < PTRS_PER_PUD; i++) {
294                 st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT);
295                 if (!pud_none(*start)) {
296                         pgprotval_t prot = pud_val(*start) & PTE_FLAGS_MASK;
297
298                         if (pud_large(*start) || !pud_present(*start))
299                                 note_page(m, st, __pgprot(prot), 2);
300                         else
301                                 walk_pmd_level(m, st, *start,
302                                                P + i * PUD_LEVEL_MULT);
303                 } else
304                         note_page(m, st, __pgprot(0), 2);
305
306                 start++;
307         }
308 }
309
310 #else
311 #define walk_pud_level(m,s,a,p) walk_pmd_level(m,s,__pud(pgd_val(a)),p)
312 #define pgd_large(a) pud_large(__pud(pgd_val(a)))
313 #define pgd_none(a)  pud_none(__pud(pgd_val(a)))
314 #endif
315
316 static void walk_pgd_level(struct seq_file *m)
317 {
318 #ifdef CONFIG_X86_64
319         pgd_t *start = (pgd_t *) &init_level4_pgt;
320 #else
321         pgd_t *start = swapper_pg_dir;
322 #endif
323         int i;
324         struct pg_state st;
325
326         memset(&st, 0, sizeof(st));
327
328         for (i = 0; i < PTRS_PER_PGD; i++) {
329                 st.current_address = normalize_addr(i * PGD_LEVEL_MULT);
330                 if (!pgd_none(*start)) {
331                         pgprotval_t prot = pgd_val(*start) & PTE_FLAGS_MASK;
332
333                         if (pgd_large(*start) || !pgd_present(*start))
334                                 note_page(m, &st, __pgprot(prot), 1);
335                         else
336                                 walk_pud_level(m, &st, *start,
337                                                i * PGD_LEVEL_MULT);
338                 } else
339                         note_page(m, &st, __pgprot(0), 1);
340
341                 start++;
342         }
343
344         /* Flush out the last page */
345         st.current_address = normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT);
346         note_page(m, &st, __pgprot(0), 0);
347 }
348
349 static int ptdump_show(struct seq_file *m, void *v)
350 {
351         walk_pgd_level(m);
352         return 0;
353 }
354
355 static int ptdump_open(struct inode *inode, struct file *filp)
356 {
357         return single_open(filp, ptdump_show, NULL);
358 }
359
360 static const struct file_operations ptdump_fops = {
361         .open           = ptdump_open,
362         .read           = seq_read,
363         .llseek         = seq_lseek,
364         .release        = single_release,
365 };
366
367 static int pt_dump_init(void)
368 {
369         struct dentry *pe;
370
371 #ifdef CONFIG_X86_32
372         /* Not a compile-time constant on x86-32 */
373         address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
374         address_markers[VMALLOC_END_NR].start_address = VMALLOC_END;
375 # ifdef CONFIG_HIGHMEM
376         address_markers[PKMAP_BASE_NR].start_address = PKMAP_BASE;
377 # endif
378         address_markers[FIXADDR_START_NR].start_address = FIXADDR_START;
379 #endif
380
381         pe = debugfs_create_file("kernel_page_tables", 0600, NULL, NULL,
382                                  &ptdump_fops);
383         if (!pe)
384                 return -ENOMEM;
385
386         return 0;
387 }
388
389 __initcall(pt_dump_init);
390 MODULE_LICENSE("GPL");
391 MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>");
392 MODULE_DESCRIPTION("Kernel debugging helper that dumps pagetables");