2 * arch/arm/include/asm/pgtable-2level.h
4 * Copyright (C) 1995-2002 Russell King
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
10 #ifndef _ASM_PGTABLE_2LEVEL_H
11 #define _ASM_PGTABLE_2LEVEL_H
14 * Hardware-wise, we have a two level page table structure, where the first
15 * level has 4096 entries, and the second level has 256 entries. Each entry
16 * is one 32-bit word. Most of the bits in the second level entry are used
17 * by hardware, and there aren't any "accessed" and "dirty" bits.
19 * Linux on the other hand has a three level page table structure, which can
20 * be wrapped to fit a two level page table structure easily - using the PGD
21 * and PTE only. However, Linux also expects one "PTE" table per page, and
22 * at least a "dirty" bit.
24 * Therefore, we tweak the implementation slightly - we tell Linux that we
25 * have 2048 entries in the first level, each of which is 8 bytes (iow, two
26 * hardware pointers to the second level.) The second level contains two
27 * hardware PTE tables arranged contiguously, preceded by Linux versions
28 * which contain the state information Linux needs. We, therefore, end up
29 * with 512 entries in the "PTE" level.
31 * This leads to the page tables having the following layout:
36 * | | +------------+ +0
37 * +- - - - + | Linux pt 0 |
38 * | | +------------+ +1024
39 * +--------+ +0 | Linux pt 1 |
40 * | |-----> +------------+ +2048
41 * +- - - - + +4 | h/w pt 0 |
42 * | |-----> +------------+ +3072
43 * +--------+ +8 | h/w pt 1 |
44 * | | +------------+ +4096
46 * See L_PTE_xxx below for definitions of bits in the "Linux pt", and
47 * PTE_xxx for definitions of bits appearing in the "h/w pt".
49 * PMD_xxx definitions refer to bits in the first level page table.
51 * The "dirty" bit is emulated by only granting hardware write permission
52 * iff the page is marked "writable" and "dirty" in the Linux PTE. This
53 * means that a write to a clean page will cause a permission fault, and
54 * the Linux MM layer will mark the page dirty via handle_pte_fault().
55 * For the hardware to notice the permission change, the TLB entry must
56 * be flushed, and ptep_set_access_flags() does that for us.
58 * The "accessed" or "young" bit is emulated by a similar method; we only
59 * allow accesses to the page if the "young" bit is set. Accesses to the
60 * page will cause a fault, and handle_pte_fault() will set the young bit
61 * for us as long as the page is marked present in the corresponding Linux
62 * PTE entry. Again, ptep_set_access_flags() will ensure that the TLB is
65 * However, when the "young" bit is cleared, we deny access to the page
66 * by clearing the hardware PTE. Currently Linux does not flush the TLB
67 * for us in this case, which means the TLB will retain the transation
68 * until either the TLB entry is evicted under pressure, or a context
69 * switch which changes the user space mapping occurs.
71 #define PTRS_PER_PTE 512
72 #define PTRS_PER_PMD 1
73 #define PTRS_PER_PGD 2048
75 #define PTE_HWTABLE_PTRS (PTRS_PER_PTE)
76 #define PTE_HWTABLE_OFF (PTE_HWTABLE_PTRS * sizeof(pte_t))
77 #define PTE_HWTABLE_SIZE (PTRS_PER_PTE * sizeof(u32))
80 * PMD_SHIFT determines the size of the area a second-level page table can map
81 * PGDIR_SHIFT determines what a third-level page table entry can map
84 #define PGDIR_SHIFT 21
86 #define PMD_SIZE (1UL << PMD_SHIFT)
87 #define PMD_MASK (~(PMD_SIZE-1))
88 #define PGDIR_SIZE (1UL << PGDIR_SHIFT)
89 #define PGDIR_MASK (~(PGDIR_SIZE-1))
92 * section address mask and size definitions.
94 #define SECTION_SHIFT 20
95 #define SECTION_SIZE (1UL << SECTION_SHIFT)
96 #define SECTION_MASK (~(SECTION_SIZE-1))
99 * ARMv6 supersection address mask and size definitions.
101 #define SUPERSECTION_SHIFT 24
102 #define SUPERSECTION_SIZE (1UL << SUPERSECTION_SHIFT)
103 #define SUPERSECTION_MASK (~(SUPERSECTION_SIZE-1))
105 #define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE)
108 * "Linux" PTE definitions.
110 * We keep two sets of PTEs - the hardware and the linux version.
111 * This allows greater flexibility in the way we map the Linux bits
112 * onto the hardware tables, and allows us to have YOUNG and DIRTY
115 * The PTE table pointer refers to the hardware entries; the "Linux"
116 * entries are stored 1024 bytes below.
118 #define L_PTE_VALID (_AT(pteval_t, 1) << 0) /* Valid */
119 #define L_PTE_PRESENT (_AT(pteval_t, 1) << 0)
120 #define L_PTE_YOUNG (_AT(pteval_t, 1) << 1)
121 #define L_PTE_FILE (_AT(pteval_t, 1) << 2) /* only when !PRESENT */
122 #define L_PTE_DIRTY (_AT(pteval_t, 1) << 6)
123 #define L_PTE_RDONLY (_AT(pteval_t, 1) << 7)
124 #define L_PTE_USER (_AT(pteval_t, 1) << 8)
125 #define L_PTE_XN (_AT(pteval_t, 1) << 9)
126 #define L_PTE_SHARED (_AT(pteval_t, 1) << 10) /* shared(v6), coherent(xsc3) */
127 #define L_PTE_NONE (_AT(pteval_t, 1) << 11)
130 * These are the memory types, defined to be compatible with
131 * pre-ARMv6 CPUs cacheable and bufferable bits: XXCB
133 #define L_PTE_MT_UNCACHED (_AT(pteval_t, 0x00) << 2) /* 0000 */
134 #define L_PTE_MT_BUFFERABLE (_AT(pteval_t, 0x01) << 2) /* 0001 */
135 #define L_PTE_MT_WRITETHROUGH (_AT(pteval_t, 0x02) << 2) /* 0010 */
136 #define L_PTE_MT_WRITEBACK (_AT(pteval_t, 0x03) << 2) /* 0011 */
137 #define L_PTE_MT_MINICACHE (_AT(pteval_t, 0x06) << 2) /* 0110 (sa1100, xscale) */
138 #define L_PTE_MT_WRITEALLOC (_AT(pteval_t, 0x07) << 2) /* 0111 */
139 #define L_PTE_MT_DEV_SHARED (_AT(pteval_t, 0x04) << 2) /* 0100 */
140 #define L_PTE_MT_DEV_NONSHARED (_AT(pteval_t, 0x0c) << 2) /* 1100 */
141 #define L_PTE_MT_DEV_WC (_AT(pteval_t, 0x09) << 2) /* 1001 */
142 #define L_PTE_MT_DEV_CACHED (_AT(pteval_t, 0x0b) << 2) /* 1011 */
143 #define L_PTE_MT_VECTORS (_AT(pteval_t, 0x0f) << 2) /* 1111 */
144 #define L_PTE_MT_MASK (_AT(pteval_t, 0x0f) << 2)
149 * The "pud_xxx()" functions here are trivial when the pmd is folded into
150 * the pud: the pud entry is never bad, always exists, and can't be set or
153 #define pud_none(pud) (0)
154 #define pud_bad(pud) (0)
155 #define pud_present(pud) (1)
156 #define pud_clear(pudp) do { } while (0)
157 #define set_pud(pud,pudp) do { } while (0)
159 static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr)
164 #define pmd_bad(pmd) (pmd_val(pmd) & 2)
166 #define copy_pmd(pmdpd,pmdps) \
168 pmdpd[0] = pmdps[0]; \
169 pmdpd[1] = pmdps[1]; \
170 flush_pmd_entry(pmdpd); \
173 #define pmd_clear(pmdp) \
175 pmdp[0] = __pmd(0); \
176 pmdp[1] = __pmd(0); \
177 clean_pmd_entry(pmdp); \
180 /* we don't need complex calculations here as the pmd is folded into the pgd */
181 #define pmd_addr_end(addr,end) (end)
183 #define pmd_present(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) != PMD_TYPE_FAULT)
185 #define set_pte_ext(ptep,pte,ext) cpu_set_pte_ext(ptep,pte,ext)
188 #ifdef CONFIG_SYS_SUPPORTS_HUGETLBFS
191 * now follows some of the definitions to allow huge page support, we can't put
192 * these in the hugetlb source files as they are also required for transparent
196 #define HPAGE_SHIFT PMD_SHIFT
197 #define HPAGE_SIZE (_AC(1, UL) << HPAGE_SHIFT)
198 #define HPAGE_MASK (~(HPAGE_SIZE - 1))
199 #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
201 #define HUGE_LINUX_PTE_COUNT (PAGE_OFFSET >> HPAGE_SHIFT)
202 #define HUGE_LINUX_PTE_SIZE (HUGE_LINUX_PTE_COUNT * sizeof(pte_t *))
203 #define HUGE_LINUX_PTE_INDEX(addr) (addr >> HPAGE_SHIFT)
206 * We re-purpose the following domain bits in the section descriptor
208 #define PMD_DOMAIN_MASK (_AT(pmdval_t, 0xF) << 5)
209 #define PMD_DSECT_DIRTY (_AT(pmdval_t, 1) << 5)
210 #define PMD_DSECT_AF (_AT(pmdval_t, 1) << 6)
211 #define PMD_DSECT_SPLITTING (_AT(pmdval_t, 1) << 7)
213 #define PMD_BIT_FUNC(fn,op) \
214 static inline pmd_t pmd_##fn(pmd_t pmd) { pmd_val(pmd) op; return pmd; }
216 static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
217 pmd_t *pmdp, pmd_t pmd)
220 * we can sometimes be passed a pmd pointing to a level 2 descriptor
221 * from collapse_huge_page.
223 if ((pmd_val(pmd) & PMD_TYPE_MASK) == PMD_TYPE_TABLE) {
224 pmdp[0] = __pmd(pmd_val(pmd));
225 pmdp[1] = __pmd(pmd_val(pmd) + 256 * sizeof(pte_t));
227 pmdp[0] = __pmd(pmd_val(pmd)); /* first 1M section */
228 pmdp[1] = __pmd(pmd_val(pmd) + SECTION_SIZE); /* second 1M section */
231 flush_pmd_entry(pmdp);
234 #define pmd_mkhuge(pmd) (__pmd((pmd_val(pmd) & ~PMD_TYPE_MASK) | PMD_TYPE_SECT))
236 PMD_BIT_FUNC(mkold, &= ~PMD_DSECT_AF);
237 PMD_BIT_FUNC(mkdirty, |= PMD_DSECT_DIRTY);
238 PMD_BIT_FUNC(mkclean, &= ~PMD_DSECT_DIRTY);
239 PMD_BIT_FUNC(mkyoung, |= PMD_DSECT_AF);
240 PMD_BIT_FUNC(mkwrite, |= PMD_SECT_AP_WRITE);
241 PMD_BIT_FUNC(wrprotect, &= ~PMD_SECT_AP_WRITE);
242 PMD_BIT_FUNC(mknotpresent, &= ~PMD_TYPE_MASK);
243 PMD_BIT_FUNC(mkexec, &= ~PMD_SECT_XN);
244 PMD_BIT_FUNC(mknexec, |= PMD_SECT_XN);
246 #define pmd_young(pmd) (pmd_val(pmd) & PMD_DSECT_AF)
247 #define pmd_write(pmd) (pmd_val(pmd) & PMD_SECT_AP_WRITE)
248 #define pmd_exec(pmd) (!(pmd_val(pmd) & PMD_SECT_XN))
249 #define pmd_dirty(pmd) (pmd_val(pmd) & PMD_DSECT_DIRTY)
251 #define __HAVE_ARCH_PMD_WRITE
253 #define pmd_modify(pmd, prot) \
255 pmd_t pmdret = __pmd(pmd_val(pmd) & (PMD_MASK | PMD_DOMAIN_MASK)); \
256 pgprot_t inprot = prot; \
257 u32 inprotval = pgprot_val(inprot); \
258 pte_t newprot = __pte(inprotval); \
260 if (pte_dirty(newprot)) \
261 pmdret = pmd_mkdirty(pmdret); \
263 pmdret = pmd_mkclean(pmdret); \
265 if (pte_exec(newprot)) \
266 pmdret = pmd_mkexec(pmdret); \
268 pmdret = pmd_mknexec(pmdret); \
270 if (pte_write(newprot)) \
271 pmdret = pmd_mkwrite(pmdret); \
273 pmdret = pmd_wrprotect(pmdret); \
275 if (pte_young(newprot)) \
276 pmdret = pmd_mkyoung(pmdret); \
278 pmdret = pmd_mkold(pmdret); \
279 pmdret = __pmd(pmd_val(pmdret) | (inprotval & 0x0c) \
280 | ((inprotval << 8) & 0x1000) \
281 | PMD_TYPE_SECT | PMD_SECT_AP_WRITE \
282 | PMD_SECT_AP_READ | PMD_SECT_nG); \
287 #define pmd_hugewillfault(pmd) ( !pmd_young(pmd) || \
290 #define pmd_thp_or_huge(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == PMD_TYPE_SECT)
293 #define pmd_hugewillfault(pmd) (0)
294 #define pmd_thp_or_huge(pmd) (0)
295 #endif /* CONFIG_SYS_SUPPORTS_HUGETLBFS */
297 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
298 #define pmd_mkhuge(pmd) (__pmd((pmd_val(pmd) & ~PMD_TYPE_MASK) | PMD_TYPE_SECT))
300 PMD_BIT_FUNC(mksplitting, |= PMD_DSECT_SPLITTING);
301 #define pmd_trans_splitting(pmd) (pmd_val(pmd) & PMD_DSECT_SPLITTING)
302 #define pmd_trans_huge(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == PMD_TYPE_SECT)
304 static inline unsigned long pmd_pfn(pmd_t pmd)
307 * for a section, we need to mask off more of the pmd
308 * before looking up the pfn
310 if ((pmd_val(pmd) & PMD_TYPE_MASK) == PMD_TYPE_SECT)
311 return __phys_to_pfn(pmd_val(pmd) & HPAGE_MASK);
313 return __phys_to_pfn(pmd_val(pmd) & PHYS_MASK);
316 #define pfn_pmd(pfn,prot) pmd_modify(__pmd(__pfn_to_phys(pfn)),prot);
317 #define mk_pmd(page,prot) pfn_pmd(page_to_pfn(page),prot);
319 static inline int has_transparent_hugepage(void)
324 #define _PMD_HUGE(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == PMD_TYPE_SECT)
325 #define _PMD_HPAGE(pmd) (phys_to_page(pmd_val(pmd) & HPAGE_MASK))
327 #define _PMD_HUGE(pmd) (0)
328 #define _PMD_HPAGE(pmd) (0)
329 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
331 static inline struct page *pmd_page(pmd_t pmd)
334 * for a section, we need to mask off more of the pmd
335 * before looking up the page as it is a section descriptor.
338 return _PMD_HPAGE(pmd);
340 return phys_to_page(pmd_val(pmd) & PHYS_MASK);
343 #endif /* __ASSEMBLY__ */
345 #endif /* _ASM_PGTABLE_2LEVEL_H */