2 * Copyright IBM Corp. 2007,2009
3 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
6 #include <linux/sched.h>
7 #include <linux/kernel.h>
8 #include <linux/errno.h>
11 #include <linux/swap.h>
12 #include <linux/smp.h>
13 #include <linux/highmem.h>
14 #include <linux/pagemap.h>
15 #include <linux/spinlock.h>
16 #include <linux/module.h>
17 #include <linux/quicklist.h>
18 #include <linux/rcupdate.h>
20 #include <asm/system.h>
21 #include <asm/pgtable.h>
22 #include <asm/pgalloc.h>
24 #include <asm/tlbflush.h>
25 #include <asm/mmu_context.h>
29 #define FRAG_MASK 0x0f
32 #define FRAG_MASK 0x03
35 unsigned long VMALLOC_START = VMALLOC_END - VMALLOC_SIZE;
36 EXPORT_SYMBOL(VMALLOC_START);
38 static int __init parse_vmalloc(char *arg)
42 VMALLOC_START = (VMALLOC_END - memparse(arg, &arg)) & PAGE_MASK;
45 early_param("vmalloc", parse_vmalloc);
47 unsigned long *crst_table_alloc(struct mm_struct *mm)
49 struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
53 return (unsigned long *) page_to_phys(page);
56 void crst_table_free(struct mm_struct *mm, unsigned long *table)
58 free_pages((unsigned long) table, ALLOC_ORDER);
62 int crst_table_upgrade(struct mm_struct *mm, unsigned long limit)
64 unsigned long *table, *pgd;
67 BUG_ON(limit > (1UL << 53));
69 table = crst_table_alloc(mm);
72 spin_lock_bh(&mm->page_table_lock);
73 if (mm->context.asce_limit < limit) {
74 pgd = (unsigned long *) mm->pgd;
75 if (mm->context.asce_limit <= (1UL << 31)) {
76 entry = _REGION3_ENTRY_EMPTY;
77 mm->context.asce_limit = 1UL << 42;
78 mm->context.asce_bits = _ASCE_TABLE_LENGTH |
82 entry = _REGION2_ENTRY_EMPTY;
83 mm->context.asce_limit = 1UL << 53;
84 mm->context.asce_bits = _ASCE_TABLE_LENGTH |
88 crst_table_init(table, entry);
89 pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd);
90 mm->pgd = (pgd_t *) table;
91 mm->task_size = mm->context.asce_limit;
94 spin_unlock_bh(&mm->page_table_lock);
96 crst_table_free(mm, table);
97 if (mm->context.asce_limit < limit)
99 update_mm(mm, current);
103 void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
107 if (mm->context.asce_limit <= limit)
110 while (mm->context.asce_limit > limit) {
112 switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) {
113 case _REGION_ENTRY_TYPE_R2:
114 mm->context.asce_limit = 1UL << 42;
115 mm->context.asce_bits = _ASCE_TABLE_LENGTH |
119 case _REGION_ENTRY_TYPE_R3:
120 mm->context.asce_limit = 1UL << 31;
121 mm->context.asce_bits = _ASCE_TABLE_LENGTH |
128 mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN);
129 mm->task_size = mm->context.asce_limit;
130 crst_table_free(mm, (unsigned long *) pgd);
132 update_mm(mm, current);
136 static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
138 unsigned int old, new;
141 old = atomic_read(v);
143 } while (atomic_cmpxchg(v, old, new) != old);
148 * page table entry allocation/free routines.
151 static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm)
154 unsigned long *table;
156 page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
159 pgtable_page_ctor(page);
160 atomic_set(&page->_mapcount, 3);
161 table = (unsigned long *) page_to_phys(page);
162 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2);
163 clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2);
167 static inline void page_table_free_pgste(unsigned long *table)
171 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
172 pgtable_page_ctor(page);
173 atomic_set(&page->_mapcount, -1);
178 unsigned long *page_table_alloc(struct mm_struct *mm)
181 unsigned long *table;
182 unsigned int mask, bit;
185 if (mm_has_pgste(mm))
186 return page_table_alloc_pgste(mm);
188 /* Allocate fragments of a 4K page as 1K/2K page table */
189 spin_lock_bh(&mm->context.list_lock);
191 if (!list_empty(&mm->context.pgtable_list)) {
192 page = list_first_entry(&mm->context.pgtable_list,
194 table = (unsigned long *) page_to_phys(page);
195 mask = atomic_read(&page->_mapcount);
196 mask = mask | (mask >> 4);
198 if ((mask & FRAG_MASK) == FRAG_MASK) {
199 spin_unlock_bh(&mm->context.list_lock);
200 page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
203 pgtable_page_ctor(page);
204 atomic_set(&page->_mapcount, 1);
205 table = (unsigned long *) page_to_phys(page);
206 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
207 spin_lock_bh(&mm->context.list_lock);
208 list_add(&page->lru, &mm->context.pgtable_list);
210 for (bit = 1; mask & bit; bit <<= 1)
211 table += PTRS_PER_PTE;
212 mask = atomic_xor_bits(&page->_mapcount, bit);
213 if ((mask & FRAG_MASK) == FRAG_MASK)
214 list_del(&page->lru);
216 spin_unlock_bh(&mm->context.list_lock);
220 void page_table_free(struct mm_struct *mm, unsigned long *table)
223 unsigned int bit, mask;
226 if (mm_has_pgste(mm))
227 return page_table_free_pgste(table);
229 /* Free 1K/2K page table fragment of a 4K page */
230 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
231 bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)));
232 spin_lock_bh(&mm->context.list_lock);
233 if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
234 list_del(&page->lru);
235 mask = atomic_xor_bits(&page->_mapcount, bit);
236 if (mask & FRAG_MASK)
237 list_add(&page->lru, &mm->context.pgtable_list);
238 spin_unlock_bh(&mm->context.list_lock);
240 pgtable_page_dtor(page);
241 atomic_set(&page->_mapcount, -1);
246 #ifdef CONFIG_HAVE_RCU_TABLE_FREE
248 static void __page_table_free_rcu(void *table, unsigned bit)
253 if (bit == FRAG_MASK)
254 return page_table_free_pgste(table);
256 /* Free 1K/2K page table fragment of a 4K page */
257 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
258 if (atomic_xor_bits(&page->_mapcount, bit) == 0) {
259 pgtable_page_dtor(page);
260 atomic_set(&page->_mapcount, -1);
265 void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table)
267 struct mm_struct *mm;
269 unsigned int bit, mask;
273 if (mm_has_pgste(mm)) {
274 table = (unsigned long *) (__pa(table) | FRAG_MASK);
275 tlb_remove_table(tlb, table);
279 bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)));
280 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
281 spin_lock_bh(&mm->context.list_lock);
282 if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
283 list_del(&page->lru);
284 mask = atomic_xor_bits(&page->_mapcount, bit | (bit << 4));
285 if (mask & FRAG_MASK)
286 list_add_tail(&page->lru, &mm->context.pgtable_list);
287 spin_unlock_bh(&mm->context.list_lock);
288 table = (unsigned long *) (__pa(table) | (bit << 4));
289 tlb_remove_table(tlb, table);
292 void __tlb_remove_table(void *_table)
294 void *table = (void *)((unsigned long) _table & PAGE_MASK);
295 unsigned type = (unsigned long) _table & ~PAGE_MASK;
298 __page_table_free_rcu(table, type);
300 free_pages((unsigned long) table, ALLOC_ORDER);
306 * switch on pgstes for its userspace process (for kvm)
308 int s390_enable_sie(void)
310 struct task_struct *tsk = current;
311 struct mm_struct *mm, *old_mm;
313 /* Do we have switched amode? If no, we cannot do sie */
314 if (user_mode == HOME_SPACE_MODE)
317 /* Do we have pgstes? if yes, we are done */
318 if (mm_has_pgste(tsk->mm))
321 /* lets check if we are allowed to replace the mm */
323 if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
325 !hlist_empty(&tsk->mm->ioctx_list) ||
327 tsk->mm != tsk->active_mm) {
333 /* we copy the mm and let dup_mm create the page tables with_pgstes */
334 tsk->mm->context.alloc_pgste = 1;
336 tsk->mm->context.alloc_pgste = 0;
340 /* Now lets check again if something happened */
342 if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
344 !hlist_empty(&tsk->mm->ioctx_list) ||
346 tsk->mm != tsk->active_mm) {
352 /* ok, we are alone. No ptrace, no threads, etc. */
354 tsk->mm = tsk->active_mm = mm;
357 atomic_inc(&mm->context.attach_count);
358 atomic_dec(&old_mm->context.attach_count);
359 cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
365 EXPORT_SYMBOL_GPL(s390_enable_sie);
367 #if defined(CONFIG_DEBUG_PAGEALLOC) && defined(CONFIG_HIBERNATION)
368 bool kernel_page_present(struct page *page)
373 addr = page_to_phys(page);
378 : "=d" (cc), "+a" (addr) : : "cc");
381 #endif /* CONFIG_HIBERNATION && CONFIG_DEBUG_PAGEALLOC */