Merge branch 'for-2.6.31' of git://git.linux-nfs.org/projects/trondmy/nfs-2.6
[pandora-kernel.git] / arch / sh / mm / fault_32.c
1 /*
2  * Page fault handler for SH with an MMU.
3  *
4  *  Copyright (C) 1999  Niibe Yutaka
5  *  Copyright (C) 2003 - 2008  Paul Mundt
6  *
7  *  Based on linux/arch/i386/mm/fault.c:
8  *   Copyright (C) 1995  Linus Torvalds
9  *
10  * This file is subject to the terms and conditions of the GNU General Public
11  * License.  See the file "COPYING" in the main directory of this archive
12  * for more details.
13  */
14 #include <linux/kernel.h>
15 #include <linux/mm.h>
16 #include <linux/hardirq.h>
17 #include <linux/kprobes.h>
18 #include <linux/marker.h>
19 #include <asm/io_trapped.h>
20 #include <asm/system.h>
21 #include <asm/mmu_context.h>
22 #include <asm/tlbflush.h>
23
24 /*
25  * This routine handles page faults.  It determines the address,
26  * and the problem, and then passes it off to one of the appropriate
27  * routines.
28  */
29 asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
30                                         unsigned long writeaccess,
31                                         unsigned long address)
32 {
33         struct task_struct *tsk;
34         struct mm_struct *mm;
35         struct vm_area_struct * vma;
36         int si_code;
37         int fault;
38         siginfo_t info;
39
40         /*
41          * We don't bother with any notifier callbacks here, as they are
42          * all handled through the __do_page_fault() fast-path.
43          */
44
45         tsk = current;
46         si_code = SEGV_MAPERR;
47
48         if (unlikely(address >= TASK_SIZE)) {
49                 /*
50                  * Synchronize this task's top level page-table
51                  * with the 'reference' page table.
52                  *
53                  * Do _not_ use "tsk" here. We might be inside
54                  * an interrupt in the middle of a task switch..
55                  */
56                 int offset = pgd_index(address);
57                 pgd_t *pgd, *pgd_k;
58                 pud_t *pud, *pud_k;
59                 pmd_t *pmd, *pmd_k;
60
61                 pgd = get_TTB() + offset;
62                 pgd_k = swapper_pg_dir + offset;
63
64                 if (!pgd_present(*pgd)) {
65                         if (!pgd_present(*pgd_k))
66                                 goto bad_area_nosemaphore;
67                         set_pgd(pgd, *pgd_k);
68                         return;
69                 }
70
71                 pud = pud_offset(pgd, address);
72                 pud_k = pud_offset(pgd_k, address);
73
74                 if (!pud_present(*pud)) {
75                         if (!pud_present(*pud_k))
76                                 goto bad_area_nosemaphore;
77                         set_pud(pud, *pud_k);
78                         return;
79                 }
80
81                 pmd = pmd_offset(pud, address);
82                 pmd_k = pmd_offset(pud_k, address);
83                 if (pmd_present(*pmd) || !pmd_present(*pmd_k))
84                         goto bad_area_nosemaphore;
85                 set_pmd(pmd, *pmd_k);
86
87                 return;
88         }
89
90         /* Only enable interrupts if they were on before the fault */
91         if ((regs->sr & SR_IMASK) != SR_IMASK) {
92                 trace_hardirqs_on();
93                 local_irq_enable();
94         }
95
96         mm = tsk->mm;
97
98         /*
99          * If we're in an interrupt or have no user
100          * context, we must not take the fault..
101          */
102         if (in_atomic() || !mm)
103                 goto no_context;
104
105         down_read(&mm->mmap_sem);
106
107         vma = find_vma(mm, address);
108         if (!vma)
109                 goto bad_area;
110         if (vma->vm_start <= address)
111                 goto good_area;
112         if (!(vma->vm_flags & VM_GROWSDOWN))
113                 goto bad_area;
114         if (expand_stack(vma, address))
115                 goto bad_area;
116 /*
117  * Ok, we have a good vm_area for this memory access, so
118  * we can handle it..
119  */
120 good_area:
121         si_code = SEGV_ACCERR;
122         if (writeaccess) {
123                 if (!(vma->vm_flags & VM_WRITE))
124                         goto bad_area;
125         } else {
126                 if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
127                         goto bad_area;
128         }
129
130         /*
131          * If for any reason at all we couldn't handle the fault,
132          * make sure we exit gracefully rather than endlessly redo
133          * the fault.
134          */
135 survive:
136         fault = handle_mm_fault(mm, vma, address, writeaccess ? FAULT_FLAG_WRITE : 0);
137         if (unlikely(fault & VM_FAULT_ERROR)) {
138                 if (fault & VM_FAULT_OOM)
139                         goto out_of_memory;
140                 else if (fault & VM_FAULT_SIGBUS)
141                         goto do_sigbus;
142                 BUG();
143         }
144         if (fault & VM_FAULT_MAJOR)
145                 tsk->maj_flt++;
146         else
147                 tsk->min_flt++;
148
149         up_read(&mm->mmap_sem);
150         return;
151
152 /*
153  * Something tried to access memory that isn't in our memory map..
154  * Fix it, but check if it's kernel or user first..
155  */
156 bad_area:
157         up_read(&mm->mmap_sem);
158
159 bad_area_nosemaphore:
160         if (user_mode(regs)) {
161                 info.si_signo = SIGSEGV;
162                 info.si_errno = 0;
163                 info.si_code = si_code;
164                 info.si_addr = (void *) address;
165                 force_sig_info(SIGSEGV, &info, tsk);
166                 return;
167         }
168
169 no_context:
170         /* Are we prepared to handle this kernel fault?  */
171         if (fixup_exception(regs))
172                 return;
173
174         if (handle_trapped_io(regs, address))
175                 return;
176 /*
177  * Oops. The kernel tried to access some bad page. We'll have to
178  * terminate things with extreme prejudice.
179  *
180  */
181
182         bust_spinlocks(1);
183
184         if (oops_may_print()) {
185                 unsigned long page;
186
187                 if (address < PAGE_SIZE)
188                         printk(KERN_ALERT "Unable to handle kernel NULL "
189                                           "pointer dereference");
190                 else
191                         printk(KERN_ALERT "Unable to handle kernel paging "
192                                           "request");
193                 printk(" at virtual address %08lx\n", address);
194                 printk(KERN_ALERT "pc = %08lx\n", regs->pc);
195                 page = (unsigned long)get_TTB();
196                 if (page) {
197                         page = ((__typeof__(page) *)page)[address >> PGDIR_SHIFT];
198                         printk(KERN_ALERT "*pde = %08lx\n", page);
199                         if (page & _PAGE_PRESENT) {
200                                 page &= PAGE_MASK;
201                                 address &= 0x003ff000;
202                                 page = ((__typeof__(page) *)
203                                                 __va(page))[address >>
204                                                             PAGE_SHIFT];
205                                 printk(KERN_ALERT "*pte = %08lx\n", page);
206                         }
207                 }
208         }
209
210         die("Oops", regs, writeaccess);
211         bust_spinlocks(0);
212         do_exit(SIGKILL);
213
214 /*
215  * We ran out of memory, or some other thing happened to us that made
216  * us unable to handle the page fault gracefully.
217  */
218 out_of_memory:
219         up_read(&mm->mmap_sem);
220         if (is_global_init(current)) {
221                 yield();
222                 down_read(&mm->mmap_sem);
223                 goto survive;
224         }
225         printk("VM: killing process %s\n", tsk->comm);
226         if (user_mode(regs))
227                 do_group_exit(SIGKILL);
228         goto no_context;
229
230 do_sigbus:
231         up_read(&mm->mmap_sem);
232
233         /*
234          * Send a sigbus, regardless of whether we were in kernel
235          * or user mode.
236          */
237         info.si_signo = SIGBUS;
238         info.si_errno = 0;
239         info.si_code = BUS_ADRERR;
240         info.si_addr = (void *)address;
241         force_sig_info(SIGBUS, &info, tsk);
242
243         /* Kernel mode? Handle exceptions or die */
244         if (!user_mode(regs))
245                 goto no_context;
246 }
247
248 static inline int notify_page_fault(struct pt_regs *regs, int trap)
249 {
250         int ret = 0;
251
252 #ifdef CONFIG_KPROBES
253         if (!user_mode(regs)) {
254                 preempt_disable();
255                 if (kprobe_running() && kprobe_fault_handler(regs, trap))
256                         ret = 1;
257                 preempt_enable();
258         }
259 #endif
260
261         return ret;
262 }
263
264 /*
265  * Called with interrupts disabled.
266  */
267 asmlinkage int __kprobes __do_page_fault(struct pt_regs *regs,
268                                          unsigned long writeaccess,
269                                          unsigned long address)
270 {
271         pgd_t *pgd;
272         pud_t *pud;
273         pmd_t *pmd;
274         pte_t *pte;
275         pte_t entry;
276         int ret = 0;
277
278         if (notify_page_fault(regs, lookup_exception_vector()))
279                 goto out;
280
281         ret = 1;
282
283         /*
284          * We don't take page faults for P1, P2, and parts of P4, these
285          * are always mapped, whether it be due to legacy behaviour in
286          * 29-bit mode, or due to PMB configuration in 32-bit mode.
287          */
288         if (address >= P3SEG && address < P3_ADDR_MAX) {
289                 pgd = pgd_offset_k(address);
290         } else {
291                 if (unlikely(address >= TASK_SIZE || !current->mm))
292                         goto out;
293
294                 pgd = pgd_offset(current->mm, address);
295         }
296
297         pud = pud_offset(pgd, address);
298         if (pud_none_or_clear_bad(pud))
299                 goto out;
300         pmd = pmd_offset(pud, address);
301         if (pmd_none_or_clear_bad(pmd))
302                 goto out;
303         pte = pte_offset_kernel(pmd, address);
304         entry = *pte;
305         if (unlikely(pte_none(entry) || pte_not_present(entry)))
306                 goto out;
307         if (unlikely(writeaccess && !pte_write(entry)))
308                 goto out;
309
310         if (writeaccess)
311                 entry = pte_mkdirty(entry);
312         entry = pte_mkyoung(entry);
313
314 #if defined(CONFIG_CPU_SH4) && !defined(CONFIG_SMP)
315         /*
316          * ITLB is not affected by "ldtlb" instruction.
317          * So, we need to flush the entry by ourselves.
318          */
319         local_flush_tlb_one(get_asid(), address & PAGE_MASK);
320 #endif
321
322         set_pte(pte, entry);
323         update_mmu_cache(NULL, address, entry);
324
325         ret = 0;
326 out:
327         return ret;
328 }