xen/privcmd: print SIGBUS faults
[pandora-kernel.git] / drivers / xen / xenfs / privcmd.c
1 /******************************************************************************
2  * privcmd.c
3  *
4  * Interface to privileged domain-0 commands.
5  *
6  * Copyright (c) 2002-2004, K A Fraser, B Dragovic
7  */
8
9 #include <linux/kernel.h>
10 #include <linux/sched.h>
11 #include <linux/slab.h>
12 #include <linux/string.h>
13 #include <linux/errno.h>
14 #include <linux/mm.h>
15 #include <linux/mman.h>
16 #include <linux/uaccess.h>
17 #include <linux/swap.h>
18 #include <linux/smp_lock.h>
19 #include <linux/highmem.h>
20 #include <linux/pagemap.h>
21 #include <linux/seq_file.h>
22
23 #include <asm/pgalloc.h>
24 #include <asm/pgtable.h>
25 #include <asm/tlb.h>
26 #include <asm/xen/hypervisor.h>
27 #include <asm/xen/hypercall.h>
28
29 #include <xen/xen.h>
30 #include <xen/privcmd.h>
31 #include <xen/interface/xen.h>
32 #include <xen/features.h>
33 #include <xen/page.h>
34
35 #ifndef HAVE_ARCH_PRIVCMD_MMAP
36 static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma);
37 #endif
38
39 struct remap_data {
40         unsigned long mfn;
41         unsigned domid;
42         pgprot_t prot;
43 };
44
45 static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token,
46                                  unsigned long addr, void *data)
47 {
48         struct remap_data *rmd = data;
49         pte_t pte = pte_mkspecial(pfn_pte(rmd->mfn++, rmd->prot));
50
51         xen_set_domain_pte(ptep, pte, rmd->domid);
52
53         return 0;
54 }
55
56 int remap_domain_mfn_range(struct vm_area_struct *vma, unsigned long addr,
57                            unsigned long mfn, unsigned long size,
58                            pgprot_t prot, unsigned domid)
59 {
60         struct remap_data rmd;
61         int err;
62
63         prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP);
64
65         vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
66
67         rmd.mfn = mfn;
68         rmd.prot = prot;
69         rmd.domid = domid;
70
71         err = apply_to_page_range(vma->vm_mm, addr, size,
72                                   remap_area_mfn_pte_fn, &rmd);
73
74         return err;
75 }
76
77 static long privcmd_ioctl_hypercall(void __user *udata)
78 {
79         struct privcmd_hypercall hypercall;
80         long ret;
81
82         if (copy_from_user(&hypercall, udata, sizeof(hypercall)))
83                 return -EFAULT;
84
85         ret = privcmd_call(hypercall.op,
86                            hypercall.arg[0], hypercall.arg[1],
87                            hypercall.arg[2], hypercall.arg[3],
88                            hypercall.arg[4]);
89
90         return ret;
91 }
92
93 static void free_page_list(struct list_head *pages)
94 {
95         struct page *p, *n;
96
97         list_for_each_entry_safe(p, n, pages, lru)
98                 __free_page(p);
99
100         INIT_LIST_HEAD(pages);
101 }
102
103 /*
104  * Given an array of items in userspace, return a list of pages
105  * containing the data.  If copying fails, either because of memory
106  * allocation failure or a problem reading user memory, return an
107  * error code; its up to the caller to dispose of any partial list.
108  */
109 static int gather_array(struct list_head *pagelist,
110                         unsigned nelem, size_t size,
111                         void __user *data)
112 {
113         unsigned pageidx;
114         void *pagedata;
115         int ret;
116
117         if (size > PAGE_SIZE)
118                 return 0;
119
120         pageidx = PAGE_SIZE;
121         pagedata = NULL;        /* quiet, gcc */
122         while (nelem--) {
123                 if (pageidx > PAGE_SIZE-size) {
124                         struct page *page = alloc_page(GFP_KERNEL);
125
126                         ret = -ENOMEM;
127                         if (page == NULL)
128                                 goto fail;
129
130                         pagedata = page_address(page);
131
132                         list_add_tail(&page->lru, pagelist);
133                         pageidx = 0;
134                 }
135
136                 ret = -EFAULT;
137                 if (copy_from_user(pagedata + pageidx, data, size))
138                         goto fail;
139
140                 data += size;
141                 pageidx += size;
142         }
143
144         ret = 0;
145
146 fail:
147         return ret;
148 }
149
150 /*
151  * Call function "fn" on each element of the array fragmented
152  * over a list of pages.
153  */
154 static int traverse_pages(unsigned nelem, size_t size,
155                           struct list_head *pos,
156                           int (*fn)(void *data, void *state),
157                           void *state)
158 {
159         void *pagedata;
160         unsigned pageidx;
161         int ret;
162
163         BUG_ON(size > PAGE_SIZE);
164
165         pageidx = PAGE_SIZE;
166         pagedata = NULL;        /* hush, gcc */
167
168         while (nelem--) {
169                 if (pageidx > PAGE_SIZE-size) {
170                         struct page *page;
171                         pos = pos->next;
172                         page = list_entry(pos, struct page, lru);
173                         pagedata = page_address(page);
174                         pageidx = 0;
175                 }
176
177                 ret = (*fn)(pagedata + pageidx, state);
178                 if (ret)
179                         break;
180                 pageidx += size;
181         }
182
183         return ret;
184 }
185
186 struct mmap_mfn_state {
187         unsigned long va;
188         struct vm_area_struct *vma;
189         domid_t domain;
190 };
191
192 static int mmap_mfn_range(void *data, void *state)
193 {
194         struct privcmd_mmap_entry *msg = data;
195         struct mmap_mfn_state *st = state;
196         struct vm_area_struct *vma = st->vma;
197         int rc;
198
199         /* Do not allow range to wrap the address space. */
200         if ((msg->npages > (LONG_MAX >> PAGE_SHIFT)) ||
201             ((unsigned long)(msg->npages << PAGE_SHIFT) >= -st->va))
202                 return -EINVAL;
203
204         /* Range chunks must be contiguous in va space. */
205         if ((msg->va != st->va) ||
206             ((msg->va+(msg->npages<<PAGE_SHIFT)) > vma->vm_end))
207                 return -EINVAL;
208
209         rc = remap_domain_mfn_range(vma,
210                                     msg->va & PAGE_MASK,
211                                     msg->mfn,
212                                     msg->npages << PAGE_SHIFT,
213                                     vma->vm_page_prot,
214                                     st->domain);
215         if (rc < 0)
216                 return rc;
217
218         st->va += msg->npages << PAGE_SHIFT;
219
220         return 0;
221 }
222
223 static long privcmd_ioctl_mmap(void __user *udata)
224 {
225         struct privcmd_mmap mmapcmd;
226         struct mm_struct *mm = current->mm;
227         struct vm_area_struct *vma;
228         int rc;
229         LIST_HEAD(pagelist);
230         struct mmap_mfn_state state;
231
232         if (!xen_initial_domain())
233                 return -EPERM;
234
235         if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd)))
236                 return -EFAULT;
237
238         rc = gather_array(&pagelist,
239                           mmapcmd.num, sizeof(struct privcmd_mmap_entry),
240                           mmapcmd.entry);
241
242         if (rc || list_empty(&pagelist))
243                 goto out;
244
245         down_write(&mm->mmap_sem);
246
247         {
248                 struct page *page = list_first_entry(&pagelist,
249                                                      struct page, lru);
250                 struct privcmd_mmap_entry *msg = page_address(page);
251
252                 vma = find_vma(mm, msg->va);
253                 rc = -EINVAL;
254
255                 if (!vma || (msg->va != vma->vm_start) ||
256                     !privcmd_enforce_singleshot_mapping(vma))
257                         goto out_up;
258         }
259
260         state.va = vma->vm_start;
261         state.vma = vma;
262         state.domain = mmapcmd.dom;
263
264         rc = traverse_pages(mmapcmd.num, sizeof(struct privcmd_mmap_entry),
265                             &pagelist,
266                             mmap_mfn_range, &state);
267
268
269 out_up:
270         up_write(&mm->mmap_sem);
271
272 out:
273         free_page_list(&pagelist);
274
275         return rc;
276 }
277
278 struct mmap_batch_state {
279         domid_t domain;
280         unsigned long va;
281         struct vm_area_struct *vma;
282         int err;
283
284         xen_pfn_t __user *user;
285 };
286
287 static int mmap_batch_fn(void *data, void *state)
288 {
289         xen_pfn_t *mfnp = data;
290         struct mmap_batch_state *st = state;
291
292         if (remap_domain_mfn_range(st->vma, st->va & PAGE_MASK,
293                                    *mfnp, PAGE_SIZE,
294                                    st->vma->vm_page_prot, st->domain) < 0) {
295                 *mfnp |= 0xf0000000U;
296                 st->err++;
297         }
298         st->va += PAGE_SIZE;
299
300         return 0;
301 }
302
303 static int mmap_return_errors(void *data, void *state)
304 {
305         xen_pfn_t *mfnp = data;
306         struct mmap_batch_state *st = state;
307
308         put_user(*mfnp, st->user++);
309
310         return 0;
311 }
312
313 static long privcmd_ioctl_mmap_batch(void __user *udata)
314 {
315         int ret;
316         struct privcmd_mmapbatch m;
317         struct mm_struct *mm = current->mm;
318         struct vm_area_struct *vma;
319         unsigned long nr_pages;
320         LIST_HEAD(pagelist);
321         struct mmap_batch_state state;
322
323         if (!xen_initial_domain())
324                 return -EPERM;
325
326         if (copy_from_user(&m, udata, sizeof(m)))
327                 return -EFAULT;
328
329         nr_pages = m.num;
330         if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
331                 return -EINVAL;
332
333         ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t),
334                            m.arr);
335
336         if (ret || list_empty(&pagelist))
337                 goto out;
338
339         down_write(&mm->mmap_sem);
340
341         vma = find_vma(mm, m.addr);
342         ret = -EINVAL;
343         if (!vma ||
344             (m.addr != vma->vm_start) ||
345             ((m.addr + (nr_pages << PAGE_SHIFT)) != vma->vm_end) ||
346             !privcmd_enforce_singleshot_mapping(vma)) {
347                 up_write(&mm->mmap_sem);
348                 goto out;
349         }
350
351         state.domain = m.dom;
352         state.vma = vma;
353         state.va = m.addr;
354         state.err = 0;
355
356         ret = traverse_pages(m.num, sizeof(xen_pfn_t),
357                              &pagelist, mmap_batch_fn, &state);
358
359         up_write(&mm->mmap_sem);
360
361         if (state.err > 0) {
362                 ret = state.err;
363
364                 state.user = udata;
365                 traverse_pages(m.num, sizeof(xen_pfn_t),
366                                &pagelist,
367                                mmap_return_errors, &state);
368         }
369
370 out:
371         free_page_list(&pagelist);
372
373         return ret;
374 }
375
376 static long privcmd_ioctl(struct file *file,
377                           unsigned int cmd, unsigned long data)
378 {
379         int ret = -ENOSYS;
380         void __user *udata = (void __user *) data;
381
382         switch (cmd) {
383         case IOCTL_PRIVCMD_HYPERCALL:
384                 ret = privcmd_ioctl_hypercall(udata);
385                 break;
386
387         case IOCTL_PRIVCMD_MMAP:
388                 ret = privcmd_ioctl_mmap(udata);
389                 break;
390
391         case IOCTL_PRIVCMD_MMAPBATCH:
392                 ret = privcmd_ioctl_mmap_batch(udata);
393                 break;
394
395         default:
396                 ret = -EINVAL;
397                 break;
398         }
399
400         return ret;
401 }
402
403 #ifndef HAVE_ARCH_PRIVCMD_MMAP
404 static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
405 {
406         printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n",
407                vma, vma->vm_start, vma->vm_end,
408                vmf->pgoff, vmf->virtual_address);
409
410         return VM_FAULT_SIGBUS;
411 }
412
413 static struct vm_operations_struct privcmd_vm_ops = {
414         .fault = privcmd_fault
415 };
416
417 static int privcmd_mmap(struct file *file, struct vm_area_struct *vma)
418 {
419         /* Unsupported for auto-translate guests. */
420         if (xen_feature(XENFEAT_auto_translated_physmap))
421                 return -ENOSYS;
422
423         /* DONTCOPY is essential for Xen as copy_page_range is broken. */
424         vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY;
425         vma->vm_ops = &privcmd_vm_ops;
426         vma->vm_private_data = NULL;
427
428         return 0;
429 }
430
431 static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma)
432 {
433         return (xchg(&vma->vm_private_data, (void *)1) == NULL);
434 }
435 #endif
436
437 const struct file_operations privcmd_file_ops = {
438         .unlocked_ioctl = privcmd_ioctl,
439         .mmap = privcmd_mmap,
440 };