Merge ../to-linus-stable/
[pandora-kernel.git] / drivers / infiniband / core / uverbs_mem.c
1 /*
2  * Copyright (c) 2005 Topspin Communications.  All rights reserved.
3  * Copyright (c) 2005 Cisco Systems.  All rights reserved.
4  *
5  * This software is available to you under a choice of one of two
6  * licenses.  You may choose to be licensed under the terms of the GNU
7  * General Public License (GPL) Version 2, available from the file
8  * COPYING in the main directory of this source tree, or the
9  * OpenIB.org BSD license below:
10  *
11  *     Redistribution and use in source and binary forms, with or
12  *     without modification, are permitted provided that the following
13  *     conditions are met:
14  *
15  *      - Redistributions of source code must retain the above
16  *        copyright notice, this list of conditions and the following
17  *        disclaimer.
18  *
19  *      - Redistributions in binary form must reproduce the above
20  *        copyright notice, this list of conditions and the following
21  *        disclaimer in the documentation and/or other materials
22  *        provided with the distribution.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31  * SOFTWARE.
32  *
33  * $Id: uverbs_mem.c 2743 2005-06-28 22:27:59Z roland $
34  */
35
36 #include <linux/mm.h>
37 #include <linux/dma-mapping.h>
38
39 #include "uverbs.h"
40
41 struct ib_umem_account_work {
42         struct work_struct work;
43         struct mm_struct  *mm;
44         unsigned long      diff;
45 };
46
47
48 static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty)
49 {
50         struct ib_umem_chunk *chunk, *tmp;
51         int i;
52
53         list_for_each_entry_safe(chunk, tmp, &umem->chunk_list, list) {
54                 dma_unmap_sg(dev->dma_device, chunk->page_list,
55                              chunk->nents, DMA_BIDIRECTIONAL);
56                 for (i = 0; i < chunk->nents; ++i) {
57                         if (umem->writable && dirty)
58                                 set_page_dirty_lock(chunk->page_list[i].page);
59                         put_page(chunk->page_list[i].page);
60                 }
61
62                 kfree(chunk);
63         }
64 }
65
66 int ib_umem_get(struct ib_device *dev, struct ib_umem *mem,
67                 void *addr, size_t size, int write)
68 {
69         struct page **page_list;
70         struct ib_umem_chunk *chunk;
71         unsigned long locked;
72         unsigned long lock_limit;
73         unsigned long cur_base;
74         unsigned long npages;
75         int ret = 0;
76         int off;
77         int i;
78
79         if (!can_do_mlock())
80                 return -EPERM;
81
82         page_list = (struct page **) __get_free_page(GFP_KERNEL);
83         if (!page_list)
84                 return -ENOMEM;
85
86         mem->user_base = (unsigned long) addr;
87         mem->length    = size;
88         mem->offset    = (unsigned long) addr & ~PAGE_MASK;
89         mem->page_size = PAGE_SIZE;
90         mem->writable  = write;
91
92         INIT_LIST_HEAD(&mem->chunk_list);
93
94         npages = PAGE_ALIGN(size + mem->offset) >> PAGE_SHIFT;
95
96         down_write(&current->mm->mmap_sem);
97
98         locked     = npages + current->mm->locked_vm;
99         lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
100
101         if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
102                 ret = -ENOMEM;
103                 goto out;
104         }
105
106         cur_base = (unsigned long) addr & PAGE_MASK;
107
108         while (npages) {
109                 ret = get_user_pages(current, current->mm, cur_base,
110                                      min_t(int, npages,
111                                            PAGE_SIZE / sizeof (struct page *)),
112                                      1, !write, page_list, NULL);
113
114                 if (ret < 0)
115                         goto out;
116
117                 cur_base += ret * PAGE_SIZE;
118                 npages   -= ret;
119
120                 off = 0;
121
122                 while (ret) {
123                         chunk = kmalloc(sizeof *chunk + sizeof (struct scatterlist) *
124                                         min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK),
125                                         GFP_KERNEL);
126                         if (!chunk) {
127                                 ret = -ENOMEM;
128                                 goto out;
129                         }
130
131                         chunk->nents = min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK);
132                         for (i = 0; i < chunk->nents; ++i) {
133                                 chunk->page_list[i].page   = page_list[i + off];
134                                 chunk->page_list[i].offset = 0;
135                                 chunk->page_list[i].length = PAGE_SIZE;
136                         }
137
138                         chunk->nmap = dma_map_sg(dev->dma_device,
139                                                  &chunk->page_list[0],
140                                                  chunk->nents,
141                                                  DMA_BIDIRECTIONAL);
142                         if (chunk->nmap <= 0) {
143                                 for (i = 0; i < chunk->nents; ++i)
144                                         put_page(chunk->page_list[i].page);
145                                 kfree(chunk);
146
147                                 ret = -ENOMEM;
148                                 goto out;
149                         }
150
151                         ret -= chunk->nents;
152                         off += chunk->nents;
153                         list_add_tail(&chunk->list, &mem->chunk_list);
154                 }
155
156                 ret = 0;
157         }
158
159 out:
160         if (ret < 0)
161                 __ib_umem_release(dev, mem, 0);
162         else
163                 current->mm->locked_vm = locked;
164
165         up_write(&current->mm->mmap_sem);
166         free_page((unsigned long) page_list);
167
168         return ret;
169 }
170
171 void ib_umem_release(struct ib_device *dev, struct ib_umem *umem)
172 {
173         __ib_umem_release(dev, umem, 1);
174
175         down_write(&current->mm->mmap_sem);
176         current->mm->locked_vm -=
177                 PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT;
178         up_write(&current->mm->mmap_sem);
179 }
180
181 static void ib_umem_account(void *work_ptr)
182 {
183         struct ib_umem_account_work *work = work_ptr;
184
185         down_write(&work->mm->mmap_sem);
186         work->mm->locked_vm -= work->diff;
187         up_write(&work->mm->mmap_sem);
188         mmput(work->mm);
189         kfree(work);
190 }
191
192 void ib_umem_release_on_close(struct ib_device *dev, struct ib_umem *umem)
193 {
194         struct ib_umem_account_work *work;
195         struct mm_struct *mm;
196
197         __ib_umem_release(dev, umem, 1);
198
199         mm = get_task_mm(current);
200         if (!mm)
201                 return;
202
203         /*
204          * We may be called with the mm's mmap_sem already held.  This
205          * can happen when a userspace munmap() is the call that drops
206          * the last reference to our file and calls our release
207          * method.  If there are memory regions to destroy, we'll end
208          * up here and not be able to take the mmap_sem.  Therefore we
209          * defer the vm_locked accounting to the system workqueue.
210          */
211
212         work = kmalloc(sizeof *work, GFP_KERNEL);
213         if (!work)
214                 return;
215
216         INIT_WORK(&work->work, ib_umem_account, work);
217         work->mm   = mm;
218         work->diff = PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT;
219
220         schedule_work(&work->work);
221 }