Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
umem.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2005 Topspin Communications. All rights reserved.
3  * Copyright (c) 2005 Cisco Systems. All rights reserved.
4  * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
5  *
6  * This software is available to you under a choice of one of two
7  * licenses. You may choose to be licensed under the terms of the GNU
8  * General Public License (GPL) Version 2, available from the file
9  * COPYING in the main directory of this source tree, or the
10  * OpenIB.org BSD license below:
11  *
12  * Redistribution and use in source and binary forms, with or
13  * without modification, are permitted provided that the following
14  * conditions are met:
15  *
16  * - Redistributions of source code must retain the above
17  * copyright notice, this list of conditions and the following
18  * disclaimer.
19  *
20  * - Redistributions in binary form must reproduce the above
21  * copyright notice, this list of conditions and the following
22  * disclaimer in the documentation and/or other materials
23  * provided with the distribution.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32  * SOFTWARE.
33  */
34 
35 #include <linux/mm.h>
36 #include <linux/dma-mapping.h>
37 #include <linux/sched.h>
38 #include <linux/export.h>
39 #include <linux/hugetlb.h>
40 #include <linux/dma-attrs.h>
41 #include <linux/slab.h>
42 
43 #include "uverbs.h"
44 
45 #define IB_UMEM_MAX_PAGE_CHUNK \
46  ((PAGE_SIZE - offsetof(struct ib_umem_chunk, page_list)) / \
47  ((void *) &((struct ib_umem_chunk *) 0)->page_list[1] - \
48  (void *) &((struct ib_umem_chunk *) 0)->page_list[0]))
49 
50 static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty)
51 {
52  struct ib_umem_chunk *chunk, *tmp;
53  int i;
54 
55  list_for_each_entry_safe(chunk, tmp, &umem->chunk_list, list) {
56  ib_dma_unmap_sg(dev, chunk->page_list,
57  chunk->nents, DMA_BIDIRECTIONAL);
58  for (i = 0; i < chunk->nents; ++i) {
59  struct page *page = sg_page(&chunk->page_list[i]);
60 
61  if (umem->writable && dirty)
62  set_page_dirty_lock(page);
63  put_page(page);
64  }
65 
66  kfree(chunk);
67  }
68 }
69 
78 struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
79  size_t size, int access, int dmasync)
80 {
81  struct ib_umem *umem;
82  struct page **page_list;
83  struct vm_area_struct **vma_list;
84  struct ib_umem_chunk *chunk;
85  unsigned long locked;
86  unsigned long lock_limit;
87  unsigned long cur_base;
88  unsigned long npages;
89  int ret;
90  int off;
91  int i;
92  DEFINE_DMA_ATTRS(attrs);
93 
94  if (dmasync)
95  dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs);
96 
97  if (!can_do_mlock())
98  return ERR_PTR(-EPERM);
99 
100  umem = kmalloc(sizeof *umem, GFP_KERNEL);
101  if (!umem)
102  return ERR_PTR(-ENOMEM);
103 
104  umem->context = context;
105  umem->length = size;
106  umem->offset = addr & ~PAGE_MASK;
107  umem->page_size = PAGE_SIZE;
108  /*
109  * We ask for writable memory if any access flags other than
110  * "remote read" are set. "Local write" and "remote write"
111  * obviously require write access. "Remote atomic" can do
112  * things like fetch and add, which will modify memory, and
113  * "MW bind" can change permissions by binding a window.
114  */
115  umem->writable = !!(access & ~IB_ACCESS_REMOTE_READ);
116 
117  /* We assume the memory is from hugetlb until proved otherwise */
118  umem->hugetlb = 1;
119 
120  INIT_LIST_HEAD(&umem->chunk_list);
121 
122  page_list = (struct page **) __get_free_page(GFP_KERNEL);
123  if (!page_list) {
124  kfree(umem);
125  return ERR_PTR(-ENOMEM);
126  }
127 
128  /*
129  * if we can't alloc the vma_list, it's not so bad;
130  * just assume the memory is not hugetlb memory
131  */
132  vma_list = (struct vm_area_struct **) __get_free_page(GFP_KERNEL);
133  if (!vma_list)
134  umem->hugetlb = 0;
135 
136  npages = PAGE_ALIGN(size + umem->offset) >> PAGE_SHIFT;
137 
138  down_write(&current->mm->mmap_sem);
139 
140  locked = npages + current->mm->pinned_vm;
141  lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
142 
143  if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
144  ret = -ENOMEM;
145  goto out;
146  }
147 
148  cur_base = addr & PAGE_MASK;
149 
150  ret = 0;
151  while (npages) {
152  ret = get_user_pages(current, current->mm, cur_base,
153  min_t(unsigned long, npages,
154  PAGE_SIZE / sizeof (struct page *)),
155  1, !umem->writable, page_list, vma_list);
156 
157  if (ret < 0)
158  goto out;
159 
160  cur_base += ret * PAGE_SIZE;
161  npages -= ret;
162 
163  off = 0;
164 
165  while (ret) {
166  chunk = kmalloc(sizeof *chunk + sizeof (struct scatterlist) *
167  min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK),
168  GFP_KERNEL);
169  if (!chunk) {
170  ret = -ENOMEM;
171  goto out;
172  }
173 
174  chunk->nents = min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK);
175  sg_init_table(chunk->page_list, chunk->nents);
176  for (i = 0; i < chunk->nents; ++i) {
177  if (vma_list &&
178  !is_vm_hugetlb_page(vma_list[i + off]))
179  umem->hugetlb = 0;
180  sg_set_page(&chunk->page_list[i], page_list[i + off], PAGE_SIZE, 0);
181  }
182 
183  chunk->nmap = ib_dma_map_sg_attrs(context->device,
184  &chunk->page_list[0],
185  chunk->nents,
187  &attrs);
188  if (chunk->nmap <= 0) {
189  for (i = 0; i < chunk->nents; ++i)
190  put_page(sg_page(&chunk->page_list[i]));
191  kfree(chunk);
192 
193  ret = -ENOMEM;
194  goto out;
195  }
196 
197  ret -= chunk->nents;
198  off += chunk->nents;
199  list_add_tail(&chunk->list, &umem->chunk_list);
200  }
201 
202  ret = 0;
203  }
204 
205 out:
206  if (ret < 0) {
207  __ib_umem_release(context->device, umem, 0);
208  kfree(umem);
209  } else
210  current->mm->pinned_vm = locked;
211 
212  up_write(&current->mm->mmap_sem);
213  if (vma_list)
214  free_page((unsigned long) vma_list);
215  free_page((unsigned long) page_list);
216 
217  return ret < 0 ? ERR_PTR(ret) : umem;
218 }
220 
221 static void ib_umem_account(struct work_struct *work)
222 {
223  struct ib_umem *umem = container_of(work, struct ib_umem, work);
224 
225  down_write(&umem->mm->mmap_sem);
226  umem->mm->pinned_vm -= umem->diff;
227  up_write(&umem->mm->mmap_sem);
228  mmput(umem->mm);
229  kfree(umem);
230 }
231 
236 void ib_umem_release(struct ib_umem *umem)
237 {
238  struct ib_ucontext *context = umem->context;
239  struct mm_struct *mm;
240  unsigned long diff;
241 
242  __ib_umem_release(umem->context->device, umem, 1);
243 
244  mm = get_task_mm(current);
245  if (!mm) {
246  kfree(umem);
247  return;
248  }
249 
250  diff = PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT;
251 
252  /*
253  * We may be called with the mm's mmap_sem already held. This
254  * can happen when a userspace munmap() is the call that drops
255  * the last reference to our file and calls our release
256  * method. If there are memory regions to destroy, we'll end
257  * up here and not be able to take the mmap_sem. In that case
258  * we defer the vm_locked accounting to the system workqueue.
259  */
260  if (context->closing) {
261  if (!down_write_trylock(&mm->mmap_sem)) {
262  INIT_WORK(&umem->work, ib_umem_account);
263  umem->mm = mm;
264  umem->diff = diff;
265 
266  queue_work(ib_wq, &umem->work);
267  return;
268  }
269  } else
270  down_write(&mm->mmap_sem);
271 
272  current->mm->pinned_vm -= diff;
273  up_write(&mm->mmap_sem);
274  mmput(mm);
275  kfree(umem);
276 }
278 
279 int ib_umem_page_count(struct ib_umem *umem)
280 {
281  struct ib_umem_chunk *chunk;
282  int shift;
283  int i;
284  int n;
285 
286  shift = ilog2(umem->page_size);
287 
288  n = 0;
289  list_for_each_entry(chunk, &umem->chunk_list, list)
290  for (i = 0; i < chunk->nmap; ++i)
291  n += sg_dma_len(&chunk->page_list[i]) >> shift;
292 
293  return n;
294 }