Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
privcmd.c
Go to the documentation of this file.
1 /******************************************************************************
2  * privcmd.c
3  *
4  * Interface to privileged domain-0 commands.
5  *
6  * Copyright (c) 2002-2004, K A Fraser, B Dragovic
7  */
8 
9 #include <linux/kernel.h>
10 #include <linux/module.h>
11 #include <linux/sched.h>
12 #include <linux/slab.h>
13 #include <linux/string.h>
14 #include <linux/errno.h>
15 #include <linux/mm.h>
16 #include <linux/mman.h>
17 #include <linux/uaccess.h>
18 #include <linux/swap.h>
19 #include <linux/highmem.h>
20 #include <linux/pagemap.h>
21 #include <linux/seq_file.h>
22 #include <linux/miscdevice.h>
23 
24 #include <asm/pgalloc.h>
25 #include <asm/pgtable.h>
26 #include <asm/tlb.h>
27 #include <asm/xen/hypervisor.h>
28 #include <asm/xen/hypercall.h>
29 
30 #include <xen/xen.h>
31 #include <xen/privcmd.h>
32 #include <xen/interface/xen.h>
33 #include <xen/features.h>
34 #include <xen/page.h>
35 #include <xen/xen-ops.h>
36 
37 #include "privcmd.h"
38 
39 MODULE_LICENSE("GPL");
40 
41 #ifndef HAVE_ARCH_PRIVCMD_MMAP
42 static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma);
43 #endif
44 
45 static long privcmd_ioctl_hypercall(void __user *udata)
46 {
47  struct privcmd_hypercall hypercall;
48  long ret;
49 
50  if (copy_from_user(&hypercall, udata, sizeof(hypercall)))
51  return -EFAULT;
52 
53  ret = privcmd_call(hypercall.op,
54  hypercall.arg[0], hypercall.arg[1],
55  hypercall.arg[2], hypercall.arg[3],
56  hypercall.arg[4]);
57 
58  return ret;
59 }
60 
61 static void free_page_list(struct list_head *pages)
62 {
63  struct page *p, *n;
64 
65  list_for_each_entry_safe(p, n, pages, lru)
66  __free_page(p);
67 
68  INIT_LIST_HEAD(pages);
69 }
70 
71 /*
72  * Given an array of items in userspace, return a list of pages
73  * containing the data. If copying fails, either because of memory
74  * allocation failure or a problem reading user memory, return an
75  * error code; its up to the caller to dispose of any partial list.
76  */
77 static int gather_array(struct list_head *pagelist,
78  unsigned nelem, size_t size,
79  const void __user *data)
80 {
81  unsigned pageidx;
82  void *pagedata;
83  int ret;
84 
85  if (size > PAGE_SIZE)
86  return 0;
87 
88  pageidx = PAGE_SIZE;
89  pagedata = NULL; /* quiet, gcc */
90  while (nelem--) {
91  if (pageidx > PAGE_SIZE-size) {
92  struct page *page = alloc_page(GFP_KERNEL);
93 
94  ret = -ENOMEM;
95  if (page == NULL)
96  goto fail;
97 
98  pagedata = page_address(page);
99 
100  list_add_tail(&page->lru, pagelist);
101  pageidx = 0;
102  }
103 
104  ret = -EFAULT;
105  if (copy_from_user(pagedata + pageidx, data, size))
106  goto fail;
107 
108  data += size;
109  pageidx += size;
110  }
111 
112  ret = 0;
113 
114 fail:
115  return ret;
116 }
117 
118 /*
119  * Call function "fn" on each element of the array fragmented
120  * over a list of pages.
121  */
122 static int traverse_pages(unsigned nelem, size_t size,
123  struct list_head *pos,
124  int (*fn)(void *data, void *state),
125  void *state)
126 {
127  void *pagedata;
128  unsigned pageidx;
129  int ret = 0;
130 
131  BUG_ON(size > PAGE_SIZE);
132 
133  pageidx = PAGE_SIZE;
134  pagedata = NULL; /* hush, gcc */
135 
136  while (nelem--) {
137  if (pageidx > PAGE_SIZE-size) {
138  struct page *page;
139  pos = pos->next;
140  page = list_entry(pos, struct page, lru);
141  pagedata = page_address(page);
142  pageidx = 0;
143  }
144 
145  ret = (*fn)(pagedata + pageidx, state);
146  if (ret)
147  break;
148  pageidx += size;
149  }
150 
151  return ret;
152 }
153 
155  unsigned long va;
158 };
159 
160 static int mmap_mfn_range(void *data, void *state)
161 {
162  struct privcmd_mmap_entry *msg = data;
163  struct mmap_mfn_state *st = state;
164  struct vm_area_struct *vma = st->vma;
165  int rc;
166 
167  /* Do not allow range to wrap the address space. */
168  if ((msg->npages > (LONG_MAX >> PAGE_SHIFT)) ||
169  ((unsigned long)(msg->npages << PAGE_SHIFT) >= -st->va))
170  return -EINVAL;
171 
172  /* Range chunks must be contiguous in va space. */
173  if ((msg->va != st->va) ||
174  ((msg->va+(msg->npages<<PAGE_SHIFT)) > vma->vm_end))
175  return -EINVAL;
176 
178  msg->va & PAGE_MASK,
179  msg->mfn, msg->npages,
180  vma->vm_page_prot,
181  st->domain);
182  if (rc < 0)
183  return rc;
184 
185  st->va += msg->npages << PAGE_SHIFT;
186 
187  return 0;
188 }
189 
190 static long privcmd_ioctl_mmap(void __user *udata)
191 {
192  struct privcmd_mmap mmapcmd;
193  struct mm_struct *mm = current->mm;
194  struct vm_area_struct *vma;
195  int rc;
196  LIST_HEAD(pagelist);
197  struct mmap_mfn_state state;
198 
199  if (!xen_initial_domain())
200  return -EPERM;
201 
202  if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd)))
203  return -EFAULT;
204 
205  rc = gather_array(&pagelist,
206  mmapcmd.num, sizeof(struct privcmd_mmap_entry),
207  mmapcmd.entry);
208 
209  if (rc || list_empty(&pagelist))
210  goto out;
211 
212  down_write(&mm->mmap_sem);
213 
214  {
215  struct page *page = list_first_entry(&pagelist,
216  struct page, lru);
217  struct privcmd_mmap_entry *msg = page_address(page);
218 
219  vma = find_vma(mm, msg->va);
220  rc = -EINVAL;
221 
222  if (!vma || (msg->va != vma->vm_start) ||
223  !privcmd_enforce_singleshot_mapping(vma))
224  goto out_up;
225  }
226 
227  state.va = vma->vm_start;
228  state.vma = vma;
229  state.domain = mmapcmd.dom;
230 
231  rc = traverse_pages(mmapcmd.num, sizeof(struct privcmd_mmap_entry),
232  &pagelist,
233  mmap_mfn_range, &state);
234 
235 
236 out_up:
237  up_write(&mm->mmap_sem);
238 
239 out:
240  free_page_list(&pagelist);
241 
242  return rc;
243 }
244 
247  unsigned long va;
249  /* A tristate:
250  * 0 for no errors
251  * 1 if at least one error has happened (and no
252  * -ENOENT errors have happened)
253  * -ENOENT if at least 1 -ENOENT has happened.
254  */
256  /* An array for individual errors */
257  int *err;
258 
259  /* User-space mfn array to store errors in the second pass for V1. */
261 };
262 
263 static int mmap_batch_fn(void *data, void *state)
264 {
265  xen_pfn_t *mfnp = data;
266  struct mmap_batch_state *st = state;
267  int ret;
268 
269  ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
270  st->vma->vm_page_prot, st->domain);
271 
272  /* Store error code for second pass. */
273  *(st->err++) = ret;
274 
275  /* And see if it affects the global_error. */
276  if (ret < 0) {
277  if (ret == -ENOENT)
278  st->global_error = -ENOENT;
279  else {
280  /* Record that at least one error has happened. */
281  if (st->global_error == 0)
282  st->global_error = 1;
283  }
284  }
285  st->va += PAGE_SIZE;
286 
287  return 0;
288 }
289 
290 static int mmap_return_errors_v1(void *data, void *state)
291 {
292  xen_pfn_t *mfnp = data;
293  struct mmap_batch_state *st = state;
294  int err = *(st->err++);
295 
296  /*
297  * V1 encodes the error codes in the 32bit top nibble of the
298  * mfn (with its known limitations vis-a-vis 64 bit callers).
299  */
300  *mfnp |= (err == -ENOENT) ?
303  return __put_user(*mfnp, st->user_mfn++);
304 }
305 
306 static struct vm_operations_struct privcmd_vm_ops;
307 
308 static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
309 {
310  int ret;
311  struct privcmd_mmapbatch_v2 m;
312  struct mm_struct *mm = current->mm;
313  struct vm_area_struct *vma;
314  unsigned long nr_pages;
315  LIST_HEAD(pagelist);
316  int *err_array = NULL;
317  struct mmap_batch_state state;
318 
319  if (!xen_initial_domain())
320  return -EPERM;
321 
322  switch (version) {
323  case 1:
324  if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch)))
325  return -EFAULT;
326  /* Returns per-frame error in m.arr. */
327  m.err = NULL;
328  if (!access_ok(VERIFY_WRITE, m.arr, m.num * sizeof(*m.arr)))
329  return -EFAULT;
330  break;
331  case 2:
332  if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch_v2)))
333  return -EFAULT;
334  /* Returns per-frame error code in m.err. */
335  if (!access_ok(VERIFY_WRITE, m.err, m.num * (sizeof(*m.err))))
336  return -EFAULT;
337  break;
338  default:
339  return -EINVAL;
340  }
341 
342  nr_pages = m.num;
343  if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
344  return -EINVAL;
345 
346  ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), m.arr);
347 
348  if (ret)
349  goto out;
350  if (list_empty(&pagelist)) {
351  ret = -EINVAL;
352  goto out;
353  }
354 
355  err_array = kcalloc(m.num, sizeof(int), GFP_KERNEL);
356  if (err_array == NULL) {
357  ret = -ENOMEM;
358  goto out;
359  }
360 
361  down_write(&mm->mmap_sem);
362 
363  vma = find_vma(mm, m.addr);
364  if (!vma ||
365  vma->vm_ops != &privcmd_vm_ops ||
366  (m.addr != vma->vm_start) ||
367  ((m.addr + (nr_pages << PAGE_SHIFT)) != vma->vm_end) ||
368  !privcmd_enforce_singleshot_mapping(vma)) {
369  up_write(&mm->mmap_sem);
370  ret = -EINVAL;
371  goto out;
372  }
373 
374  state.domain = m.dom;
375  state.vma = vma;
376  state.va = m.addr;
377  state.global_error = 0;
378  state.err = err_array;
379 
380  /* mmap_batch_fn guarantees ret == 0 */
381  BUG_ON(traverse_pages(m.num, sizeof(xen_pfn_t),
382  &pagelist, mmap_batch_fn, &state));
383 
384  up_write(&mm->mmap_sem);
385 
386  if (version == 1) {
387  if (state.global_error) {
388  /* Write back errors in second pass. */
389  state.user_mfn = (xen_pfn_t *)m.arr;
390  state.err = err_array;
391  ret = traverse_pages(m.num, sizeof(xen_pfn_t),
392  &pagelist, mmap_return_errors_v1, &state);
393  } else
394  ret = 0;
395 
396  } else if (version == 2) {
397  ret = __copy_to_user(m.err, err_array, m.num * sizeof(int));
398  if (ret)
399  ret = -EFAULT;
400  }
401 
402  /* If we have not had any EFAULT-like global errors then set the global
403  * error to -ENOENT if necessary. */
404  if ((ret == 0) && (state.global_error == -ENOENT))
405  ret = -ENOENT;
406 
407 out:
408  kfree(err_array);
409  free_page_list(&pagelist);
410 
411  return ret;
412 }
413 
414 static long privcmd_ioctl(struct file *file,
415  unsigned int cmd, unsigned long data)
416 {
417  int ret = -ENOSYS;
418  void __user *udata = (void __user *) data;
419 
420  switch (cmd) {
422  ret = privcmd_ioctl_hypercall(udata);
423  break;
424 
425  case IOCTL_PRIVCMD_MMAP:
426  ret = privcmd_ioctl_mmap(udata);
427  break;
428 
430  ret = privcmd_ioctl_mmap_batch(udata, 1);
431  break;
432 
434  ret = privcmd_ioctl_mmap_batch(udata, 2);
435  break;
436 
437  default:
438  ret = -EINVAL;
439  break;
440  }
441 
442  return ret;
443 }
444 
445 static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
446 {
447  printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n",
448  vma, vma->vm_start, vma->vm_end,
449  vmf->pgoff, vmf->virtual_address);
450 
451  return VM_FAULT_SIGBUS;
452 }
453 
454 static struct vm_operations_struct privcmd_vm_ops = {
455  .fault = privcmd_fault
456 };
457 
458 static int privcmd_mmap(struct file *file, struct vm_area_struct *vma)
459 {
460  /* DONTCOPY is essential for Xen because copy_page_range doesn't know
461  * how to recreate these mappings */
462  vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTCOPY |
463  VM_DONTEXPAND | VM_DONTDUMP;
464  vma->vm_ops = &privcmd_vm_ops;
465  vma->vm_private_data = NULL;
466 
467  return 0;
468 }
469 
470 static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma)
471 {
472  return (xchg(&vma->vm_private_data, (void *)1) == NULL);
473 }
474 
476  .owner = THIS_MODULE,
477  .unlocked_ioctl = privcmd_ioctl,
478  .mmap = privcmd_mmap,
479 };
480 EXPORT_SYMBOL_GPL(xen_privcmd_fops);
481 
482 static struct miscdevice privcmd_dev = {
483  .minor = MISC_DYNAMIC_MINOR,
484  .name = "xen/privcmd",
485  .fops = &xen_privcmd_fops,
486 };
487 
488 static int __init privcmd_init(void)
489 {
490  int err;
491 
492  if (!xen_domain())
493  return -ENODEV;
494 
495  err = misc_register(&privcmd_dev);
496  if (err != 0) {
497  printk(KERN_ERR "Could not register Xen privcmd device\n");
498  return err;
499  }
500  return 0;
501 }
502 
503 static void __exit privcmd_exit(void)
504 {
505  misc_deregister(&privcmd_dev);
506 }
507 
508 module_init(privcmd_init);
509 module_exit(privcmd_exit);