Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
user_namespace.c
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or
3  * modify it under the terms of the GNU General Public License as
4  * published by the Free Software Foundation, version 2 of the
5  * License.
6  */
7 
8 #include <linux/export.h>
9 #include <linux/nsproxy.h>
10 #include <linux/slab.h>
11 #include <linux/user_namespace.h>
12 #include <linux/highuid.h>
13 #include <linux/cred.h>
14 #include <linux/securebits.h>
15 #include <linux/keyctl.h>
16 #include <linux/key-type.h>
17 #include <keys/user-type.h>
18 #include <linux/seq_file.h>
19 #include <linux/fs.h>
20 #include <linux/uaccess.h>
21 #include <linux/ctype.h>
22 #include <linux/projid.h>
23 
24 static struct kmem_cache *user_ns_cachep __read_mostly;
25 
26 static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid,
27  struct uid_gid_map *map);
28 
29 /*
30  * Create a new user namespace, deriving the creator from the user in the
31  * passed credentials, and replacing that user with the new root user for the
32  * new namespace.
33  *
34  * This is called by copy_creds(), which will finish setting the target task's
35  * credentials.
36  */
37 int create_user_ns(struct cred *new)
38 {
39  struct user_namespace *ns, *parent_ns = new->user_ns;
40  kuid_t owner = new->euid;
41  kgid_t group = new->egid;
42 
43  /* The creator needs a mapping in the parent user namespace
44  * or else we won't be able to reasonably tell userspace who
45  * created a user_namespace.
46  */
47  if (!kuid_has_mapping(parent_ns, owner) ||
48  !kgid_has_mapping(parent_ns, group))
49  return -EPERM;
50 
51  ns = kmem_cache_zalloc(user_ns_cachep, GFP_KERNEL);
52  if (!ns)
53  return -ENOMEM;
54 
55  kref_init(&ns->kref);
56  ns->parent = parent_ns;
57  ns->owner = owner;
58  ns->group = group;
59 
60  /* Start with the same capabilities as init but useless for doing
61  * anything as the capabilities are bound to the new user namespace.
62  */
63  new->securebits = SECUREBITS_DEFAULT;
64  new->cap_inheritable = CAP_EMPTY_SET;
65  new->cap_permitted = CAP_FULL_SET;
66  new->cap_effective = CAP_FULL_SET;
67  new->cap_bset = CAP_FULL_SET;
68 #ifdef CONFIG_KEYS
69  key_put(new->request_key_auth);
70  new->request_key_auth = NULL;
71 #endif
72  /* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */
73 
74  /* Leave the new->user_ns reference with the new user namespace. */
75  /* Leave the reference to our user_ns with the new cred. */
76  new->user_ns = ns;
77 
78  return 0;
79 }
80 
81 void free_user_ns(struct kref *kref)
82 {
83  struct user_namespace *parent, *ns =
84  container_of(kref, struct user_namespace, kref);
85 
86  parent = ns->parent;
87  kmem_cache_free(user_ns_cachep, ns);
88  put_user_ns(parent);
89 }
91 
92 static u32 map_id_range_down(struct uid_gid_map *map, u32 id, u32 count)
93 {
94  unsigned idx, extents;
95  u32 first, last, id2;
96 
97  id2 = id + count - 1;
98 
99  /* Find the matching extent */
100  extents = map->nr_extents;
102  for (idx = 0; idx < extents; idx++) {
103  first = map->extent[idx].first;
104  last = first + map->extent[idx].count - 1;
105  if (id >= first && id <= last &&
106  (id2 >= first && id2 <= last))
107  break;
108  }
109  /* Map the id or note failure */
110  if (idx < extents)
111  id = (id - first) + map->extent[idx].lower_first;
112  else
113  id = (u32) -1;
114 
115  return id;
116 }
117 
118 static u32 map_id_down(struct uid_gid_map *map, u32 id)
119 {
120  unsigned idx, extents;
121  u32 first, last;
122 
123  /* Find the matching extent */
124  extents = map->nr_extents;
126  for (idx = 0; idx < extents; idx++) {
127  first = map->extent[idx].first;
128  last = first + map->extent[idx].count - 1;
129  if (id >= first && id <= last)
130  break;
131  }
132  /* Map the id or note failure */
133  if (idx < extents)
134  id = (id - first) + map->extent[idx].lower_first;
135  else
136  id = (u32) -1;
137 
138  return id;
139 }
140 
141 static u32 map_id_up(struct uid_gid_map *map, u32 id)
142 {
143  unsigned idx, extents;
144  u32 first, last;
145 
146  /* Find the matching extent */
147  extents = map->nr_extents;
149  for (idx = 0; idx < extents; idx++) {
150  first = map->extent[idx].lower_first;
151  last = first + map->extent[idx].count - 1;
152  if (id >= first && id <= last)
153  break;
154  }
155  /* Map the id or note failure */
156  if (idx < extents)
157  id = (id - first) + map->extent[idx].first;
158  else
159  id = (u32) -1;
160 
161  return id;
162 }
163 
178 {
179  /* Map the uid to a global kernel uid */
180  return KUIDT_INIT(map_id_down(&ns->uid_map, uid));
181 }
183 
197 {
198  /* Map the uid from a global kernel uid */
199  return map_id_up(&targ->uid_map, __kuid_val(kuid));
200 }
202 
222 {
223  uid_t uid;
224  uid = from_kuid(targ, kuid);
225 
226  if (uid == (uid_t) -1)
227  uid = overflowuid;
228  return uid;
229 }
231 
246 {
247  /* Map the gid to a global kernel gid */
248  return KGIDT_INIT(map_id_down(&ns->gid_map, gid));
249 }
251 
265 {
266  /* Map the gid from a global kernel gid */
267  return map_id_up(&targ->gid_map, __kgid_val(kgid));
268 }
270 
289 {
290  gid_t gid;
291  gid = from_kgid(targ, kgid);
292 
293  if (gid == (gid_t) -1)
294  gid = overflowgid;
295  return gid;
296 }
298 
313 {
314  /* Map the uid to a global kernel uid */
315  return KPROJIDT_INIT(map_id_down(&ns->projid_map, projid));
316 }
318 
332 {
333  /* Map the uid from a global kernel uid */
334  return map_id_up(&targ->projid_map, __kprojid_val(kprojid));
335 }
337 
357 {
358  projid_t projid;
359  projid = from_kprojid(targ, kprojid);
360 
361  if (projid == (projid_t) -1)
362  projid = OVERFLOW_PROJID;
363  return projid;
364 }
366 
367 
368 static int uid_m_show(struct seq_file *seq, void *v)
369 {
370  struct user_namespace *ns = seq->private;
371  struct uid_gid_extent *extent = v;
372  struct user_namespace *lower_ns;
373  uid_t lower;
374 
375  lower_ns = current_user_ns();
376  if ((lower_ns == ns) && lower_ns->parent)
377  lower_ns = lower_ns->parent;
378 
379  lower = from_kuid(lower_ns, KUIDT_INIT(extent->lower_first));
380 
381  seq_printf(seq, "%10u %10u %10u\n",
382  extent->first,
383  lower,
384  extent->count);
385 
386  return 0;
387 }
388 
389 static int gid_m_show(struct seq_file *seq, void *v)
390 {
391  struct user_namespace *ns = seq->private;
392  struct uid_gid_extent *extent = v;
393  struct user_namespace *lower_ns;
394  gid_t lower;
395 
396  lower_ns = current_user_ns();
397  if ((lower_ns == ns) && lower_ns->parent)
398  lower_ns = lower_ns->parent;
399 
400  lower = from_kgid(lower_ns, KGIDT_INIT(extent->lower_first));
401 
402  seq_printf(seq, "%10u %10u %10u\n",
403  extent->first,
404  lower,
405  extent->count);
406 
407  return 0;
408 }
409 
410 static int projid_m_show(struct seq_file *seq, void *v)
411 {
412  struct user_namespace *ns = seq->private;
413  struct uid_gid_extent *extent = v;
414  struct user_namespace *lower_ns;
415  projid_t lower;
416 
417  lower_ns = seq_user_ns(seq);
418  if ((lower_ns == ns) && lower_ns->parent)
419  lower_ns = lower_ns->parent;
420 
421  lower = from_kprojid(lower_ns, KPROJIDT_INIT(extent->lower_first));
422 
423  seq_printf(seq, "%10u %10u %10u\n",
424  extent->first,
425  lower,
426  extent->count);
427 
428  return 0;
429 }
430 
431 static void *m_start(struct seq_file *seq, loff_t *ppos, struct uid_gid_map *map)
432 {
433  struct uid_gid_extent *extent = NULL;
434  loff_t pos = *ppos;
435 
436  if (pos < map->nr_extents)
437  extent = &map->extent[pos];
438 
439  return extent;
440 }
441 
442 static void *uid_m_start(struct seq_file *seq, loff_t *ppos)
443 {
444  struct user_namespace *ns = seq->private;
445 
446  return m_start(seq, ppos, &ns->uid_map);
447 }
448 
449 static void *gid_m_start(struct seq_file *seq, loff_t *ppos)
450 {
451  struct user_namespace *ns = seq->private;
452 
453  return m_start(seq, ppos, &ns->gid_map);
454 }
455 
456 static void *projid_m_start(struct seq_file *seq, loff_t *ppos)
457 {
458  struct user_namespace *ns = seq->private;
459 
460  return m_start(seq, ppos, &ns->projid_map);
461 }
462 
463 static void *m_next(struct seq_file *seq, void *v, loff_t *pos)
464 {
465  (*pos)++;
466  return seq->op->start(seq, pos);
467 }
468 
469 static void m_stop(struct seq_file *seq, void *v)
470 {
471  return;
472 }
473 
475  .start = uid_m_start,
476  .stop = m_stop,
477  .next = m_next,
478  .show = uid_m_show,
479 };
480 
482  .start = gid_m_start,
483  .stop = m_stop,
484  .next = m_next,
485  .show = gid_m_show,
486 };
487 
489  .start = projid_m_start,
490  .stop = m_stop,
491  .next = m_next,
492  .show = projid_m_show,
493 };
494 
495 static DEFINE_MUTEX(id_map_mutex);
496 
497 static ssize_t map_write(struct file *file, const char __user *buf,
498  size_t count, loff_t *ppos,
499  int cap_setid,
500  struct uid_gid_map *map,
501  struct uid_gid_map *parent_map)
502 {
503  struct seq_file *seq = file->private_data;
504  struct user_namespace *ns = seq->private;
505  struct uid_gid_map new_map;
506  unsigned idx;
507  struct uid_gid_extent *extent, *last = NULL;
508  unsigned long page = 0;
509  char *kbuf, *pos, *next_line;
510  ssize_t ret = -EINVAL;
511 
512  /*
513  * The id_map_mutex serializes all writes to any given map.
514  *
515  * Any map is only ever written once.
516  *
517  * An id map fits within 1 cache line on most architectures.
518  *
519  * On read nothing needs to be done unless you are on an
520  * architecture with a crazy cache coherency model like alpha.
521  *
522  * There is a one time data dependency between reading the
523  * count of the extents and the values of the extents. The
524  * desired behavior is to see the values of the extents that
525  * were written before the count of the extents.
526  *
527  * To achieve this smp_wmb() is used on guarantee the write
528  * order and smp_read_barrier_depends() is guaranteed that we
529  * don't have crazy architectures returning stale data.
530  *
531  */
532  mutex_lock(&id_map_mutex);
533 
534  ret = -EPERM;
535  /* Only allow one successful write to the map */
536  if (map->nr_extents != 0)
537  goto out;
538 
539  /* Require the appropriate privilege CAP_SETUID or CAP_SETGID
540  * over the user namespace in order to set the id mapping.
541  */
542  if (cap_valid(cap_setid) && !ns_capable(ns, cap_setid))
543  goto out;
544 
545  /* Get a buffer */
546  ret = -ENOMEM;
548  kbuf = (char *) page;
549  if (!page)
550  goto out;
551 
552  /* Only allow <= page size writes at the beginning of the file */
553  ret = -EINVAL;
554  if ((*ppos != 0) || (count >= PAGE_SIZE))
555  goto out;
556 
557  /* Slurp in the user data */
558  ret = -EFAULT;
559  if (copy_from_user(kbuf, buf, count))
560  goto out;
561  kbuf[count] = '\0';
562 
563  /* Parse the user data */
564  ret = -EINVAL;
565  pos = kbuf;
566  new_map.nr_extents = 0;
567  for (;pos; pos = next_line) {
568  extent = &new_map.extent[new_map.nr_extents];
569 
570  /* Find the end of line and ensure I don't look past it */
571  next_line = strchr(pos, '\n');
572  if (next_line) {
573  *next_line = '\0';
574  next_line++;
575  if (*next_line == '\0')
576  next_line = NULL;
577  }
578 
579  pos = skip_spaces(pos);
580  extent->first = simple_strtoul(pos, &pos, 10);
581  if (!isspace(*pos))
582  goto out;
583 
584  pos = skip_spaces(pos);
585  extent->lower_first = simple_strtoul(pos, &pos, 10);
586  if (!isspace(*pos))
587  goto out;
588 
589  pos = skip_spaces(pos);
590  extent->count = simple_strtoul(pos, &pos, 10);
591  if (*pos && !isspace(*pos))
592  goto out;
593 
594  /* Verify there is not trailing junk on the line */
595  pos = skip_spaces(pos);
596  if (*pos != '\0')
597  goto out;
598 
599  /* Verify we have been given valid starting values */
600  if ((extent->first == (u32) -1) ||
601  (extent->lower_first == (u32) -1 ))
602  goto out;
603 
604  /* Verify count is not zero and does not cause the extent to wrap */
605  if ((extent->first + extent->count) <= extent->first)
606  goto out;
607  if ((extent->lower_first + extent->count) <= extent->lower_first)
608  goto out;
609 
610  /* For now only accept extents that are strictly in order */
611  if (last &&
612  (((last->first + last->count) > extent->first) ||
613  ((last->lower_first + last->count) > extent->lower_first)))
614  goto out;
615 
616  new_map.nr_extents++;
617  last = extent;
618 
619  /* Fail if the file contains too many extents */
620  if ((new_map.nr_extents == UID_GID_MAP_MAX_EXTENTS) &&
621  (next_line != NULL))
622  goto out;
623  }
624  /* Be very certaint the new map actually exists */
625  if (new_map.nr_extents == 0)
626  goto out;
627 
628  ret = -EPERM;
629  /* Validate the user is allowed to use user id's mapped to. */
630  if (!new_idmap_permitted(ns, cap_setid, &new_map))
631  goto out;
632 
633  /* Map the lower ids from the parent user namespace to the
634  * kernel global id space.
635  */
636  for (idx = 0; idx < new_map.nr_extents; idx++) {
637  u32 lower_first;
638  extent = &new_map.extent[idx];
639 
640  lower_first = map_id_range_down(parent_map,
641  extent->lower_first,
642  extent->count);
643 
644  /* Fail if we can not map the specified extent to
645  * the kernel global id space.
646  */
647  if (lower_first == (u32) -1)
648  goto out;
649 
650  extent->lower_first = lower_first;
651  }
652 
653  /* Install the map */
654  memcpy(map->extent, new_map.extent,
655  new_map.nr_extents*sizeof(new_map.extent[0]));
656  smp_wmb();
657  map->nr_extents = new_map.nr_extents;
658 
659  *ppos = count;
660  ret = count;
661 out:
662  mutex_unlock(&id_map_mutex);
663  if (page)
664  free_page(page);
665  return ret;
666 }
667 
668 ssize_t proc_uid_map_write(struct file *file, const char __user *buf, size_t size, loff_t *ppos)
669 {
670  struct seq_file *seq = file->private_data;
671  struct user_namespace *ns = seq->private;
672 
673  if (!ns->parent)
674  return -EPERM;
675 
676  return map_write(file, buf, size, ppos, CAP_SETUID,
677  &ns->uid_map, &ns->parent->uid_map);
678 }
679 
680 ssize_t proc_gid_map_write(struct file *file, const char __user *buf, size_t size, loff_t *ppos)
681 {
682  struct seq_file *seq = file->private_data;
683  struct user_namespace *ns = seq->private;
684 
685  if (!ns->parent)
686  return -EPERM;
687 
688  return map_write(file, buf, size, ppos, CAP_SETGID,
689  &ns->gid_map, &ns->parent->gid_map);
690 }
691 
692 ssize_t proc_projid_map_write(struct file *file, const char __user *buf, size_t size, loff_t *ppos)
693 {
694  struct seq_file *seq = file->private_data;
695  struct user_namespace *ns = seq->private;
696  struct user_namespace *seq_ns = seq_user_ns(seq);
697 
698  if (!ns->parent)
699  return -EPERM;
700 
701  if ((seq_ns != ns) && (seq_ns != ns->parent))
702  return -EPERM;
703 
704  /* Anyone can set any valid project id no capability needed */
705  return map_write(file, buf, size, ppos, -1,
706  &ns->projid_map, &ns->parent->projid_map);
707 }
708 
709 static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid,
710  struct uid_gid_map *new_map)
711 {
712  /* Allow anyone to set a mapping that doesn't require privilege */
713  if (!cap_valid(cap_setid))
714  return true;
715 
716  /* Allow the specified ids if we have the appropriate capability
717  * (CAP_SETUID or CAP_SETGID) over the parent user namespace.
718  */
719  if (ns_capable(ns->parent, cap_setid))
720  return true;
721 
722  return false;
723 }
724 
725 static __init int user_namespaces_init(void)
726 {
727  user_ns_cachep = KMEM_CACHE(user_namespace, SLAB_PANIC);
728  return 0;
729 }
730 module_init(user_namespaces_init);