Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
fcntl.c
Go to the documentation of this file.
1 /*
2  * linux/fs/fcntl.c
3  *
4  * Copyright (C) 1991, 1992 Linus Torvalds
5  */
6 
7 #include <linux/syscalls.h>
8 #include <linux/init.h>
9 #include <linux/mm.h>
10 #include <linux/fs.h>
11 #include <linux/file.h>
12 #include <linux/fdtable.h>
13 #include <linux/capability.h>
14 #include <linux/dnotify.h>
15 #include <linux/slab.h>
16 #include <linux/module.h>
17 #include <linux/pipe_fs_i.h>
18 #include <linux/security.h>
19 #include <linux/ptrace.h>
20 #include <linux/signal.h>
21 #include <linux/rcupdate.h>
22 #include <linux/pid_namespace.h>
23 #include <linux/user_namespace.h>
24 
25 #include <asm/poll.h>
26 #include <asm/siginfo.h>
27 #include <asm/uaccess.h>
28 
29 #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME)
30 
31 static int setfl(int fd, struct file * filp, unsigned long arg)
32 {
33  struct inode * inode = filp->f_path.dentry->d_inode;
34  int error = 0;
35 
36  /*
37  * O_APPEND cannot be cleared if the file is marked as append-only
38  * and the file is open for write.
39  */
40  if (((arg ^ filp->f_flags) & O_APPEND) && IS_APPEND(inode))
41  return -EPERM;
42 
43  /* O_NOATIME can only be set by the owner or superuser */
44  if ((arg & O_NOATIME) && !(filp->f_flags & O_NOATIME))
45  if (!inode_owner_or_capable(inode))
46  return -EPERM;
47 
48  /* required for strict SunOS emulation */
49  if (O_NONBLOCK != O_NDELAY)
50  if (arg & O_NDELAY)
51  arg |= O_NONBLOCK;
52 
53  if (arg & O_DIRECT) {
54  if (!filp->f_mapping || !filp->f_mapping->a_ops ||
55  !filp->f_mapping->a_ops->direct_IO)
56  return -EINVAL;
57  }
58 
59  if (filp->f_op && filp->f_op->check_flags)
60  error = filp->f_op->check_flags(arg);
61  if (error)
62  return error;
63 
64  /*
65  * ->fasync() is responsible for setting the FASYNC bit.
66  */
67  if (((arg ^ filp->f_flags) & FASYNC) && filp->f_op &&
68  filp->f_op->fasync) {
69  error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0);
70  if (error < 0)
71  goto out;
72  if (error > 0)
73  error = 0;
74  }
75  spin_lock(&filp->f_lock);
76  filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK);
77  spin_unlock(&filp->f_lock);
78 
79  out:
80  return error;
81 }
82 
83 static void f_modown(struct file *filp, struct pid *pid, enum pid_type type,
84  int force)
85 {
86  write_lock_irq(&filp->f_owner.lock);
87  if (force || !filp->f_owner.pid) {
88  put_pid(filp->f_owner.pid);
89  filp->f_owner.pid = get_pid(pid);
90  filp->f_owner.pid_type = type;
91 
92  if (pid) {
93  const struct cred *cred = current_cred();
94  filp->f_owner.uid = cred->uid;
95  filp->f_owner.euid = cred->euid;
96  }
97  }
98  write_unlock_irq(&filp->f_owner.lock);
99 }
100 
101 int __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
102  int force)
103 {
104  int err;
105 
106  err = security_file_set_fowner(filp);
107  if (err)
108  return err;
109 
110  f_modown(filp, pid, type, force);
111  return 0;
112 }
114 
115 int f_setown(struct file *filp, unsigned long arg, int force)
116 {
117  enum pid_type type;
118  struct pid *pid;
119  int who = arg;
120  int result;
121  type = PIDTYPE_PID;
122  if (who < 0) {
123  type = PIDTYPE_PGID;
124  who = -who;
125  }
126  rcu_read_lock();
127  pid = find_vpid(who);
128  result = __f_setown(filp, pid, type, force);
129  rcu_read_unlock();
130  return result;
131 }
133 
134 void f_delown(struct file *filp)
135 {
136  f_modown(filp, NULL, PIDTYPE_PID, 1);
137 }
138 
139 pid_t f_getown(struct file *filp)
140 {
141  pid_t pid;
142  read_lock(&filp->f_owner.lock);
143  pid = pid_vnr(filp->f_owner.pid);
144  if (filp->f_owner.pid_type == PIDTYPE_PGID)
145  pid = -pid;
146  read_unlock(&filp->f_owner.lock);
147  return pid;
148 }
149 
150 static int f_setown_ex(struct file *filp, unsigned long arg)
151 {
152  struct f_owner_ex __user *owner_p = (void __user *)arg;
153  struct f_owner_ex owner;
154  struct pid *pid;
155  int type;
156  int ret;
157 
158  ret = copy_from_user(&owner, owner_p, sizeof(owner));
159  if (ret)
160  return -EFAULT;
161 
162  switch (owner.type) {
163  case F_OWNER_TID:
164  type = PIDTYPE_MAX;
165  break;
166 
167  case F_OWNER_PID:
168  type = PIDTYPE_PID;
169  break;
170 
171  case F_OWNER_PGRP:
172  type = PIDTYPE_PGID;
173  break;
174 
175  default:
176  return -EINVAL;
177  }
178 
179  rcu_read_lock();
180  pid = find_vpid(owner.pid);
181  if (owner.pid && !pid)
182  ret = -ESRCH;
183  else
184  ret = __f_setown(filp, pid, type, 1);
185  rcu_read_unlock();
186 
187  return ret;
188 }
189 
190 static int f_getown_ex(struct file *filp, unsigned long arg)
191 {
192  struct f_owner_ex __user *owner_p = (void __user *)arg;
193  struct f_owner_ex owner;
194  int ret = 0;
195 
196  read_lock(&filp->f_owner.lock);
197  owner.pid = pid_vnr(filp->f_owner.pid);
198  switch (filp->f_owner.pid_type) {
199  case PIDTYPE_MAX:
200  owner.type = F_OWNER_TID;
201  break;
202 
203  case PIDTYPE_PID:
204  owner.type = F_OWNER_PID;
205  break;
206 
207  case PIDTYPE_PGID:
208  owner.type = F_OWNER_PGRP;
209  break;
210 
211  default:
212  WARN_ON(1);
213  ret = -EINVAL;
214  break;
215  }
216  read_unlock(&filp->f_owner.lock);
217 
218  if (!ret) {
219  ret = copy_to_user(owner_p, &owner, sizeof(owner));
220  if (ret)
221  ret = -EFAULT;
222  }
223  return ret;
224 }
225 
226 #ifdef CONFIG_CHECKPOINT_RESTORE
227 static int f_getowner_uids(struct file *filp, unsigned long arg)
228 {
229  struct user_namespace *user_ns = current_user_ns();
230  uid_t __user *dst = (void __user *)arg;
231  uid_t src[2];
232  int err;
233 
234  read_lock(&filp->f_owner.lock);
235  src[0] = from_kuid(user_ns, filp->f_owner.uid);
236  src[1] = from_kuid(user_ns, filp->f_owner.euid);
237  read_unlock(&filp->f_owner.lock);
238 
239  err = put_user(src[0], &dst[0]);
240  err |= put_user(src[1], &dst[1]);
241 
242  return err;
243 }
244 #else
245 static int f_getowner_uids(struct file *filp, unsigned long arg)
246 {
247  return -EINVAL;
248 }
249 #endif
250 
251 static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
252  struct file *filp)
253 {
254  long err = -EINVAL;
255 
256  switch (cmd) {
257  case F_DUPFD:
258  err = f_dupfd(arg, filp, 0);
259  break;
260  case F_DUPFD_CLOEXEC:
261  err = f_dupfd(arg, filp, O_CLOEXEC);
262  break;
263  case F_GETFD:
264  err = get_close_on_exec(fd) ? FD_CLOEXEC : 0;
265  break;
266  case F_SETFD:
267  err = 0;
268  set_close_on_exec(fd, arg & FD_CLOEXEC);
269  break;
270  case F_GETFL:
271  err = filp->f_flags;
272  break;
273  case F_SETFL:
274  err = setfl(fd, filp, arg);
275  break;
276  case F_GETLK:
277  err = fcntl_getlk(filp, (struct flock __user *) arg);
278  break;
279  case F_SETLK:
280  case F_SETLKW:
281  err = fcntl_setlk(fd, filp, cmd, (struct flock __user *) arg);
282  break;
283  case F_GETOWN:
284  /*
285  * XXX If f_owner is a process group, the
286  * negative return value will get converted
287  * into an error. Oops. If we keep the
288  * current syscall conventions, the only way
289  * to fix this will be in libc.
290  */
291  err = f_getown(filp);
293  break;
294  case F_SETOWN:
295  err = f_setown(filp, arg, 1);
296  break;
297  case F_GETOWN_EX:
298  err = f_getown_ex(filp, arg);
299  break;
300  case F_SETOWN_EX:
301  err = f_setown_ex(filp, arg);
302  break;
303  case F_GETOWNER_UIDS:
304  err = f_getowner_uids(filp, arg);
305  break;
306  case F_GETSIG:
307  err = filp->f_owner.signum;
308  break;
309  case F_SETSIG:
310  /* arg == 0 restores default behaviour. */
311  if (!valid_signal(arg)) {
312  break;
313  }
314  err = 0;
315  filp->f_owner.signum = arg;
316  break;
317  case F_GETLEASE:
318  err = fcntl_getlease(filp);
319  break;
320  case F_SETLEASE:
321  err = fcntl_setlease(fd, filp, arg);
322  break;
323  case F_NOTIFY:
324  err = fcntl_dirnotify(fd, filp, arg);
325  break;
326  case F_SETPIPE_SZ:
327  case F_GETPIPE_SZ:
328  err = pipe_fcntl(filp, cmd, arg);
329  break;
330  default:
331  break;
332  }
333  return err;
334 }
335 
336 static int check_fcntl_cmd(unsigned cmd)
337 {
338  switch (cmd) {
339  case F_DUPFD:
340  case F_DUPFD_CLOEXEC:
341  case F_GETFD:
342  case F_SETFD:
343  case F_GETFL:
344  return 1;
345  }
346  return 0;
347 }
348 
349 SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
350 {
351  struct fd f = fdget_raw(fd);
352  long err = -EBADF;
353 
354  if (!f.file)
355  goto out;
356 
357  if (unlikely(f.file->f_mode & FMODE_PATH)) {
358  if (!check_fcntl_cmd(cmd))
359  goto out1;
360  }
361 
362  err = security_file_fcntl(f.file, cmd, arg);
363  if (!err)
364  err = do_fcntl(fd, cmd, arg, f.file);
365 
366 out1:
367  fdput(f);
368 out:
369  return err;
370 }
371 
372 #if BITS_PER_LONG == 32
373 SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
374  unsigned long, arg)
375 {
376  struct fd f = fdget_raw(fd);
377  long err = -EBADF;
378 
379  if (!f.file)
380  goto out;
381 
382  if (unlikely(f.file->f_mode & FMODE_PATH)) {
383  if (!check_fcntl_cmd(cmd))
384  goto out1;
385  }
386 
387  err = security_file_fcntl(f.file, cmd, arg);
388  if (err)
389  goto out1;
390 
391  switch (cmd) {
392  case F_GETLK64:
393  err = fcntl_getlk64(f.file, (struct flock64 __user *) arg);
394  break;
395  case F_SETLK64:
396  case F_SETLKW64:
397  err = fcntl_setlk64(fd, f.file, cmd,
398  (struct flock64 __user *) arg);
399  break;
400  default:
401  err = do_fcntl(fd, cmd, arg, f.file);
402  break;
403  }
404 out1:
405  fdput(f);
406 out:
407  return err;
408 }
409 #endif
410 
411 /* Table to convert sigio signal codes into poll band bitmaps */
412 
413 static const long band_table[NSIGPOLL] = {
414  POLLIN | POLLRDNORM, /* POLL_IN */
415  POLLOUT | POLLWRNORM | POLLWRBAND, /* POLL_OUT */
416  POLLIN | POLLRDNORM | POLLMSG, /* POLL_MSG */
417  POLLERR, /* POLL_ERR */
418  POLLPRI | POLLRDBAND, /* POLL_PRI */
419  POLLHUP | POLLERR /* POLL_HUP */
420 };
421 
422 static inline int sigio_perm(struct task_struct *p,
423  struct fown_struct *fown, int sig)
424 {
425  const struct cred *cred;
426  int ret;
427 
428  rcu_read_lock();
429  cred = __task_cred(p);
430  ret = ((uid_eq(fown->euid, GLOBAL_ROOT_UID) ||
431  uid_eq(fown->euid, cred->suid) || uid_eq(fown->euid, cred->uid) ||
432  uid_eq(fown->uid, cred->suid) || uid_eq(fown->uid, cred->uid)) &&
433  !security_file_send_sigiotask(p, fown, sig));
434  rcu_read_unlock();
435  return ret;
436 }
437 
438 static void send_sigio_to_task(struct task_struct *p,
439  struct fown_struct *fown,
440  int fd, int reason, int group)
441 {
442  /*
443  * F_SETSIG can change ->signum lockless in parallel, make
444  * sure we read it once and use the same value throughout.
445  */
446  int signum = ACCESS_ONCE(fown->signum);
447 
448  if (!sigio_perm(p, fown, signum))
449  return;
450 
451  switch (signum) {
452  siginfo_t si;
453  default:
454  /* Queue a rt signal with the appropriate fd as its
455  value. We use SI_SIGIO as the source, not
456  SI_KERNEL, since kernel signals always get
457  delivered even if we can't queue. Failure to
458  queue in this case _should_ be reported; we fall
459  back to SIGIO in that case. --sct */
460  si.si_signo = signum;
461  si.si_errno = 0;
462  si.si_code = reason;
463  /* Make sure we are called with one of the POLL_*
464  reasons, otherwise we could leak kernel stack into
465  userspace. */
466  BUG_ON((reason & __SI_MASK) != __SI_POLL);
467  if (reason - POLL_IN >= NSIGPOLL)
468  si.si_band = ~0L;
469  else
470  si.si_band = band_table[reason - POLL_IN];
471  si.si_fd = fd;
472  if (!do_send_sig_info(signum, &si, p, group))
473  break;
474  /* fall-through: fall back on the old plain SIGIO signal */
475  case 0:
477  }
478 }
479 
480 void send_sigio(struct fown_struct *fown, int fd, int band)
481 {
482  struct task_struct *p;
483  enum pid_type type;
484  struct pid *pid;
485  int group = 1;
486 
487  read_lock(&fown->lock);
488 
489  type = fown->pid_type;
490  if (type == PIDTYPE_MAX) {
491  group = 0;
492  type = PIDTYPE_PID;
493  }
494 
495  pid = fown->pid;
496  if (!pid)
497  goto out_unlock_fown;
498 
500  do_each_pid_task(pid, type, p) {
501  send_sigio_to_task(p, fown, fd, band, group);
502  } while_each_pid_task(pid, type, p);
504  out_unlock_fown:
505  read_unlock(&fown->lock);
506 }
507 
508 static void send_sigurg_to_task(struct task_struct *p,
509  struct fown_struct *fown, int group)
510 {
511  if (sigio_perm(p, fown, SIGURG))
513 }
514 
515 int send_sigurg(struct fown_struct *fown)
516 {
517  struct task_struct *p;
518  enum pid_type type;
519  struct pid *pid;
520  int group = 1;
521  int ret = 0;
522 
523  read_lock(&fown->lock);
524 
525  type = fown->pid_type;
526  if (type == PIDTYPE_MAX) {
527  group = 0;
528  type = PIDTYPE_PID;
529  }
530 
531  pid = fown->pid;
532  if (!pid)
533  goto out_unlock_fown;
534 
535  ret = 1;
536 
538  do_each_pid_task(pid, type, p) {
539  send_sigurg_to_task(p, fown, group);
540  } while_each_pid_task(pid, type, p);
542  out_unlock_fown:
543  read_unlock(&fown->lock);
544  return ret;
545 }
546 
547 static DEFINE_SPINLOCK(fasync_lock);
548 static struct kmem_cache *fasync_cache __read_mostly;
549 
550 static void fasync_free_rcu(struct rcu_head *head)
551 {
552  kmem_cache_free(fasync_cache,
553  container_of(head, struct fasync_struct, fa_rcu));
554 }
555 
556 /*
557  * Remove a fasync entry. If successfully removed, return
558  * positive and clear the FASYNC flag. If no entry exists,
559  * do nothing and return 0.
560  *
561  * NOTE! It is very important that the FASYNC flag always
562  * match the state "is the filp on a fasync list".
563  *
564  */
565 int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp)
566 {
567  struct fasync_struct *fa, **fp;
568  int result = 0;
569 
570  spin_lock(&filp->f_lock);
571  spin_lock(&fasync_lock);
572  for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
573  if (fa->fa_file != filp)
574  continue;
575 
576  spin_lock_irq(&fa->fa_lock);
577  fa->fa_file = NULL;
578  spin_unlock_irq(&fa->fa_lock);
579 
580  *fp = fa->fa_next;
581  call_rcu(&fa->fa_rcu, fasync_free_rcu);
582  filp->f_flags &= ~FASYNC;
583  result = 1;
584  break;
585  }
586  spin_unlock(&fasync_lock);
587  spin_unlock(&filp->f_lock);
588  return result;
589 }
590 
592 {
593  return kmem_cache_alloc(fasync_cache, GFP_KERNEL);
594 }
595 
596 /*
597  * NOTE! This can be used only for unused fasync entries:
598  * entries that actually got inserted on the fasync list
599  * need to be released by rcu - see fasync_remove_entry.
600  */
601 void fasync_free(struct fasync_struct *new)
602 {
603  kmem_cache_free(fasync_cache, new);
604 }
605 
606 /*
607  * Insert a new entry into the fasync list. Return the pointer to the
608  * old one if we didn't use the new one.
609  *
610  * NOTE! It is very important that the FASYNC flag always
611  * match the state "is the filp on a fasync list".
612  */
613 struct fasync_struct *fasync_insert_entry(int fd, struct file *filp, struct fasync_struct **fapp, struct fasync_struct *new)
614 {
615  struct fasync_struct *fa, **fp;
616 
617  spin_lock(&filp->f_lock);
618  spin_lock(&fasync_lock);
619  for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
620  if (fa->fa_file != filp)
621  continue;
622 
623  spin_lock_irq(&fa->fa_lock);
624  fa->fa_fd = fd;
625  spin_unlock_irq(&fa->fa_lock);
626  goto out;
627  }
628 
629  spin_lock_init(&new->fa_lock);
630  new->magic = FASYNC_MAGIC;
631  new->fa_file = filp;
632  new->fa_fd = fd;
633  new->fa_next = *fapp;
634  rcu_assign_pointer(*fapp, new);
635  filp->f_flags |= FASYNC;
636 
637 out:
638  spin_unlock(&fasync_lock);
639  spin_unlock(&filp->f_lock);
640  return fa;
641 }
642 
643 /*
644  * Add a fasync entry. Return negative on error, positive if
645  * added, and zero if did nothing but change an existing one.
646  */
647 static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fapp)
648 {
649  struct fasync_struct *new;
650 
651  new = fasync_alloc();
652  if (!new)
653  return -ENOMEM;
654 
655  /*
656  * fasync_insert_entry() returns the old (update) entry if
657  * it existed.
658  *
659  * So free the (unused) new entry and return 0 to let the
660  * caller know that we didn't add any new fasync entries.
661  */
662  if (fasync_insert_entry(fd, filp, fapp, new)) {
663  fasync_free(new);
664  return 0;
665  }
666 
667  return 1;
668 }
669 
670 /*
671  * fasync_helper() is used by almost all character device drivers
672  * to set up the fasync queue, and for regular files by the file
673  * lease code. It returns negative on error, 0 if it did no changes
674  * and positive if it added/deleted the entry.
675  */
676 int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp)
677 {
678  if (!on)
679  return fasync_remove_entry(filp, fapp);
680  return fasync_add_entry(fd, filp, fapp);
681 }
682 
684 
685 /*
686  * rcu_read_lock() is held
687  */
688 static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band)
689 {
690  while (fa) {
691  struct fown_struct *fown;
692  unsigned long flags;
693 
694  if (fa->magic != FASYNC_MAGIC) {
695  printk(KERN_ERR "kill_fasync: bad magic number in "
696  "fasync_struct!\n");
697  return;
698  }
699  spin_lock_irqsave(&fa->fa_lock, flags);
700  if (fa->fa_file) {
701  fown = &fa->fa_file->f_owner;
702  /* Don't send SIGURG to processes which have not set a
703  queued signum: SIGURG has its own default signalling
704  mechanism. */
705  if (!(sig == SIGURG && fown->signum == 0))
706  send_sigio(fown, fa->fa_fd, band);
707  }
708  spin_unlock_irqrestore(&fa->fa_lock, flags);
709  fa = rcu_dereference(fa->fa_next);
710  }
711 }
712 
713 void kill_fasync(struct fasync_struct **fp, int sig, int band)
714 {
715  /* First a quick test without locking: usually
716  * the list is empty.
717  */
718  if (*fp) {
719  rcu_read_lock();
720  kill_fasync_rcu(rcu_dereference(*fp), sig, band);
721  rcu_read_unlock();
722  }
723 }
725 
726 static int __init fcntl_init(void)
727 {
728  /*
729  * Please add new bits here to ensure allocation uniqueness.
730  * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY
731  * is defined as O_NONBLOCK on some platforms and not on others.
732  */
733  BUILD_BUG_ON(19 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32(
734  O_RDONLY | O_WRONLY | O_RDWR |
735  O_CREAT | O_EXCL | O_NOCTTY |
736  O_TRUNC | O_APPEND | /* O_NONBLOCK | */
737  __O_SYNC | O_DSYNC | FASYNC |
741  ));
742 
743  fasync_cache = kmem_cache_create("fasync_cache",
744  sizeof(struct fasync_struct), 0, SLAB_PANIC, NULL);
745  return 0;
746 }
747 
748 module_init(fcntl_init)