Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
open.c
Go to the documentation of this file.
1 /*
2  * linux/fs/open.c
3  *
4  * Copyright (C) 1991, 1992 Linus Torvalds
5  */
6 
7 #include <linux/string.h>
8 #include <linux/mm.h>
9 #include <linux/file.h>
10 #include <linux/fdtable.h>
11 #include <linux/fsnotify.h>
12 #include <linux/module.h>
13 #include <linux/tty.h>
14 #include <linux/namei.h>
15 #include <linux/backing-dev.h>
16 #include <linux/capability.h>
17 #include <linux/securebits.h>
18 #include <linux/security.h>
19 #include <linux/mount.h>
20 #include <linux/fcntl.h>
21 #include <linux/slab.h>
22 #include <asm/uaccess.h>
23 #include <linux/fs.h>
24 #include <linux/personality.h>
25 #include <linux/pagemap.h>
26 #include <linux/syscalls.h>
27 #include <linux/rcupdate.h>
28 #include <linux/audit.h>
29 #include <linux/falloc.h>
30 #include <linux/fs_struct.h>
31 #include <linux/ima.h>
32 #include <linux/dnotify.h>
33 
34 #include "internal.h"
35 
36 int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
37  struct file *filp)
38 {
39  int ret;
40  struct iattr newattrs;
41 
42  /* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */
43  if (length < 0)
44  return -EINVAL;
45 
46  newattrs.ia_size = length;
47  newattrs.ia_valid = ATTR_SIZE | time_attrs;
48  if (filp) {
49  newattrs.ia_file = filp;
50  newattrs.ia_valid |= ATTR_FILE;
51  }
52 
53  /* Remove suid/sgid on truncate too */
54  ret = should_remove_suid(dentry);
55  if (ret)
56  newattrs.ia_valid |= ret | ATTR_FORCE;
57 
58  mutex_lock(&dentry->d_inode->i_mutex);
59  ret = notify_change(dentry, &newattrs);
60  mutex_unlock(&dentry->d_inode->i_mutex);
61  return ret;
62 }
63 
64 static long do_sys_truncate(const char __user *pathname, loff_t length)
65 {
66  struct path path;
67  struct inode *inode;
68  int error;
69 
70  error = -EINVAL;
71  if (length < 0) /* sorry, but loff_t says... */
72  goto out;
73 
74  error = user_path(pathname, &path);
75  if (error)
76  goto out;
77  inode = path.dentry->d_inode;
78 
79  /* For directories it's -EISDIR, for other non-regulars - -EINVAL */
80  error = -EISDIR;
81  if (S_ISDIR(inode->i_mode))
82  goto dput_and_out;
83 
84  error = -EINVAL;
85  if (!S_ISREG(inode->i_mode))
86  goto dput_and_out;
87 
88  error = mnt_want_write(path.mnt);
89  if (error)
90  goto dput_and_out;
91 
92  error = inode_permission(inode, MAY_WRITE);
93  if (error)
94  goto mnt_drop_write_and_out;
95 
96  error = -EPERM;
97  if (IS_APPEND(inode))
98  goto mnt_drop_write_and_out;
99 
100  error = get_write_access(inode);
101  if (error)
102  goto mnt_drop_write_and_out;
103 
104  /*
105  * Make sure that there are no leases. get_write_access() protects
106  * against the truncate racing with a lease-granting setlease().
107  */
108  error = break_lease(inode, O_WRONLY);
109  if (error)
110  goto put_write_and_out;
111 
112  error = locks_verify_truncate(inode, NULL, length);
113  if (!error)
114  error = security_path_truncate(&path);
115  if (!error)
116  error = do_truncate(path.dentry, length, 0, NULL);
117 
118 put_write_and_out:
119  put_write_access(inode);
120 mnt_drop_write_and_out:
122 dput_and_out:
123  path_put(&path);
124 out:
125  return error;
126 }
127 
128 SYSCALL_DEFINE2(truncate, const char __user *, path, long, length)
129 {
130  return do_sys_truncate(path, length);
131 }
132 
133 static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
134 {
135  struct inode *inode;
136  struct dentry *dentry;
137  struct fd f;
138  int error;
139 
140  error = -EINVAL;
141  if (length < 0)
142  goto out;
143  error = -EBADF;
144  f = fdget(fd);
145  if (!f.file)
146  goto out;
147 
148  /* explicitly opened as large or we are on 64-bit box */
149  if (f.file->f_flags & O_LARGEFILE)
150  small = 0;
151 
152  dentry = f.file->f_path.dentry;
153  inode = dentry->d_inode;
154  error = -EINVAL;
155  if (!S_ISREG(inode->i_mode) || !(f.file->f_mode & FMODE_WRITE))
156  goto out_putf;
157 
158  error = -EINVAL;
159  /* Cannot ftruncate over 2^31 bytes without large file support */
160  if (small && length > MAX_NON_LFS)
161  goto out_putf;
162 
163  error = -EPERM;
164  if (IS_APPEND(inode))
165  goto out_putf;
166 
167  sb_start_write(inode->i_sb);
168  error = locks_verify_truncate(inode, f.file, length);
169  if (!error)
170  error = security_path_truncate(&f.file->f_path);
171  if (!error)
172  error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, f.file);
173  sb_end_write(inode->i_sb);
174 out_putf:
175  fdput(f);
176 out:
177  return error;
178 }
179 
180 SYSCALL_DEFINE2(ftruncate, unsigned int, fd, unsigned long, length)
181 {
182  long ret = do_sys_ftruncate(fd, length, 1);
183  /* avoid REGPARM breakage on x86: */
184  asmlinkage_protect(2, ret, fd, length);
185  return ret;
186 }
187 
188 /* LFS versions of truncate are only needed on 32 bit machines */
189 #if BITS_PER_LONG == 32
190 SYSCALL_DEFINE(truncate64)(const char __user * path, loff_t length)
191 {
192  return do_sys_truncate(path, length);
193 }
194 #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
195 asmlinkage long SyS_truncate64(long path, loff_t length)
196 {
197  return SYSC_truncate64((const char __user *) path, length);
198 }
199 SYSCALL_ALIAS(sys_truncate64, SyS_truncate64);
200 #endif
201 
202 SYSCALL_DEFINE(ftruncate64)(unsigned int fd, loff_t length)
203 {
204  long ret = do_sys_ftruncate(fd, length, 0);
205  /* avoid REGPARM breakage on x86: */
206  asmlinkage_protect(2, ret, fd, length);
207  return ret;
208 }
209 #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
210 asmlinkage long SyS_ftruncate64(long fd, loff_t length)
211 {
212  return SYSC_ftruncate64((unsigned int) fd, length);
213 }
214 SYSCALL_ALIAS(sys_ftruncate64, SyS_ftruncate64);
215 #endif
216 #endif /* BITS_PER_LONG == 32 */
217 
218 
219 int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
220 {
221  struct inode *inode = file->f_path.dentry->d_inode;
222  long ret;
223 
224  if (offset < 0 || len <= 0)
225  return -EINVAL;
226 
227  /* Return error if mode is not supported */
229  return -EOPNOTSUPP;
230 
231  /* Punch hole must have keep size set */
232  if ((mode & FALLOC_FL_PUNCH_HOLE) &&
233  !(mode & FALLOC_FL_KEEP_SIZE))
234  return -EOPNOTSUPP;
235 
236  if (!(file->f_mode & FMODE_WRITE))
237  return -EBADF;
238 
239  /* It's not possible punch hole on append only file */
240  if (mode & FALLOC_FL_PUNCH_HOLE && IS_APPEND(inode))
241  return -EPERM;
242 
243  if (IS_IMMUTABLE(inode))
244  return -EPERM;
245 
246  /*
247  * Revalidate the write permissions, in case security policy has
248  * changed since the files were opened.
249  */
250  ret = security_file_permission(file, MAY_WRITE);
251  if (ret)
252  return ret;
253 
254  if (S_ISFIFO(inode->i_mode))
255  return -ESPIPE;
256 
257  /*
258  * Let individual file system decide if it supports preallocation
259  * for directories or not.
260  */
261  if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
262  return -ENODEV;
263 
264  /* Check for wrap through zero too */
265  if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0))
266  return -EFBIG;
267 
268  if (!file->f_op->fallocate)
269  return -EOPNOTSUPP;
270 
271  sb_start_write(inode->i_sb);
272  ret = file->f_op->fallocate(file, mode, offset, len);
273  sb_end_write(inode->i_sb);
274  return ret;
275 }
276 
277 SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len)
278 {
279  struct fd f = fdget(fd);
280  int error = -EBADF;
281 
282  if (f.file) {
283  error = do_fallocate(f.file, mode, offset, len);
284  fdput(f);
285  }
286  return error;
287 }
288 
289 #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
290 asmlinkage long SyS_fallocate(long fd, long mode, loff_t offset, loff_t len)
291 {
292  return SYSC_fallocate((int)fd, (int)mode, offset, len);
293 }
294 SYSCALL_ALIAS(sys_fallocate, SyS_fallocate);
295 #endif
296 
297 /*
298  * access() needs to use the real uid/gid, not the effective uid/gid.
299  * We do this by temporarily clearing all FS-related capabilities and
300  * switching the fsuid/fsgid around to the real ones.
301  */
302 SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode)
303 {
304  const struct cred *old_cred;
305  struct cred *override_cred;
306  struct path path;
307  struct inode *inode;
308  int res;
309 
310  if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */
311  return -EINVAL;
312 
313  override_cred = prepare_creds();
314  if (!override_cred)
315  return -ENOMEM;
316 
317  override_cred->fsuid = override_cred->uid;
318  override_cred->fsgid = override_cred->gid;
319 
321  /* Clear the capabilities if we switch to a non-root user */
322  kuid_t root_uid = make_kuid(override_cred->user_ns, 0);
323  if (!uid_eq(override_cred->uid, root_uid))
324  cap_clear(override_cred->cap_effective);
325  else
326  override_cred->cap_effective =
327  override_cred->cap_permitted;
328  }
329 
330  old_cred = override_creds(override_cred);
331 
332  res = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path);
333  if (res)
334  goto out;
335 
336  inode = path.dentry->d_inode;
337 
338  if ((mode & MAY_EXEC) && S_ISREG(inode->i_mode)) {
339  /*
340  * MAY_EXEC on regular files is denied if the fs is mounted
341  * with the "noexec" flag.
342  */
343  res = -EACCES;
344  if (path.mnt->mnt_flags & MNT_NOEXEC)
345  goto out_path_release;
346  }
347 
348  res = inode_permission(inode, mode | MAY_ACCESS);
349  /* SuS v2 requires we report a read only fs too */
350  if (res || !(mode & S_IWOTH) || special_file(inode->i_mode))
351  goto out_path_release;
352  /*
353  * This is a rare case where using __mnt_is_readonly()
354  * is OK without a mnt_want/drop_write() pair. Since
355  * no actual write to the fs is performed here, we do
356  * not need to telegraph to that to anyone.
357  *
358  * By doing this, we accept that this access is
359  * inherently racy and know that the fs may change
360  * state before we even see this result.
361  */
362  if (__mnt_is_readonly(path.mnt))
363  res = -EROFS;
364 
365 out_path_release:
366  path_put(&path);
367 out:
368  revert_creds(old_cred);
369  put_cred(override_cred);
370  return res;
371 }
372 
373 SYSCALL_DEFINE2(access, const char __user *, filename, int, mode)
374 {
375  return sys_faccessat(AT_FDCWD, filename, mode);
376 }
377 
378 SYSCALL_DEFINE1(chdir, const char __user *, filename)
379 {
380  struct path path;
381  int error;
382 
383  error = user_path_dir(filename, &path);
384  if (error)
385  goto out;
386 
387  error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR);
388  if (error)
389  goto dput_and_out;
390 
391  set_fs_pwd(current->fs, &path);
392 
393 dput_and_out:
394  path_put(&path);
395 out:
396  return error;
397 }
398 
399 SYSCALL_DEFINE1(fchdir, unsigned int, fd)
400 {
401  struct fd f = fdget_raw(fd);
402  struct inode *inode;
403  int error = -EBADF;
404 
405  error = -EBADF;
406  if (!f.file)
407  goto out;
408 
409  inode = f.file->f_path.dentry->d_inode;
410 
411  error = -ENOTDIR;
412  if (!S_ISDIR(inode->i_mode))
413  goto out_putf;
414 
415  error = inode_permission(inode, MAY_EXEC | MAY_CHDIR);
416  if (!error)
417  set_fs_pwd(current->fs, &f.file->f_path);
418 out_putf:
419  fdput(f);
420 out:
421  return error;
422 }
423 
424 SYSCALL_DEFINE1(chroot, const char __user *, filename)
425 {
426  struct path path;
427  int error;
428 
429  error = user_path_dir(filename, &path);
430  if (error)
431  goto out;
432 
433  error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR);
434  if (error)
435  goto dput_and_out;
436 
437  error = -EPERM;
438  if (!capable(CAP_SYS_CHROOT))
439  goto dput_and_out;
440  error = security_path_chroot(&path);
441  if (error)
442  goto dput_and_out;
443 
444  set_fs_root(current->fs, &path);
445  error = 0;
446 dput_and_out:
447  path_put(&path);
448 out:
449  return error;
450 }
451 
452 static int chmod_common(struct path *path, umode_t mode)
453 {
454  struct inode *inode = path->dentry->d_inode;
455  struct iattr newattrs;
456  int error;
457 
458  error = mnt_want_write(path->mnt);
459  if (error)
460  return error;
461  mutex_lock(&inode->i_mutex);
462  error = security_path_chmod(path, mode);
463  if (error)
464  goto out_unlock;
465  newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
466  newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
467  error = notify_change(path->dentry, &newattrs);
468 out_unlock:
469  mutex_unlock(&inode->i_mutex);
470  mnt_drop_write(path->mnt);
471  return error;
472 }
473 
474 SYSCALL_DEFINE2(fchmod, unsigned int, fd, umode_t, mode)
475 {
476  struct file * file;
477  int err = -EBADF;
478 
479  file = fget(fd);
480  if (file) {
481  audit_inode(NULL, file->f_path.dentry, 0);
482  err = chmod_common(&file->f_path, mode);
483  fput(file);
484  }
485  return err;
486 }
487 
488 SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename, umode_t, mode)
489 {
490  struct path path;
491  int error;
492 
493  error = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path);
494  if (!error) {
495  error = chmod_common(&path, mode);
496  path_put(&path);
497  }
498  return error;
499 }
500 
501 SYSCALL_DEFINE2(chmod, const char __user *, filename, umode_t, mode)
502 {
503  return sys_fchmodat(AT_FDCWD, filename, mode);
504 }
505 
506 static int chown_common(struct path *path, uid_t user, gid_t group)
507 {
508  struct inode *inode = path->dentry->d_inode;
509  int error;
510  struct iattr newattrs;
511  kuid_t uid;
512  kgid_t gid;
513 
514  uid = make_kuid(current_user_ns(), user);
515  gid = make_kgid(current_user_ns(), group);
516 
517  newattrs.ia_valid = ATTR_CTIME;
518  if (user != (uid_t) -1) {
519  if (!uid_valid(uid))
520  return -EINVAL;
521  newattrs.ia_valid |= ATTR_UID;
522  newattrs.ia_uid = uid;
523  }
524  if (group != (gid_t) -1) {
525  if (!gid_valid(gid))
526  return -EINVAL;
527  newattrs.ia_valid |= ATTR_GID;
528  newattrs.ia_gid = gid;
529  }
530  if (!S_ISDIR(inode->i_mode))
531  newattrs.ia_valid |=
533  mutex_lock(&inode->i_mutex);
534  error = security_path_chown(path, uid, gid);
535  if (!error)
536  error = notify_change(path->dentry, &newattrs);
537  mutex_unlock(&inode->i_mutex);
538 
539  return error;
540 }
541 
542 SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user,
543  gid_t, group, int, flag)
544 {
545  struct path path;
546  int error = -EINVAL;
547  int lookup_flags;
548 
549  if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0)
550  goto out;
551 
552  lookup_flags = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
553  if (flag & AT_EMPTY_PATH)
554  lookup_flags |= LOOKUP_EMPTY;
555  error = user_path_at(dfd, filename, lookup_flags, &path);
556  if (error)
557  goto out;
558  error = mnt_want_write(path.mnt);
559  if (error)
560  goto out_release;
561  error = chown_common(&path, user, group);
562  mnt_drop_write(path.mnt);
563 out_release:
564  path_put(&path);
565 out:
566  return error;
567 }
568 
569 SYSCALL_DEFINE3(chown, const char __user *, filename, uid_t, user, gid_t, group)
570 {
571  return sys_fchownat(AT_FDCWD, filename, user, group, 0);
572 }
573 
574 SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group)
575 {
576  return sys_fchownat(AT_FDCWD, filename, user, group,
578 }
579 
580 SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group)
581 {
582  struct fd f = fdget(fd);
583  int error = -EBADF;
584 
585  if (!f.file)
586  goto out;
587 
588  error = mnt_want_write_file(f.file);
589  if (error)
590  goto out_fput;
591  audit_inode(NULL, f.file->f_path.dentry, 0);
592  error = chown_common(&f.file->f_path, user, group);
594 out_fput:
595  fdput(f);
596 out:
597  return error;
598 }
599 
600 /*
601  * You have to be very careful that these write
602  * counts get cleaned up in error cases and
603  * upon __fput(). This should probably never
604  * be called outside of __dentry_open().
605  */
606 static inline int __get_file_write_access(struct inode *inode,
607  struct vfsmount *mnt)
608 {
609  int error;
610  error = get_write_access(inode);
611  if (error)
612  return error;
613  /*
614  * Do not take mount writer counts on
615  * special files since no writes to
616  * the mount itself will occur.
617  */
618  if (!special_file(inode->i_mode)) {
619  /*
620  * Balanced in __fput()
621  */
622  error = __mnt_want_write(mnt);
623  if (error)
624  put_write_access(inode);
625  }
626  return error;
627 }
628 
630 {
631  /* NB: we're sure to have correct a_ops only after f_op->open */
632  if (f->f_flags & O_DIRECT) {
633  if (!f->f_mapping->a_ops ||
634  ((!f->f_mapping->a_ops->direct_IO) &&
635  (!f->f_mapping->a_ops->get_xip_mem))) {
636  return -EINVAL;
637  }
638  }
639  return 0;
640 }
641 
642 static int do_dentry_open(struct file *f,
643  int (*open)(struct inode *, struct file *),
644  const struct cred *cred)
645 {
646  static const struct file_operations empty_fops = {};
647  struct inode *inode;
648  int error;
649 
650  f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK |
652 
653  if (unlikely(f->f_flags & O_PATH))
654  f->f_mode = FMODE_PATH;
655 
656  path_get(&f->f_path);
657  inode = f->f_path.dentry->d_inode;
658  if (f->f_mode & FMODE_WRITE) {
659  error = __get_file_write_access(inode, f->f_path.mnt);
660  if (error)
661  goto cleanup_file;
662  if (!special_file(inode->i_mode))
663  file_take_write(f);
664  }
665 
666  f->f_mapping = inode->i_mapping;
667  f->f_pos = 0;
668  file_sb_list_add(f, inode->i_sb);
669 
670  if (unlikely(f->f_mode & FMODE_PATH)) {
671  f->f_op = &empty_fops;
672  return 0;
673  }
674 
675  f->f_op = fops_get(inode->i_fop);
676 
677  error = security_file_open(f, cred);
678  if (error)
679  goto cleanup_all;
680 
681  error = break_lease(inode, f->f_flags);
682  if (error)
683  goto cleanup_all;
684 
685  if (!open && f->f_op)
686  open = f->f_op->open;
687  if (open) {
688  error = open(inode, f);
689  if (error)
690  goto cleanup_all;
691  }
692  if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
693  i_readcount_inc(inode);
694 
695  f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
696 
697  file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
698 
699  return 0;
700 
701 cleanup_all:
702  fops_put(f->f_op);
703  file_sb_list_del(f);
704  if (f->f_mode & FMODE_WRITE) {
705  put_write_access(inode);
706  if (!special_file(inode->i_mode)) {
707  /*
708  * We don't consider this a real
709  * mnt_want/drop_write() pair
710  * because it all happenend right
711  * here, so just reset the state.
712  */
713  file_reset_write(f);
714  __mnt_drop_write(f->f_path.mnt);
715  }
716  }
717 cleanup_file:
718  path_put(&f->f_path);
719  f->f_path.mnt = NULL;
720  f->f_path.dentry = NULL;
721  return error;
722 }
723 
735 int finish_open(struct file *file, struct dentry *dentry,
736  int (*open)(struct inode *, struct file *),
737  int *opened)
738 {
739  int error;
740  BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */
741 
742  file->f_path.dentry = dentry;
743  error = do_dentry_open(file, open, current_cred());
744  if (!error)
745  *opened |= FILE_OPENED;
746 
747  return error;
748 }
750 
760 int finish_no_open(struct file *file, struct dentry *dentry)
761 {
762  file->f_path.dentry = dentry;
763  return 1;
764 }
766 
767 struct file *dentry_open(const struct path *path, int flags,
768  const struct cred *cred)
769 {
770  int error;
771  struct file *f;
772 
773  validate_creds(cred);
774 
775  /* We must always pass in a valid mount pointer. */
776  BUG_ON(!path->mnt);
777 
778  error = -ENFILE;
779  f = get_empty_filp();
780  if (f == NULL)
781  return ERR_PTR(error);
782 
783  f->f_flags = flags;
784  f->f_path = *path;
785  error = do_dentry_open(f, NULL, cred);
786  if (!error) {
787  error = open_check_o_direct(f);
788  if (error) {
789  fput(f);
790  f = ERR_PTR(error);
791  }
792  } else {
793  put_filp(f);
794  f = ERR_PTR(error);
795  }
796  return f;
797 }
799 
800 static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op)
801 {
802  int lookup_flags = 0;
803  int acc_mode;
804 
805  if (flags & O_CREAT)
806  op->mode = (mode & S_IALLUGO) | S_IFREG;
807  else
808  op->mode = 0;
809 
810  /* Must never be set by userspace */
811  flags &= ~FMODE_NONOTIFY & ~O_CLOEXEC;
812 
813  /*
814  * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only
815  * check for O_DSYNC if the need any syncing at all we enforce it's
816  * always set instead of having to deal with possibly weird behaviour
817  * for malicious applications setting only __O_SYNC.
818  */
819  if (flags & __O_SYNC)
820  flags |= O_DSYNC;
821 
822  /*
823  * If we have O_PATH in the open flag. Then we
824  * cannot have anything other than the below set of flags
825  */
826  if (flags & O_PATH) {
827  flags &= O_DIRECTORY | O_NOFOLLOW | O_PATH;
828  acc_mode = 0;
829  } else {
830  acc_mode = MAY_OPEN | ACC_MODE(flags);
831  }
832 
833  op->open_flag = flags;
834 
835  /* O_TRUNC implies we need access checks for write permissions */
836  if (flags & O_TRUNC)
837  acc_mode |= MAY_WRITE;
838 
839  /* Allow the LSM permission hook to distinguish append
840  access from general write access. */
841  if (flags & O_APPEND)
842  acc_mode |= MAY_APPEND;
843 
844  op->acc_mode = acc_mode;
845 
846  op->intent = flags & O_PATH ? 0 : LOOKUP_OPEN;
847 
848  if (flags & O_CREAT) {
849  op->intent |= LOOKUP_CREATE;
850  if (flags & O_EXCL)
851  op->intent |= LOOKUP_EXCL;
852  }
853 
854  if (flags & O_DIRECTORY)
855  lookup_flags |= LOOKUP_DIRECTORY;
856  if (!(flags & O_NOFOLLOW))
857  lookup_flags |= LOOKUP_FOLLOW;
858  return lookup_flags;
859 }
860 
872 struct file *file_open_name(struct filename *name, int flags, umode_t mode)
873 {
874  struct open_flags op;
875  int lookup = build_open_flags(flags, mode, &op);
876  return do_filp_open(AT_FDCWD, name, &op, lookup);
877 }
878 
890 struct file *filp_open(const char *filename, int flags, umode_t mode)
891 {
892  struct filename name = {.name = filename};
893  return file_open_name(&name, flags, mode);
894 }
896 
897 struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt,
898  const char *filename, int flags)
899 {
900  struct open_flags op;
901  int lookup = build_open_flags(flags, 0, &op);
902  if (flags & O_CREAT)
903  return ERR_PTR(-EINVAL);
904  if (!filename && (flags & O_DIRECTORY))
905  if (!dentry->d_inode->i_op->lookup)
906  return ERR_PTR(-ENOTDIR);
907  return do_file_open_root(dentry, mnt, filename, &op, lookup);
908 }
910 
911 long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode)
912 {
913  struct open_flags op;
914  int lookup = build_open_flags(flags, mode, &op);
915  struct filename *tmp = getname(filename);
916  int fd = PTR_ERR(tmp);
917 
918  if (!IS_ERR(tmp)) {
919  fd = get_unused_fd_flags(flags);
920  if (fd >= 0) {
921  struct file *f = do_filp_open(dfd, tmp, &op, lookup);
922  if (IS_ERR(f)) {
923  put_unused_fd(fd);
924  fd = PTR_ERR(f);
925  } else {
926  fsnotify_open(f);
927  fd_install(fd, f);
928  }
929  }
930  putname(tmp);
931  }
932  return fd;
933 }
934 
935 SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, umode_t, mode)
936 {
937  long ret;
938 
939  if (force_o_largefile())
940  flags |= O_LARGEFILE;
941 
942  ret = do_sys_open(AT_FDCWD, filename, flags, mode);
943  /* avoid REGPARM breakage on x86: */
944  asmlinkage_protect(3, ret, filename, flags, mode);
945  return ret;
946 }
947 
948 SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, flags,
949  umode_t, mode)
950 {
951  long ret;
952 
953  if (force_o_largefile())
954  flags |= O_LARGEFILE;
955 
956  ret = do_sys_open(dfd, filename, flags, mode);
957  /* avoid REGPARM breakage on x86: */
958  asmlinkage_protect(4, ret, dfd, filename, flags, mode);
959  return ret;
960 }
961 
962 #ifndef __alpha__
963 
964 /*
965  * For backward compatibility? Maybe this should be moved
966  * into arch/i386 instead?
967  */
968 SYSCALL_DEFINE2(creat, const char __user *, pathname, umode_t, mode)
969 {
970  return sys_open(pathname, O_CREAT | O_WRONLY | O_TRUNC, mode);
971 }
972 
973 #endif
974 
975 /*
976  * "id" is the POSIX thread ID. We use the
977  * files pointer for this..
978  */
979 int filp_close(struct file *filp, fl_owner_t id)
980 {
981  int retval = 0;
982 
983  if (!file_count(filp)) {
984  printk(KERN_ERR "VFS: Close: file count is 0\n");
985  return 0;
986  }
987 
988  if (filp->f_op && filp->f_op->flush)
989  retval = filp->f_op->flush(filp, id);
990 
991  if (likely(!(filp->f_mode & FMODE_PATH))) {
992  dnotify_flush(filp, id);
993  locks_remove_posix(filp, id);
994  }
995  fput(filp);
996  return retval;
997 }
998 
1000 
1001 /*
1002  * Careful here! We test whether the file pointer is NULL before
1003  * releasing the fd. This ensures that one clone task can't release
1004  * an fd while another clone is opening it.
1005  */
1006 SYSCALL_DEFINE1(close, unsigned int, fd)
1007 {
1008  int retval = __close_fd(current->files, fd);
1009 
1010  /* can't restart close syscall because file table entry was cleared */
1011  if (unlikely(retval == -ERESTARTSYS ||
1012  retval == -ERESTARTNOINTR ||
1013  retval == -ERESTARTNOHAND ||
1014  retval == -ERESTART_RESTARTBLOCK))
1015  retval = -EINTR;
1016 
1017  return retval;
1018 }
1020 
1021 /*
1022  * This routine simulates a hangup on the tty, to arrange that users
1023  * are given clean terminals at login time.
1024  */
1026 {
1027  if (capable(CAP_SYS_TTY_CONFIG)) {
1028  tty_vhangup_self();
1029  return 0;
1030  }
1031  return -EPERM;
1032 }
1033 
1034 /*
1035  * Called when an inode is about to be open.
1036  * We use this to disallow opening large files on 32bit systems if
1037  * the caller didn't specify O_LARGEFILE. On 64bit systems we force
1038  * on this flag in sys_open.
1039  */
1040 int generic_file_open(struct inode * inode, struct file * filp)
1041 {
1042  if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
1043  return -EOVERFLOW;
1044  return 0;
1045 }
1046 
1048 
1049 /*
1050  * This is used by subsystems that don't want seekable
1051  * file descriptors. The function is not supposed to ever fail, the only
1052  * reason it returns an 'int' and not 'void' is so that it can be plugged
1053  * directly into file_operations structure.
1054  */
1055 int nonseekable_open(struct inode *inode, struct file *filp)
1056 {
1057  filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE);
1058  return 0;
1059 }
1060