Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
vfs.c
Go to the documentation of this file.
1 /*
2  * File operations used by nfsd. Some of these have been ripped from
3  * other parts of the kernel because they weren't exported, others
4  * are partial duplicates with added or changed functionality.
5  *
6  * Note that several functions dget() the dentry upon which they want
7  * to act, most notably those that create directory entries. Response
8  * dentry's are dput()'d if necessary in the release callback.
9  * So if you notice code paths that apparently fail to dput() the
10  * dentry, don't worry--they have been taken care of.
11  *
12  * Copyright (C) 1995-1999 Olaf Kirch <[email protected]>
13  * Zerocpy NFS support (C) 2002 Hirokazu Takahashi <[email protected]>
14  */
15 
16 #include <linux/fs.h>
17 #include <linux/file.h>
18 #include <linux/splice.h>
19 #include <linux/fcntl.h>
20 #include <linux/namei.h>
21 #include <linux/delay.h>
22 #include <linux/fsnotify.h>
23 #include <linux/posix_acl_xattr.h>
24 #include <linux/xattr.h>
25 #include <linux/jhash.h>
26 #include <linux/ima.h>
27 #include <linux/slab.h>
28 #include <asm/uaccess.h>
29 #include <linux/exportfs.h>
30 #include <linux/writeback.h>
31 
32 #ifdef CONFIG_NFSD_V3
33 #include "xdr3.h"
34 #endif /* CONFIG_NFSD_V3 */
35 
36 #ifdef CONFIG_NFSD_V4
37 #include "acl.h"
38 #include "idmap.h"
39 #endif /* CONFIG_NFSD_V4 */
40 
41 #include "nfsd.h"
42 #include "vfs.h"
43 
44 #define NFSDDBG_FACILITY NFSDDBG_FILEOP
45 
46 
47 /*
48  * This is a cache of readahead params that help us choose the proper
49  * readahead strategy. Initially, we set all readahead parameters to 0
50  * and let the VFS handle things.
51  * If you increase the number of cached files very much, you'll need to
52  * add a hash table here.
53  */
54 struct raparms {
55  struct raparms *p_next;
56  unsigned int p_count;
59  int p_set;
61  unsigned int p_hindex;
62 };
63 
65  struct raparms *pb_head;
68 
69 #define RAPARM_HASH_BITS 4
70 #define RAPARM_HASH_SIZE (1<<RAPARM_HASH_BITS)
71 #define RAPARM_HASH_MASK (RAPARM_HASH_SIZE-1)
72 static struct raparm_hbucket raparm_hash[RAPARM_HASH_SIZE];
73 
74 /*
75  * Called from nfsd_lookup and encode_dirent. Check if we have crossed
76  * a mount point.
77  * Returns -EAGAIN or -ETIMEDOUT leaving *dpp and *expp unchanged,
78  * or nfs_ok having possibly changed *dpp and *expp
79  */
80 int
81 nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
82  struct svc_export **expp)
83 {
84  struct svc_export *exp = *expp, *exp2 = NULL;
85  struct dentry *dentry = *dpp;
86  struct path path = {.mnt = mntget(exp->ex_path.mnt),
87  .dentry = dget(dentry)};
88  int err = 0;
89 
90  err = follow_down(&path);
91  if (err < 0)
92  goto out;
93 
94  exp2 = rqst_exp_get_by_name(rqstp, &path);
95  if (IS_ERR(exp2)) {
96  err = PTR_ERR(exp2);
97  /*
98  * We normally allow NFS clients to continue
99  * "underneath" a mountpoint that is not exported.
100  * The exception is V4ROOT, where no traversal is ever
101  * allowed without an explicit export of the new
102  * directory.
103  */
104  if (err == -ENOENT && !(exp->ex_flags & NFSEXP_V4ROOT))
105  err = 0;
106  path_put(&path);
107  goto out;
108  }
109  if (nfsd_v4client(rqstp) ||
110  (exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2)) {
111  /* successfully crossed mount point */
112  /*
113  * This is subtle: path.dentry is *not* on path.mnt
114  * at this point. The only reason we are safe is that
115  * original mnt is pinned down by exp, so we should
116  * put path *before* putting exp
117  */
118  *dpp = path.dentry;
119  path.dentry = dentry;
120  *expp = exp2;
121  exp2 = exp;
122  }
123  path_put(&path);
124  exp_put(exp2);
125 out:
126  return err;
127 }
128 
129 static void follow_to_parent(struct path *path)
130 {
131  struct dentry *dp;
132 
133  while (path->dentry == path->mnt->mnt_root && follow_up(path))
134  ;
135  dp = dget_parent(path->dentry);
136  dput(path->dentry);
137  path->dentry = dp;
138 }
139 
140 static int nfsd_lookup_parent(struct svc_rqst *rqstp, struct dentry *dparent, struct svc_export **exp, struct dentry **dentryp)
141 {
142  struct svc_export *exp2;
143  struct path path = {.mnt = mntget((*exp)->ex_path.mnt),
144  .dentry = dget(dparent)};
145 
146  follow_to_parent(&path);
147 
148  exp2 = rqst_exp_parent(rqstp, &path);
149  if (PTR_ERR(exp2) == -ENOENT) {
150  *dentryp = dget(dparent);
151  } else if (IS_ERR(exp2)) {
152  path_put(&path);
153  return PTR_ERR(exp2);
154  } else {
155  *dentryp = dget(path.dentry);
156  exp_put(*exp);
157  *exp = exp2;
158  }
159  path_put(&path);
160  return 0;
161 }
162 
163 /*
164  * For nfsd purposes, we treat V4ROOT exports as though there was an
165  * export at *every* directory.
166  */
167 int nfsd_mountpoint(struct dentry *dentry, struct svc_export *exp)
168 {
169  if (d_mountpoint(dentry))
170  return 1;
171  if (nfsd4_is_junction(dentry))
172  return 1;
173  if (!(exp->ex_flags & NFSEXP_V4ROOT))
174  return 0;
175  return dentry->d_inode != NULL;
176 }
177 
178 __be32
179 nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
180  const char *name, unsigned int len,
181  struct svc_export **exp_ret, struct dentry **dentry_ret)
182 {
183  struct svc_export *exp;
184  struct dentry *dparent;
185  struct dentry *dentry;
186  int host_err;
187 
188  dprintk("nfsd: nfsd_lookup(fh %s, %.*s)\n", SVCFH_fmt(fhp), len,name);
189 
190  dparent = fhp->fh_dentry;
191  exp = fhp->fh_export;
192  exp_get(exp);
193 
194  /* Lookup the name, but don't follow links */
195  if (isdotent(name, len)) {
196  if (len==1)
197  dentry = dget(dparent);
198  else if (dparent != exp->ex_path.dentry)
199  dentry = dget_parent(dparent);
200  else if (!EX_NOHIDE(exp) && !nfsd_v4client(rqstp))
201  dentry = dget(dparent); /* .. == . just like at / */
202  else {
203  /* checking mountpoint crossing is very different when stepping up */
204  host_err = nfsd_lookup_parent(rqstp, dparent, &exp, &dentry);
205  if (host_err)
206  goto out_nfserr;
207  }
208  } else {
209  fh_lock(fhp);
210  dentry = lookup_one_len(name, dparent, len);
211  host_err = PTR_ERR(dentry);
212  if (IS_ERR(dentry))
213  goto out_nfserr;
214  /*
215  * check if we have crossed a mount point ...
216  */
217  if (nfsd_mountpoint(dentry, exp)) {
218  if ((host_err = nfsd_cross_mnt(rqstp, &dentry, &exp))) {
219  dput(dentry);
220  goto out_nfserr;
221  }
222  }
223  }
224  *dentry_ret = dentry;
225  *exp_ret = exp;
226  return 0;
227 
228 out_nfserr:
229  exp_put(exp);
230  return nfserrno(host_err);
231 }
232 
233 /*
234  * Look up one component of a pathname.
235  * N.B. After this call _both_ fhp and resfh need an fh_put
236  *
237  * If the lookup would cross a mountpoint, and the mounted filesystem
238  * is exported to the client with NFSEXP_NOHIDE, then the lookup is
239  * accepted as it stands and the mounted directory is
240  * returned. Otherwise the covered directory is returned.
241  * NOTE: this mountpoint crossing is not supported properly by all
242  * clients and is explicitly disallowed for NFSv3
243  * NeilBrown <[email protected]>
244  */
245 __be32
246 nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name,
247  unsigned int len, struct svc_fh *resfh)
248 {
249  struct svc_export *exp;
250  struct dentry *dentry;
251  __be32 err;
252 
253  err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC);
254  if (err)
255  return err;
256  err = nfsd_lookup_dentry(rqstp, fhp, name, len, &exp, &dentry);
257  if (err)
258  return err;
259  err = check_nfsd_access(exp, rqstp);
260  if (err)
261  goto out;
262  /*
263  * Note: we compose the file handle now, but as the
264  * dentry may be negative, it may need to be updated.
265  */
266  err = fh_compose(resfh, exp, dentry, fhp);
267  if (!err && !dentry->d_inode)
268  err = nfserr_noent;
269 out:
270  dput(dentry);
271  exp_put(exp);
272  return err;
273 }
274 
275 static int nfsd_break_lease(struct inode *inode)
276 {
277  if (!S_ISREG(inode->i_mode))
278  return 0;
279  return break_lease(inode, O_WRONLY | O_NONBLOCK);
280 }
281 
282 /*
283  * Commit metadata changes to stable storage.
284  */
285 static int
286 commit_metadata(struct svc_fh *fhp)
287 {
288  struct inode *inode = fhp->fh_dentry->d_inode;
289  const struct export_operations *export_ops = inode->i_sb->s_export_op;
290 
291  if (!EX_ISSYNC(fhp->fh_export))
292  return 0;
293 
294  if (export_ops->commit_metadata)
295  return export_ops->commit_metadata(inode);
296  return sync_inode_metadata(inode, 1);
297 }
298 
299 /*
300  * Set various file attributes.
301  * N.B. After this call fhp needs an fh_put
302  */
303 __be32
304 nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
305  int check_guard, time_t guardtime)
306 {
307  struct dentry *dentry;
308  struct inode *inode;
309  int accmode = NFSD_MAY_SATTR;
310  umode_t ftype = 0;
311  __be32 err;
312  int host_err;
313  int size_change = 0;
314 
315  if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE))
317  if (iap->ia_valid & ATTR_SIZE)
318  ftype = S_IFREG;
319 
320  /* Get inode */
321  err = fh_verify(rqstp, fhp, ftype, accmode);
322  if (err)
323  goto out;
324 
325  dentry = fhp->fh_dentry;
326  inode = dentry->d_inode;
327 
328  /* Ignore any mode updates on symlinks */
329  if (S_ISLNK(inode->i_mode))
330  iap->ia_valid &= ~ATTR_MODE;
331 
332  if (!iap->ia_valid)
333  goto out;
334 
335  /*
336  * NFSv2 does not differentiate between "set-[ac]time-to-now"
337  * which only requires access, and "set-[ac]time-to-X" which
338  * requires ownership.
339  * So if it looks like it might be "set both to the same time which
340  * is close to now", and if inode_change_ok fails, then we
341  * convert to "set to now" instead of "set to explicit time"
342  *
343  * We only call inode_change_ok as the last test as technically
344  * it is not an interface that we should be using. It is only
345  * valid if the filesystem does not define it's own i_op->setattr.
346  */
347 #define BOTH_TIME_SET (ATTR_ATIME_SET | ATTR_MTIME_SET)
348 #define MAX_TOUCH_TIME_ERROR (30*60)
349  if ((iap->ia_valid & BOTH_TIME_SET) == BOTH_TIME_SET &&
350  iap->ia_mtime.tv_sec == iap->ia_atime.tv_sec) {
351  /*
352  * Looks probable.
353  *
354  * Now just make sure time is in the right ballpark.
355  * Solaris, at least, doesn't seem to care what the time
356  * request is. We require it be within 30 minutes of now.
357  */
358  time_t delta = iap->ia_atime.tv_sec - get_seconds();
359  if (delta < 0)
360  delta = -delta;
361  if (delta < MAX_TOUCH_TIME_ERROR &&
362  inode_change_ok(inode, iap) != 0) {
363  /*
364  * Turn off ATTR_[AM]TIME_SET but leave ATTR_[AM]TIME.
365  * This will cause notify_change to set these times
366  * to "now"
367  */
368  iap->ia_valid &= ~BOTH_TIME_SET;
369  }
370  }
371 
372  /*
373  * The size case is special.
374  * It changes the file as well as the attributes.
375  */
376  if (iap->ia_valid & ATTR_SIZE) {
377  if (iap->ia_size < inode->i_size) {
378  err = nfsd_permission(rqstp, fhp->fh_export, dentry,
380  if (err)
381  goto out;
382  }
383 
384  host_err = get_write_access(inode);
385  if (host_err)
386  goto out_nfserr;
387 
388  size_change = 1;
389  host_err = locks_verify_truncate(inode, NULL, iap->ia_size);
390  if (host_err) {
391  put_write_access(inode);
392  goto out_nfserr;
393  }
394  }
395 
396  /* sanitize the mode change */
397  if (iap->ia_valid & ATTR_MODE) {
398  iap->ia_mode &= S_IALLUGO;
399  iap->ia_mode |= (inode->i_mode & ~S_IALLUGO);
400  }
401 
402  /* Revoke setuid/setgid on chown */
403  if (!S_ISDIR(inode->i_mode) &&
404  (((iap->ia_valid & ATTR_UID) && iap->ia_uid != inode->i_uid) ||
405  ((iap->ia_valid & ATTR_GID) && iap->ia_gid != inode->i_gid))) {
406  iap->ia_valid |= ATTR_KILL_PRIV;
407  if (iap->ia_valid & ATTR_MODE) {
408  /* we're setting mode too, just clear the s*id bits */
409  iap->ia_mode &= ~S_ISUID;
410  if (iap->ia_mode & S_IXGRP)
411  iap->ia_mode &= ~S_ISGID;
412  } else {
413  /* set ATTR_KILL_* bits and let VFS handle it */
415  }
416  }
417 
418  /* Change the attributes. */
419 
420  iap->ia_valid |= ATTR_CTIME;
421 
422  err = nfserr_notsync;
423  if (!check_guard || guardtime == inode->i_ctime.tv_sec) {
424  host_err = nfsd_break_lease(inode);
425  if (host_err)
426  goto out_nfserr;
427  fh_lock(fhp);
428 
429  host_err = notify_change(dentry, iap);
430  err = nfserrno(host_err);
431  fh_unlock(fhp);
432  }
433  if (size_change)
434  put_write_access(inode);
435  if (!err)
436  commit_metadata(fhp);
437 out:
438  return err;
439 
440 out_nfserr:
441  err = nfserrno(host_err);
442  goto out;
443 }
444 
445 #if defined(CONFIG_NFSD_V2_ACL) || \
446  defined(CONFIG_NFSD_V3_ACL) || \
447  defined(CONFIG_NFSD_V4)
448 static ssize_t nfsd_getxattr(struct dentry *dentry, char *key, void **buf)
449 {
450  ssize_t buflen;
451  ssize_t ret;
452 
453  buflen = vfs_getxattr(dentry, key, NULL, 0);
454  if (buflen <= 0)
455  return buflen;
456 
457  *buf = kmalloc(buflen, GFP_KERNEL);
458  if (!*buf)
459  return -ENOMEM;
460 
461  ret = vfs_getxattr(dentry, key, *buf, buflen);
462  if (ret < 0)
463  kfree(*buf);
464  return ret;
465 }
466 #endif
467 
468 #if defined(CONFIG_NFSD_V4)
469 static int
470 set_nfsv4_acl_one(struct dentry *dentry, struct posix_acl *pacl, char *key)
471 {
472  int len;
473  size_t buflen;
474  char *buf = NULL;
475  int error = 0;
476 
477  buflen = posix_acl_xattr_size(pacl->a_count);
478  buf = kmalloc(buflen, GFP_KERNEL);
479  error = -ENOMEM;
480  if (buf == NULL)
481  goto out;
482 
483  len = posix_acl_to_xattr(&init_user_ns, pacl, buf, buflen);
484  if (len < 0) {
485  error = len;
486  goto out;
487  }
488 
489  error = vfs_setxattr(dentry, key, buf, len, 0);
490 out:
491  kfree(buf);
492  return error;
493 }
494 
495 __be32
496 nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
497  struct nfs4_acl *acl)
498 {
499  __be32 error;
500  int host_error;
501  struct dentry *dentry;
502  struct inode *inode;
503  struct posix_acl *pacl = NULL, *dpacl = NULL;
504  unsigned int flags = 0;
505 
506  /* Get inode */
507  error = fh_verify(rqstp, fhp, 0, NFSD_MAY_SATTR);
508  if (error)
509  return error;
510 
511  dentry = fhp->fh_dentry;
512  inode = dentry->d_inode;
513  if (S_ISDIR(inode->i_mode))
514  flags = NFS4_ACL_DIR;
515 
516  host_error = nfs4_acl_nfsv4_to_posix(acl, &pacl, &dpacl, flags);
517  if (host_error == -EINVAL) {
518  return nfserr_attrnotsupp;
519  } else if (host_error < 0)
520  goto out_nfserr;
521 
522  host_error = set_nfsv4_acl_one(dentry, pacl, POSIX_ACL_XATTR_ACCESS);
523  if (host_error < 0)
524  goto out_release;
525 
526  if (S_ISDIR(inode->i_mode))
527  host_error = set_nfsv4_acl_one(dentry, dpacl, POSIX_ACL_XATTR_DEFAULT);
528 
529 out_release:
530  posix_acl_release(pacl);
531  posix_acl_release(dpacl);
532 out_nfserr:
533  if (host_error == -EOPNOTSUPP)
534  return nfserr_attrnotsupp;
535  else
536  return nfserrno(host_error);
537 }
538 
539 static struct posix_acl *
540 _get_posix_acl(struct dentry *dentry, char *key)
541 {
542  void *buf = NULL;
543  struct posix_acl *pacl = NULL;
544  int buflen;
545 
546  buflen = nfsd_getxattr(dentry, key, &buf);
547  if (!buflen)
548  buflen = -ENODATA;
549  if (buflen <= 0)
550  return ERR_PTR(buflen);
551 
552  pacl = posix_acl_from_xattr(&init_user_ns, buf, buflen);
553  kfree(buf);
554  return pacl;
555 }
556 
557 int
558 nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_acl **acl)
559 {
560  struct inode *inode = dentry->d_inode;
561  int error = 0;
562  struct posix_acl *pacl = NULL, *dpacl = NULL;
563  unsigned int flags = 0;
564 
565  pacl = _get_posix_acl(dentry, POSIX_ACL_XATTR_ACCESS);
566  if (IS_ERR(pacl) && PTR_ERR(pacl) == -ENODATA)
567  pacl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL);
568  if (IS_ERR(pacl)) {
569  error = PTR_ERR(pacl);
570  pacl = NULL;
571  goto out;
572  }
573 
574  if (S_ISDIR(inode->i_mode)) {
575  dpacl = _get_posix_acl(dentry, POSIX_ACL_XATTR_DEFAULT);
576  if (IS_ERR(dpacl) && PTR_ERR(dpacl) == -ENODATA)
577  dpacl = NULL;
578  else if (IS_ERR(dpacl)) {
579  error = PTR_ERR(dpacl);
580  dpacl = NULL;
581  goto out;
582  }
583  flags = NFS4_ACL_DIR;
584  }
585 
586  *acl = nfs4_acl_posix_to_nfsv4(pacl, dpacl, flags);
587  if (IS_ERR(*acl)) {
588  error = PTR_ERR(*acl);
589  *acl = NULL;
590  }
591  out:
592  posix_acl_release(pacl);
593  posix_acl_release(dpacl);
594  return error;
595 }
596 
597 /*
598  * NFS junction information is stored in an extended attribute.
599  */
600 #define NFSD_JUNCTION_XATTR_NAME XATTR_TRUSTED_PREFIX "junction.nfs"
601 
610 int nfsd4_is_junction(struct dentry *dentry)
611 {
612  struct inode *inode = dentry->d_inode;
613 
614  if (inode == NULL)
615  return 0;
616  if (inode->i_mode & S_IXUGO)
617  return 0;
618  if (!(inode->i_mode & S_ISVTX))
619  return 0;
620  if (vfs_getxattr(dentry, NFSD_JUNCTION_XATTR_NAME, NULL, 0) <= 0)
621  return 0;
622  return 1;
623 }
624 #endif /* defined(CONFIG_NFSD_V4) */
625 
626 #ifdef CONFIG_NFSD_V3
627 /*
628  * Check server access rights to a file system object
629  */
630 struct accessmap {
631  u32 access;
632  int how;
633 };
634 static struct accessmap nfs3_regaccess[] = {
639 
640  { 0, 0 }
641 };
642 
643 static struct accessmap nfs3_diraccess[] = {
649 
650  { 0, 0 }
651 };
652 
653 static struct accessmap nfs3_anyaccess[] = {
654  /* Some clients - Solaris 2.6 at least, make an access call
655  * to the server to check for access for things like /dev/null
656  * (which really, the server doesn't care about). So
657  * We provide simple access checking for them, looking
658  * mainly at mode bits, and we make sure to ignore read-only
659  * filesystem checks
660  */
665 
666  { 0, 0 }
667 };
668 
669 __be32
670 nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access, u32 *supported)
671 {
672  struct accessmap *map;
673  struct svc_export *export;
674  struct dentry *dentry;
675  u32 query, result = 0, sresult = 0;
676  __be32 error;
677 
678  error = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP);
679  if (error)
680  goto out;
681 
682  export = fhp->fh_export;
683  dentry = fhp->fh_dentry;
684 
685  if (S_ISREG(dentry->d_inode->i_mode))
686  map = nfs3_regaccess;
687  else if (S_ISDIR(dentry->d_inode->i_mode))
688  map = nfs3_diraccess;
689  else
690  map = nfs3_anyaccess;
691 
692 
693  query = *access;
694  for (; map->access; map++) {
695  if (map->access & query) {
696  __be32 err2;
697 
698  sresult |= map->access;
699 
700  err2 = nfsd_permission(rqstp, export, dentry, map->how);
701  switch (err2) {
702  case nfs_ok:
703  result |= map->access;
704  break;
705 
706  /* the following error codes just mean the access was not allowed,
707  * rather than an error occurred */
708  case nfserr_rofs:
709  case nfserr_acces:
710  case nfserr_perm:
711  /* simply don't "or" in the access bit. */
712  break;
713  default:
714  error = err2;
715  goto out;
716  }
717  }
718  }
719  *access = result;
720  if (supported)
721  *supported = sresult;
722 
723  out:
724  return error;
725 }
726 #endif /* CONFIG_NFSD_V3 */
727 
728 static int nfsd_open_break_lease(struct inode *inode, int access)
729 {
730  unsigned int mode;
731 
732  if (access & NFSD_MAY_NOT_BREAK_LEASE)
733  return 0;
734  mode = (access & NFSD_MAY_WRITE) ? O_WRONLY : O_RDONLY;
735  return break_lease(inode, mode | O_NONBLOCK);
736 }
737 
738 /*
739  * Open an existing file or directory.
740  * The may_flags argument indicates the type of open (read/write/lock)
741  * and additional flags.
742  * N.B. After this call fhp needs an fh_put
743  */
744 __be32
745 nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
746  int may_flags, struct file **filp)
747 {
748  struct path path;
749  struct inode *inode;
750  int flags = O_RDONLY|O_LARGEFILE;
751  __be32 err;
752  int host_err = 0;
753 
754  validate_process_creds();
755 
756  /*
757  * If we get here, then the client has already done an "open",
758  * and (hopefully) checked permission - so allow OWNER_OVERRIDE
759  * in case a chmod has now revoked permission.
760  *
761  * Arguably we should also allow the owner override for
762  * directories, but we never have and it doesn't seem to have
763  * caused anyone a problem. If we were to change this, note
764  * also that our filldir callbacks would need a variant of
765  * lookup_one_len that doesn't check permissions.
766  */
767  if (type == S_IFREG)
768  may_flags |= NFSD_MAY_OWNER_OVERRIDE;
769  err = fh_verify(rqstp, fhp, type, may_flags);
770  if (err)
771  goto out;
772 
773  path.mnt = fhp->fh_export->ex_path.mnt;
774  path.dentry = fhp->fh_dentry;
775  inode = path.dentry->d_inode;
776 
777  /* Disallow write access to files with the append-only bit set
778  * or any access when mandatory locking enabled
779  */
780  err = nfserr_perm;
781  if (IS_APPEND(inode) && (may_flags & NFSD_MAY_WRITE))
782  goto out;
783  /*
784  * We must ignore files (but only files) which might have mandatory
785  * locks on them because there is no way to know if the accesser has
786  * the lock.
787  */
788  if (S_ISREG((inode)->i_mode) && mandatory_lock(inode))
789  goto out;
790 
791  if (!inode->i_fop)
792  goto out;
793 
794  host_err = nfsd_open_break_lease(inode, may_flags);
795  if (host_err) /* NOMEM or WOULDBLOCK */
796  goto out_nfserr;
797 
798  if (may_flags & NFSD_MAY_WRITE) {
799  if (may_flags & NFSD_MAY_READ)
800  flags = O_RDWR|O_LARGEFILE;
801  else
802  flags = O_WRONLY|O_LARGEFILE;
803  }
804  *filp = dentry_open(&path, flags, current_cred());
805  if (IS_ERR(*filp))
806  host_err = PTR_ERR(*filp);
807  else {
808  host_err = ima_file_check(*filp, may_flags);
809 
810  if (may_flags & NFSD_MAY_64BIT_COOKIE)
811  (*filp)->f_mode |= FMODE_64BITHASH;
812  else
813  (*filp)->f_mode |= FMODE_32BITHASH;
814  }
815 
816 out_nfserr:
817  err = nfserrno(host_err);
818 out:
819  validate_process_creds();
820  return err;
821 }
822 
823 /*
824  * Close a file.
825  */
826 void
827 nfsd_close(struct file *filp)
828 {
829  fput(filp);
830 }
831 
832 /*
833  * Obtain the readahead parameters for the file
834  * specified by (dev, ino).
835  */
836 
837 static inline struct raparms *
838 nfsd_get_raparms(dev_t dev, ino_t ino)
839 {
840  struct raparms *ra, **rap, **frap = NULL;
841  int depth = 0;
842  unsigned int hash;
843  struct raparm_hbucket *rab;
844 
845  hash = jhash_2words(dev, ino, 0xfeedbeef) & RAPARM_HASH_MASK;
846  rab = &raparm_hash[hash];
847 
848  spin_lock(&rab->pb_lock);
849  for (rap = &rab->pb_head; (ra = *rap); rap = &ra->p_next) {
850  if (ra->p_ino == ino && ra->p_dev == dev)
851  goto found;
852  depth++;
853  if (ra->p_count == 0)
854  frap = rap;
855  }
856  depth = nfsdstats.ra_size;
857  if (!frap) {
858  spin_unlock(&rab->pb_lock);
859  return NULL;
860  }
861  rap = frap;
862  ra = *frap;
863  ra->p_dev = dev;
864  ra->p_ino = ino;
865  ra->p_set = 0;
866  ra->p_hindex = hash;
867 found:
868  if (rap != &rab->pb_head) {
869  *rap = ra->p_next;
870  ra->p_next = rab->pb_head;
871  rab->pb_head = ra;
872  }
873  ra->p_count++;
874  nfsdstats.ra_depth[depth*10/nfsdstats.ra_size]++;
875  spin_unlock(&rab->pb_lock);
876  return ra;
877 }
878 
879 /*
880  * Grab and keep cached pages associated with a file in the svc_rqst
881  * so that they can be passed to the network sendmsg/sendpage routines
882  * directly. They will be released after the sending has completed.
883  */
884 static int
885 nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
886  struct splice_desc *sd)
887 {
888  struct svc_rqst *rqstp = sd->u.data;
889  struct page **pp = rqstp->rq_respages + rqstp->rq_resused;
890  struct page *page = buf->page;
891  size_t size;
892 
893  size = sd->len;
894 
895  if (rqstp->rq_res.page_len == 0) {
896  get_page(page);
897  put_page(*pp);
898  *pp = page;
899  rqstp->rq_resused++;
900  rqstp->rq_res.page_base = buf->offset;
901  rqstp->rq_res.page_len = size;
902  } else if (page != pp[-1]) {
903  get_page(page);
904  if (*pp)
905  put_page(*pp);
906  *pp = page;
907  rqstp->rq_resused++;
908  rqstp->rq_res.page_len += size;
909  } else
910  rqstp->rq_res.page_len += size;
911 
912  return size;
913 }
914 
915 static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe,
916  struct splice_desc *sd)
917 {
918  return __splice_from_pipe(pipe, sd, nfsd_splice_actor);
919 }
920 
921 static __be32
922 nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
923  loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
924 {
925  mm_segment_t oldfs;
926  __be32 err;
927  int host_err;
928 
929  err = nfserr_perm;
930 
931  if (file->f_op->splice_read && rqstp->rq_splice_ok) {
932  struct splice_desc sd = {
933  .len = 0,
934  .total_len = *count,
935  .pos = offset,
936  .u.data = rqstp,
937  };
938 
939  rqstp->rq_resused = 1;
940  host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor);
941  } else {
942  oldfs = get_fs();
943  set_fs(KERNEL_DS);
944  host_err = vfs_readv(file, (struct iovec __user *)vec, vlen, &offset);
945  set_fs(oldfs);
946  }
947 
948  if (host_err >= 0) {
949  nfsdstats.io_read += host_err;
950  *count = host_err;
951  err = 0;
952  fsnotify_access(file);
953  } else
954  err = nfserrno(host_err);
955  return err;
956 }
957 
958 static void kill_suid(struct dentry *dentry)
959 {
960  struct iattr ia;
962 
963  mutex_lock(&dentry->d_inode->i_mutex);
964  notify_change(dentry, &ia);
965  mutex_unlock(&dentry->d_inode->i_mutex);
966 }
967 
968 /*
969  * Gathered writes: If another process is currently writing to the file,
970  * there's a high chance this is another nfsd (triggered by a bulk write
971  * from a client's biod). Rather than syncing the file with each write
972  * request, we sleep for 10 msec.
973  *
974  * I don't know if this roughly approximates C. Juszak's idea of
975  * gathered writes, but it's a nice and simple solution (IMHO), and it
976  * seems to work:-)
977  *
978  * Note: we do this only in the NFSv2 case, since v3 and higher have a
979  * better tool (separate unstable writes and commits) for solving this
980  * problem.
981  */
982 static int wait_for_concurrent_writes(struct file *file)
983 {
984  struct inode *inode = file->f_path.dentry->d_inode;
985  static ino_t last_ino;
986  static dev_t last_dev;
987  int err = 0;
988 
989  if (atomic_read(&inode->i_writecount) > 1
990  || (last_ino == inode->i_ino && last_dev == inode->i_sb->s_dev)) {
991  dprintk("nfsd: write defer %d\n", task_pid_nr(current));
992  msleep(10);
993  dprintk("nfsd: write resume %d\n", task_pid_nr(current));
994  }
995 
996  if (inode->i_state & I_DIRTY) {
997  dprintk("nfsd: write sync %d\n", task_pid_nr(current));
998  err = vfs_fsync(file, 0);
999  }
1000  last_ino = inode->i_ino;
1001  last_dev = inode->i_sb->s_dev;
1002  return err;
1003 }
1004 
1005 static __be32
1006 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
1007  loff_t offset, struct kvec *vec, int vlen,
1008  unsigned long *cnt, int *stablep)
1009 {
1010  struct svc_export *exp;
1011  struct dentry *dentry;
1012  struct inode *inode;
1013  mm_segment_t oldfs;
1014  __be32 err = 0;
1015  int host_err;
1016  int stable = *stablep;
1017  int use_wgather;
1018 
1019  dentry = file->f_path.dentry;
1020  inode = dentry->d_inode;
1021  exp = fhp->fh_export;
1022 
1023  /*
1024  * Request sync writes if
1025  * - the sync export option has been set, or
1026  * - the client requested O_SYNC behavior (NFSv3 feature).
1027  * - The file system doesn't support fsync().
1028  * When NFSv2 gathered writes have been configured for this volume,
1029  * flushing the data to disk is handled separately below.
1030  */
1031  use_wgather = (rqstp->rq_vers == 2) && EX_WGATHER(exp);
1032 
1033  if (!file->f_op->fsync) {/* COMMIT3 cannot work */
1034  stable = 2;
1035  *stablep = 2; /* FILE_SYNC */
1036  }
1037 
1038  if (!EX_ISSYNC(exp))
1039  stable = 0;
1040  if (stable && !use_wgather) {
1041  spin_lock(&file->f_lock);
1042  file->f_flags |= O_SYNC;
1043  spin_unlock(&file->f_lock);
1044  }
1045 
1046  /* Write the data. */
1047  oldfs = get_fs(); set_fs(KERNEL_DS);
1048  host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset);
1049  set_fs(oldfs);
1050  if (host_err < 0)
1051  goto out_nfserr;
1052  *cnt = host_err;
1053  nfsdstats.io_write += host_err;
1054  fsnotify_modify(file);
1055 
1056  /* clear setuid/setgid flag after write */
1057  if (inode->i_mode & (S_ISUID | S_ISGID))
1058  kill_suid(dentry);
1059 
1060  if (stable && use_wgather)
1061  host_err = wait_for_concurrent_writes(file);
1062 
1063 out_nfserr:
1064  dprintk("nfsd: write complete host_err=%d\n", host_err);
1065  if (host_err >= 0)
1066  err = 0;
1067  else
1068  err = nfserrno(host_err);
1069  return err;
1070 }
1071 
1072 /*
1073  * Read data from a file. count must contain the requested read count
1074  * on entry. On return, *count contains the number of bytes actually read.
1075  * N.B. After this call fhp needs an fh_put
1076  */
1077 __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
1078  loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
1079 {
1080  struct file *file;
1081  struct inode *inode;
1082  struct raparms *ra;
1083  __be32 err;
1084 
1085  err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
1086  if (err)
1087  return err;
1088 
1089  inode = file->f_path.dentry->d_inode;
1090 
1091  /* Get readahead parameters */
1092  ra = nfsd_get_raparms(inode->i_sb->s_dev, inode->i_ino);
1093 
1094  if (ra && ra->p_set)
1095  file->f_ra = ra->p_ra;
1096 
1097  err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count);
1098 
1099  /* Write back readahead params */
1100  if (ra) {
1101  struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex];
1102  spin_lock(&rab->pb_lock);
1103  ra->p_ra = file->f_ra;
1104  ra->p_set = 1;
1105  ra->p_count--;
1106  spin_unlock(&rab->pb_lock);
1107  }
1108 
1109  nfsd_close(file);
1110  return err;
1111 }
1112 
1113 /* As above, but use the provided file descriptor. */
1114 __be32
1115 nfsd_read_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
1116  loff_t offset, struct kvec *vec, int vlen,
1117  unsigned long *count)
1118 {
1119  __be32 err;
1120 
1121  if (file) {
1122  err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
1124  if (err)
1125  goto out;
1126  err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count);
1127  } else /* Note file may still be NULL in NFSv4 special stateid case: */
1128  err = nfsd_read(rqstp, fhp, offset, vec, vlen, count);
1129 out:
1130  return err;
1131 }
1132 
1133 /*
1134  * Write data to a file.
1135  * The stable flag requests synchronous writes.
1136  * N.B. After this call fhp needs an fh_put
1137  */
1138 __be32
1139 nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
1140  loff_t offset, struct kvec *vec, int vlen, unsigned long *cnt,
1141  int *stablep)
1142 {
1143  __be32 err = 0;
1144 
1145  if (file) {
1146  err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
1148  if (err)
1149  goto out;
1150  err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt,
1151  stablep);
1152  } else {
1153  err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file);
1154  if (err)
1155  goto out;
1156 
1157  if (cnt)
1158  err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen,
1159  cnt, stablep);
1160  nfsd_close(file);
1161  }
1162 out:
1163  return err;
1164 }
1165 
1166 #ifdef CONFIG_NFSD_V3
1167 /*
1168  * Commit all pending writes to stable storage.
1169  *
1170  * Note: we only guarantee that data that lies within the range specified
1171  * by the 'offset' and 'count' parameters will be synced.
1172  *
1173  * Unfortunately we cannot lock the file to make sure we return full WCC
1174  * data to the client, as locking happens lower down in the filesystem.
1175  */
1176 __be32
1177 nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
1178  loff_t offset, unsigned long count)
1179 {
1180  struct file *file;
1181  loff_t end = LLONG_MAX;
1182  __be32 err = nfserr_inval;
1183 
1184  if (offset < 0)
1185  goto out;
1186  if (count != 0) {
1187  end = offset + (loff_t)count - 1;
1188  if (end < offset)
1189  goto out;
1190  }
1191 
1192  err = nfsd_open(rqstp, fhp, S_IFREG,
1194  if (err)
1195  goto out;
1196  if (EX_ISSYNC(fhp->fh_export)) {
1197  int err2 = vfs_fsync_range(file, offset, end, 0);
1198 
1199  if (err2 != -EINVAL)
1200  err = nfserrno(err2);
1201  else
1202  err = nfserr_notsupp;
1203  }
1204 
1205  nfsd_close(file);
1206 out:
1207  return err;
1208 }
1209 #endif /* CONFIG_NFSD_V3 */
1210 
1211 static __be32
1212 nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp,
1213  struct iattr *iap)
1214 {
1215  /*
1216  * Mode has already been set earlier in create:
1217  */
1218  iap->ia_valid &= ~ATTR_MODE;
1219  /*
1220  * Setting uid/gid works only for root. Irix appears to
1221  * send along the gid on create when it tries to implement
1222  * setgid directories via NFS:
1223  */
1224  if (current_fsuid() != 0)
1225  iap->ia_valid &= ~(ATTR_UID|ATTR_GID);
1226  if (iap->ia_valid)
1227  return nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
1228  return 0;
1229 }
1230 
1231 /* HPUX client sometimes creates a file in mode 000, and sets size to 0.
1232  * setting size to 0 may fail for some specific file systems by the permission
1233  * checking which requires WRITE permission but the mode is 000.
1234  * we ignore the resizing(to 0) on the just new created file, since the size is
1235  * 0 after file created.
1236  *
1237  * call this only after vfs_create() is called.
1238  * */
1239 static void
1240 nfsd_check_ignore_resizing(struct iattr *iap)
1241 {
1242  if ((iap->ia_valid & ATTR_SIZE) && (iap->ia_size == 0))
1243  iap->ia_valid &= ~ATTR_SIZE;
1244 }
1245 
1246 /*
1247  * Create a file (regular, directory, device, fifo); UNIX sockets
1248  * not yet implemented.
1249  * If the response fh has been verified, the parent directory should
1250  * already be locked. Note that the parent directory is left locked.
1251  *
1252  * N.B. Every call to nfsd_create needs an fh_put for _both_ fhp and resfhp
1253  */
1254 __be32
1255 nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1256  char *fname, int flen, struct iattr *iap,
1257  int type, dev_t rdev, struct svc_fh *resfhp)
1258 {
1259  struct dentry *dentry, *dchild = NULL;
1260  struct inode *dirp;
1261  __be32 err;
1262  __be32 err2;
1263  int host_err;
1264 
1265  err = nfserr_perm;
1266  if (!flen)
1267  goto out;
1268  err = nfserr_exist;
1269  if (isdotent(fname, flen))
1270  goto out;
1271 
1272  err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
1273  if (err)
1274  goto out;
1275 
1276  dentry = fhp->fh_dentry;
1277  dirp = dentry->d_inode;
1278 
1279  err = nfserr_notdir;
1280  if (!dirp->i_op->lookup)
1281  goto out;
1282  /*
1283  * Check whether the response file handle has been verified yet.
1284  * If it has, the parent directory should already be locked.
1285  */
1286  if (!resfhp->fh_dentry) {
1287  host_err = fh_want_write(fhp);
1288  if (host_err)
1289  goto out_nfserr;
1290 
1291  /* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create */
1292  fh_lock_nested(fhp, I_MUTEX_PARENT);
1293  dchild = lookup_one_len(fname, dentry, flen);
1294  host_err = PTR_ERR(dchild);
1295  if (IS_ERR(dchild))
1296  goto out_nfserr;
1297  err = fh_compose(resfhp, fhp->fh_export, dchild, fhp);
1298  if (err)
1299  goto out;
1300  } else {
1301  /* called from nfsd_proc_create */
1302  dchild = dget(resfhp->fh_dentry);
1303  if (!fhp->fh_locked) {
1304  /* not actually possible */
1306  "nfsd_create: parent %s/%s not locked!\n",
1307  dentry->d_parent->d_name.name,
1308  dentry->d_name.name);
1309  err = nfserr_io;
1310  goto out;
1311  }
1312  }
1313  /*
1314  * Make sure the child dentry is still negative ...
1315  */
1316  err = nfserr_exist;
1317  if (dchild->d_inode) {
1318  dprintk("nfsd_create: dentry %s/%s not negative!\n",
1319  dentry->d_name.name, dchild->d_name.name);
1320  goto out;
1321  }
1322 
1323  if (!(iap->ia_valid & ATTR_MODE))
1324  iap->ia_mode = 0;
1325  iap->ia_mode = (iap->ia_mode & S_IALLUGO) | type;
1326 
1327  err = nfserr_inval;
1328  if (!S_ISREG(type) && !S_ISDIR(type) && !special_file(type)) {
1329  printk(KERN_WARNING "nfsd: bad file type %o in nfsd_create\n",
1330  type);
1331  goto out;
1332  }
1333 
1334  /*
1335  * Get the dir op function pointer.
1336  */
1337  err = 0;
1338  host_err = 0;
1339  switch (type) {
1340  case S_IFREG:
1341  host_err = vfs_create(dirp, dchild, iap->ia_mode, true);
1342  if (!host_err)
1343  nfsd_check_ignore_resizing(iap);
1344  break;
1345  case S_IFDIR:
1346  host_err = vfs_mkdir(dirp, dchild, iap->ia_mode);
1347  break;
1348  case S_IFCHR:
1349  case S_IFBLK:
1350  case S_IFIFO:
1351  case S_IFSOCK:
1352  host_err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev);
1353  break;
1354  }
1355  if (host_err < 0)
1356  goto out_nfserr;
1357 
1358  err = nfsd_create_setattr(rqstp, resfhp, iap);
1359 
1360  /*
1361  * nfsd_setattr already committed the child. Transactional filesystems
1362  * had a chance to commit changes for both parent and child
1363  * simultaneously making the following commit_metadata a noop.
1364  */
1365  err2 = nfserrno(commit_metadata(fhp));
1366  if (err2)
1367  err = err2;
1368  /*
1369  * Update the file handle to get the new inode info.
1370  */
1371  if (!err)
1372  err = fh_update(resfhp);
1373 out:
1374  if (dchild && !IS_ERR(dchild))
1375  dput(dchild);
1376  return err;
1377 
1378 out_nfserr:
1379  err = nfserrno(host_err);
1380  goto out;
1381 }
1382 
1383 #ifdef CONFIG_NFSD_V3
1384 
1385 static inline int nfsd_create_is_exclusive(int createmode)
1386 {
1387  return createmode == NFS3_CREATE_EXCLUSIVE
1388  || createmode == NFS4_CREATE_EXCLUSIVE4_1;
1389 }
1390 
1391 /*
1392  * NFSv3 and NFSv4 version of nfsd_create
1393  */
1394 __be32
1395 do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1396  char *fname, int flen, struct iattr *iap,
1397  struct svc_fh *resfhp, int createmode, u32 *verifier,
1398  bool *truncp, bool *created)
1399 {
1400  struct dentry *dentry, *dchild = NULL;
1401  struct inode *dirp;
1402  __be32 err;
1403  int host_err;
1404  __u32 v_mtime=0, v_atime=0;
1405 
1406  err = nfserr_perm;
1407  if (!flen)
1408  goto out;
1409  err = nfserr_exist;
1410  if (isdotent(fname, flen))
1411  goto out;
1412  if (!(iap->ia_valid & ATTR_MODE))
1413  iap->ia_mode = 0;
1414  err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC);
1415  if (err)
1416  goto out;
1417 
1418  dentry = fhp->fh_dentry;
1419  dirp = dentry->d_inode;
1420 
1421  /* Get all the sanity checks out of the way before
1422  * we lock the parent. */
1423  err = nfserr_notdir;
1424  if (!dirp->i_op->lookup)
1425  goto out;
1426 
1427  host_err = fh_want_write(fhp);
1428  if (host_err)
1429  goto out_nfserr;
1430 
1431  fh_lock_nested(fhp, I_MUTEX_PARENT);
1432 
1433  /*
1434  * Compose the response file handle.
1435  */
1436  dchild = lookup_one_len(fname, dentry, flen);
1437  host_err = PTR_ERR(dchild);
1438  if (IS_ERR(dchild))
1439  goto out_nfserr;
1440 
1441  /* If file doesn't exist, check for permissions to create one */
1442  if (!dchild->d_inode) {
1443  err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
1444  if (err)
1445  goto out;
1446  }
1447 
1448  err = fh_compose(resfhp, fhp->fh_export, dchild, fhp);
1449  if (err)
1450  goto out;
1451 
1452  if (nfsd_create_is_exclusive(createmode)) {
1453  /* solaris7 gets confused (bugid 4218508) if these have
1454  * the high bit set, so just clear the high bits. If this is
1455  * ever changed to use different attrs for storing the
1456  * verifier, then do_open_lookup() will also need to be fixed
1457  * accordingly.
1458  */
1459  v_mtime = verifier[0]&0x7fffffff;
1460  v_atime = verifier[1]&0x7fffffff;
1461  }
1462 
1463  if (dchild->d_inode) {
1464  err = 0;
1465 
1466  switch (createmode) {
1467  case NFS3_CREATE_UNCHECKED:
1468  if (! S_ISREG(dchild->d_inode->i_mode))
1469  goto out;
1470  else if (truncp) {
1471  /* in nfsv4, we need to treat this case a little
1472  * differently. we don't want to truncate the
1473  * file now; this would be wrong if the OPEN
1474  * fails for some other reason. furthermore,
1475  * if the size is nonzero, we should ignore it
1476  * according to spec!
1477  */
1478  *truncp = (iap->ia_valid & ATTR_SIZE) && !iap->ia_size;
1479  }
1480  else {
1481  iap->ia_valid &= ATTR_SIZE;
1482  goto set_attr;
1483  }
1484  break;
1485  case NFS3_CREATE_EXCLUSIVE:
1486  if ( dchild->d_inode->i_mtime.tv_sec == v_mtime
1487  && dchild->d_inode->i_atime.tv_sec == v_atime
1488  && dchild->d_inode->i_size == 0 )
1489  break;
1491  if ( dchild->d_inode->i_mtime.tv_sec == v_mtime
1492  && dchild->d_inode->i_atime.tv_sec == v_atime
1493  && dchild->d_inode->i_size == 0 )
1494  goto set_attr;
1495  /* fallthru */
1496  case NFS3_CREATE_GUARDED:
1497  err = nfserr_exist;
1498  }
1499  fh_drop_write(fhp);
1500  goto out;
1501  }
1502 
1503  host_err = vfs_create(dirp, dchild, iap->ia_mode, true);
1504  if (host_err < 0) {
1505  fh_drop_write(fhp);
1506  goto out_nfserr;
1507  }
1508  if (created)
1509  *created = 1;
1510 
1511  nfsd_check_ignore_resizing(iap);
1512 
1513  if (nfsd_create_is_exclusive(createmode)) {
1514  /* Cram the verifier into atime/mtime */
1517  /* XXX someone who knows this better please fix it for nsec */
1518  iap->ia_mtime.tv_sec = v_mtime;
1519  iap->ia_atime.tv_sec = v_atime;
1520  iap->ia_mtime.tv_nsec = 0;
1521  iap->ia_atime.tv_nsec = 0;
1522  }
1523 
1524  set_attr:
1525  err = nfsd_create_setattr(rqstp, resfhp, iap);
1526 
1527  /*
1528  * nfsd_setattr already committed the child (and possibly also the parent).
1529  */
1530  if (!err)
1531  err = nfserrno(commit_metadata(fhp));
1532 
1533  /*
1534  * Update the filehandle to get the new inode info.
1535  */
1536  if (!err)
1537  err = fh_update(resfhp);
1538 
1539  out:
1540  fh_unlock(fhp);
1541  if (dchild && !IS_ERR(dchild))
1542  dput(dchild);
1543  fh_drop_write(fhp);
1544  return err;
1545 
1546  out_nfserr:
1547  err = nfserrno(host_err);
1548  goto out;
1549 }
1550 #endif /* CONFIG_NFSD_V3 */
1551 
1552 /*
1553  * Read a symlink. On entry, *lenp must contain the maximum path length that
1554  * fits into the buffer. On return, it contains the true length.
1555  * N.B. After this call fhp needs an fh_put
1556  */
1557 __be32
1558 nfsd_readlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *buf, int *lenp)
1559 {
1560  struct inode *inode;
1561  mm_segment_t oldfs;
1562  __be32 err;
1563  int host_err;
1564  struct path path;
1565 
1566  err = fh_verify(rqstp, fhp, S_IFLNK, NFSD_MAY_NOP);
1567  if (err)
1568  goto out;
1569 
1570  path.mnt = fhp->fh_export->ex_path.mnt;
1571  path.dentry = fhp->fh_dentry;
1572  inode = path.dentry->d_inode;
1573 
1574  err = nfserr_inval;
1575  if (!inode->i_op->readlink)
1576  goto out;
1577 
1578  touch_atime(&path);
1579  /* N.B. Why does this call need a get_fs()??
1580  * Remove the set_fs and watch the fireworks:-) --okir
1581  */
1582 
1583  oldfs = get_fs(); set_fs(KERNEL_DS);
1584  host_err = inode->i_op->readlink(path.dentry, (char __user *)buf, *lenp);
1585  set_fs(oldfs);
1586 
1587  if (host_err < 0)
1588  goto out_nfserr;
1589  *lenp = host_err;
1590  err = 0;
1591 out:
1592  return err;
1593 
1594 out_nfserr:
1595  err = nfserrno(host_err);
1596  goto out;
1597 }
1598 
1599 /*
1600  * Create a symlink and look up its inode
1601  * N.B. After this call _both_ fhp and resfhp need an fh_put
1602  */
1603 __be32
1604 nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
1605  char *fname, int flen,
1606  char *path, int plen,
1607  struct svc_fh *resfhp,
1608  struct iattr *iap)
1609 {
1610  struct dentry *dentry, *dnew;
1611  __be32 err, cerr;
1612  int host_err;
1613 
1614  err = nfserr_noent;
1615  if (!flen || !plen)
1616  goto out;
1617  err = nfserr_exist;
1618  if (isdotent(fname, flen))
1619  goto out;
1620 
1621  err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
1622  if (err)
1623  goto out;
1624 
1625  host_err = fh_want_write(fhp);
1626  if (host_err)
1627  goto out_nfserr;
1628 
1629  fh_lock(fhp);
1630  dentry = fhp->fh_dentry;
1631  dnew = lookup_one_len(fname, dentry, flen);
1632  host_err = PTR_ERR(dnew);
1633  if (IS_ERR(dnew))
1634  goto out_nfserr;
1635 
1636  if (unlikely(path[plen] != 0)) {
1637  char *path_alloced = kmalloc(plen+1, GFP_KERNEL);
1638  if (path_alloced == NULL)
1639  host_err = -ENOMEM;
1640  else {
1641  strncpy(path_alloced, path, plen);
1642  path_alloced[plen] = 0;
1643  host_err = vfs_symlink(dentry->d_inode, dnew, path_alloced);
1644  kfree(path_alloced);
1645  }
1646  } else
1647  host_err = vfs_symlink(dentry->d_inode, dnew, path);
1648  err = nfserrno(host_err);
1649  if (!err)
1650  err = nfserrno(commit_metadata(fhp));
1651  fh_unlock(fhp);
1652 
1653  fh_drop_write(fhp);
1654 
1655  cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp);
1656  dput(dnew);
1657  if (err==0) err = cerr;
1658 out:
1659  return err;
1660 
1661 out_nfserr:
1662  err = nfserrno(host_err);
1663  goto out;
1664 }
1665 
1666 /*
1667  * Create a hardlink
1668  * N.B. After this call _both_ ffhp and tfhp need an fh_put
1669  */
1670 __be32
1671 nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
1672  char *name, int len, struct svc_fh *tfhp)
1673 {
1674  struct dentry *ddir, *dnew, *dold;
1675  struct inode *dirp;
1676  __be32 err;
1677  int host_err;
1678 
1679  err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_CREATE);
1680  if (err)
1681  goto out;
1682  err = fh_verify(rqstp, tfhp, 0, NFSD_MAY_NOP);
1683  if (err)
1684  goto out;
1685  err = nfserr_isdir;
1686  if (S_ISDIR(tfhp->fh_dentry->d_inode->i_mode))
1687  goto out;
1688  err = nfserr_perm;
1689  if (!len)
1690  goto out;
1691  err = nfserr_exist;
1692  if (isdotent(name, len))
1693  goto out;
1694 
1695  host_err = fh_want_write(tfhp);
1696  if (host_err) {
1697  err = nfserrno(host_err);
1698  goto out;
1699  }
1700 
1701  fh_lock_nested(ffhp, I_MUTEX_PARENT);
1702  ddir = ffhp->fh_dentry;
1703  dirp = ddir->d_inode;
1704 
1705  dnew = lookup_one_len(name, ddir, len);
1706  host_err = PTR_ERR(dnew);
1707  if (IS_ERR(dnew))
1708  goto out_nfserr;
1709 
1710  dold = tfhp->fh_dentry;
1711 
1712  err = nfserr_noent;
1713  if (!dold->d_inode)
1714  goto out_dput;
1715  host_err = nfsd_break_lease(dold->d_inode);
1716  if (host_err) {
1717  err = nfserrno(host_err);
1718  goto out_dput;
1719  }
1720  host_err = vfs_link(dold, dirp, dnew);
1721  if (!host_err) {
1722  err = nfserrno(commit_metadata(ffhp));
1723  if (!err)
1724  err = nfserrno(commit_metadata(tfhp));
1725  } else {
1726  if (host_err == -EXDEV && rqstp->rq_vers == 2)
1727  err = nfserr_acces;
1728  else
1729  err = nfserrno(host_err);
1730  }
1731 out_dput:
1732  dput(dnew);
1733 out_unlock:
1734  fh_unlock(ffhp);
1735  fh_drop_write(tfhp);
1736 out:
1737  return err;
1738 
1739 out_nfserr:
1740  err = nfserrno(host_err);
1741  goto out_unlock;
1742 }
1743 
1744 /*
1745  * Rename a file
1746  * N.B. After this call _both_ ffhp and tfhp need an fh_put
1747  */
1748 __be32
1749 nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
1750  struct svc_fh *tfhp, char *tname, int tlen)
1751 {
1752  struct dentry *fdentry, *tdentry, *odentry, *ndentry, *trap;
1753  struct inode *fdir, *tdir;
1754  __be32 err;
1755  int host_err;
1756 
1757  err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_REMOVE);
1758  if (err)
1759  goto out;
1760  err = fh_verify(rqstp, tfhp, S_IFDIR, NFSD_MAY_CREATE);
1761  if (err)
1762  goto out;
1763 
1764  fdentry = ffhp->fh_dentry;
1765  fdir = fdentry->d_inode;
1766 
1767  tdentry = tfhp->fh_dentry;
1768  tdir = tdentry->d_inode;
1769 
1770  err = (rqstp->rq_vers == 2) ? nfserr_acces : nfserr_xdev;
1771  if (ffhp->fh_export != tfhp->fh_export)
1772  goto out;
1773 
1774  err = nfserr_perm;
1775  if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen))
1776  goto out;
1777 
1778  host_err = fh_want_write(ffhp);
1779  if (host_err) {
1780  err = nfserrno(host_err);
1781  goto out;
1782  }
1783 
1784  /* cannot use fh_lock as we need deadlock protective ordering
1785  * so do it by hand */
1786  trap = lock_rename(tdentry, fdentry);
1787  ffhp->fh_locked = tfhp->fh_locked = 1;
1788  fill_pre_wcc(ffhp);
1789  fill_pre_wcc(tfhp);
1790 
1791  odentry = lookup_one_len(fname, fdentry, flen);
1792  host_err = PTR_ERR(odentry);
1793  if (IS_ERR(odentry))
1794  goto out_nfserr;
1795 
1796  host_err = -ENOENT;
1797  if (!odentry->d_inode)
1798  goto out_dput_old;
1799  host_err = -EINVAL;
1800  if (odentry == trap)
1801  goto out_dput_old;
1802 
1803  ndentry = lookup_one_len(tname, tdentry, tlen);
1804  host_err = PTR_ERR(ndentry);
1805  if (IS_ERR(ndentry))
1806  goto out_dput_old;
1807  host_err = -ENOTEMPTY;
1808  if (ndentry == trap)
1809  goto out_dput_new;
1810 
1811  host_err = -EXDEV;
1812  if (ffhp->fh_export->ex_path.mnt != tfhp->fh_export->ex_path.mnt)
1813  goto out_dput_new;
1814 
1815  host_err = nfsd_break_lease(odentry->d_inode);
1816  if (host_err)
1817  goto out_dput_new;
1818  if (ndentry->d_inode) {
1819  host_err = nfsd_break_lease(ndentry->d_inode);
1820  if (host_err)
1821  goto out_dput_new;
1822  }
1823  host_err = vfs_rename(fdir, odentry, tdir, ndentry);
1824  if (!host_err) {
1825  host_err = commit_metadata(tfhp);
1826  if (!host_err)
1827  host_err = commit_metadata(ffhp);
1828  }
1829  out_dput_new:
1830  dput(ndentry);
1831  out_dput_old:
1832  dput(odentry);
1833  out_nfserr:
1834  err = nfserrno(host_err);
1835 
1836  /* we cannot reply on fh_unlock on the two filehandles,
1837  * as that would do the wrong thing if the two directories
1838  * were the same, so again we do it by hand
1839  */
1840  fill_post_wcc(ffhp);
1841  fill_post_wcc(tfhp);
1842  unlock_rename(tdentry, fdentry);
1843  ffhp->fh_locked = tfhp->fh_locked = 0;
1844  fh_drop_write(ffhp);
1845 
1846 out:
1847  return err;
1848 }
1849 
1850 /*
1851  * Unlink a file or directory
1852  * N.B. After this call fhp needs an fh_put
1853  */
1854 __be32
1855 nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
1856  char *fname, int flen)
1857 {
1858  struct dentry *dentry, *rdentry;
1859  struct inode *dirp;
1860  __be32 err;
1861  int host_err;
1862 
1863  err = nfserr_acces;
1864  if (!flen || isdotent(fname, flen))
1865  goto out;
1866  err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_REMOVE);
1867  if (err)
1868  goto out;
1869 
1870  host_err = fh_want_write(fhp);
1871  if (host_err)
1872  goto out_nfserr;
1873 
1874  fh_lock_nested(fhp, I_MUTEX_PARENT);
1875  dentry = fhp->fh_dentry;
1876  dirp = dentry->d_inode;
1877 
1878  rdentry = lookup_one_len(fname, dentry, flen);
1879  host_err = PTR_ERR(rdentry);
1880  if (IS_ERR(rdentry))
1881  goto out_nfserr;
1882 
1883  if (!rdentry->d_inode) {
1884  dput(rdentry);
1885  err = nfserr_noent;
1886  goto out;
1887  }
1888 
1889  if (!type)
1890  type = rdentry->d_inode->i_mode & S_IFMT;
1891 
1892  host_err = nfsd_break_lease(rdentry->d_inode);
1893  if (host_err)
1894  goto out_put;
1895  if (type != S_IFDIR)
1896  host_err = vfs_unlink(dirp, rdentry);
1897  else
1898  host_err = vfs_rmdir(dirp, rdentry);
1899  if (!host_err)
1900  host_err = commit_metadata(fhp);
1901 out_put:
1902  dput(rdentry);
1903 
1904 out_nfserr:
1905  err = nfserrno(host_err);
1906 out:
1907  return err;
1908 }
1909 
1910 /*
1911  * We do this buffering because we must not call back into the file
1912  * system's ->lookup() method from the filldir callback. That may well
1913  * deadlock a number of file systems.
1914  *
1915  * This is based heavily on the implementation of same in XFS.
1916  */
1919  loff_t offset;
1920  int namlen;
1921  unsigned int d_type;
1922  char name[];
1923 };
1924 
1926  char *dirent;
1927  size_t used;
1928  int full;
1929 };
1930 
1931 static int nfsd_buffered_filldir(void *__buf, const char *name, int namlen,
1932  loff_t offset, u64 ino, unsigned int d_type)
1933 {
1934  struct readdir_data *buf = __buf;
1935  struct buffered_dirent *de = (void *)(buf->dirent + buf->used);
1936  unsigned int reclen;
1937 
1938  reclen = ALIGN(sizeof(struct buffered_dirent) + namlen, sizeof(u64));
1939  if (buf->used + reclen > PAGE_SIZE) {
1940  buf->full = 1;
1941  return -EINVAL;
1942  }
1943 
1944  de->namlen = namlen;
1945  de->offset = offset;
1946  de->ino = ino;
1947  de->d_type = d_type;
1948  memcpy(de->name, name, namlen);
1949  buf->used += reclen;
1950 
1951  return 0;
1952 }
1953 
1954 static __be32 nfsd_buffered_readdir(struct file *file, filldir_t func,
1955  struct readdir_cd *cdp, loff_t *offsetp)
1956 {
1957  struct readdir_data buf;
1958  struct buffered_dirent *de;
1959  int host_err;
1960  int size;
1961  loff_t offset;
1962 
1963  buf.dirent = (void *)__get_free_page(GFP_KERNEL);
1964  if (!buf.dirent)
1965  return nfserrno(-ENOMEM);
1966 
1967  offset = *offsetp;
1968 
1969  while (1) {
1970  struct inode *dir_inode = file->f_path.dentry->d_inode;
1971  unsigned int reclen;
1972 
1973  cdp->err = nfserr_eof; /* will be cleared on successful read */
1974  buf.used = 0;
1975  buf.full = 0;
1976 
1977  host_err = vfs_readdir(file, nfsd_buffered_filldir, &buf);
1978  if (buf.full)
1979  host_err = 0;
1980 
1981  if (host_err < 0)
1982  break;
1983 
1984  size = buf.used;
1985 
1986  if (!size)
1987  break;
1988 
1989  /*
1990  * Various filldir functions may end up calling back into
1991  * lookup_one_len() and the file system's ->lookup() method.
1992  * These expect i_mutex to be held, as it would within readdir.
1993  */
1994  host_err = mutex_lock_killable(&dir_inode->i_mutex);
1995  if (host_err)
1996  break;
1997 
1998  de = (struct buffered_dirent *)buf.dirent;
1999  while (size > 0) {
2000  offset = de->offset;
2001 
2002  if (func(cdp, de->name, de->namlen, de->offset,
2003  de->ino, de->d_type))
2004  break;
2005 
2006  if (cdp->err != nfs_ok)
2007  break;
2008 
2009  reclen = ALIGN(sizeof(*de) + de->namlen,
2010  sizeof(u64));
2011  size -= reclen;
2012  de = (struct buffered_dirent *)((char *)de + reclen);
2013  }
2014  mutex_unlock(&dir_inode->i_mutex);
2015  if (size > 0) /* We bailed out early */
2016  break;
2017 
2018  offset = vfs_llseek(file, 0, SEEK_CUR);
2019  }
2020 
2021  free_page((unsigned long)(buf.dirent));
2022 
2023  if (host_err)
2024  return nfserrno(host_err);
2025 
2026  *offsetp = offset;
2027  return cdp->err;
2028 }
2029 
2030 /*
2031  * Read entries from a directory.
2032  * The NFSv3/4 verifier we ignore for now.
2033  */
2034 __be32
2035 nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp,
2036  struct readdir_cd *cdp, filldir_t func)
2037 {
2038  __be32 err;
2039  struct file *file;
2040  loff_t offset = *offsetp;
2041  int may_flags = NFSD_MAY_READ;
2042 
2043  /* NFSv2 only supports 32 bit cookies */
2044  if (rqstp->rq_vers > 2)
2045  may_flags |= NFSD_MAY_64BIT_COOKIE;
2046 
2047  err = nfsd_open(rqstp, fhp, S_IFDIR, may_flags, &file);
2048  if (err)
2049  goto out;
2050 
2051  offset = vfs_llseek(file, offset, SEEK_SET);
2052  if (offset < 0) {
2053  err = nfserrno((int)offset);
2054  goto out_close;
2055  }
2056 
2057  err = nfsd_buffered_readdir(file, func, cdp, offsetp);
2058 
2059  if (err == nfserr_eof || err == nfserr_toosmall)
2060  err = nfs_ok; /* can still be found in ->err */
2061 out_close:
2062  nfsd_close(file);
2063 out:
2064  return err;
2065 }
2066 
2067 /*
2068  * Get file system stats
2069  * N.B. After this call fhp needs an fh_put
2070  */
2071 __be32
2072 nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat, int access)
2073 {
2074  __be32 err;
2075 
2076  err = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP | access);
2077  if (!err) {
2078  struct path path = {
2079  .mnt = fhp->fh_export->ex_path.mnt,
2080  .dentry = fhp->fh_dentry,
2081  };
2082  if (vfs_statfs(&path, stat))
2083  err = nfserr_io;
2084  }
2085  return err;
2086 }
2087 
2088 static int exp_rdonly(struct svc_rqst *rqstp, struct svc_export *exp)
2089 {
2090  return nfsexp_flags(rqstp, exp) & NFSEXP_READONLY;
2091 }
2092 
2093 /*
2094  * Check for a user's access permissions to this inode.
2095  */
2096 __be32
2097 nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
2098  struct dentry *dentry, int acc)
2099 {
2100  struct inode *inode = dentry->d_inode;
2101  int err;
2102 
2103  if ((acc & NFSD_MAY_MASK) == NFSD_MAY_NOP)
2104  return 0;
2105 #if 0
2106  dprintk("nfsd: permission 0x%x%s%s%s%s%s%s%s mode 0%o%s%s%s\n",
2107  acc,
2108  (acc & NFSD_MAY_READ)? " read" : "",
2109  (acc & NFSD_MAY_WRITE)? " write" : "",
2110  (acc & NFSD_MAY_EXEC)? " exec" : "",
2111  (acc & NFSD_MAY_SATTR)? " sattr" : "",
2112  (acc & NFSD_MAY_TRUNC)? " trunc" : "",
2113  (acc & NFSD_MAY_LOCK)? " lock" : "",
2114  (acc & NFSD_MAY_OWNER_OVERRIDE)? " owneroverride" : "",
2115  inode->i_mode,
2116  IS_IMMUTABLE(inode)? " immut" : "",
2117  IS_APPEND(inode)? " append" : "",
2118  __mnt_is_readonly(exp->ex_path.mnt)? " ro" : "");
2119  dprintk(" owner %d/%d user %d/%d\n",
2120  inode->i_uid, inode->i_gid, current_fsuid(), current_fsgid());
2121 #endif
2122 
2123  /* Normally we reject any write/sattr etc access on a read-only file
2124  * system. But if it is IRIX doing check on write-access for a
2125  * device special file, we ignore rofs.
2126  */
2127  if (!(acc & NFSD_MAY_LOCAL_ACCESS))
2128  if (acc & (NFSD_MAY_WRITE | NFSD_MAY_SATTR | NFSD_MAY_TRUNC)) {
2129  if (exp_rdonly(rqstp, exp) ||
2130  __mnt_is_readonly(exp->ex_path.mnt))
2131  return nfserr_rofs;
2132  if (/* (acc & NFSD_MAY_WRITE) && */ IS_IMMUTABLE(inode))
2133  return nfserr_perm;
2134  }
2135  if ((acc & NFSD_MAY_TRUNC) && IS_APPEND(inode))
2136  return nfserr_perm;
2137 
2138  if (acc & NFSD_MAY_LOCK) {
2139  /* If we cannot rely on authentication in NLM requests,
2140  * just allow locks, otherwise require read permission, or
2141  * ownership
2142  */
2143  if (exp->ex_flags & NFSEXP_NOAUTHNLM)
2144  return 0;
2145  else
2146  acc = NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE;
2147  }
2148  /*
2149  * The file owner always gets access permission for accesses that
2150  * would normally be checked at open time. This is to make
2151  * file access work even when the client has done a fchmod(fd, 0).
2152  *
2153  * However, `cp foo bar' should fail nevertheless when bar is
2154  * readonly. A sensible way to do this might be to reject all
2155  * attempts to truncate a read-only file, because a creat() call
2156  * always implies file truncation.
2157  * ... but this isn't really fair. A process may reasonably call
2158  * ftruncate on an open file descriptor on a file with perm 000.
2159  * We must trust the client to do permission checking - using "ACCESS"
2160  * with NFSv3.
2161  */
2162  if ((acc & NFSD_MAY_OWNER_OVERRIDE) &&
2163  inode->i_uid == current_fsuid())
2164  return 0;
2165 
2166  /* This assumes NFSD_MAY_{READ,WRITE,EXEC} == MAY_{READ,WRITE,EXEC} */
2167  err = inode_permission(inode, acc & (MAY_READ|MAY_WRITE|MAY_EXEC));
2168 
2169  /* Allow read access to binaries even when mode 111 */
2170  if (err == -EACCES && S_ISREG(inode->i_mode) &&
2171  (acc == (NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE) ||
2172  acc == (NFSD_MAY_READ | NFSD_MAY_READ_IF_EXEC)))
2173  err = inode_permission(inode, MAY_EXEC);
2174 
2175  return err? nfserrno(err) : 0;
2176 }
2177 
2178 void
2180 {
2181  struct raparms *raparm, *last_raparm;
2182  unsigned int i;
2183 
2184  dprintk("nfsd: freeing readahead buffers.\n");
2185 
2186  for (i = 0; i < RAPARM_HASH_SIZE; i++) {
2187  raparm = raparm_hash[i].pb_head;
2188  while(raparm) {
2189  last_raparm = raparm;
2190  raparm = raparm->p_next;
2191  kfree(last_raparm);
2192  }
2193  raparm_hash[i].pb_head = NULL;
2194  }
2195 }
2196 /*
2197  * Initialize readahead param cache
2198  */
2199 int
2200 nfsd_racache_init(int cache_size)
2201 {
2202  int i;
2203  int j = 0;
2204  int nperbucket;
2205  struct raparms **raparm = NULL;
2206 
2207 
2208  if (raparm_hash[0].pb_head)
2209  return 0;
2210  nperbucket = DIV_ROUND_UP(cache_size, RAPARM_HASH_SIZE);
2211  if (nperbucket < 2)
2212  nperbucket = 2;
2213  cache_size = nperbucket * RAPARM_HASH_SIZE;
2214 
2215  dprintk("nfsd: allocating %d readahead buffers.\n", cache_size);
2216 
2217  for (i = 0; i < RAPARM_HASH_SIZE; i++) {
2218  spin_lock_init(&raparm_hash[i].pb_lock);
2219 
2220  raparm = &raparm_hash[i].pb_head;
2221  for (j = 0; j < nperbucket; j++) {
2222  *raparm = kzalloc(sizeof(struct raparms), GFP_KERNEL);
2223  if (!*raparm)
2224  goto out_nomem;
2225  raparm = &(*raparm)->p_next;
2226  }
2227  *raparm = NULL;
2228  }
2229 
2230  nfsdstats.ra_size = cache_size;
2231  return 0;
2232 
2233 out_nomem:
2234  dprintk("nfsd: kmalloc failed, freeing readahead buffers\n");
2236  return -ENOMEM;
2237 }
2238 
2239 #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
2240 struct posix_acl *
2241 nfsd_get_posix_acl(struct svc_fh *fhp, int type)
2242 {
2243  struct inode *inode = fhp->fh_dentry->d_inode;
2244  char *name;
2245  void *value = NULL;
2246  ssize_t size;
2247  struct posix_acl *acl;
2248 
2249  if (!IS_POSIXACL(inode))
2250  return ERR_PTR(-EOPNOTSUPP);
2251 
2252  switch (type) {
2253  case ACL_TYPE_ACCESS:
2254  name = POSIX_ACL_XATTR_ACCESS;
2255  break;
2256  case ACL_TYPE_DEFAULT:
2257  name = POSIX_ACL_XATTR_DEFAULT;
2258  break;
2259  default:
2260  return ERR_PTR(-EOPNOTSUPP);
2261  }
2262 
2263  size = nfsd_getxattr(fhp->fh_dentry, name, &value);
2264  if (size < 0)
2265  return ERR_PTR(size);
2266 
2267  acl = posix_acl_from_xattr(&init_user_ns, value, size);
2268  kfree(value);
2269  return acl;
2270 }
2271 
2272 int
2273 nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl)
2274 {
2275  struct inode *inode = fhp->fh_dentry->d_inode;
2276  char *name;
2277  void *value = NULL;
2278  size_t size;
2279  int error;
2280 
2281  if (!IS_POSIXACL(inode) ||
2282  !inode->i_op->setxattr || !inode->i_op->removexattr)
2283  return -EOPNOTSUPP;
2284  switch(type) {
2285  case ACL_TYPE_ACCESS:
2286  name = POSIX_ACL_XATTR_ACCESS;
2287  break;
2288  case ACL_TYPE_DEFAULT:
2289  name = POSIX_ACL_XATTR_DEFAULT;
2290  break;
2291  default:
2292  return -EOPNOTSUPP;
2293  }
2294 
2295  if (acl && acl->a_count) {
2296  size = posix_acl_xattr_size(acl->a_count);
2297  value = kmalloc(size, GFP_KERNEL);
2298  if (!value)
2299  return -ENOMEM;
2300  error = posix_acl_to_xattr(&init_user_ns, acl, value, size);
2301  if (error < 0)
2302  goto getout;
2303  size = error;
2304  } else
2305  size = 0;
2306 
2307  error = fh_want_write(fhp);
2308  if (error)
2309  goto getout;
2310  if (size)
2311  error = vfs_setxattr(fhp->fh_dentry, name, value, size, 0);
2312  else {
2313  if (!S_ISDIR(inode->i_mode) && type == ACL_TYPE_DEFAULT)
2314  error = 0;
2315  else {
2316  error = vfs_removexattr(fhp->fh_dentry, name);
2317  if (error == -ENODATA)
2318  error = 0;
2319  }
2320  }
2321  fh_drop_write(fhp);
2322 
2323 getout:
2324  kfree(value);
2325  return error;
2326 }
2327 #endif /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */