Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
read_write.c
Go to the documentation of this file.
1 /*
2  * linux/fs/read_write.c
3  *
4  * Copyright (C) 1991, 1992 Linus Torvalds
5  */
6 
7 #include <linux/slab.h>
8 #include <linux/stat.h>
9 #include <linux/fcntl.h>
10 #include <linux/file.h>
11 #include <linux/uio.h>
12 #include <linux/fsnotify.h>
13 #include <linux/security.h>
14 #include <linux/export.h>
15 #include <linux/syscalls.h>
16 #include <linux/pagemap.h>
17 #include <linux/splice.h>
18 #include "read_write.h"
19 
20 #include <asm/uaccess.h>
21 #include <asm/unistd.h>
22 
24  .llseek = generic_file_llseek,
25  .read = do_sync_read,
26  .aio_read = generic_file_aio_read,
28  .splice_read = generic_file_splice_read,
29 };
30 
31 EXPORT_SYMBOL(generic_ro_fops);
32 
33 static inline int unsigned_offsets(struct file *file)
34 {
35  return file->f_mode & FMODE_UNSIGNED_OFFSET;
36 }
37 
38 static loff_t lseek_execute(struct file *file, struct inode *inode,
39  loff_t offset, loff_t maxsize)
40 {
41  if (offset < 0 && !unsigned_offsets(file))
42  return -EINVAL;
43  if (offset > maxsize)
44  return -EINVAL;
45 
46  if (offset != file->f_pos) {
47  file->f_pos = offset;
48  file->f_version = 0;
49  }
50  return offset;
51 }
52 
69 loff_t
70 generic_file_llseek_size(struct file *file, loff_t offset, int origin,
71  loff_t maxsize, loff_t eof)
72 {
73  struct inode *inode = file->f_mapping->host;
74 
75  switch (origin) {
76  case SEEK_END:
77  offset += eof;
78  break;
79  case SEEK_CUR:
80  /*
81  * Here we special-case the lseek(fd, 0, SEEK_CUR)
82  * position-querying operation. Avoid rewriting the "same"
83  * f_pos value back to the file because a concurrent read(),
84  * write() or lseek() might have altered it
85  */
86  if (offset == 0)
87  return file->f_pos;
88  /*
89  * f_lock protects against read/modify/write race with other
90  * SEEK_CURs. Note that parallel writes and reads behave
91  * like SEEK_SET.
92  */
93  spin_lock(&file->f_lock);
94  offset = lseek_execute(file, inode, file->f_pos + offset,
95  maxsize);
96  spin_unlock(&file->f_lock);
97  return offset;
98  case SEEK_DATA:
99  /*
100  * In the generic case the entire file is data, so as long as
101  * offset isn't at the end of the file then the offset is data.
102  */
103  if (offset >= eof)
104  return -ENXIO;
105  break;
106  case SEEK_HOLE:
107  /*
108  * There is a virtual hole at the end of the file, so as long as
109  * offset isn't i_size or larger, return i_size.
110  */
111  if (offset >= eof)
112  return -ENXIO;
113  offset = eof;
114  break;
115  }
116 
117  return lseek_execute(file, inode, offset, maxsize);
118 }
120 
131 loff_t generic_file_llseek(struct file *file, loff_t offset, int origin)
132 {
133  struct inode *inode = file->f_mapping->host;
134 
135  return generic_file_llseek_size(file, offset, origin,
136  inode->i_sb->s_maxbytes,
137  i_size_read(inode));
138 }
140 
152 loff_t noop_llseek(struct file *file, loff_t offset, int origin)
153 {
154  return file->f_pos;
155 }
157 
158 loff_t no_llseek(struct file *file, loff_t offset, int origin)
159 {
160  return -ESPIPE;
161 }
163 
164 loff_t default_llseek(struct file *file, loff_t offset, int origin)
165 {
166  struct inode *inode = file->f_path.dentry->d_inode;
167  loff_t retval;
168 
169  mutex_lock(&inode->i_mutex);
170  switch (origin) {
171  case SEEK_END:
172  offset += i_size_read(inode);
173  break;
174  case SEEK_CUR:
175  if (offset == 0) {
176  retval = file->f_pos;
177  goto out;
178  }
179  offset += file->f_pos;
180  break;
181  case SEEK_DATA:
182  /*
183  * In the generic case the entire file is data, so as
184  * long as offset isn't at the end of the file then the
185  * offset is data.
186  */
187  if (offset >= inode->i_size) {
188  retval = -ENXIO;
189  goto out;
190  }
191  break;
192  case SEEK_HOLE:
193  /*
194  * There is a virtual hole at the end of the file, so
195  * as long as offset isn't i_size or larger, return
196  * i_size.
197  */
198  if (offset >= inode->i_size) {
199  retval = -ENXIO;
200  goto out;
201  }
202  offset = inode->i_size;
203  break;
204  }
205  retval = -EINVAL;
206  if (offset >= 0 || unsigned_offsets(file)) {
207  if (offset != file->f_pos) {
208  file->f_pos = offset;
209  file->f_version = 0;
210  }
211  retval = offset;
212  }
213 out:
214  mutex_unlock(&inode->i_mutex);
215  return retval;
216 }
218 
219 loff_t vfs_llseek(struct file *file, loff_t offset, int origin)
220 {
221  loff_t (*fn)(struct file *, loff_t, int);
222 
223  fn = no_llseek;
224  if (file->f_mode & FMODE_LSEEK) {
225  if (file->f_op && file->f_op->llseek)
226  fn = file->f_op->llseek;
227  }
228  return fn(file, offset, origin);
229 }
231 
232 SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, origin)
233 {
234  off_t retval;
235  struct fd f = fdget(fd);
236  if (!f.file)
237  return -EBADF;
238 
239  retval = -EINVAL;
240  if (origin <= SEEK_MAX) {
241  loff_t res = vfs_llseek(f.file, offset, origin);
242  retval = res;
243  if (res != (loff_t)retval)
244  retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */
245  }
246  fdput(f);
247  return retval;
248 }
249 
250 #ifdef __ARCH_WANT_SYS_LLSEEK
251 SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high,
252  unsigned long, offset_low, loff_t __user *, result,
253  unsigned int, origin)
254 {
255  int retval;
256  struct fd f = fdget(fd);
257  loff_t offset;
258 
259  if (!f.file)
260  return -EBADF;
261 
262  retval = -EINVAL;
263  if (origin > SEEK_MAX)
264  goto out_putf;
265 
266  offset = vfs_llseek(f.file, ((loff_t) offset_high << 32) | offset_low,
267  origin);
268 
269  retval = (int)offset;
270  if (offset >= 0) {
271  retval = -EFAULT;
272  if (!copy_to_user(result, &offset, sizeof(offset)))
273  retval = 0;
274  }
275 out_putf:
276  fdput(f);
277  return retval;
278 }
279 #endif
280 
281 
282 /*
283  * rw_verify_area doesn't like huge counts. We limit
284  * them to something that fits in "int" so that others
285  * won't have to do range checks all the time.
286  */
287 int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count)
288 {
289  struct inode *inode;
290  loff_t pos;
291  int retval = -EINVAL;
292 
293  inode = file->f_path.dentry->d_inode;
294  if (unlikely((ssize_t) count < 0))
295  return retval;
296  pos = *ppos;
297  if (unlikely(pos < 0)) {
298  if (!unsigned_offsets(file))
299  return retval;
300  if (count >= -pos) /* both values are in 0..LLONG_MAX */
301  return -EOVERFLOW;
302  } else if (unlikely((loff_t) (pos + count) < 0)) {
303  if (!unsigned_offsets(file))
304  return retval;
305  }
306 
307  if (unlikely(inode->i_flock && mandatory_lock(inode))) {
308  retval = locks_mandatory_area(
309  read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE,
310  inode, file, pos, count);
311  if (retval < 0)
312  return retval;
313  }
314  retval = security_file_permission(file,
315  read_write == READ ? MAY_READ : MAY_WRITE);
316  if (retval)
317  return retval;
318  return count > MAX_RW_COUNT ? MAX_RW_COUNT : count;
319 }
320 
321 static void wait_on_retry_sync_kiocb(struct kiocb *iocb)
322 {
324  if (!kiocbIsKicked(iocb))
325  schedule();
326  else
327  kiocbClearKicked(iocb);
329 }
330 
331 ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
332 {
333  struct iovec iov = { .iov_base = buf, .iov_len = len };
334  struct kiocb kiocb;
335  ssize_t ret;
336 
337  init_sync_kiocb(&kiocb, filp);
338  kiocb.ki_pos = *ppos;
339  kiocb.ki_left = len;
340  kiocb.ki_nbytes = len;
341 
342  for (;;) {
343  ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos);
344  if (ret != -EIOCBRETRY)
345  break;
346  wait_on_retry_sync_kiocb(&kiocb);
347  }
348 
349  if (-EIOCBQUEUED == ret)
350  ret = wait_on_sync_kiocb(&kiocb);
351  *ppos = kiocb.ki_pos;
352  return ret;
353 }
354 
356 
357 ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
358 {
359  ssize_t ret;
360 
361  if (!(file->f_mode & FMODE_READ))
362  return -EBADF;
363  if (!file->f_op || (!file->f_op->read && !file->f_op->aio_read))
364  return -EINVAL;
365  if (unlikely(!access_ok(VERIFY_WRITE, buf, count)))
366  return -EFAULT;
367 
368  ret = rw_verify_area(READ, file, pos, count);
369  if (ret >= 0) {
370  count = ret;
371  if (file->f_op->read)
372  ret = file->f_op->read(file, buf, count, pos);
373  else
374  ret = do_sync_read(file, buf, count, pos);
375  if (ret > 0) {
376  fsnotify_access(file);
377  add_rchar(current, ret);
378  }
379  inc_syscr(current);
380  }
381 
382  return ret;
383 }
384 
386 
387 ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
388 {
389  struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len };
390  struct kiocb kiocb;
391  ssize_t ret;
392 
393  init_sync_kiocb(&kiocb, filp);
394  kiocb.ki_pos = *ppos;
395  kiocb.ki_left = len;
396  kiocb.ki_nbytes = len;
397 
398  for (;;) {
399  ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos);
400  if (ret != -EIOCBRETRY)
401  break;
402  wait_on_retry_sync_kiocb(&kiocb);
403  }
404 
405  if (-EIOCBQUEUED == ret)
406  ret = wait_on_sync_kiocb(&kiocb);
407  *ppos = kiocb.ki_pos;
408  return ret;
409 }
410 
412 
413 ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
414 {
415  ssize_t ret;
416 
417  if (!(file->f_mode & FMODE_WRITE))
418  return -EBADF;
419  if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write))
420  return -EINVAL;
421  if (unlikely(!access_ok(VERIFY_READ, buf, count)))
422  return -EFAULT;
423 
424  ret = rw_verify_area(WRITE, file, pos, count);
425  if (ret >= 0) {
426  count = ret;
427  if (file->f_op->write)
428  ret = file->f_op->write(file, buf, count, pos);
429  else
430  ret = do_sync_write(file, buf, count, pos);
431  if (ret > 0) {
432  fsnotify_modify(file);
433  add_wchar(current, ret);
434  }
435  inc_syscw(current);
436  }
437 
438  return ret;
439 }
440 
442 
443 static inline loff_t file_pos_read(struct file *file)
444 {
445  return file->f_pos;
446 }
447 
448 static inline void file_pos_write(struct file *file, loff_t pos)
449 {
450  file->f_pos = pos;
451 }
452 
453 SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
454 {
455  struct fd f = fdget(fd);
456  ssize_t ret = -EBADF;
457 
458  if (f.file) {
459  loff_t pos = file_pos_read(f.file);
460  ret = vfs_read(f.file, buf, count, &pos);
461  file_pos_write(f.file, pos);
462  fdput(f);
463  }
464  return ret;
465 }
466 
467 SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
468  size_t, count)
469 {
470  struct fd f = fdget(fd);
471  ssize_t ret = -EBADF;
472 
473  if (f.file) {
474  loff_t pos = file_pos_read(f.file);
475  ret = vfs_write(f.file, buf, count, &pos);
476  file_pos_write(f.file, pos);
477  fdput(f);
478  }
479 
480  return ret;
481 }
482 
483 SYSCALL_DEFINE(pread64)(unsigned int fd, char __user *buf,
484  size_t count, loff_t pos)
485 {
486  struct fd f;
487  ssize_t ret = -EBADF;
488 
489  if (pos < 0)
490  return -EINVAL;
491 
492  f = fdget(fd);
493  if (f.file) {
494  ret = -ESPIPE;
495  if (f.file->f_mode & FMODE_PREAD)
496  ret = vfs_read(f.file, buf, count, &pos);
497  fdput(f);
498  }
499 
500  return ret;
501 }
502 #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
503 asmlinkage long SyS_pread64(long fd, long buf, long count, loff_t pos)
504 {
505  return SYSC_pread64((unsigned int) fd, (char __user *) buf,
506  (size_t) count, pos);
507 }
508 SYSCALL_ALIAS(sys_pread64, SyS_pread64);
509 #endif
510 
511 SYSCALL_DEFINE(pwrite64)(unsigned int fd, const char __user *buf,
512  size_t count, loff_t pos)
513 {
514  struct fd f;
515  ssize_t ret = -EBADF;
516 
517  if (pos < 0)
518  return -EINVAL;
519 
520  f = fdget(fd);
521  if (f.file) {
522  ret = -ESPIPE;
523  if (f.file->f_mode & FMODE_PWRITE)
524  ret = vfs_write(f.file, buf, count, &pos);
525  fdput(f);
526  }
527 
528  return ret;
529 }
530 #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
531 asmlinkage long SyS_pwrite64(long fd, long buf, long count, loff_t pos)
532 {
533  return SYSC_pwrite64((unsigned int) fd, (const char __user *) buf,
534  (size_t) count, pos);
535 }
536 SYSCALL_ALIAS(sys_pwrite64, SyS_pwrite64);
537 #endif
538 
539 /*
540  * Reduce an iovec's length in-place. Return the resulting number of segments
541  */
542 unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to)
543 {
544  unsigned long seg = 0;
545  size_t len = 0;
546 
547  while (seg < nr_segs) {
548  seg++;
549  if (len + iov->iov_len >= to) {
550  iov->iov_len = to - len;
551  break;
552  }
553  len += iov->iov_len;
554  iov++;
555  }
556  return seg;
557 }
559 
560 ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
561  unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn)
562 {
563  struct kiocb kiocb;
564  ssize_t ret;
565 
566  init_sync_kiocb(&kiocb, filp);
567  kiocb.ki_pos = *ppos;
568  kiocb.ki_left = len;
569  kiocb.ki_nbytes = len;
570 
571  for (;;) {
572  ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos);
573  if (ret != -EIOCBRETRY)
574  break;
575  wait_on_retry_sync_kiocb(&kiocb);
576  }
577 
578  if (ret == -EIOCBQUEUED)
579  ret = wait_on_sync_kiocb(&kiocb);
580  *ppos = kiocb.ki_pos;
581  return ret;
582 }
583 
584 /* Do it by hand, with file-ops */
585 ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov,
586  unsigned long nr_segs, loff_t *ppos, io_fn_t fn)
587 {
588  struct iovec *vector = iov;
589  ssize_t ret = 0;
590 
591  while (nr_segs > 0) {
592  void __user *base;
593  size_t len;
594  ssize_t nr;
595 
596  base = vector->iov_base;
597  len = vector->iov_len;
598  vector++;
599  nr_segs--;
600 
601  nr = fn(filp, base, len, ppos);
602 
603  if (nr < 0) {
604  if (!ret)
605  ret = nr;
606  break;
607  }
608  ret += nr;
609  if (nr != len)
610  break;
611  }
612 
613  return ret;
614 }
615 
616 /* A write operation does a read from user space and vice versa */
617 #define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ)
618 
619 ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
620  unsigned long nr_segs, unsigned long fast_segs,
621  struct iovec *fast_pointer,
622  struct iovec **ret_pointer)
623 {
624  unsigned long seg;
625  ssize_t ret;
626  struct iovec *iov = fast_pointer;
627 
628  /*
629  * SuS says "The readv() function *may* fail if the iovcnt argument
630  * was less than or equal to 0, or greater than {IOV_MAX}. Linux has
631  * traditionally returned zero for zero segments, so...
632  */
633  if (nr_segs == 0) {
634  ret = 0;
635  goto out;
636  }
637 
638  /*
639  * First get the "struct iovec" from user memory and
640  * verify all the pointers
641  */
642  if (nr_segs > UIO_MAXIOV) {
643  ret = -EINVAL;
644  goto out;
645  }
646  if (nr_segs > fast_segs) {
647  iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL);
648  if (iov == NULL) {
649  ret = -ENOMEM;
650  goto out;
651  }
652  }
653  if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) {
654  ret = -EFAULT;
655  goto out;
656  }
657 
658  /*
659  * According to the Single Unix Specification we should return EINVAL
660  * if an element length is < 0 when cast to ssize_t or if the
661  * total length would overflow the ssize_t return value of the
662  * system call.
663  *
664  * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the
665  * overflow case.
666  */
667  ret = 0;
668  for (seg = 0; seg < nr_segs; seg++) {
669  void __user *buf = iov[seg].iov_base;
670  ssize_t len = (ssize_t)iov[seg].iov_len;
671 
672  /* see if we we're about to use an invalid len or if
673  * it's about to overflow ssize_t */
674  if (len < 0) {
675  ret = -EINVAL;
676  goto out;
677  }
678  if (type >= 0
679  && unlikely(!access_ok(vrfy_dir(type), buf, len))) {
680  ret = -EFAULT;
681  goto out;
682  }
683  if (len > MAX_RW_COUNT - ret) {
684  len = MAX_RW_COUNT - ret;
685  iov[seg].iov_len = len;
686  }
687  ret += len;
688  }
689 out:
690  *ret_pointer = iov;
691  return ret;
692 }
693 
694 static ssize_t do_readv_writev(int type, struct file *file,
695  const struct iovec __user * uvector,
696  unsigned long nr_segs, loff_t *pos)
697 {
698  size_t tot_len;
699  struct iovec iovstack[UIO_FASTIOV];
700  struct iovec *iov = iovstack;
701  ssize_t ret;
702  io_fn_t fn;
703  iov_fn_t fnv;
704 
705  if (!file->f_op) {
706  ret = -EINVAL;
707  goto out;
708  }
709 
710  ret = rw_copy_check_uvector(type, uvector, nr_segs,
711  ARRAY_SIZE(iovstack), iovstack, &iov);
712  if (ret <= 0)
713  goto out;
714 
715  tot_len = ret;
716  ret = rw_verify_area(type, file, pos, tot_len);
717  if (ret < 0)
718  goto out;
719 
720  fnv = NULL;
721  if (type == READ) {
722  fn = file->f_op->read;
723  fnv = file->f_op->aio_read;
724  } else {
725  fn = (io_fn_t)file->f_op->write;
726  fnv = file->f_op->aio_write;
727  }
728 
729  if (fnv)
730  ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
731  pos, fnv);
732  else
733  ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);
734 
735 out:
736  if (iov != iovstack)
737  kfree(iov);
738  if ((ret + (type == READ)) > 0) {
739  if (type == READ)
740  fsnotify_access(file);
741  else
742  fsnotify_modify(file);
743  }
744  return ret;
745 }
746 
747 ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
748  unsigned long vlen, loff_t *pos)
749 {
750  if (!(file->f_mode & FMODE_READ))
751  return -EBADF;
752  if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read))
753  return -EINVAL;
754 
755  return do_readv_writev(READ, file, vec, vlen, pos);
756 }
757 
759 
760 ssize_t vfs_writev(struct file *file, const struct iovec __user *vec,
761  unsigned long vlen, loff_t *pos)
762 {
763  if (!(file->f_mode & FMODE_WRITE))
764  return -EBADF;
765  if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write))
766  return -EINVAL;
767 
768  return do_readv_writev(WRITE, file, vec, vlen, pos);
769 }
770 
772 
773 SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
774  unsigned long, vlen)
775 {
776  struct fd f = fdget(fd);
777  ssize_t ret = -EBADF;
778 
779  if (f.file) {
780  loff_t pos = file_pos_read(f.file);
781  ret = vfs_readv(f.file, vec, vlen, &pos);
782  file_pos_write(f.file, pos);
783  fdput(f);
784  }
785 
786  if (ret > 0)
787  add_rchar(current, ret);
788  inc_syscr(current);
789  return ret;
790 }
791 
792 SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
793  unsigned long, vlen)
794 {
795  struct fd f = fdget(fd);
796  ssize_t ret = -EBADF;
797 
798  if (f.file) {
799  loff_t pos = file_pos_read(f.file);
800  ret = vfs_writev(f.file, vec, vlen, &pos);
801  file_pos_write(f.file, pos);
802  fdput(f);
803  }
804 
805  if (ret > 0)
806  add_wchar(current, ret);
807  inc_syscw(current);
808  return ret;
809 }
810 
811 static inline loff_t pos_from_hilo(unsigned long high, unsigned long low)
812 {
813 #define HALF_LONG_BITS (BITS_PER_LONG / 2)
814  return (((loff_t)high << HALF_LONG_BITS) << HALF_LONG_BITS) | low;
815 }
816 
817 SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
818  unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
819 {
820  loff_t pos = pos_from_hilo(pos_h, pos_l);
821  struct fd f;
822  ssize_t ret = -EBADF;
823 
824  if (pos < 0)
825  return -EINVAL;
826 
827  f = fdget(fd);
828  if (f.file) {
829  ret = -ESPIPE;
830  if (f.file->f_mode & FMODE_PREAD)
831  ret = vfs_readv(f.file, vec, vlen, &pos);
832  fdput(f);
833  }
834 
835  if (ret > 0)
836  add_rchar(current, ret);
837  inc_syscr(current);
838  return ret;
839 }
840 
841 SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
842  unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
843 {
844  loff_t pos = pos_from_hilo(pos_h, pos_l);
845  struct fd f;
846  ssize_t ret = -EBADF;
847 
848  if (pos < 0)
849  return -EINVAL;
850 
851  f = fdget(fd);
852  if (f.file) {
853  ret = -ESPIPE;
854  if (f.file->f_mode & FMODE_PWRITE)
855  ret = vfs_writev(f.file, vec, vlen, &pos);
856  fdput(f);
857  }
858 
859  if (ret > 0)
860  add_wchar(current, ret);
861  inc_syscw(current);
862  return ret;
863 }
864 
865 ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, size_t count,
866  loff_t max)
867 {
868  struct fd in, out;
869  struct inode *in_inode, *out_inode;
870  loff_t pos;
871  ssize_t retval;
872  int fl;
873 
874  /*
875  * Get input file, and verify that it is ok..
876  */
877  retval = -EBADF;
878  in = fdget(in_fd);
879  if (!in.file)
880  goto out;
881  if (!(in.file->f_mode & FMODE_READ))
882  goto fput_in;
883  retval = -ESPIPE;
884  if (!ppos)
885  ppos = &in.file->f_pos;
886  else
887  if (!(in.file->f_mode & FMODE_PREAD))
888  goto fput_in;
889  retval = rw_verify_area(READ, in.file, ppos, count);
890  if (retval < 0)
891  goto fput_in;
892  count = retval;
893 
894  /*
895  * Get output file, and verify that it is ok..
896  */
897  retval = -EBADF;
898  out = fdget(out_fd);
899  if (!out.file)
900  goto fput_in;
901  if (!(out.file->f_mode & FMODE_WRITE))
902  goto fput_out;
903  retval = -EINVAL;
904  in_inode = in.file->f_path.dentry->d_inode;
905  out_inode = out.file->f_path.dentry->d_inode;
906  retval = rw_verify_area(WRITE, out.file, &out.file->f_pos, count);
907  if (retval < 0)
908  goto fput_out;
909  count = retval;
910 
911  if (!max)
912  max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes);
913 
914  pos = *ppos;
915  if (unlikely(pos + count > max)) {
916  retval = -EOVERFLOW;
917  if (pos >= max)
918  goto fput_out;
919  count = max - pos;
920  }
921 
922  fl = 0;
923 #if 0
924  /*
925  * We need to debate whether we can enable this or not. The
926  * man page documents EAGAIN return for the output at least,
927  * and the application is arguably buggy if it doesn't expect
928  * EAGAIN on a non-blocking file descriptor.
929  */
930  if (in.file->f_flags & O_NONBLOCK)
931  fl = SPLICE_F_NONBLOCK;
932 #endif
933  retval = do_splice_direct(in.file, ppos, out.file, count, fl);
934 
935  if (retval > 0) {
936  add_rchar(current, retval);
937  add_wchar(current, retval);
938  }
939 
940  inc_syscr(current);
941  inc_syscw(current);
942  if (*ppos > max)
943  retval = -EOVERFLOW;
944 
945 fput_out:
946  fdput(out);
947 fput_in:
948  fdput(in);
949 out:
950  return retval;
951 }
952 
953 SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, off_t __user *, offset, size_t, count)
954 {
955  loff_t pos;
956  off_t off;
957  ssize_t ret;
958 
959  if (offset) {
960  if (unlikely(get_user(off, offset)))
961  return -EFAULT;
962  pos = off;
963  ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS);
964  if (unlikely(put_user(pos, offset)))
965  return -EFAULT;
966  return ret;
967  }
968 
969  return do_sendfile(out_fd, in_fd, NULL, count, 0);
970 }
971 
972 SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, loff_t __user *, offset, size_t, count)
973 {
974  loff_t pos;
975  ssize_t ret;
976 
977  if (offset) {
978  if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t))))
979  return -EFAULT;
980  ret = do_sendfile(out_fd, in_fd, &pos, count, 0);
981  if (unlikely(put_user(pos, offset)))
982  return -EFAULT;
983  return ret;
984  }
985 
986  return do_sendfile(out_fd, in_fd, NULL, count, 0);
987 }