41 #include <linux/errno.h>
42 #include <linux/sched.h>
43 #include <linux/kernel.h>
47 #include <linux/slab.h>
49 #include <linux/module.h>
51 #include <linux/nfs_fs.h>
55 #include <asm/uaccess.h>
62 #define NFSDBG_FACILITY NFSDBG_VFS
91 #define NFS_ODIRECT_DO_COMMIT (1)
92 #define NFS_ODIRECT_RESCHED_WRITES (2)
126 #ifndef CONFIG_NFS_SWAP
127 dprintk(
"NFS: nfs_direct_IO (%s) off/no(%Ld/%lu) EINVAL\n",
128 iocb->
ki_filp->f_path.dentry->d_name.name,
129 (
long long) pos, nr_segs);
138 rw ==
READ ?
true :
false);
140 rw ==
WRITE ?
true :
false);
144 static void nfs_direct_release_pages(
struct page **
pages,
unsigned int npages)
147 for (i = 0; i < npages; i++)
165 dreq = kmem_cache_zalloc(nfs_direct_cachep,
GFP_KERNEL);
169 kref_init(&dreq->
kref);
170 kref_get(&dreq->
kref);
179 static void nfs_direct_req_free(
struct kref *
kref)
192 kref_put(&dreq->
kref, nfs_direct_req_free);
215 result = dreq->
error;
217 result = dreq->
count;
237 nfs_direct_req_release(dreq);
240 static void nfs_direct_readpage_release(
struct nfs_page *
req)
242 dprintk(
"NFS: direct read done (%s/%lld %d@%lld)\n",
244 (
long long)NFS_FILEID(req->
wb_context->dentry->d_inode),
246 (
long long)req_offset(req));
252 unsigned long bytes = 0;
258 spin_lock(&dreq->
lock);
263 spin_unlock(&dreq->
lock);
265 while (!list_empty(&hdr->
pages)) {
273 zero_user_segment(page,
277 if (!PageCompound(page)) {
279 if (bytes < hdr->good_bytes)
285 nfs_list_remove_request(req);
286 nfs_direct_readpage_release(req);
290 nfs_direct_complete(dreq);
298 while (!list_empty(head)) {
299 req = nfs_list_entry(head->
next);
300 nfs_list_remove_request(req);
311 .error_cleanup = nfs_read_sync_pgio_error,
312 .init_hdr = nfs_direct_pgio_init,
313 .completion = nfs_direct_read_completion,
324 const struct iovec *iov,
325 loff_t
pos,
bool uio)
332 size_t rsize = NFS_SERVER(inode)->rsize;
347 npages = nfs_page_array_len(pgbase, bytes);
349 pagevec =
kmalloc(npages *
sizeof(
struct page *),
356 npages, 1, 0, pagevec,
NULL);
367 if ((
unsigned)result < npages) {
369 if (bytes <= pgbase) {
370 nfs_direct_release_pages(pagevec, result);
377 for (i = 0; i < npages; i++) {
379 unsigned int req_len =
min_t(
size_t, bytes, PAGE_SIZE - pgbase);
385 result = PTR_ERR(req);
404 nfs_direct_release_pages(pagevec, npages);
405 }
while (
count != 0 && result >= 0);
415 const struct iovec *iov,
416 unsigned long nr_segs,
417 loff_t pos,
bool uio)
421 size_t requested_bytes = 0;
424 NFS_PROTO(dreq->
inode)->read_pageio_init(&desc, dreq->
inode,
425 &nfs_direct_read_completion_ops);
429 for (seg = 0; seg < nr_segs; seg++) {
430 const struct iovec *vec = &iov[
seg];
431 result = nfs_direct_read_schedule_segment(&desc, vec, pos, uio);
434 requested_bytes +=
result;
435 if ((
size_t)result < vec->
iov_len)
446 if (requested_bytes == 0) {
447 nfs_direct_req_release(dreq);
448 return result < 0 ? result : -
EIO;
452 nfs_direct_complete(dreq);
457 unsigned long nr_segs, loff_t pos,
bool uio)
460 struct inode *inode = iocb->
ki_filp->f_mapping->host;
464 dreq = nfs_direct_req_alloc();
473 result = PTR_ERR(l_ctx);
477 if (!is_sync_kiocb(iocb))
480 NFS_I(inode)->read_io += iov_length(iov, nr_segs);
481 result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos, uio);
483 result = nfs_direct_wait(dreq);
485 nfs_direct_req_release(dreq);
490 static void nfs_inode_dio_write_done(
struct inode *inode)
496 #if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
497 static void nfs_direct_write_reschedule(
struct nfs_direct_req *dreq)
506 pnfs_recover_commit_reqs(dreq->
inode, &reqs, &cinfo);
507 spin_lock(cinfo.lock);
509 spin_unlock(cinfo.lock);
515 &nfs_direct_write_completion_ops);
520 nfs_list_remove_request(req);
521 nfs_list_add_request(req, &failed);
522 spin_lock(cinfo.lock);
525 spin_unlock(cinfo.lock);
531 while (!list_empty(&failed)) {
532 req = nfs_list_entry(failed.next);
533 nfs_list_remove_request(req);
538 nfs_direct_write_complete(dreq, dreq->
inode);
550 dprintk(
"NFS: %5u commit failed with error %d.\n",
551 data->
task.tk_pid, status);
554 dprintk(
"NFS: %5u commit verify failed\n", data->
task.tk_pid);
558 dprintk(
"NFS: %5u commit returned %d\n", data->
task.tk_pid, status);
559 while (!list_empty(&data->
pages)) {
560 req = nfs_list_entry(data->
pages.next);
561 nfs_list_remove_request(req);
571 nfs_direct_write_complete(dreq, data->
inode);
574 static void nfs_direct_error_cleanup(
struct nfs_inode *nfsi)
580 .completion = nfs_direct_commit_complete,
581 .error_cleanup = nfs_direct_error_cleanup,
584 static void nfs_direct_commit_schedule(
struct nfs_direct_req *dreq)
594 nfs_direct_write_reschedule(dreq);
605 nfs_direct_commit_schedule(dreq);
608 nfs_direct_write_reschedule(dreq);
611 nfs_inode_dio_write_done(dreq->
inode);
612 nfs_direct_complete(dreq);
616 static void nfs_direct_write_complete(
struct nfs_direct_req *dreq,
struct inode *inode)
622 static void nfs_direct_write_schedule_work(
struct work_struct *work)
626 static void nfs_direct_write_complete(
struct nfs_direct_req *dreq,
struct inode *inode)
628 nfs_inode_dio_write_done(inode);
629 nfs_direct_complete(dreq);
645 const struct iovec *iov,
646 loff_t pos,
bool uio)
650 struct inode *inode = ctx->
dentry->d_inode;
653 size_t wsize = NFS_SERVER(inode)->wsize;
657 struct page **pagevec =
NULL;
668 npages = nfs_page_array_len(pgbase, bytes);
677 npages, 0, 0, pagevec,
NULL);
688 if ((
unsigned)result < npages) {
690 if (bytes <= pgbase) {
691 nfs_direct_release_pages(pagevec, result);
698 for (i = 0; i < npages; i++) {
700 unsigned int req_len =
min_t(
size_t, bytes, PAGE_SIZE - pgbase);
706 result = PTR_ERR(req);
709 nfs_lock_request(req);
726 nfs_direct_release_pages(pagevec, npages);
727 }
while (
count != 0 && result >= 0);
748 spin_lock(&dreq->
lock);
754 if (dreq->
error != 0)
764 else if (dreq->
flags == 0) {
778 spin_unlock(&dreq->
lock);
780 while (!list_empty(&hdr->
pages)) {
781 req = nfs_list_entry(hdr->
pages.next);
782 nfs_list_remove_request(req);
794 nfs_direct_write_complete(dreq, hdr->
inode);
798 static void nfs_write_sync_pgio_error(
struct list_head *head)
802 while (!list_empty(head)) {
803 req = nfs_list_entry(head->
next);
804 nfs_list_remove_request(req);
810 .error_cleanup = nfs_write_sync_pgio_error,
811 .init_hdr = nfs_direct_pgio_init,
812 .completion = nfs_direct_write_completion,
816 const struct iovec *iov,
817 unsigned long nr_segs,
818 loff_t pos,
bool uio)
821 struct inode *inode = dreq->
inode;
823 size_t requested_bytes = 0;
827 &nfs_direct_write_completion_ops);
832 NFS_I(dreq->
inode)->write_io += iov_length(iov, nr_segs);
833 for (seg = 0; seg < nr_segs; seg++) {
834 const struct iovec *vec = &iov[
seg];
835 result = nfs_direct_write_schedule_segment(&desc, vec, pos, uio);
838 requested_bytes +=
result;
839 if ((
size_t)result < vec->
iov_len)
849 if (requested_bytes == 0) {
851 nfs_direct_req_release(dreq);
852 return result < 0 ? result : -
EIO;
856 nfs_direct_write_complete(dreq, dreq->
inode);
861 unsigned long nr_segs, loff_t pos,
862 size_t count,
bool uio)
865 struct inode *inode = iocb->
ki_filp->f_mapping->host;
869 dreq = nfs_direct_req_alloc();
878 result = PTR_ERR(l_ctx);
882 if (!is_sync_kiocb(iocb))
885 result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, uio);
887 result = nfs_direct_wait(dreq);
889 nfs_direct_req_release(dreq);
916 unsigned long nr_segs, loff_t pos,
bool uio)
923 count = iov_length(iov, nr_segs);
926 dfprintk(FILE,
"NFS: direct read(%s/%s, %zd@%Ld)\n",
927 file->
f_path.dentry->d_parent->d_name.name,
928 file->
f_path.dentry->d_name.name,
929 count, (
long long) pos);
939 task_io_account_read(count);
941 retval = nfs_direct_read(iocb, iov, nr_segs, pos, uio);
972 unsigned long nr_segs, loff_t pos,
bool uio)
979 count = iov_length(iov, nr_segs);
982 dfprintk(FILE,
"NFS: direct write(%s/%s, %zd@%Ld)\n",
983 file->
f_path.dentry->d_parent->d_name.name,
984 file->
f_path.dentry->d_name.name,
985 count, (
long long) pos);
1002 task_io_account_write(count);
1004 retval = nfs_direct_write(iocb, iov, nr_segs, pos, count, uio);
1006 struct inode *inode = mapping->
host;
1009 spin_lock(&inode->
i_lock);
1010 if (i_size_read(inode) < iocb->
ki_pos)
1011 i_size_write(inode, iocb->
ki_pos);
1012 spin_unlock(&inode->
i_lock);
1029 if (nfs_direct_cachep ==
NULL)