Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
write.c
Go to the documentation of this file.
1 /*
2  * linux/fs/nfs/write.c
3  *
4  * Write file data over NFS.
5  *
6  * Copyright (C) 1996, 1997, Olaf Kirch <[email protected]>
7  */
8 
9 #include <linux/types.h>
10 #include <linux/slab.h>
11 #include <linux/mm.h>
12 #include <linux/pagemap.h>
13 #include <linux/file.h>
14 #include <linux/writeback.h>
15 #include <linux/swap.h>
16 #include <linux/migrate.h>
17 
18 #include <linux/sunrpc/clnt.h>
19 #include <linux/nfs_fs.h>
20 #include <linux/nfs_mount.h>
21 #include <linux/nfs_page.h>
22 #include <linux/backing-dev.h>
23 #include <linux/export.h>
24 
25 #include <asm/uaccess.h>
26 
27 #include "delegation.h"
28 #include "internal.h"
29 #include "iostat.h"
30 #include "nfs4_fs.h"
31 #include "fscache.h"
32 #include "pnfs.h"
33 
34 #define NFSDBG_FACILITY NFSDBG_PAGECACHE
35 
36 #define MIN_POOL_WRITE (32)
37 #define MIN_POOL_COMMIT (4)
38 
39 /*
40  * Local function declarations
41  */
42 static void nfs_redirty_request(struct nfs_page *req);
43 static const struct rpc_call_ops nfs_write_common_ops;
44 static const struct rpc_call_ops nfs_commit_ops;
45 static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops;
47 
48 static struct kmem_cache *nfs_wdata_cachep;
49 static mempool_t *nfs_wdata_mempool;
50 static struct kmem_cache *nfs_cdata_cachep;
51 static mempool_t *nfs_commit_mempool;
52 
54 {
55  struct nfs_commit_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOIO);
56 
57  if (p) {
58  memset(p, 0, sizeof(*p));
59  INIT_LIST_HEAD(&p->pages);
60  }
61  return p;
62 }
64 
66 {
67  mempool_free(p, nfs_commit_mempool);
68 }
70 
72 {
73  struct nfs_write_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO);
74 
75  if (p) {
76  struct nfs_pgio_header *hdr = &p->header;
77 
78  memset(p, 0, sizeof(*p));
79  INIT_LIST_HEAD(&hdr->pages);
80  INIT_LIST_HEAD(&hdr->rpc_list);
81  spin_lock_init(&hdr->lock);
82  atomic_set(&hdr->refcnt, 0);
83  hdr->verf = &p->verf;
84  }
85  return p;
86 }
88 
89 static struct nfs_write_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr,
90  unsigned int pagecount)
91 {
92  struct nfs_write_data *data, *prealloc;
93 
94  prealloc = &container_of(hdr, struct nfs_write_header, header)->rpc_data;
95  if (prealloc->header == NULL)
96  data = prealloc;
97  else
98  data = kzalloc(sizeof(*data), GFP_KERNEL);
99  if (!data)
100  goto out;
101 
102  if (nfs_pgarray_set(&data->pages, pagecount)) {
103  data->header = hdr;
104  atomic_inc(&hdr->refcnt);
105  } else {
106  if (data != prealloc)
107  kfree(data);
108  data = NULL;
109  }
110 out:
111  return data;
112 }
113 
115 {
116  struct nfs_write_header *whdr = container_of(hdr, struct nfs_write_header, header);
117  mempool_free(whdr, nfs_wdata_mempool);
118 }
120 
122 {
123  struct nfs_pgio_header *hdr = wdata->header;
124  struct nfs_write_header *write_header = container_of(hdr, struct nfs_write_header, header);
125 
126  put_nfs_open_context(wdata->args.context);
127  if (wdata->pages.pagevec != wdata->pages.page_array)
128  kfree(wdata->pages.pagevec);
129  if (wdata != &write_header->rpc_data)
130  kfree(wdata);
131  else
132  wdata->header = NULL;
133  if (atomic_dec_and_test(&hdr->refcnt))
134  hdr->completion_ops->completion(hdr);
135 }
137 
138 static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
139 {
140  ctx->error = error;
141  smp_wmb();
143 }
144 
145 static struct nfs_page *
146 nfs_page_find_request_locked(struct nfs_inode *nfsi, struct page *page)
147 {
148  struct nfs_page *req = NULL;
149 
150  if (PagePrivate(page))
151  req = (struct nfs_page *)page_private(page);
152  else if (unlikely(PageSwapCache(page))) {
153  struct nfs_page *freq, *t;
154 
155  /* Linearly search the commit list for the correct req */
156  list_for_each_entry_safe(freq, t, &nfsi->commit_info.list, wb_list) {
157  if (freq->wb_page == page) {
158  req = freq;
159  break;
160  }
161  }
162  }
163 
164  if (req)
165  kref_get(&req->wb_kref);
166 
167  return req;
168 }
169 
170 static struct nfs_page *nfs_page_find_request(struct page *page)
171 {
172  struct inode *inode = page_file_mapping(page)->host;
173  struct nfs_page *req = NULL;
174 
175  spin_lock(&inode->i_lock);
176  req = nfs_page_find_request_locked(NFS_I(inode), page);
177  spin_unlock(&inode->i_lock);
178  return req;
179 }
180 
181 /* Adjust the file length if we're writing beyond the end */
182 static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count)
183 {
184  struct inode *inode = page_file_mapping(page)->host;
185  loff_t end, i_size;
186  pgoff_t end_index;
187 
188  spin_lock(&inode->i_lock);
189  i_size = i_size_read(inode);
190  end_index = (i_size - 1) >> PAGE_CACHE_SHIFT;
191  if (i_size > 0 && page_file_index(page) < end_index)
192  goto out;
193  end = page_file_offset(page) + ((loff_t)offset+count);
194  if (i_size >= end)
195  goto out;
196  i_size_write(inode, end);
197  nfs_inc_stats(inode, NFSIOS_EXTENDWRITE);
198 out:
199  spin_unlock(&inode->i_lock);
200 }
201 
202 /* A writeback failed: mark the page as bad, and invalidate the page cache */
203 static void nfs_set_pageerror(struct page *page)
204 {
205  SetPageError(page);
206  nfs_zap_mapping(page_file_mapping(page)->host, page_file_mapping(page));
207 }
208 
209 /* We can set the PG_uptodate flag if we see that a write request
210  * covers the full page.
211  */
212 static void nfs_mark_uptodate(struct page *page, unsigned int base, unsigned int count)
213 {
214  if (PageUptodate(page))
215  return;
216  if (base != 0)
217  return;
218  if (count != nfs_page_length(page))
219  return;
220  SetPageUptodate(page);
221 }
222 
223 static int wb_priority(struct writeback_control *wbc)
224 {
225  if (wbc->for_reclaim)
226  return FLUSH_HIGHPRI | FLUSH_STABLE;
227  if (wbc->for_kupdate || wbc->for_background)
229  return FLUSH_COND_STABLE;
230 }
231 
232 /*
233  * NFS congestion control
234  */
235 
237 
238 #define NFS_CONGESTION_ON_THRESH (nfs_congestion_kb >> (PAGE_SHIFT-10))
239 #define NFS_CONGESTION_OFF_THRESH \
240  (NFS_CONGESTION_ON_THRESH - (NFS_CONGESTION_ON_THRESH >> 2))
241 
242 static int nfs_set_page_writeback(struct page *page)
243 {
244  int ret = test_set_page_writeback(page);
245 
246  if (!ret) {
247  struct inode *inode = page_file_mapping(page)->host;
248  struct nfs_server *nfss = NFS_SERVER(inode);
249 
250  if (atomic_long_inc_return(&nfss->writeback) >
253  BLK_RW_ASYNC);
254  }
255  }
256  return ret;
257 }
258 
259 static void nfs_end_page_writeback(struct page *page)
260 {
261  struct inode *inode = page_file_mapping(page)->host;
262  struct nfs_server *nfss = NFS_SERVER(inode);
263 
264  end_page_writeback(page);
265  if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
267 }
268 
269 static struct nfs_page *nfs_find_and_lock_request(struct page *page, bool nonblock)
270 {
271  struct inode *inode = page_file_mapping(page)->host;
272  struct nfs_page *req;
273  int ret;
274 
275  spin_lock(&inode->i_lock);
276  for (;;) {
277  req = nfs_page_find_request_locked(NFS_I(inode), page);
278  if (req == NULL)
279  break;
280  if (nfs_lock_request(req))
281  break;
282  /* Note: If we hold the page lock, as is the case in nfs_writepage,
283  * then the call to nfs_lock_request() will always
284  * succeed provided that someone hasn't already marked the
285  * request as dirty (in which case we don't care).
286  */
287  spin_unlock(&inode->i_lock);
288  if (!nonblock)
289  ret = nfs_wait_on_request(req);
290  else
291  ret = -EAGAIN;
292  nfs_release_request(req);
293  if (ret != 0)
294  return ERR_PTR(ret);
295  spin_lock(&inode->i_lock);
296  }
297  spin_unlock(&inode->i_lock);
298  return req;
299 }
300 
301 /*
302  * Find an associated nfs write request, and prepare to flush it out
303  * May return an error if the user signalled nfs_wait_on_request().
304  */
305 static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
306  struct page *page, bool nonblock)
307 {
308  struct nfs_page *req;
309  int ret = 0;
310 
311  req = nfs_find_and_lock_request(page, nonblock);
312  if (!req)
313  goto out;
314  ret = PTR_ERR(req);
315  if (IS_ERR(req))
316  goto out;
317 
318  ret = nfs_set_page_writeback(page);
319  BUG_ON(ret != 0);
320  BUG_ON(test_bit(PG_CLEAN, &req->wb_flags));
321 
322  if (!nfs_pageio_add_request(pgio, req)) {
323  nfs_redirty_request(req);
324  ret = pgio->pg_error;
325  }
326 out:
327  return ret;
328 }
329 
330 static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio)
331 {
332  struct inode *inode = page_file_mapping(page)->host;
333  int ret;
334 
335  nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
336  nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1);
337 
338  nfs_pageio_cond_complete(pgio, page_file_index(page));
339  ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE);
340  if (ret == -EAGAIN) {
341  redirty_page_for_writepage(wbc, page);
342  ret = 0;
343  }
344  return ret;
345 }
346 
347 /*
348  * Write an mmapped page to the server.
349  */
350 static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc)
351 {
352  struct nfs_pageio_descriptor pgio;
353  int err;
354 
355  NFS_PROTO(page_file_mapping(page)->host)->write_pageio_init(&pgio,
356  page->mapping->host,
357  wb_priority(wbc),
358  &nfs_async_write_completion_ops);
359  err = nfs_do_writepage(page, wbc, &pgio);
360  nfs_pageio_complete(&pgio);
361  if (err < 0)
362  return err;
363  if (pgio.pg_error < 0)
364  return pgio.pg_error;
365  return 0;
366 }
367 
368 int nfs_writepage(struct page *page, struct writeback_control *wbc)
369 {
370  int ret;
371 
372  ret = nfs_writepage_locked(page, wbc);
373  unlock_page(page);
374  return ret;
375 }
376 
377 static int nfs_writepages_callback(struct page *page, struct writeback_control *wbc, void *data)
378 {
379  int ret;
380 
381  ret = nfs_do_writepage(page, wbc, data);
382  unlock_page(page);
383  return ret;
384 }
385 
387 {
388  struct inode *inode = mapping->host;
389  unsigned long *bitlock = &NFS_I(inode)->flags;
390  struct nfs_pageio_descriptor pgio;
391  int err;
392 
393  /* Stop dirtying of new pages while we sync */
394  err = wait_on_bit_lock(bitlock, NFS_INO_FLUSHING,
396  if (err)
397  goto out_err;
398 
399  nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
400 
401  NFS_PROTO(inode)->write_pageio_init(&pgio, inode, wb_priority(wbc), &nfs_async_write_completion_ops);
402  err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio);
403  nfs_pageio_complete(&pgio);
404 
407  wake_up_bit(bitlock, NFS_INO_FLUSHING);
408 
409  if (err < 0)
410  goto out_err;
411  err = pgio.pg_error;
412  if (err < 0)
413  goto out_err;
414  return 0;
415 out_err:
416  return err;
417 }
418 
419 /*
420  * Insert a write request into an inode
421  */
422 static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
423 {
424  struct nfs_inode *nfsi = NFS_I(inode);
425 
426  /* Lock the request! */
427  nfs_lock_request(req);
428 
429  spin_lock(&inode->i_lock);
430  if (!nfsi->npages && NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE))
431  inode->i_version++;
432  /*
433  * Swap-space should not get truncated. Hence no need to plug the race
434  * with invalidate/truncate.
435  */
436  if (likely(!PageSwapCache(req->wb_page))) {
437  set_bit(PG_MAPPED, &req->wb_flags);
438  SetPagePrivate(req->wb_page);
439  set_page_private(req->wb_page, (unsigned long)req);
440  }
441  nfsi->npages++;
442  kref_get(&req->wb_kref);
443  spin_unlock(&inode->i_lock);
444 }
445 
446 /*
447  * Remove a write request from an inode
448  */
449 static void nfs_inode_remove_request(struct nfs_page *req)
450 {
451  struct inode *inode = req->wb_context->dentry->d_inode;
452  struct nfs_inode *nfsi = NFS_I(inode);
453 
454  BUG_ON (!NFS_WBACK_BUSY(req));
455 
456  spin_lock(&inode->i_lock);
457  if (likely(!PageSwapCache(req->wb_page))) {
458  set_page_private(req->wb_page, 0);
459  ClearPagePrivate(req->wb_page);
460  clear_bit(PG_MAPPED, &req->wb_flags);
461  }
462  nfsi->npages--;
463  spin_unlock(&inode->i_lock);
464  nfs_release_request(req);
465 }
466 
467 static void
468 nfs_mark_request_dirty(struct nfs_page *req)
469 {
471 }
472 
473 #if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
474 
487 void
489  struct nfs_commit_info *cinfo)
490 {
491  set_bit(PG_CLEAN, &(req)->wb_flags);
492  spin_lock(cinfo->lock);
493  nfs_list_add_request(req, dst);
494  cinfo->mds->ncommit++;
495  spin_unlock(cinfo->lock);
496  if (!cinfo->dreq) {
498  inc_bdi_stat(page_file_mapping(req->wb_page)->backing_dev_info,
500  __mark_inode_dirty(req->wb_context->dentry->d_inode,
502  }
503 }
505 
517 void
519  struct nfs_commit_info *cinfo)
520 {
521  if (!test_and_clear_bit(PG_CLEAN, &(req)->wb_flags))
522  return;
523  nfs_list_remove_request(req);
524  cinfo->mds->ncommit--;
525 }
527 
528 static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo,
529  struct inode *inode)
530 {
531  cinfo->lock = &inode->i_lock;
532  cinfo->mds = &NFS_I(inode)->commit_info;
533  cinfo->ds = pnfs_get_ds_info(inode);
534  cinfo->dreq = NULL;
536 }
537 
538 void nfs_init_cinfo(struct nfs_commit_info *cinfo,
539  struct inode *inode,
540  struct nfs_direct_req *dreq)
541 {
542  if (dreq)
543  nfs_init_cinfo_from_dreq(cinfo, dreq);
544  else
545  nfs_init_cinfo_from_inode(cinfo, inode);
546 }
548 
549 /*
550  * Add a request to the inode's commit list.
551  */
552 void
553 nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
554  struct nfs_commit_info *cinfo)
555 {
556  if (pnfs_mark_request_commit(req, lseg, cinfo))
557  return;
558  nfs_request_add_commit_list(req, &cinfo->mds->list, cinfo);
559 }
560 
561 static void
562 nfs_clear_page_commit(struct page *page)
563 {
565  dec_bdi_stat(page_file_mapping(page)->backing_dev_info, BDI_RECLAIMABLE);
566 }
567 
568 static void
569 nfs_clear_request_commit(struct nfs_page *req)
570 {
571  if (test_bit(PG_CLEAN, &req->wb_flags)) {
572  struct inode *inode = req->wb_context->dentry->d_inode;
573  struct nfs_commit_info cinfo;
574 
575  nfs_init_cinfo_from_inode(&cinfo, inode);
576  if (!pnfs_clear_request_commit(req, &cinfo)) {
577  spin_lock(cinfo.lock);
578  nfs_request_remove_commit_list(req, &cinfo);
579  spin_unlock(cinfo.lock);
580  }
581  nfs_clear_page_commit(req->wb_page);
582  }
583 }
584 
585 static inline
586 int nfs_write_need_commit(struct nfs_write_data *data)
587 {
588  if (data->verf.committed == NFS_DATA_SYNC)
589  return data->header->lseg == NULL;
590  return data->verf.committed != NFS_FILE_SYNC;
591 }
592 
593 #else
594 static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo,
595  struct inode *inode)
596 {
597 }
598 
599 void nfs_init_cinfo(struct nfs_commit_info *cinfo,
600  struct inode *inode,
601  struct nfs_direct_req *dreq)
602 {
603 }
604 
605 void
607  struct nfs_commit_info *cinfo)
608 {
609 }
610 
611 static void
612 nfs_clear_request_commit(struct nfs_page *req)
613 {
614 }
615 
616 static inline
617 int nfs_write_need_commit(struct nfs_write_data *data)
618 {
619  return 0;
620 }
621 
622 #endif
623 
624 static void nfs_write_completion(struct nfs_pgio_header *hdr)
625 {
626  struct nfs_commit_info cinfo;
627  unsigned long bytes = 0;
628 
629  if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
630  goto out;
631  nfs_init_cinfo_from_inode(&cinfo, hdr->inode);
632  while (!list_empty(&hdr->pages)) {
633  struct nfs_page *req = nfs_list_entry(hdr->pages.next);
634 
635  bytes += req->wb_bytes;
636  nfs_list_remove_request(req);
637  if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) &&
638  (hdr->good_bytes < bytes)) {
639  nfs_set_pageerror(req->wb_page);
640  nfs_context_set_write_error(req->wb_context, hdr->error);
641  goto remove_req;
642  }
643  if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) {
644  nfs_mark_request_dirty(req);
645  goto next;
646  }
647  if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) {
648  memcpy(&req->wb_verf, &hdr->verf->verifier, sizeof(req->wb_verf));
649  nfs_mark_request_commit(req, hdr->lseg, &cinfo);
650  goto next;
651  }
652 remove_req:
653  nfs_inode_remove_request(req);
654 next:
655  nfs_unlock_request(req);
656  nfs_end_page_writeback(req->wb_page);
657  nfs_release_request(req);
658  }
659 out:
660  hdr->release(hdr);
661 }
662 
663 #if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
664 static unsigned long
665 nfs_reqs_to_commit(struct nfs_commit_info *cinfo)
666 {
667  return cinfo->mds->ncommit;
668 }
669 
670 /* cinfo->lock held by caller */
671 int
672 nfs_scan_commit_list(struct list_head *src, struct list_head *dst,
673  struct nfs_commit_info *cinfo, int max)
674 {
675  struct nfs_page *req, *tmp;
676  int ret = 0;
677 
678  list_for_each_entry_safe(req, tmp, src, wb_list) {
679  if (!nfs_lock_request(req))
680  continue;
681  kref_get(&req->wb_kref);
682  if (cond_resched_lock(cinfo->lock))
683  list_safe_reset_next(req, tmp, wb_list);
684  nfs_request_remove_commit_list(req, cinfo);
685  nfs_list_add_request(req, dst);
686  ret++;
687  if ((ret == max) && !cinfo->dreq)
688  break;
689  }
690  return ret;
691 }
692 
693 /*
694  * nfs_scan_commit - Scan an inode for commit requests
695  * @inode: NFS inode to scan
696  * @dst: mds destination list
697  * @cinfo: mds and ds lists of reqs ready to commit
698  *
699  * Moves requests from the inode's 'commit' request list.
700  * The requests are *not* checked to ensure that they form a contiguous set.
701  */
702 int
703 nfs_scan_commit(struct inode *inode, struct list_head *dst,
704  struct nfs_commit_info *cinfo)
705 {
706  int ret = 0;
707 
708  spin_lock(cinfo->lock);
709  if (cinfo->mds->ncommit > 0) {
710  const int max = INT_MAX;
711 
712  ret = nfs_scan_commit_list(&cinfo->mds->list, dst,
713  cinfo, max);
714  ret += pnfs_scan_commit_lists(inode, cinfo, max - ret);
715  }
716  spin_unlock(cinfo->lock);
717  return ret;
718 }
719 
720 #else
721 static unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo)
722 {
723  return 0;
724 }
725 
726 int nfs_scan_commit(struct inode *inode, struct list_head *dst,
727  struct nfs_commit_info *cinfo)
728 {
729  return 0;
730 }
731 #endif
732 
733 /*
734  * Search for an existing write request, and attempt to update
735  * it to reflect a new dirty region on a given page.
736  *
737  * If the attempt fails, then the existing request is flushed out
738  * to disk.
739  */
740 static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
741  struct page *page,
742  unsigned int offset,
743  unsigned int bytes)
744 {
745  struct nfs_page *req;
746  unsigned int rqend;
747  unsigned int end;
748  int error;
749 
750  if (!PagePrivate(page))
751  return NULL;
752 
753  end = offset + bytes;
754  spin_lock(&inode->i_lock);
755 
756  for (;;) {
757  req = nfs_page_find_request_locked(NFS_I(inode), page);
758  if (req == NULL)
759  goto out_unlock;
760 
761  rqend = req->wb_offset + req->wb_bytes;
762  /*
763  * Tell the caller to flush out the request if
764  * the offsets are non-contiguous.
765  * Note: nfs_flush_incompatible() will already
766  * have flushed out requests having wrong owners.
767  */
768  if (offset > rqend
769  || end < req->wb_offset)
770  goto out_flushme;
771 
772  if (nfs_lock_request(req))
773  break;
774 
775  /* The request is locked, so wait and then retry */
776  spin_unlock(&inode->i_lock);
777  error = nfs_wait_on_request(req);
778  nfs_release_request(req);
779  if (error != 0)
780  goto out_err;
781  spin_lock(&inode->i_lock);
782  }
783 
784  /* Okay, the request matches. Update the region */
785  if (offset < req->wb_offset) {
786  req->wb_offset = offset;
787  req->wb_pgbase = offset;
788  }
789  if (end > rqend)
790  req->wb_bytes = end - req->wb_offset;
791  else
792  req->wb_bytes = rqend - req->wb_offset;
793 out_unlock:
794  spin_unlock(&inode->i_lock);
795  if (req)
796  nfs_clear_request_commit(req);
797  return req;
798 out_flushme:
799  spin_unlock(&inode->i_lock);
800  nfs_release_request(req);
801  error = nfs_wb_page(inode, page);
802 out_err:
803  return ERR_PTR(error);
804 }
805 
806 /*
807  * Try to update an existing write request, or create one if there is none.
808  *
809  * Note: Should always be called with the Page Lock held to prevent races
810  * if we have to add a new request. Also assumes that the caller has
811  * already called nfs_flush_incompatible() if necessary.
812  */
813 static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx,
814  struct page *page, unsigned int offset, unsigned int bytes)
815 {
816  struct inode *inode = page_file_mapping(page)->host;
817  struct nfs_page *req;
818 
819  req = nfs_try_to_update_request(inode, page, offset, bytes);
820  if (req != NULL)
821  goto out;
822  req = nfs_create_request(ctx, inode, page, offset, bytes);
823  if (IS_ERR(req))
824  goto out;
825  nfs_inode_add_request(inode, req);
826 out:
827  return req;
828 }
829 
830 static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
831  unsigned int offset, unsigned int count)
832 {
833  struct nfs_page *req;
834 
835  req = nfs_setup_write_request(ctx, page, offset, count);
836  if (IS_ERR(req))
837  return PTR_ERR(req);
838  /* Update file length */
839  nfs_grow_file(page, offset, count);
840  nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
841  nfs_mark_request_dirty(req);
843  return 0;
844 }
845 
846 int nfs_flush_incompatible(struct file *file, struct page *page)
847 {
848  struct nfs_open_context *ctx = nfs_file_open_context(file);
849  struct nfs_lock_context *l_ctx;
850  struct nfs_page *req;
851  int do_flush, status;
852  /*
853  * Look for a request corresponding to this page. If there
854  * is one, and it belongs to another file, we flush it out
855  * before we try to copy anything into the page. Do this
856  * due to the lack of an ACCESS-type call in NFSv2.
857  * Also do the same if we find a request from an existing
858  * dropped page.
859  */
860  do {
861  req = nfs_page_find_request(page);
862  if (req == NULL)
863  return 0;
864  l_ctx = req->wb_lock_context;
865  do_flush = req->wb_page != page || req->wb_context != ctx;
866  if (l_ctx) {
867  do_flush |= l_ctx->lockowner.l_owner != current->files
868  || l_ctx->lockowner.l_pid != current->tgid;
869  }
870  nfs_release_request(req);
871  if (!do_flush)
872  return 0;
873  status = nfs_wb_page(page_file_mapping(page)->host, page);
874  } while (status == 0);
875  return status;
876 }
877 
878 /*
879  * If the page cache is marked as unsafe or invalid, then we can't rely on
880  * the PageUptodate() flag. In this case, we will need to turn off
881  * write optimisations that depend on the page contents being correct.
882  */
883 static bool nfs_write_pageuptodate(struct page *page, struct inode *inode)
884 {
885  if (nfs_have_delegated_attributes(inode))
886  goto out;
887  if (NFS_I(inode)->cache_validity & NFS_INO_REVAL_PAGECACHE)
888  return false;
889 out:
890  return PageUptodate(page) != 0;
891 }
892 
893 /*
894  * Update and possibly write a cached page of an NFS file.
895  *
896  * XXX: Keep an eye on generic_file_read to make sure it doesn't do bad
897  * things with a page scheduled for an RPC call (e.g. invalidate it).
898  */
899 int nfs_updatepage(struct file *file, struct page *page,
900  unsigned int offset, unsigned int count)
901 {
902  struct nfs_open_context *ctx = nfs_file_open_context(file);
903  struct inode *inode = page_file_mapping(page)->host;
904  int status = 0;
905 
906  nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE);
907 
908  dprintk("NFS: nfs_updatepage(%s/%s %d@%lld)\n",
909  file->f_path.dentry->d_parent->d_name.name,
910  file->f_path.dentry->d_name.name, count,
911  (long long)(page_file_offset(page) + offset));
912 
913  /* If we're not using byte range locks, and we know the page
914  * is up to date, it may be more efficient to extend the write
915  * to cover the entire page in order to avoid fragmentation
916  * inefficiencies.
917  */
918  if (nfs_write_pageuptodate(page, inode) &&
919  inode->i_flock == NULL &&
920  !(file->f_flags & O_DSYNC)) {
921  count = max(count + offset, nfs_page_length(page));
922  offset = 0;
923  }
924 
925  status = nfs_writepage_setup(ctx, page, offset, count);
926  if (status < 0)
927  nfs_set_pageerror(page);
928  else
930 
931  dprintk("NFS: nfs_updatepage returns %d (isize %lld)\n",
932  status, (long long)i_size_read(inode));
933  return status;
934 }
935 
936 static int flush_task_priority(int how)
937 {
938  switch (how & (FLUSH_HIGHPRI|FLUSH_LOWPRI)) {
939  case FLUSH_HIGHPRI:
940  return RPC_PRIORITY_HIGH;
941  case FLUSH_LOWPRI:
942  return RPC_PRIORITY_LOW;
943  }
944  return RPC_PRIORITY_NORMAL;
945 }
946 
947 int nfs_initiate_write(struct rpc_clnt *clnt,
948  struct nfs_write_data *data,
949  const struct rpc_call_ops *call_ops,
950  int how, int flags)
951 {
952  struct inode *inode = data->header->inode;
953  int priority = flush_task_priority(how);
954  struct rpc_task *task;
955  struct rpc_message msg = {
956  .rpc_argp = &data->args,
957  .rpc_resp = &data->res,
958  .rpc_cred = data->header->cred,
959  };
960  struct rpc_task_setup task_setup_data = {
961  .rpc_client = clnt,
962  .task = &data->task,
963  .rpc_message = &msg,
964  .callback_ops = call_ops,
965  .callback_data = data,
966  .workqueue = nfsiod_workqueue,
967  .flags = RPC_TASK_ASYNC | flags,
968  .priority = priority,
969  };
970  int ret = 0;
971 
972  /* Set up the initial task struct. */
973  NFS_PROTO(inode)->write_setup(data, &msg);
974 
975  dprintk("NFS: %5u initiated write call "
976  "(req %s/%lld, %u bytes @ offset %llu)\n",
977  data->task.tk_pid,
978  inode->i_sb->s_id,
979  (long long)NFS_FILEID(inode),
980  data->args.count,
981  (unsigned long long)data->args.offset);
982 
983  task = rpc_run_task(&task_setup_data);
984  if (IS_ERR(task)) {
985  ret = PTR_ERR(task);
986  goto out;
987  }
988  if (how & FLUSH_SYNC) {
989  ret = rpc_wait_for_completion_task(task);
990  if (ret == 0)
991  ret = task->tk_status;
992  }
993  rpc_put_task(task);
994 out:
995  return ret;
996 }
998 
999 /*
1000  * Set up the argument/result storage required for the RPC call.
1001  */
1002 static void nfs_write_rpcsetup(struct nfs_write_data *data,
1003  unsigned int count, unsigned int offset,
1004  int how, struct nfs_commit_info *cinfo)
1005 {
1006  struct nfs_page *req = data->header->req;
1007 
1008  /* Set up the RPC argument and reply structs
1009  * NB: take care not to mess about with data->commit et al. */
1010 
1011  data->args.fh = NFS_FH(data->header->inode);
1012  data->args.offset = req_offset(req) + offset;
1013  /* pnfs_set_layoutcommit needs this */
1014  data->mds_offset = data->args.offset;
1015  data->args.pgbase = req->wb_pgbase + offset;
1016  data->args.pages = data->pages.pagevec;
1017  data->args.count = count;
1018  data->args.context = get_nfs_open_context(req->wb_context);
1019  data->args.lock_context = req->wb_lock_context;
1020  data->args.stable = NFS_UNSTABLE;
1021  switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) {
1022  case 0:
1023  break;
1024  case FLUSH_COND_STABLE:
1025  if (nfs_reqs_to_commit(cinfo))
1026  break;
1027  default:
1028  data->args.stable = NFS_FILE_SYNC;
1029  }
1030 
1031  data->res.fattr = &data->fattr;
1032  data->res.count = count;
1033  data->res.verf = &data->verf;
1034  nfs_fattr_init(&data->fattr);
1035 }
1036 
1037 static int nfs_do_write(struct nfs_write_data *data,
1038  const struct rpc_call_ops *call_ops,
1039  int how)
1040 {
1041  struct inode *inode = data->header->inode;
1042 
1043  return nfs_initiate_write(NFS_CLIENT(inode), data, call_ops, how, 0);
1044 }
1045 
1046 static int nfs_do_multiple_writes(struct list_head *head,
1047  const struct rpc_call_ops *call_ops,
1048  int how)
1049 {
1050  struct nfs_write_data *data;
1051  int ret = 0;
1052 
1053  while (!list_empty(head)) {
1054  int ret2;
1055 
1056  data = list_first_entry(head, struct nfs_write_data, list);
1057  list_del_init(&data->list);
1058 
1059  ret2 = nfs_do_write(data, call_ops, how);
1060  if (ret == 0)
1061  ret = ret2;
1062  }
1063  return ret;
1064 }
1065 
1066 /* If a nfs_flush_* function fails, it should remove reqs from @head and
1067  * call this on each, which will prepare them to be retried on next
1068  * writeback using standard nfs.
1069  */
1070 static void nfs_redirty_request(struct nfs_page *req)
1071 {
1072  nfs_mark_request_dirty(req);
1073  nfs_unlock_request(req);
1074  nfs_end_page_writeback(req->wb_page);
1075  nfs_release_request(req);
1076 }
1077 
1078 static void nfs_async_write_error(struct list_head *head)
1079 {
1080  struct nfs_page *req;
1081 
1082  while (!list_empty(head)) {
1083  req = nfs_list_entry(head->next);
1084  nfs_list_remove_request(req);
1085  nfs_redirty_request(req);
1086  }
1087 }
1088 
1089 static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = {
1090  .error_cleanup = nfs_async_write_error,
1091  .completion = nfs_write_completion,
1092 };
1093 
1094 static void nfs_flush_error(struct nfs_pageio_descriptor *desc,
1095  struct nfs_pgio_header *hdr)
1096 {
1097  set_bit(NFS_IOHDR_REDO, &hdr->flags);
1098  while (!list_empty(&hdr->rpc_list)) {
1099  struct nfs_write_data *data = list_first_entry(&hdr->rpc_list,
1100  struct nfs_write_data, list);
1101  list_del(&data->list);
1102  nfs_writedata_release(data);
1103  }
1104  desc->pg_completion_ops->error_cleanup(&desc->pg_list);
1105 }
1106 
1107 /*
1108  * Generate multiple small requests to write out a single
1109  * contiguous dirty area on one page.
1110  */
1111 static int nfs_flush_multi(struct nfs_pageio_descriptor *desc,
1112  struct nfs_pgio_header *hdr)
1113 {
1114  struct nfs_page *req = hdr->req;
1115  struct page *page = req->wb_page;
1116  struct nfs_write_data *data;
1117  size_t wsize = desc->pg_bsize, nbytes;
1118  unsigned int offset;
1119  int requests = 0;
1120  struct nfs_commit_info cinfo;
1121 
1122  nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
1123 
1124  if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
1125  (desc->pg_moreio || nfs_reqs_to_commit(&cinfo) ||
1126  desc->pg_count > wsize))
1127  desc->pg_ioflags &= ~FLUSH_COND_STABLE;
1128 
1129 
1130  offset = 0;
1131  nbytes = desc->pg_count;
1132  do {
1133  size_t len = min(nbytes, wsize);
1134 
1135  data = nfs_writedata_alloc(hdr, 1);
1136  if (!data) {
1137  nfs_flush_error(desc, hdr);
1138  return -ENOMEM;
1139  }
1140  data->pages.pagevec[0] = page;
1141  nfs_write_rpcsetup(data, len, offset, desc->pg_ioflags, &cinfo);
1142  list_add(&data->list, &hdr->rpc_list);
1143  requests++;
1144  nbytes -= len;
1145  offset += len;
1146  } while (nbytes != 0);
1147  nfs_list_remove_request(req);
1148  nfs_list_add_request(req, &hdr->pages);
1149  desc->pg_rpc_callops = &nfs_write_common_ops;
1150  return 0;
1151 }
1152 
1153 /*
1154  * Create an RPC task for the given write request and kick it.
1155  * The page must have been locked by the caller.
1156  *
1157  * It may happen that the page we're passed is not marked dirty.
1158  * This is the case if nfs_updatepage detects a conflicting request
1159  * that has been written but not committed.
1160  */
1161 static int nfs_flush_one(struct nfs_pageio_descriptor *desc,
1162  struct nfs_pgio_header *hdr)
1163 {
1164  struct nfs_page *req;
1165  struct page **pages;
1166  struct nfs_write_data *data;
1167  struct list_head *head = &desc->pg_list;
1168  struct nfs_commit_info cinfo;
1169 
1170  data = nfs_writedata_alloc(hdr, nfs_page_array_len(desc->pg_base,
1171  desc->pg_count));
1172  if (!data) {
1173  nfs_flush_error(desc, hdr);
1174  return -ENOMEM;
1175  }
1176 
1177  nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
1178  pages = data->pages.pagevec;
1179  while (!list_empty(head)) {
1180  req = nfs_list_entry(head->next);
1181  nfs_list_remove_request(req);
1182  nfs_list_add_request(req, &hdr->pages);
1183  *pages++ = req->wb_page;
1184  }
1185 
1186  if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
1187  (desc->pg_moreio || nfs_reqs_to_commit(&cinfo)))
1188  desc->pg_ioflags &= ~FLUSH_COND_STABLE;
1189 
1190  /* Set up the argument struct */
1191  nfs_write_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo);
1192  list_add(&data->list, &hdr->rpc_list);
1193  desc->pg_rpc_callops = &nfs_write_common_ops;
1194  return 0;
1195 }
1196 
1198  struct nfs_pgio_header *hdr)
1199 {
1200  if (desc->pg_bsize < PAGE_CACHE_SIZE)
1201  return nfs_flush_multi(desc, hdr);
1202  return nfs_flush_one(desc, hdr);
1203 }
1205 
1206 static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
1207 {
1208  struct nfs_write_header *whdr;
1209  struct nfs_pgio_header *hdr;
1210  int ret;
1211 
1212  whdr = nfs_writehdr_alloc();
1213  if (!whdr) {
1214  desc->pg_completion_ops->error_cleanup(&desc->pg_list);
1215  return -ENOMEM;
1216  }
1217  hdr = &whdr->header;
1219  atomic_inc(&hdr->refcnt);
1220  ret = nfs_generic_flush(desc, hdr);
1221  if (ret == 0)
1222  ret = nfs_do_multiple_writes(&hdr->rpc_list,
1223  desc->pg_rpc_callops,
1224  desc->pg_ioflags);
1225  if (atomic_dec_and_test(&hdr->refcnt))
1226  hdr->completion_ops->completion(hdr);
1227  return ret;
1228 }
1229 
1230 static const struct nfs_pageio_ops nfs_pageio_write_ops = {
1231  .pg_test = nfs_generic_pg_test,
1232  .pg_doio = nfs_generic_pg_writepages,
1233 };
1234 
1236  struct inode *inode, int ioflags,
1237  const struct nfs_pgio_completion_ops *compl_ops)
1238 {
1239  nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, compl_ops,
1240  NFS_SERVER(inode)->wsize, ioflags);
1241 }
1243 
1245 {
1246  pgio->pg_ops = &nfs_pageio_write_ops;
1247  pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize;
1248 }
1250 
1251 
1252 void nfs_write_prepare(struct rpc_task *task, void *calldata)
1253 {
1254  struct nfs_write_data *data = calldata;
1255  NFS_PROTO(data->header->inode)->write_rpc_prepare(task, data);
1256 }
1257 
1258 void nfs_commit_prepare(struct rpc_task *task, void *calldata)
1259 {
1260  struct nfs_commit_data *data = calldata;
1261 
1262  NFS_PROTO(data->inode)->commit_rpc_prepare(task, data);
1263 }
1264 
1265 /*
1266  * Handle a write reply that flushes a whole page.
1267  *
1268  * FIXME: There is an inherent race with invalidate_inode_pages and
1269  * writebacks since the page->count is kept > 1 for as long
1270  * as the page has a write request pending.
1271  */
1272 static void nfs_writeback_done_common(struct rpc_task *task, void *calldata)
1273 {
1274  struct nfs_write_data *data = calldata;
1275 
1276  nfs_writeback_done(task, data);
1277 }
1278 
1279 static void nfs_writeback_release_common(void *calldata)
1280 {
1281  struct nfs_write_data *data = calldata;
1282  struct nfs_pgio_header *hdr = data->header;
1283  int status = data->task.tk_status;
1284 
1285  if ((status >= 0) && nfs_write_need_commit(data)) {
1286  spin_lock(&hdr->lock);
1288  ; /* Do nothing */
1289  else if (!test_and_set_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags))
1290  memcpy(hdr->verf, &data->verf, sizeof(*hdr->verf));
1291  else if (memcmp(hdr->verf, &data->verf, sizeof(*hdr->verf)))
1293  spin_unlock(&hdr->lock);
1294  }
1295  nfs_writedata_release(data);
1296 }
1297 
1298 static const struct rpc_call_ops nfs_write_common_ops = {
1299  .rpc_call_prepare = nfs_write_prepare,
1300  .rpc_call_done = nfs_writeback_done_common,
1301  .rpc_release = nfs_writeback_release_common,
1302 };
1303 
1304 
1305 /*
1306  * This function is called when the WRITE call is complete.
1307  */
1308 void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1309 {
1310  struct nfs_writeargs *argp = &data->args;
1311  struct nfs_writeres *resp = &data->res;
1312  struct inode *inode = data->header->inode;
1313  int status;
1314 
1315  dprintk("NFS: %5u nfs_writeback_done (status %d)\n",
1316  task->tk_pid, task->tk_status);
1317 
1318  /*
1319  * ->write_done will attempt to use post-op attributes to detect
1320  * conflicting writes by other clients. A strict interpretation
1321  * of close-to-open would allow us to continue caching even if
1322  * another writer had changed the file, but some applications
1323  * depend on tighter cache coherency when writing.
1324  */
1325  status = NFS_PROTO(inode)->write_done(task, data);
1326  if (status != 0)
1327  return;
1328  nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, resp->count);
1329 
1330 #if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
1331  if (resp->verf->committed < argp->stable && task->tk_status >= 0) {
1332  /* We tried a write call, but the server did not
1333  * commit data to stable storage even though we
1334  * requested it.
1335  * Note: There is a known bug in Tru64 < 5.0 in which
1336  * the server reports NFS_DATA_SYNC, but performs
1337  * NFS_FILE_SYNC. We therefore implement this checking
1338  * as a dprintk() in order to avoid filling syslog.
1339  */
1340  static unsigned long complain;
1341 
1342  /* Note this will print the MDS for a DS write */
1343  if (time_before(complain, jiffies)) {
1344  dprintk("NFS: faulty NFS server %s:"
1345  " (committed = %d) != (stable = %d)\n",
1346  NFS_SERVER(inode)->nfs_client->cl_hostname,
1347  resp->verf->committed, argp->stable);
1348  complain = jiffies + 300 * HZ;
1349  }
1350  }
1351 #endif
1352  if (task->tk_status < 0)
1353  nfs_set_pgio_error(data->header, task->tk_status, argp->offset);
1354  else if (resp->count < argp->count) {
1355  static unsigned long complain;
1356 
1357  /* This a short write! */
1358  nfs_inc_stats(inode, NFSIOS_SHORTWRITE);
1359 
1360  /* Has the server at least made some progress? */
1361  if (resp->count == 0) {
1362  if (time_before(complain, jiffies)) {
1364  "NFS: Server wrote zero bytes, expected %u.\n",
1365  argp->count);
1366  complain = jiffies + 300 * HZ;
1367  }
1368  nfs_set_pgio_error(data->header, -EIO, argp->offset);
1369  task->tk_status = -EIO;
1370  return;
1371  }
1372  /* Was this an NFSv2 write or an NFSv3 stable write? */
1373  if (resp->verf->committed != NFS_UNSTABLE) {
1374  /* Resend from where the server left off */
1375  data->mds_offset += resp->count;
1376  argp->offset += resp->count;
1377  argp->pgbase += resp->count;
1378  argp->count -= resp->count;
1379  } else {
1380  /* Resend as a stable write in order to avoid
1381  * headaches in the case of a server crash.
1382  */
1383  argp->stable = NFS_FILE_SYNC;
1384  }
1386  }
1387 }
1388 
1389 
1390 #if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
1391 static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait)
1392 {
1393  int ret;
1394 
1395  if (!test_and_set_bit(NFS_INO_COMMIT, &nfsi->flags))
1396  return 1;
1397  if (!may_wait)
1398  return 0;
1399  ret = out_of_line_wait_on_bit_lock(&nfsi->flags,
1402  TASK_KILLABLE);
1403  return (ret < 0) ? ret : 1;
1404 }
1405 
1406 static void nfs_commit_clear_lock(struct nfs_inode *nfsi)
1407 {
1408  clear_bit(NFS_INO_COMMIT, &nfsi->flags);
1410  wake_up_bit(&nfsi->flags, NFS_INO_COMMIT);
1411 }
1412 
1413 void nfs_commitdata_release(struct nfs_commit_data *data)
1414 {
1416  nfs_commit_free(data);
1417 }
1419 
1420 int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data,
1421  const struct rpc_call_ops *call_ops,
1422  int how, int flags)
1423 {
1424  struct rpc_task *task;
1425  int priority = flush_task_priority(how);
1426  struct rpc_message msg = {
1427  .rpc_argp = &data->args,
1428  .rpc_resp = &data->res,
1429  .rpc_cred = data->cred,
1430  };
1431  struct rpc_task_setup task_setup_data = {
1432  .task = &data->task,
1433  .rpc_client = clnt,
1434  .rpc_message = &msg,
1435  .callback_ops = call_ops,
1436  .callback_data = data,
1437  .workqueue = nfsiod_workqueue,
1438  .flags = RPC_TASK_ASYNC | flags,
1439  .priority = priority,
1440  };
1441  /* Set up the initial task struct. */
1442  NFS_PROTO(data->inode)->commit_setup(data, &msg);
1443 
1444  dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
1445 
1446  task = rpc_run_task(&task_setup_data);
1447  if (IS_ERR(task))
1448  return PTR_ERR(task);
1449  if (how & FLUSH_SYNC)
1450  rpc_wait_for_completion_task(task);
1451  rpc_put_task(task);
1452  return 0;
1453 }
1455 
1456 /*
1457  * Set up the argument/result storage required for the RPC call.
1458  */
1459 void nfs_init_commit(struct nfs_commit_data *data,
1460  struct list_head *head,
1461  struct pnfs_layout_segment *lseg,
1462  struct nfs_commit_info *cinfo)
1463 {
1464  struct nfs_page *first = nfs_list_entry(head->next);
1465  struct inode *inode = first->wb_context->dentry->d_inode;
1466 
1467  /* Set up the RPC argument and reply structs
1468  * NB: take care not to mess about with data->commit et al. */
1469 
1470  list_splice_init(head, &data->pages);
1471 
1472  data->inode = inode;
1473  data->cred = first->wb_context->cred;
1474  data->lseg = lseg; /* reference transferred */
1475  data->mds_ops = &nfs_commit_ops;
1476  data->completion_ops = cinfo->completion_ops;
1477  data->dreq = cinfo->dreq;
1478 
1479  data->args.fh = NFS_FH(data->inode);
1480  /* Note: we always request a commit of the entire inode */
1481  data->args.offset = 0;
1482  data->args.count = 0;
1483  data->context = get_nfs_open_context(first->wb_context);
1484  data->res.fattr = &data->fattr;
1485  data->res.verf = &data->verf;
1486  nfs_fattr_init(&data->fattr);
1487 }
1489 
1490 void nfs_retry_commit(struct list_head *page_list,
1491  struct pnfs_layout_segment *lseg,
1492  struct nfs_commit_info *cinfo)
1493 {
1494  struct nfs_page *req;
1495 
1496  while (!list_empty(page_list)) {
1497  req = nfs_list_entry(page_list->next);
1498  nfs_list_remove_request(req);
1499  nfs_mark_request_commit(req, lseg, cinfo);
1500  if (!cinfo->dreq) {
1502  dec_bdi_stat(page_file_mapping(req->wb_page)->backing_dev_info,
1503  BDI_RECLAIMABLE);
1504  }
1506  }
1507 }
1509 
1510 /*
1511  * Commit dirty pages
1512  */
1513 static int
1514 nfs_commit_list(struct inode *inode, struct list_head *head, int how,
1515  struct nfs_commit_info *cinfo)
1516 {
1517  struct nfs_commit_data *data;
1518 
1519  data = nfs_commitdata_alloc();
1520 
1521  if (!data)
1522  goto out_bad;
1523 
1524  /* Set up the argument struct */
1525  nfs_init_commit(data, head, NULL, cinfo);
1526  atomic_inc(&cinfo->mds->rpcs_out);
1527  return nfs_initiate_commit(NFS_CLIENT(inode), data, data->mds_ops,
1528  how, 0);
1529  out_bad:
1530  nfs_retry_commit(head, NULL, cinfo);
1531  cinfo->completion_ops->error_cleanup(NFS_I(inode));
1532  return -ENOMEM;
1533 }
1534 
1535 /*
1536  * COMMIT call returned
1537  */
1538 static void nfs_commit_done(struct rpc_task *task, void *calldata)
1539 {
1540  struct nfs_commit_data *data = calldata;
1541 
1542  dprintk("NFS: %5u nfs_commit_done (status %d)\n",
1543  task->tk_pid, task->tk_status);
1544 
1545  /* Call the NFS version-specific code */
1546  NFS_PROTO(data->inode)->commit_done(task, data);
1547 }
1548 
1549 static void nfs_commit_release_pages(struct nfs_commit_data *data)
1550 {
1551  struct nfs_page *req;
1552  int status = data->task.tk_status;
1553  struct nfs_commit_info cinfo;
1554 
1555  while (!list_empty(&data->pages)) {
1556  req = nfs_list_entry(data->pages.next);
1557  nfs_list_remove_request(req);
1558  nfs_clear_page_commit(req->wb_page);
1559 
1560  dprintk("NFS: commit (%s/%lld %d@%lld)",
1561  req->wb_context->dentry->d_sb->s_id,
1562  (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
1563  req->wb_bytes,
1564  (long long)req_offset(req));
1565  if (status < 0) {
1566  nfs_context_set_write_error(req->wb_context, status);
1567  nfs_inode_remove_request(req);
1568  dprintk(", error = %d\n", status);
1569  goto next;
1570  }
1571 
1572  /* Okay, COMMIT succeeded, apparently. Check the verifier
1573  * returned by the server against all stored verfs. */
1574  if (!memcmp(&req->wb_verf, &data->verf.verifier, sizeof(req->wb_verf))) {
1575  /* We have a match */
1576  nfs_inode_remove_request(req);
1577  dprintk(" OK\n");
1578  goto next;
1579  }
1580  /* We have a mismatch. Write the page again */
1581  dprintk(" mismatch\n");
1582  nfs_mark_request_dirty(req);
1584  next:
1586  }
1587  nfs_init_cinfo(&cinfo, data->inode, data->dreq);
1588  if (atomic_dec_and_test(&cinfo.mds->rpcs_out))
1589  nfs_commit_clear_lock(NFS_I(data->inode));
1590 }
1591 
1592 static void nfs_commit_release(void *calldata)
1593 {
1594  struct nfs_commit_data *data = calldata;
1595 
1596  data->completion_ops->completion(data);
1597  nfs_commitdata_release(calldata);
1598 }
1599 
1600 static const struct rpc_call_ops nfs_commit_ops = {
1601  .rpc_call_prepare = nfs_commit_prepare,
1602  .rpc_call_done = nfs_commit_done,
1603  .rpc_release = nfs_commit_release,
1604 };
1605 
1607  .completion = nfs_commit_release_pages,
1608  .error_cleanup = nfs_commit_clear_lock,
1609 };
1610 
1611 int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
1612  int how, struct nfs_commit_info *cinfo)
1613 {
1614  int status;
1615 
1616  status = pnfs_commit_list(inode, head, how, cinfo);
1617  if (status == PNFS_NOT_ATTEMPTED)
1618  status = nfs_commit_list(inode, head, how, cinfo);
1619  return status;
1620 }
1621 
1622 int nfs_commit_inode(struct inode *inode, int how)
1623 {
1624  LIST_HEAD(head);
1625  struct nfs_commit_info cinfo;
1626  int may_wait = how & FLUSH_SYNC;
1627  int res;
1628 
1629  res = nfs_commit_set_lock(NFS_I(inode), may_wait);
1630  if (res <= 0)
1631  goto out_mark_dirty;
1632  nfs_init_cinfo_from_inode(&cinfo, inode);
1633  res = nfs_scan_commit(inode, &head, &cinfo);
1634  if (res) {
1635  int error;
1636 
1637  error = nfs_generic_commit_list(inode, &head, how, &cinfo);
1638  if (error < 0)
1639  return error;
1640  if (!may_wait)
1641  goto out_mark_dirty;
1642  error = wait_on_bit(&NFS_I(inode)->flags,
1645  TASK_KILLABLE);
1646  if (error < 0)
1647  return error;
1648  } else
1649  nfs_commit_clear_lock(NFS_I(inode));
1650  return res;
1651  /* Note: If we exit without ensuring that the commit is complete,
1652  * we must mark the inode as dirty. Otherwise, future calls to
1653  * sync_inode() with the WB_SYNC_ALL flag set will fail to ensure
1654  * that the data is on the disk.
1655  */
1656 out_mark_dirty:
1658  return res;
1659 }
1660 
1661 static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc)
1662 {
1663  struct nfs_inode *nfsi = NFS_I(inode);
1664  int flags = FLUSH_SYNC;
1665  int ret = 0;
1666 
1667  /* no commits means nothing needs to be done */
1668  if (!nfsi->commit_info.ncommit)
1669  return ret;
1670 
1671  if (wbc->sync_mode == WB_SYNC_NONE) {
1672  /* Don't commit yet if this is a non-blocking flush and there
1673  * are a lot of outstanding writes for this mapping.
1674  */
1675  if (nfsi->commit_info.ncommit <= (nfsi->npages >> 1))
1676  goto out_mark_dirty;
1677 
1678  /* don't wait for the COMMIT response */
1679  flags = 0;
1680  }
1681 
1682  ret = nfs_commit_inode(inode, flags);
1683  if (ret >= 0) {
1684  if (wbc->sync_mode == WB_SYNC_NONE) {
1685  if (ret < wbc->nr_to_write)
1686  wbc->nr_to_write -= ret;
1687  else
1688  wbc->nr_to_write = 0;
1689  }
1690  return 0;
1691  }
1692 out_mark_dirty:
1694  return ret;
1695 }
1696 #else
1697 static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc)
1698 {
1699  return 0;
1700 }
1701 #endif
1702 
1703 int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
1704 {
1705  return nfs_commit_unstable_pages(inode, wbc);
1706 }
1708 
1709 /*
1710  * flush the inode to disk.
1711  */
1712 int nfs_wb_all(struct inode *inode)
1713 {
1714  struct writeback_control wbc = {
1716  .nr_to_write = LONG_MAX,
1717  .range_start = 0,
1718  .range_end = LLONG_MAX,
1719  };
1720 
1721  return sync_inode(inode, &wbc);
1722 }
1724 
1725 int nfs_wb_page_cancel(struct inode *inode, struct page *page)
1726 {
1727  struct nfs_page *req;
1728  int ret = 0;
1729 
1730  BUG_ON(!PageLocked(page));
1731  for (;;) {
1732  wait_on_page_writeback(page);
1733  req = nfs_page_find_request(page);
1734  if (req == NULL)
1735  break;
1736  if (nfs_lock_request(req)) {
1737  nfs_clear_request_commit(req);
1738  nfs_inode_remove_request(req);
1739  /*
1740  * In case nfs_inode_remove_request has marked the
1741  * page as being dirty
1742  */
1745  break;
1746  }
1747  ret = nfs_wait_on_request(req);
1748  nfs_release_request(req);
1749  if (ret < 0)
1750  break;
1751  }
1752  return ret;
1753 }
1754 
1755 /*
1756  * Write back all requests on one page - we do this before reading it.
1757  */
1758 int nfs_wb_page(struct inode *inode, struct page *page)
1759 {
1760  loff_t range_start = page_file_offset(page);
1761  loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
1762  struct writeback_control wbc = {
1764  .nr_to_write = 0,
1765  .range_start = range_start,
1766  .range_end = range_end,
1767  };
1768  int ret;
1769 
1770  for (;;) {
1771  wait_on_page_writeback(page);
1772  if (clear_page_dirty_for_io(page)) {
1773  ret = nfs_writepage_locked(page, &wbc);
1774  if (ret < 0)
1775  goto out_error;
1776  continue;
1777  }
1778  if (!PagePrivate(page))
1779  break;
1780  ret = nfs_commit_inode(inode, FLUSH_SYNC);
1781  if (ret < 0)
1782  goto out_error;
1783  }
1784  return 0;
1785 out_error:
1786  return ret;
1787 }
1788 
1789 #ifdef CONFIG_MIGRATION
1790 int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
1791  struct page *page, enum migrate_mode mode)
1792 {
1793  /*
1794  * If PagePrivate is set, then the page is currently associated with
1795  * an in-progress read or write request. Don't try to migrate it.
1796  *
1797  * FIXME: we could do this in principle, but we'll need a way to ensure
1798  * that we can safely release the inode reference while holding
1799  * the page lock.
1800  */
1801  if (PagePrivate(page))
1802  return -EBUSY;
1803 
1805 
1806  return migrate_page(mapping, newpage, page, mode);
1807 }
1808 #endif
1809 
1811 {
1812  nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
1813  sizeof(struct nfs_write_header),
1814  0, SLAB_HWCACHE_ALIGN,
1815  NULL);
1816  if (nfs_wdata_cachep == NULL)
1817  return -ENOMEM;
1818 
1819  nfs_wdata_mempool = mempool_create_slab_pool(MIN_POOL_WRITE,
1820  nfs_wdata_cachep);
1821  if (nfs_wdata_mempool == NULL)
1822  goto out_destroy_write_cache;
1823 
1824  nfs_cdata_cachep = kmem_cache_create("nfs_commit_data",
1825  sizeof(struct nfs_commit_data),
1826  0, SLAB_HWCACHE_ALIGN,
1827  NULL);
1828  if (nfs_cdata_cachep == NULL)
1829  goto out_destroy_write_mempool;
1830 
1831  nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT,
1832  nfs_wdata_cachep);
1833  if (nfs_commit_mempool == NULL)
1834  goto out_destroy_commit_cache;
1835 
1836  /*
1837  * NFS congestion size, scale with available memory.
1838  *
1839  * 64MB: 8192k
1840  * 128MB: 11585k
1841  * 256MB: 16384k
1842  * 512MB: 23170k
1843  * 1GB: 32768k
1844  * 2GB: 46340k
1845  * 4GB: 65536k
1846  * 8GB: 92681k
1847  * 16GB: 131072k
1848  *
1849  * This allows larger machines to have larger/more transfers.
1850  * Limit the default to 256M
1851  */
1852  nfs_congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10);
1853  if (nfs_congestion_kb > 256*1024)
1854  nfs_congestion_kb = 256*1024;
1855 
1856  return 0;
1857 
1858 out_destroy_commit_cache:
1859  kmem_cache_destroy(nfs_cdata_cachep);
1860 out_destroy_write_mempool:
1861  mempool_destroy(nfs_wdata_mempool);
1862 out_destroy_write_cache:
1863  kmem_cache_destroy(nfs_wdata_cachep);
1864  return -ENOMEM;
1865 }
1866 
1868 {
1869  mempool_destroy(nfs_commit_mempool);
1870  kmem_cache_destroy(nfs_cdata_cachep);
1871  mempool_destroy(nfs_wdata_mempool);
1872  kmem_cache_destroy(nfs_wdata_cachep);
1873 }
1874