Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
aops.c
Go to the documentation of this file.
1 
24 #include <linux/errno.h>
25 #include <linux/fs.h>
26 #include <linux/gfp.h>
27 #include <linux/mm.h>
28 #include <linux/pagemap.h>
29 #include <linux/swap.h>
30 #include <linux/buffer_head.h>
31 #include <linux/writeback.h>
32 #include <linux/bit_spinlock.h>
33 
34 #include "aops.h"
35 #include "attrib.h"
36 #include "debug.h"
37 #include "inode.h"
38 #include "mft.h"
39 #include "runlist.h"
40 #include "types.h"
41 #include "ntfs.h"
42 
59 static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
60 {
61  unsigned long flags;
62  struct buffer_head *first, *tmp;
63  struct page *page;
64  struct inode *vi;
65  ntfs_inode *ni;
66  int page_uptodate = 1;
67 
68  page = bh->b_page;
69  vi = page->mapping->host;
70  ni = NTFS_I(vi);
71 
72  if (likely(uptodate)) {
73  loff_t i_size;
74  s64 file_ofs, init_size;
75 
76  set_buffer_uptodate(bh);
77 
78  file_ofs = ((s64)page->index << PAGE_CACHE_SHIFT) +
79  bh_offset(bh);
80  read_lock_irqsave(&ni->size_lock, flags);
81  init_size = ni->initialized_size;
82  i_size = i_size_read(vi);
83  read_unlock_irqrestore(&ni->size_lock, flags);
84  if (unlikely(init_size > i_size)) {
85  /* Race with shrinking truncate. */
86  init_size = i_size;
87  }
88  /* Check for the current buffer head overflowing. */
89  if (unlikely(file_ofs + bh->b_size > init_size)) {
90  int ofs;
91  void *kaddr;
92 
93  ofs = 0;
94  if (file_ofs < init_size)
95  ofs = init_size - file_ofs;
96  local_irq_save(flags);
97  kaddr = kmap_atomic(page);
98  memset(kaddr + bh_offset(bh) + ofs, 0,
99  bh->b_size - ofs);
100  flush_dcache_page(page);
101  kunmap_atomic(kaddr);
102  local_irq_restore(flags);
103  }
104  } else {
105  clear_buffer_uptodate(bh);
106  SetPageError(page);
107  ntfs_error(ni->vol->sb, "Buffer I/O error, logical block "
108  "0x%llx.", (unsigned long long)bh->b_blocknr);
109  }
110  first = page_buffers(page);
111  local_irq_save(flags);
112  bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
113  clear_buffer_async_read(bh);
114  unlock_buffer(bh);
115  tmp = bh;
116  do {
117  if (!buffer_uptodate(tmp))
118  page_uptodate = 0;
119  if (buffer_async_read(tmp)) {
120  if (likely(buffer_locked(tmp)))
121  goto still_busy;
122  /* Async buffers must be locked. */
123  BUG();
124  }
125  tmp = tmp->b_this_page;
126  } while (tmp != bh);
127  bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
128  local_irq_restore(flags);
129  /*
130  * If none of the buffers had errors then we can set the page uptodate,
131  * but we first have to perform the post read mst fixups, if the
132  * attribute is mst protected, i.e. if NInoMstProteced(ni) is true.
133  * Note we ignore fixup errors as those are detected when
134  * map_mft_record() is called which gives us per record granularity
135  * rather than per page granularity.
136  */
137  if (!NInoMstProtected(ni)) {
138  if (likely(page_uptodate && !PageError(page)))
139  SetPageUptodate(page);
140  } else {
141  u8 *kaddr;
142  unsigned int i, recs;
143  u32 rec_size;
144 
145  rec_size = ni->itype.index.block_size;
146  recs = PAGE_CACHE_SIZE / rec_size;
147  /* Should have been verified before we got here... */
148  BUG_ON(!recs);
149  local_irq_save(flags);
150  kaddr = kmap_atomic(page);
151  for (i = 0; i < recs; i++)
152  post_read_mst_fixup((NTFS_RECORD*)(kaddr +
153  i * rec_size), rec_size);
154  kunmap_atomic(kaddr);
155  local_irq_restore(flags);
156  flush_dcache_page(page);
157  if (likely(page_uptodate && !PageError(page)))
158  SetPageUptodate(page);
159  }
160  unlock_page(page);
161  return;
162 still_busy:
163  bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
164  local_irq_restore(flags);
165  return;
166 }
167 
185 static int ntfs_read_block(struct page *page)
186 {
187  loff_t i_size;
188  VCN vcn;
189  LCN lcn;
190  s64 init_size;
191  struct inode *vi;
192  ntfs_inode *ni;
193  ntfs_volume *vol;
194  runlist_element *rl;
195  struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
196  sector_t iblock, lblock, zblock;
197  unsigned long flags;
198  unsigned int blocksize, vcn_ofs;
199  int i, nr;
200  unsigned char blocksize_bits;
201 
202  vi = page->mapping->host;
203  ni = NTFS_I(vi);
204  vol = ni->vol;
205 
206  /* $MFT/$DATA must have its complete runlist in memory at all times. */
207  BUG_ON(!ni->runlist.rl && !ni->mft_no && !NInoAttr(ni));
208 
209  blocksize = vol->sb->s_blocksize;
210  blocksize_bits = vol->sb->s_blocksize_bits;
211 
212  if (!page_has_buffers(page)) {
213  create_empty_buffers(page, blocksize, 0);
214  if (unlikely(!page_has_buffers(page))) {
215  unlock_page(page);
216  return -ENOMEM;
217  }
218  }
219  bh = head = page_buffers(page);
220  BUG_ON(!bh);
221 
222  /*
223  * We may be racing with truncate. To avoid some of the problems we
224  * now take a snapshot of the various sizes and use those for the whole
225  * of the function. In case of an extending truncate it just means we
226  * may leave some buffers unmapped which are now allocated. This is
227  * not a problem since these buffers will just get mapped when a write
228  * occurs. In case of a shrinking truncate, we will detect this later
229  * on due to the runlist being incomplete and if the page is being
230  * fully truncated, truncate will throw it away as soon as we unlock
231  * it so no need to worry what we do with it.
232  */
233  iblock = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
234  read_lock_irqsave(&ni->size_lock, flags);
235  lblock = (ni->allocated_size + blocksize - 1) >> blocksize_bits;
236  init_size = ni->initialized_size;
237  i_size = i_size_read(vi);
238  read_unlock_irqrestore(&ni->size_lock, flags);
239  if (unlikely(init_size > i_size)) {
240  /* Race with shrinking truncate. */
241  init_size = i_size;
242  }
243  zblock = (init_size + blocksize - 1) >> blocksize_bits;
244 
245  /* Loop through all the buffers in the page. */
246  rl = NULL;
247  nr = i = 0;
248  do {
249  int err = 0;
250 
251  if (unlikely(buffer_uptodate(bh)))
252  continue;
253  if (unlikely(buffer_mapped(bh))) {
254  arr[nr++] = bh;
255  continue;
256  }
257  bh->b_bdev = vol->sb->s_bdev;
258  /* Is the block within the allowed limits? */
259  if (iblock < lblock) {
260  bool is_retry = false;
261 
262  /* Convert iblock into corresponding vcn and offset. */
263  vcn = (VCN)iblock << blocksize_bits >>
264  vol->cluster_size_bits;
265  vcn_ofs = ((VCN)iblock << blocksize_bits) &
266  vol->cluster_size_mask;
267  if (!rl) {
268 lock_retry_remap:
269  down_read(&ni->runlist.lock);
270  rl = ni->runlist.rl;
271  }
272  if (likely(rl != NULL)) {
273  /* Seek to element containing target vcn. */
274  while (rl->length && rl[1].vcn <= vcn)
275  rl++;
276  lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
277  } else
278  lcn = LCN_RL_NOT_MAPPED;
279  /* Successful remap. */
280  if (lcn >= 0) {
281  /* Setup buffer head to correct block. */
282  bh->b_blocknr = ((lcn << vol->cluster_size_bits)
283  + vcn_ofs) >> blocksize_bits;
284  set_buffer_mapped(bh);
285  /* Only read initialized data blocks. */
286  if (iblock < zblock) {
287  arr[nr++] = bh;
288  continue;
289  }
290  /* Fully non-initialized data block, zero it. */
291  goto handle_zblock;
292  }
293  /* It is a hole, need to zero it. */
294  if (lcn == LCN_HOLE)
295  goto handle_hole;
296  /* If first try and runlist unmapped, map and retry. */
297  if (!is_retry && lcn == LCN_RL_NOT_MAPPED) {
298  is_retry = true;
299  /*
300  * Attempt to map runlist, dropping lock for
301  * the duration.
302  */
303  up_read(&ni->runlist.lock);
304  err = ntfs_map_runlist(ni, vcn);
305  if (likely(!err))
306  goto lock_retry_remap;
307  rl = NULL;
308  } else if (!rl)
309  up_read(&ni->runlist.lock);
310  /*
311  * If buffer is outside the runlist, treat it as a
312  * hole. This can happen due to concurrent truncate
313  * for example.
314  */
315  if (err == -ENOENT || lcn == LCN_ENOENT) {
316  err = 0;
317  goto handle_hole;
318  }
319  /* Hard error, zero out region. */
320  if (!err)
321  err = -EIO;
322  bh->b_blocknr = -1;
323  SetPageError(page);
324  ntfs_error(vol->sb, "Failed to read from inode 0x%lx, "
325  "attribute type 0x%x, vcn 0x%llx, "
326  "offset 0x%x because its location on "
327  "disk could not be determined%s "
328  "(error code %i).", ni->mft_no,
329  ni->type, (unsigned long long)vcn,
330  vcn_ofs, is_retry ? " even after "
331  "retrying" : "", err);
332  }
333  /*
334  * Either iblock was outside lblock limits or
335  * ntfs_rl_vcn_to_lcn() returned error. Just zero that portion
336  * of the page and set the buffer uptodate.
337  */
338 handle_hole:
339  bh->b_blocknr = -1UL;
340  clear_buffer_mapped(bh);
341 handle_zblock:
342  zero_user(page, i * blocksize, blocksize);
343  if (likely(!err))
344  set_buffer_uptodate(bh);
345  } while (i++, iblock++, (bh = bh->b_this_page) != head);
346 
347  /* Release the lock if we took it. */
348  if (rl)
349  up_read(&ni->runlist.lock);
350 
351  /* Check we have at least one buffer ready for i/o. */
352  if (nr) {
353  struct buffer_head *tbh;
354 
355  /* Lock the buffers. */
356  for (i = 0; i < nr; i++) {
357  tbh = arr[i];
358  lock_buffer(tbh);
359  tbh->b_end_io = ntfs_end_buffer_async_read;
360  set_buffer_async_read(tbh);
361  }
362  /* Finally, start i/o on the buffers. */
363  for (i = 0; i < nr; i++) {
364  tbh = arr[i];
365  if (likely(!buffer_uptodate(tbh)))
366  submit_bh(READ, tbh);
367  else
368  ntfs_end_buffer_async_read(tbh, 1);
369  }
370  return 0;
371  }
372  /* No i/o was scheduled on any of the buffers. */
373  if (likely(!PageError(page)))
374  SetPageUptodate(page);
375  else /* Signal synchronous i/o error. */
376  nr = -EIO;
377  unlock_page(page);
378  return nr;
379 }
380 
399 static int ntfs_readpage(struct file *file, struct page *page)
400 {
401  loff_t i_size;
402  struct inode *vi;
403  ntfs_inode *ni, *base_ni;
404  u8 *addr;
406  MFT_RECORD *mrec;
407  unsigned long flags;
408  u32 attr_len;
409  int err = 0;
410 
411 retry_readpage:
412  BUG_ON(!PageLocked(page));
413  vi = page->mapping->host;
414  i_size = i_size_read(vi);
415  /* Is the page fully outside i_size? (truncate in progress) */
416  if (unlikely(page->index >= (i_size + PAGE_CACHE_SIZE - 1) >>
417  PAGE_CACHE_SHIFT)) {
418  zero_user(page, 0, PAGE_CACHE_SIZE);
419  ntfs_debug("Read outside i_size - truncated?");
420  goto done;
421  }
422  /*
423  * This can potentially happen because we clear PageUptodate() during
424  * ntfs_writepage() of MstProtected() attributes.
425  */
426  if (PageUptodate(page)) {
427  unlock_page(page);
428  return 0;
429  }
430  ni = NTFS_I(vi);
431  /*
432  * Only $DATA attributes can be encrypted and only unnamed $DATA
433  * attributes can be compressed. Index root can have the flags set but
434  * this means to create compressed/encrypted files, not that the
435  * attribute is compressed/encrypted. Note we need to check for
436  * AT_INDEX_ALLOCATION since this is the type of both directory and
437  * index inodes.
438  */
439  if (ni->type != AT_INDEX_ALLOCATION) {
440  /* If attribute is encrypted, deny access, just like NT4. */
441  if (NInoEncrypted(ni)) {
442  BUG_ON(ni->type != AT_DATA);
443  err = -EACCES;
444  goto err_out;
445  }
446  /* Compressed data streams are handled in compress.c. */
447  if (NInoNonResident(ni) && NInoCompressed(ni)) {
448  BUG_ON(ni->type != AT_DATA);
449  BUG_ON(ni->name_len);
450  return ntfs_read_compressed_block(page);
451  }
452  }
453  /* NInoNonResident() == NInoIndexAllocPresent() */
454  if (NInoNonResident(ni)) {
455  /* Normal, non-resident data stream. */
456  return ntfs_read_block(page);
457  }
458  /*
459  * Attribute is resident, implying it is not compressed or encrypted.
460  * This also means the attribute is smaller than an mft record and
461  * hence smaller than a page, so can simply zero out any pages with
462  * index above 0. Note the attribute can actually be marked compressed
463  * but if it is resident the actual data is not compressed so we are
464  * ok to ignore the compressed flag here.
465  */
466  if (unlikely(page->index > 0)) {
467  zero_user(page, 0, PAGE_CACHE_SIZE);
468  goto done;
469  }
470  if (!NInoAttr(ni))
471  base_ni = ni;
472  else
473  base_ni = ni->ext.base_ntfs_ino;
474  /* Map, pin, and lock the mft record. */
475  mrec = map_mft_record(base_ni);
476  if (IS_ERR(mrec)) {
477  err = PTR_ERR(mrec);
478  goto err_out;
479  }
480  /*
481  * If a parallel write made the attribute non-resident, drop the mft
482  * record and retry the readpage.
483  */
484  if (unlikely(NInoNonResident(ni))) {
485  unmap_mft_record(base_ni);
486  goto retry_readpage;
487  }
488  ctx = ntfs_attr_get_search_ctx(base_ni, mrec);
489  if (unlikely(!ctx)) {
490  err = -ENOMEM;
491  goto unm_err_out;
492  }
493  err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
494  CASE_SENSITIVE, 0, NULL, 0, ctx);
495  if (unlikely(err))
496  goto put_unm_err_out;
497  attr_len = le32_to_cpu(ctx->attr->data.resident.value_length);
498  read_lock_irqsave(&ni->size_lock, flags);
499  if (unlikely(attr_len > ni->initialized_size))
500  attr_len = ni->initialized_size;
501  i_size = i_size_read(vi);
502  read_unlock_irqrestore(&ni->size_lock, flags);
503  if (unlikely(attr_len > i_size)) {
504  /* Race with shrinking truncate. */
505  attr_len = i_size;
506  }
507  addr = kmap_atomic(page);
508  /* Copy the data to the page. */
509  memcpy(addr, (u8*)ctx->attr +
510  le16_to_cpu(ctx->attr->data.resident.value_offset),
511  attr_len);
512  /* Zero the remainder of the page. */
513  memset(addr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
514  flush_dcache_page(page);
515  kunmap_atomic(addr);
516 put_unm_err_out:
518 unm_err_out:
519  unmap_mft_record(base_ni);
520 done:
521  SetPageUptodate(page);
522 err_out:
523  unlock_page(page);
524  return err;
525 }
526 
527 #ifdef NTFS_RW
528 
551 static int ntfs_write_block(struct page *page, struct writeback_control *wbc)
552 {
553  VCN vcn;
554  LCN lcn;
556  loff_t i_size;
557  sector_t block, dblock, iblock;
558  struct inode *vi;
559  ntfs_inode *ni;
560  ntfs_volume *vol;
561  runlist_element *rl;
562  struct buffer_head *bh, *head;
563  unsigned long flags;
564  unsigned int blocksize, vcn_ofs;
565  int err;
566  bool need_end_writeback;
567  unsigned char blocksize_bits;
568 
569  vi = page->mapping->host;
570  ni = NTFS_I(vi);
571  vol = ni->vol;
572 
573  ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
574  "0x%lx.", ni->mft_no, ni->type, page->index);
575 
576  BUG_ON(!NInoNonResident(ni));
577  BUG_ON(NInoMstProtected(ni));
578  blocksize = vol->sb->s_blocksize;
579  blocksize_bits = vol->sb->s_blocksize_bits;
580  if (!page_has_buffers(page)) {
581  BUG_ON(!PageUptodate(page));
582  create_empty_buffers(page, blocksize,
583  (1 << BH_Uptodate) | (1 << BH_Dirty));
584  if (unlikely(!page_has_buffers(page))) {
585  ntfs_warning(vol->sb, "Error allocating page "
586  "buffers. Redirtying page so we try "
587  "again later.");
588  /*
589  * Put the page back on mapping->dirty_pages, but leave
590  * its buffers' dirty state as-is.
591  */
592  redirty_page_for_writepage(wbc, page);
593  unlock_page(page);
594  return 0;
595  }
596  }
597  bh = head = page_buffers(page);
598  BUG_ON(!bh);
599 
600  /* NOTE: Different naming scheme to ntfs_read_block()! */
601 
602  /* The first block in the page. */
603  block = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
604 
605  read_lock_irqsave(&ni->size_lock, flags);
606  i_size = i_size_read(vi);
607  initialized_size = ni->initialized_size;
608  read_unlock_irqrestore(&ni->size_lock, flags);
609 
610  /* The first out of bounds block for the data size. */
611  dblock = (i_size + blocksize - 1) >> blocksize_bits;
612 
613  /* The last (fully or partially) initialized block. */
614  iblock = initialized_size >> blocksize_bits;
615 
616  /*
617  * Be very careful. We have no exclusion from __set_page_dirty_buffers
618  * here, and the (potentially unmapped) buffers may become dirty at
619  * any time. If a buffer becomes dirty here after we've inspected it
620  * then we just miss that fact, and the page stays dirty.
621  *
622  * Buffers outside i_size may be dirtied by __set_page_dirty_buffers;
623  * handle that here by just cleaning them.
624  */
625 
626  /*
627  * Loop through all the buffers in the page, mapping all the dirty
628  * buffers to disk addresses and handling any aliases from the
629  * underlying block device's mapping.
630  */
631  rl = NULL;
632  err = 0;
633  do {
634  bool is_retry = false;
635 
636  if (unlikely(block >= dblock)) {
637  /*
638  * Mapped buffers outside i_size will occur, because
639  * this page can be outside i_size when there is a
640  * truncate in progress. The contents of such buffers
641  * were zeroed by ntfs_writepage().
642  *
643  * FIXME: What about the small race window where
644  * ntfs_writepage() has not done any clearing because
645  * the page was within i_size but before we get here,
646  * vmtruncate() modifies i_size?
647  */
648  clear_buffer_dirty(bh);
649  set_buffer_uptodate(bh);
650  continue;
651  }
652 
653  /* Clean buffers are not written out, so no need to map them. */
654  if (!buffer_dirty(bh))
655  continue;
656 
657  /* Make sure we have enough initialized size. */
658  if (unlikely((block >= iblock) &&
659  (initialized_size < i_size))) {
660  /*
661  * If this page is fully outside initialized size, zero
662  * out all pages between the current initialized size
663  * and the current page. Just use ntfs_readpage() to do
664  * the zeroing transparently.
665  */
666  if (block > iblock) {
667  // TODO:
668  // For each page do:
669  // - read_cache_page()
670  // Again for each page do:
671  // - wait_on_page_locked()
672  // - Check (PageUptodate(page) &&
673  // !PageError(page))
674  // Update initialized size in the attribute and
675  // in the inode.
676  // Again, for each page do:
677  // __set_page_dirty_buffers();
678  // page_cache_release()
679  // We don't need to wait on the writes.
680  // Update iblock.
681  }
682  /*
683  * The current page straddles initialized size. Zero
684  * all non-uptodate buffers and set them uptodate (and
685  * dirty?). Note, there aren't any non-uptodate buffers
686  * if the page is uptodate.
687  * FIXME: For an uptodate page, the buffers may need to
688  * be written out because they were not initialized on
689  * disk before.
690  */
691  if (!PageUptodate(page)) {
692  // TODO:
693  // Zero any non-uptodate buffers up to i_size.
694  // Set them uptodate and dirty.
695  }
696  // TODO:
697  // Update initialized size in the attribute and in the
698  // inode (up to i_size).
699  // Update iblock.
700  // FIXME: This is inefficient. Try to batch the two
701  // size changes to happen in one go.
702  ntfs_error(vol->sb, "Writing beyond initialized size "
703  "is not supported yet. Sorry.");
704  err = -EOPNOTSUPP;
705  break;
706  // Do NOT set_buffer_new() BUT DO clear buffer range
707  // outside write request range.
708  // set_buffer_uptodate() on complete buffers as well as
709  // set_buffer_dirty().
710  }
711 
712  /* No need to map buffers that are already mapped. */
713  if (buffer_mapped(bh))
714  continue;
715 
716  /* Unmapped, dirty buffer. Need to map it. */
717  bh->b_bdev = vol->sb->s_bdev;
718 
719  /* Convert block into corresponding vcn and offset. */
720  vcn = (VCN)block << blocksize_bits;
721  vcn_ofs = vcn & vol->cluster_size_mask;
722  vcn >>= vol->cluster_size_bits;
723  if (!rl) {
724 lock_retry_remap:
725  down_read(&ni->runlist.lock);
726  rl = ni->runlist.rl;
727  }
728  if (likely(rl != NULL)) {
729  /* Seek to element containing target vcn. */
730  while (rl->length && rl[1].vcn <= vcn)
731  rl++;
732  lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
733  } else
734  lcn = LCN_RL_NOT_MAPPED;
735  /* Successful remap. */
736  if (lcn >= 0) {
737  /* Setup buffer head to point to correct block. */
738  bh->b_blocknr = ((lcn << vol->cluster_size_bits) +
739  vcn_ofs) >> blocksize_bits;
740  set_buffer_mapped(bh);
741  continue;
742  }
743  /* It is a hole, need to instantiate it. */
744  if (lcn == LCN_HOLE) {
745  u8 *kaddr;
746  unsigned long *bpos, *bend;
747 
748  /* Check if the buffer is zero. */
749  kaddr = kmap_atomic(page);
750  bpos = (unsigned long *)(kaddr + bh_offset(bh));
751  bend = (unsigned long *)((u8*)bpos + blocksize);
752  do {
753  if (unlikely(*bpos))
754  break;
755  } while (likely(++bpos < bend));
756  kunmap_atomic(kaddr);
757  if (bpos == bend) {
758  /*
759  * Buffer is zero and sparse, no need to write
760  * it.
761  */
762  bh->b_blocknr = -1;
763  clear_buffer_dirty(bh);
764  continue;
765  }
766  // TODO: Instantiate the hole.
767  // clear_buffer_new(bh);
768  // unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
769  ntfs_error(vol->sb, "Writing into sparse regions is "
770  "not supported yet. Sorry.");
771  err = -EOPNOTSUPP;
772  break;
773  }
774  /* If first try and runlist unmapped, map and retry. */
775  if (!is_retry && lcn == LCN_RL_NOT_MAPPED) {
776  is_retry = true;
777  /*
778  * Attempt to map runlist, dropping lock for
779  * the duration.
780  */
781  up_read(&ni->runlist.lock);
782  err = ntfs_map_runlist(ni, vcn);
783  if (likely(!err))
784  goto lock_retry_remap;
785  rl = NULL;
786  } else if (!rl)
787  up_read(&ni->runlist.lock);
788  /*
789  * If buffer is outside the runlist, truncate has cut it out
790  * of the runlist. Just clean and clear the buffer and set it
791  * uptodate so it can get discarded by the VM.
792  */
793  if (err == -ENOENT || lcn == LCN_ENOENT) {
794  bh->b_blocknr = -1;
795  clear_buffer_dirty(bh);
796  zero_user(page, bh_offset(bh), blocksize);
797  set_buffer_uptodate(bh);
798  err = 0;
799  continue;
800  }
801  /* Failed to map the buffer, even after retrying. */
802  if (!err)
803  err = -EIO;
804  bh->b_blocknr = -1;
805  ntfs_error(vol->sb, "Failed to write to inode 0x%lx, "
806  "attribute type 0x%x, vcn 0x%llx, offset 0x%x "
807  "because its location on disk could not be "
808  "determined%s (error code %i).", ni->mft_no,
809  ni->type, (unsigned long long)vcn,
810  vcn_ofs, is_retry ? " even after "
811  "retrying" : "", err);
812  break;
813  } while (block++, (bh = bh->b_this_page) != head);
814 
815  /* Release the lock if we took it. */
816  if (rl)
817  up_read(&ni->runlist.lock);
818 
819  /* For the error case, need to reset bh to the beginning. */
820  bh = head;
821 
822  /* Just an optimization, so ->readpage() is not called later. */
823  if (unlikely(!PageUptodate(page))) {
824  int uptodate = 1;
825  do {
826  if (!buffer_uptodate(bh)) {
827  uptodate = 0;
828  bh = head;
829  break;
830  }
831  } while ((bh = bh->b_this_page) != head);
832  if (uptodate)
833  SetPageUptodate(page);
834  }
835 
836  /* Setup all mapped, dirty buffers for async write i/o. */
837  do {
838  if (buffer_mapped(bh) && buffer_dirty(bh)) {
839  lock_buffer(bh);
840  if (test_clear_buffer_dirty(bh)) {
841  BUG_ON(!buffer_uptodate(bh));
843  } else
844  unlock_buffer(bh);
845  } else if (unlikely(err)) {
846  /*
847  * For the error case. The buffer may have been set
848  * dirty during attachment to a dirty page.
849  */
850  if (err != -ENOMEM)
851  clear_buffer_dirty(bh);
852  }
853  } while ((bh = bh->b_this_page) != head);
854 
855  if (unlikely(err)) {
856  // TODO: Remove the -EOPNOTSUPP check later on...
857  if (unlikely(err == -EOPNOTSUPP))
858  err = 0;
859  else if (err == -ENOMEM) {
860  ntfs_warning(vol->sb, "Error allocating memory. "
861  "Redirtying page so we try again "
862  "later.");
863  /*
864  * Put the page back on mapping->dirty_pages, but
865  * leave its buffer's dirty state as-is.
866  */
867  redirty_page_for_writepage(wbc, page);
868  err = 0;
869  } else
870  SetPageError(page);
871  }
872 
873  BUG_ON(PageWriteback(page));
874  set_page_writeback(page); /* Keeps try_to_free_buffers() away. */
875 
876  /* Submit the prepared buffers for i/o. */
877  need_end_writeback = true;
878  do {
879  struct buffer_head *next = bh->b_this_page;
880  if (buffer_async_write(bh)) {
881  submit_bh(WRITE, bh);
882  need_end_writeback = false;
883  }
884  bh = next;
885  } while (bh != head);
886  unlock_page(page);
887 
888  /* If no i/o was started, need to end_page_writeback(). */
889  if (unlikely(need_end_writeback))
890  end_page_writeback(page);
891 
892  ntfs_debug("Done.");
893  return err;
894 }
895 
920 static int ntfs_write_mst_block(struct page *page,
921  struct writeback_control *wbc)
922 {
923  sector_t block, dblock, rec_block;
924  struct inode *vi = page->mapping->host;
925  ntfs_inode *ni = NTFS_I(vi);
926  ntfs_volume *vol = ni->vol;
927  u8 *kaddr;
928  unsigned int rec_size = ni->itype.index.block_size;
929  ntfs_inode *locked_nis[PAGE_CACHE_SIZE / rec_size];
930  struct buffer_head *bh, *head, *tbh, *rec_start_bh;
931  struct buffer_head *bhs[MAX_BUF_PER_PAGE];
932  runlist_element *rl;
933  int i, nr_locked_nis, nr_recs, nr_bhs, max_bhs, bhs_per_rec, err, err2;
934  unsigned bh_size, rec_size_bits;
935  bool sync, is_mft, page_is_dirty, rec_is_dirty;
936  unsigned char bh_size_bits;
937 
938  ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
939  "0x%lx.", vi->i_ino, ni->type, page->index);
940  BUG_ON(!NInoNonResident(ni));
941  BUG_ON(!NInoMstProtected(ni));
942  is_mft = (S_ISREG(vi->i_mode) && !vi->i_ino);
943  /*
944  * NOTE: ntfs_write_mst_block() would be called for $MFTMirr if a page
945  * in its page cache were to be marked dirty. However this should
946  * never happen with the current driver and considering we do not
947  * handle this case here we do want to BUG(), at least for now.
948  */
949  BUG_ON(!(is_mft || S_ISDIR(vi->i_mode) ||
950  (NInoAttr(ni) && ni->type == AT_INDEX_ALLOCATION)));
951  bh_size = vol->sb->s_blocksize;
952  bh_size_bits = vol->sb->s_blocksize_bits;
953  max_bhs = PAGE_CACHE_SIZE / bh_size;
954  BUG_ON(!max_bhs);
955  BUG_ON(max_bhs > MAX_BUF_PER_PAGE);
956 
957  /* Were we called for sync purposes? */
958  sync = (wbc->sync_mode == WB_SYNC_ALL);
959 
960  /* Make sure we have mapped buffers. */
961  bh = head = page_buffers(page);
962  BUG_ON(!bh);
963 
964  rec_size_bits = ni->itype.index.block_size_bits;
965  BUG_ON(!(PAGE_CACHE_SIZE >> rec_size_bits));
966  bhs_per_rec = rec_size >> bh_size_bits;
967  BUG_ON(!bhs_per_rec);
968 
969  /* The first block in the page. */
970  rec_block = block = (sector_t)page->index <<
971  (PAGE_CACHE_SHIFT - bh_size_bits);
972 
973  /* The first out of bounds block for the data size. */
974  dblock = (i_size_read(vi) + bh_size - 1) >> bh_size_bits;
975 
976  rl = NULL;
977  err = err2 = nr_bhs = nr_recs = nr_locked_nis = 0;
978  page_is_dirty = rec_is_dirty = false;
979  rec_start_bh = NULL;
980  do {
981  bool is_retry = false;
982 
983  if (likely(block < rec_block)) {
984  if (unlikely(block >= dblock)) {
985  clear_buffer_dirty(bh);
986  set_buffer_uptodate(bh);
987  continue;
988  }
989  /*
990  * This block is not the first one in the record. We
991  * ignore the buffer's dirty state because we could
992  * have raced with a parallel mark_ntfs_record_dirty().
993  */
994  if (!rec_is_dirty)
995  continue;
996  if (unlikely(err2)) {
997  if (err2 != -ENOMEM)
998  clear_buffer_dirty(bh);
999  continue;
1000  }
1001  } else /* if (block == rec_block) */ {
1002  BUG_ON(block > rec_block);
1003  /* This block is the first one in the record. */
1004  rec_block += bhs_per_rec;
1005  err2 = 0;
1006  if (unlikely(block >= dblock)) {
1007  clear_buffer_dirty(bh);
1008  continue;
1009  }
1010  if (!buffer_dirty(bh)) {
1011  /* Clean records are not written out. */
1012  rec_is_dirty = false;
1013  continue;
1014  }
1015  rec_is_dirty = true;
1016  rec_start_bh = bh;
1017  }
1018  /* Need to map the buffer if it is not mapped already. */
1019  if (unlikely(!buffer_mapped(bh))) {
1020  VCN vcn;
1021  LCN lcn;
1022  unsigned int vcn_ofs;
1023 
1024  bh->b_bdev = vol->sb->s_bdev;
1025  /* Obtain the vcn and offset of the current block. */
1026  vcn = (VCN)block << bh_size_bits;
1027  vcn_ofs = vcn & vol->cluster_size_mask;
1028  vcn >>= vol->cluster_size_bits;
1029  if (!rl) {
1030 lock_retry_remap:
1031  down_read(&ni->runlist.lock);
1032  rl = ni->runlist.rl;
1033  }
1034  if (likely(rl != NULL)) {
1035  /* Seek to element containing target vcn. */
1036  while (rl->length && rl[1].vcn <= vcn)
1037  rl++;
1038  lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
1039  } else
1040  lcn = LCN_RL_NOT_MAPPED;
1041  /* Successful remap. */
1042  if (likely(lcn >= 0)) {
1043  /* Setup buffer head to correct block. */
1044  bh->b_blocknr = ((lcn <<
1045  vol->cluster_size_bits) +
1046  vcn_ofs) >> bh_size_bits;
1047  set_buffer_mapped(bh);
1048  } else {
1049  /*
1050  * Remap failed. Retry to map the runlist once
1051  * unless we are working on $MFT which always
1052  * has the whole of its runlist in memory.
1053  */
1054  if (!is_mft && !is_retry &&
1055  lcn == LCN_RL_NOT_MAPPED) {
1056  is_retry = true;
1057  /*
1058  * Attempt to map runlist, dropping
1059  * lock for the duration.
1060  */
1061  up_read(&ni->runlist.lock);
1062  err2 = ntfs_map_runlist(ni, vcn);
1063  if (likely(!err2))
1064  goto lock_retry_remap;
1065  if (err2 == -ENOMEM)
1066  page_is_dirty = true;
1067  lcn = err2;
1068  } else {
1069  err2 = -EIO;
1070  if (!rl)
1071  up_read(&ni->runlist.lock);
1072  }
1073  /* Hard error. Abort writing this record. */
1074  if (!err || err == -ENOMEM)
1075  err = err2;
1076  bh->b_blocknr = -1;
1077  ntfs_error(vol->sb, "Cannot write ntfs record "
1078  "0x%llx (inode 0x%lx, "
1079  "attribute type 0x%x) because "
1080  "its location on disk could "
1081  "not be determined (error "
1082  "code %lli).",
1083  (long long)block <<
1084  bh_size_bits >>
1085  vol->mft_record_size_bits,
1086  ni->mft_no, ni->type,
1087  (long long)lcn);
1088  /*
1089  * If this is not the first buffer, remove the
1090  * buffers in this record from the list of
1091  * buffers to write and clear their dirty bit
1092  * if not error -ENOMEM.
1093  */
1094  if (rec_start_bh != bh) {
1095  while (bhs[--nr_bhs] != rec_start_bh)
1096  ;
1097  if (err2 != -ENOMEM) {
1098  do {
1099  clear_buffer_dirty(
1100  rec_start_bh);
1101  } while ((rec_start_bh =
1102  rec_start_bh->
1103  b_this_page) !=
1104  bh);
1105  }
1106  }
1107  continue;
1108  }
1109  }
1110  BUG_ON(!buffer_uptodate(bh));
1111  BUG_ON(nr_bhs >= max_bhs);
1112  bhs[nr_bhs++] = bh;
1113  } while (block++, (bh = bh->b_this_page) != head);
1114  if (unlikely(rl))
1115  up_read(&ni->runlist.lock);
1116  /* If there were no dirty buffers, we are done. */
1117  if (!nr_bhs)
1118  goto done;
1119  /* Map the page so we can access its contents. */
1120  kaddr = kmap(page);
1121  /* Clear the page uptodate flag whilst the mst fixups are applied. */
1122  BUG_ON(!PageUptodate(page));
1123  ClearPageUptodate(page);
1124  for (i = 0; i < nr_bhs; i++) {
1125  unsigned int ofs;
1126 
1127  /* Skip buffers which are not at the beginning of records. */
1128  if (i % bhs_per_rec)
1129  continue;
1130  tbh = bhs[i];
1131  ofs = bh_offset(tbh);
1132  if (is_mft) {
1133  ntfs_inode *tni;
1134  unsigned long mft_no;
1135 
1136  /* Get the mft record number. */
1137  mft_no = (((s64)page->index << PAGE_CACHE_SHIFT) + ofs)
1138  >> rec_size_bits;
1139  /* Check whether to write this mft record. */
1140  tni = NULL;
1141  if (!ntfs_may_write_mft_record(vol, mft_no,
1142  (MFT_RECORD*)(kaddr + ofs), &tni)) {
1143  /*
1144  * The record should not be written. This
1145  * means we need to redirty the page before
1146  * returning.
1147  */
1148  page_is_dirty = true;
1149  /*
1150  * Remove the buffers in this mft record from
1151  * the list of buffers to write.
1152  */
1153  do {
1154  bhs[i] = NULL;
1155  } while (++i % bhs_per_rec);
1156  continue;
1157  }
1158  /*
1159  * The record should be written. If a locked ntfs
1160  * inode was returned, add it to the array of locked
1161  * ntfs inodes.
1162  */
1163  if (tni)
1164  locked_nis[nr_locked_nis++] = tni;
1165  }
1166  /* Apply the mst protection fixups. */
1167  err2 = pre_write_mst_fixup((NTFS_RECORD*)(kaddr + ofs),
1168  rec_size);
1169  if (unlikely(err2)) {
1170  if (!err || err == -ENOMEM)
1171  err = -EIO;
1172  ntfs_error(vol->sb, "Failed to apply mst fixups "
1173  "(inode 0x%lx, attribute type 0x%x, "
1174  "page index 0x%lx, page offset 0x%x)!"
1175  " Unmount and run chkdsk.", vi->i_ino,
1176  ni->type, page->index, ofs);
1177  /*
1178  * Mark all the buffers in this record clean as we do
1179  * not want to write corrupt data to disk.
1180  */
1181  do {
1182  clear_buffer_dirty(bhs[i]);
1183  bhs[i] = NULL;
1184  } while (++i % bhs_per_rec);
1185  continue;
1186  }
1187  nr_recs++;
1188  }
1189  /* If no records are to be written out, we are done. */
1190  if (!nr_recs)
1191  goto unm_done;
1192  flush_dcache_page(page);
1193  /* Lock buffers and start synchronous write i/o on them. */
1194  for (i = 0; i < nr_bhs; i++) {
1195  tbh = bhs[i];
1196  if (!tbh)
1197  continue;
1198  if (!trylock_buffer(tbh))
1199  BUG();
1200  /* The buffer dirty state is now irrelevant, just clean it. */
1201  clear_buffer_dirty(tbh);
1202  BUG_ON(!buffer_uptodate(tbh));
1203  BUG_ON(!buffer_mapped(tbh));
1204  get_bh(tbh);
1205  tbh->b_end_io = end_buffer_write_sync;
1206  submit_bh(WRITE, tbh);
1207  }
1208  /* Synchronize the mft mirror now if not @sync. */
1209  if (is_mft && !sync)
1210  goto do_mirror;
1211 do_wait:
1212  /* Wait on i/o completion of buffers. */
1213  for (i = 0; i < nr_bhs; i++) {
1214  tbh = bhs[i];
1215  if (!tbh)
1216  continue;
1217  wait_on_buffer(tbh);
1218  if (unlikely(!buffer_uptodate(tbh))) {
1219  ntfs_error(vol->sb, "I/O error while writing ntfs "
1220  "record buffer (inode 0x%lx, "
1221  "attribute type 0x%x, page index "
1222  "0x%lx, page offset 0x%lx)! Unmount "
1223  "and run chkdsk.", vi->i_ino, ni->type,
1224  page->index, bh_offset(tbh));
1225  if (!err || err == -ENOMEM)
1226  err = -EIO;
1227  /*
1228  * Set the buffer uptodate so the page and buffer
1229  * states do not become out of sync.
1230  */
1231  set_buffer_uptodate(tbh);
1232  }
1233  }
1234  /* If @sync, now synchronize the mft mirror. */
1235  if (is_mft && sync) {
1236 do_mirror:
1237  for (i = 0; i < nr_bhs; i++) {
1238  unsigned long mft_no;
1239  unsigned int ofs;
1240 
1241  /*
1242  * Skip buffers which are not at the beginning of
1243  * records.
1244  */
1245  if (i % bhs_per_rec)
1246  continue;
1247  tbh = bhs[i];
1248  /* Skip removed buffers (and hence records). */
1249  if (!tbh)
1250  continue;
1251  ofs = bh_offset(tbh);
1252  /* Get the mft record number. */
1253  mft_no = (((s64)page->index << PAGE_CACHE_SHIFT) + ofs)
1254  >> rec_size_bits;
1255  if (mft_no < vol->mftmirr_size)
1256  ntfs_sync_mft_mirror(vol, mft_no,
1257  (MFT_RECORD*)(kaddr + ofs),
1258  sync);
1259  }
1260  if (!sync)
1261  goto do_wait;
1262  }
1263  /* Remove the mst protection fixups again. */
1264  for (i = 0; i < nr_bhs; i++) {
1265  if (!(i % bhs_per_rec)) {
1266  tbh = bhs[i];
1267  if (!tbh)
1268  continue;
1269  post_write_mst_fixup((NTFS_RECORD*)(kaddr +
1270  bh_offset(tbh)));
1271  }
1272  }
1273  flush_dcache_page(page);
1274 unm_done:
1275  /* Unlock any locked inodes. */
1276  while (nr_locked_nis-- > 0) {
1277  ntfs_inode *tni, *base_tni;
1278 
1279  tni = locked_nis[nr_locked_nis];
1280  /* Get the base inode. */
1281  mutex_lock(&tni->extent_lock);
1282  if (tni->nr_extents >= 0)
1283  base_tni = tni;
1284  else {
1285  base_tni = tni->ext.base_ntfs_ino;
1286  BUG_ON(!base_tni);
1287  }
1288  mutex_unlock(&tni->extent_lock);
1289  ntfs_debug("Unlocking %s inode 0x%lx.",
1290  tni == base_tni ? "base" : "extent",
1291  tni->mft_no);
1292  mutex_unlock(&tni->mrec_lock);
1293  atomic_dec(&tni->count);
1294  iput(VFS_I(base_tni));
1295  }
1296  SetPageUptodate(page);
1297  kunmap(page);
1298 done:
1299  if (unlikely(err && err != -ENOMEM)) {
1300  /*
1301  * Set page error if there is only one ntfs record in the page.
1302  * Otherwise we would loose per-record granularity.
1303  */
1304  if (ni->itype.index.block_size == PAGE_CACHE_SIZE)
1305  SetPageError(page);
1306  NVolSetErrors(vol);
1307  }
1308  if (page_is_dirty) {
1309  ntfs_debug("Page still contains one or more dirty ntfs "
1310  "records. Redirtying the page starting at "
1311  "record 0x%lx.", page->index <<
1312  (PAGE_CACHE_SHIFT - rec_size_bits));
1313  redirty_page_for_writepage(wbc, page);
1314  unlock_page(page);
1315  } else {
1316  /*
1317  * Keep the VM happy. This must be done otherwise the
1318  * radix-tree tag PAGECACHE_TAG_DIRTY remains set even though
1319  * the page is clean.
1320  */
1321  BUG_ON(PageWriteback(page));
1322  set_page_writeback(page);
1323  unlock_page(page);
1324  end_page_writeback(page);
1325  }
1326  if (likely(!err))
1327  ntfs_debug("Done.");
1328  return err;
1329 }
1330 
1354 static int ntfs_writepage(struct page *page, struct writeback_control *wbc)
1355 {
1356  loff_t i_size;
1357  struct inode *vi = page->mapping->host;
1358  ntfs_inode *base_ni = NULL, *ni = NTFS_I(vi);
1359  char *addr;
1360  ntfs_attr_search_ctx *ctx = NULL;
1361  MFT_RECORD *m = NULL;
1362  u32 attr_len;
1363  int err;
1364 
1365 retry_writepage:
1366  BUG_ON(!PageLocked(page));
1367  i_size = i_size_read(vi);
1368  /* Is the page fully outside i_size? (truncate in progress) */
1369  if (unlikely(page->index >= (i_size + PAGE_CACHE_SIZE - 1) >>
1370  PAGE_CACHE_SHIFT)) {
1371  /*
1372  * The page may have dirty, unmapped buffers. Make them
1373  * freeable here, so the page does not leak.
1374  */
1375  block_invalidatepage(page, 0);
1376  unlock_page(page);
1377  ntfs_debug("Write outside i_size - truncated?");
1378  return 0;
1379  }
1380  /*
1381  * Only $DATA attributes can be encrypted and only unnamed $DATA
1382  * attributes can be compressed. Index root can have the flags set but
1383  * this means to create compressed/encrypted files, not that the
1384  * attribute is compressed/encrypted. Note we need to check for
1385  * AT_INDEX_ALLOCATION since this is the type of both directory and
1386  * index inodes.
1387  */
1388  if (ni->type != AT_INDEX_ALLOCATION) {
1389  /* If file is encrypted, deny access, just like NT4. */
1390  if (NInoEncrypted(ni)) {
1391  unlock_page(page);
1392  BUG_ON(ni->type != AT_DATA);
1393  ntfs_debug("Denying write access to encrypted file.");
1394  return -EACCES;
1395  }
1396  /* Compressed data streams are handled in compress.c. */
1397  if (NInoNonResident(ni) && NInoCompressed(ni)) {
1398  BUG_ON(ni->type != AT_DATA);
1399  BUG_ON(ni->name_len);
1400  // TODO: Implement and replace this with
1401  // return ntfs_write_compressed_block(page);
1402  unlock_page(page);
1403  ntfs_error(vi->i_sb, "Writing to compressed files is "
1404  "not supported yet. Sorry.");
1405  return -EOPNOTSUPP;
1406  }
1407  // TODO: Implement and remove this check.
1408  if (NInoNonResident(ni) && NInoSparse(ni)) {
1409  unlock_page(page);
1410  ntfs_error(vi->i_sb, "Writing to sparse files is not "
1411  "supported yet. Sorry.");
1412  return -EOPNOTSUPP;
1413  }
1414  }
1415  /* NInoNonResident() == NInoIndexAllocPresent() */
1416  if (NInoNonResident(ni)) {
1417  /* We have to zero every time due to mmap-at-end-of-file. */
1418  if (page->index >= (i_size >> PAGE_CACHE_SHIFT)) {
1419  /* The page straddles i_size. */
1420  unsigned int ofs = i_size & ~PAGE_CACHE_MASK;
1421  zero_user_segment(page, ofs, PAGE_CACHE_SIZE);
1422  }
1423  /* Handle mst protected attributes. */
1424  if (NInoMstProtected(ni))
1425  return ntfs_write_mst_block(page, wbc);
1426  /* Normal, non-resident data stream. */
1427  return ntfs_write_block(page, wbc);
1428  }
1429  /*
1430  * Attribute is resident, implying it is not compressed, encrypted, or
1431  * mst protected. This also means the attribute is smaller than an mft
1432  * record and hence smaller than a page, so can simply return error on
1433  * any pages with index above 0. Note the attribute can actually be
1434  * marked compressed but if it is resident the actual data is not
1435  * compressed so we are ok to ignore the compressed flag here.
1436  */
1437  BUG_ON(page_has_buffers(page));
1438  BUG_ON(!PageUptodate(page));
1439  if (unlikely(page->index > 0)) {
1440  ntfs_error(vi->i_sb, "BUG()! page->index (0x%lx) > 0. "
1441  "Aborting write.", page->index);
1442  BUG_ON(PageWriteback(page));
1443  set_page_writeback(page);
1444  unlock_page(page);
1445  end_page_writeback(page);
1446  return -EIO;
1447  }
1448  if (!NInoAttr(ni))
1449  base_ni = ni;
1450  else
1451  base_ni = ni->ext.base_ntfs_ino;
1452  /* Map, pin, and lock the mft record. */
1453  m = map_mft_record(base_ni);
1454  if (IS_ERR(m)) {
1455  err = PTR_ERR(m);
1456  m = NULL;
1457  ctx = NULL;
1458  goto err_out;
1459  }
1460  /*
1461  * If a parallel write made the attribute non-resident, drop the mft
1462  * record and retry the writepage.
1463  */
1464  if (unlikely(NInoNonResident(ni))) {
1465  unmap_mft_record(base_ni);
1466  goto retry_writepage;
1467  }
1468  ctx = ntfs_attr_get_search_ctx(base_ni, m);
1469  if (unlikely(!ctx)) {
1470  err = -ENOMEM;
1471  goto err_out;
1472  }
1473  err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
1474  CASE_SENSITIVE, 0, NULL, 0, ctx);
1475  if (unlikely(err))
1476  goto err_out;
1477  /*
1478  * Keep the VM happy. This must be done otherwise the radix-tree tag
1479  * PAGECACHE_TAG_DIRTY remains set even though the page is clean.
1480  */
1481  BUG_ON(PageWriteback(page));
1482  set_page_writeback(page);
1483  unlock_page(page);
1484  attr_len = le32_to_cpu(ctx->attr->data.resident.value_length);
1485  i_size = i_size_read(vi);
1486  if (unlikely(attr_len > i_size)) {
1487  /* Race with shrinking truncate or a failed truncate. */
1488  attr_len = i_size;
1489  /*
1490  * If the truncate failed, fix it up now. If a concurrent
1491  * truncate, we do its job, so it does not have to do anything.
1492  */
1493  err = ntfs_resident_attr_value_resize(ctx->mrec, ctx->attr,
1494  attr_len);
1495  /* Shrinking cannot fail. */
1496  BUG_ON(err);
1497  }
1498  addr = kmap_atomic(page);
1499  /* Copy the data from the page to the mft record. */
1500  memcpy((u8*)ctx->attr +
1501  le16_to_cpu(ctx->attr->data.resident.value_offset),
1502  addr, attr_len);
1503  /* Zero out of bounds area in the page cache page. */
1504  memset(addr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
1505  kunmap_atomic(addr);
1506  flush_dcache_page(page);
1507  flush_dcache_mft_record_page(ctx->ntfs_ino);
1508  /* We are done with the page. */
1509  end_page_writeback(page);
1510  /* Finally, mark the mft record dirty, so it gets written back. */
1511  mark_mft_record_dirty(ctx->ntfs_ino);
1513  unmap_mft_record(base_ni);
1514  return 0;
1515 err_out:
1516  if (err == -ENOMEM) {
1517  ntfs_warning(vi->i_sb, "Error allocating memory. Redirtying "
1518  "page so we try again later.");
1519  /*
1520  * Put the page back on mapping->dirty_pages, but leave its
1521  * buffers' dirty state as-is.
1522  */
1523  redirty_page_for_writepage(wbc, page);
1524  err = 0;
1525  } else {
1526  ntfs_error(vi->i_sb, "Resident attribute write failed with "
1527  "error %i.", err);
1528  SetPageError(page);
1529  NVolSetErrors(ni->vol);
1530  }
1531  unlock_page(page);
1532  if (ctx)
1534  if (m)
1535  unmap_mft_record(base_ni);
1536  return err;
1537 }
1538 
1539 #endif /* NTFS_RW */
1540 
1545  .readpage = ntfs_readpage, /* Fill page with data. */
1546 #ifdef NTFS_RW
1547  .writepage = ntfs_writepage, /* Write dirty page to disk. */
1548 #endif /* NTFS_RW */
1549  .migratepage = buffer_migrate_page, /* Move a page cache page from
1550  one physical page to an
1551  other. */
1552  .error_remove_page = generic_error_remove_page,
1553 };
1554 
1560  .readpage = ntfs_readpage, /* Fill page with data. */
1561 #ifdef NTFS_RW
1562  .writepage = ntfs_writepage, /* Write dirty page to disk. */
1563  .set_page_dirty = __set_page_dirty_nobuffers, /* Set the page dirty
1564  without touching the buffers
1565  belonging to the page. */
1566 #endif /* NTFS_RW */
1567  .migratepage = buffer_migrate_page, /* Move a page cache page from
1568  one physical page to an
1569  other. */
1570  .error_remove_page = generic_error_remove_page,
1571 };
1572 
1573 #ifdef NTFS_RW
1574 
1590 void mark_ntfs_record_dirty(struct page *page, const unsigned int ofs) {
1591  struct address_space *mapping = page->mapping;
1592  ntfs_inode *ni = NTFS_I(mapping->host);
1593  struct buffer_head *bh, *head, *buffers_to_free = NULL;
1594  unsigned int end, bh_size, bh_ofs;
1595 
1596  BUG_ON(!PageUptodate(page));
1597  end = ofs + ni->itype.index.block_size;
1598  bh_size = VFS_I(ni)->i_sb->s_blocksize;
1599  spin_lock(&mapping->private_lock);
1600  if (unlikely(!page_has_buffers(page))) {
1601  spin_unlock(&mapping->private_lock);
1602  bh = head = alloc_page_buffers(page, bh_size, 1);
1603  spin_lock(&mapping->private_lock);
1604  if (likely(!page_has_buffers(page))) {
1605  struct buffer_head *tail;
1606 
1607  do {
1608  set_buffer_uptodate(bh);
1609  tail = bh;
1610  bh = bh->b_this_page;
1611  } while (bh);
1612  tail->b_this_page = head;
1613  attach_page_buffers(page, head);
1614  } else
1615  buffers_to_free = bh;
1616  }
1617  bh = head = page_buffers(page);
1618  BUG_ON(!bh);
1619  do {
1620  bh_ofs = bh_offset(bh);
1621  if (bh_ofs + bh_size <= ofs)
1622  continue;
1623  if (unlikely(bh_ofs >= end))
1624  break;
1625  set_buffer_dirty(bh);
1626  } while ((bh = bh->b_this_page) != head);
1627  spin_unlock(&mapping->private_lock);
1629  if (unlikely(buffers_to_free)) {
1630  do {
1631  bh = buffers_to_free->b_this_page;
1632  free_buffer_head(buffers_to_free);
1633  buffers_to_free = bh;
1634  } while (buffers_to_free);
1635  }
1636 }
1637 
1638 #endif /* NTFS_RW */