Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
segment.c
Go to the documentation of this file.
1 /*
2  * segment.c - NILFS segment constructor.
3  *
4  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19  *
20  * Written by Ryusuke Konishi <[email protected]>
21  *
22  */
23 
24 #include <linux/pagemap.h>
25 #include <linux/buffer_head.h>
26 #include <linux/writeback.h>
27 #include <linux/bio.h>
28 #include <linux/completion.h>
29 #include <linux/blkdev.h>
30 #include <linux/backing-dev.h>
31 #include <linux/freezer.h>
32 #include <linux/kthread.h>
33 #include <linux/crc32.h>
34 #include <linux/pagevec.h>
35 #include <linux/slab.h>
36 #include "nilfs.h"
37 #include "btnode.h"
38 #include "page.h"
39 #include "segment.h"
40 #include "sufile.h"
41 #include "cpfile.h"
42 #include "ifile.h"
43 #include "segbuf.h"
44 
45 
46 /*
47  * Segment constructor
48  */
49 #define SC_N_INODEVEC 16 /* Size of locally allocated inode vector */
50 
51 #define SC_MAX_SEGDELTA 64 /* Upper limit of the number of segments
52  appended in collection retry loop */
53 
54 /* Construction mode */
55 enum {
56  SC_LSEG_SR = 1, /* Make a logical segment having a super root */
57  SC_LSEG_DSYNC, /* Flush data blocks of a given file and make
58  a logical segment without a super root */
59  SC_FLUSH_FILE, /* Flush data files, leads to segment writes without
60  creating a checkpoint */
61  SC_FLUSH_DAT, /* Flush DAT file. This also creates segments without
62  a checkpoint */
63 };
64 
65 /* Stage numbers of dirty block collection */
66 enum {
68  NILFS_ST_GC, /* Collecting dirty blocks for GC */
74  NILFS_ST_SR, /* Super root */
75  NILFS_ST_DSYNC, /* Data sync blocks */
77 };
78 
79 /* State flags of collection */
80 #define NILFS_CF_NODE 0x0001 /* Collecting node blocks */
81 #define NILFS_CF_IFILE_STARTED 0x0002 /* IFILE stage has started */
82 #define NILFS_CF_SUFREED 0x0004 /* segment usages has been freed */
83 #define NILFS_CF_HISTORY_MASK (NILFS_CF_IFILE_STARTED | NILFS_CF_SUFREED)
84 
85 /* Operations depending on the construction mode and file type */
87  int (*collect_data)(struct nilfs_sc_info *, struct buffer_head *,
88  struct inode *);
89  int (*collect_node)(struct nilfs_sc_info *, struct buffer_head *,
90  struct inode *);
91  int (*collect_bmap)(struct nilfs_sc_info *, struct buffer_head *,
92  struct inode *);
94  struct nilfs_segsum_pointer *,
95  union nilfs_binfo *);
97  struct nilfs_segsum_pointer *,
98  union nilfs_binfo *);
99 };
100 
101 /*
102  * Other definitions
103  */
104 static void nilfs_segctor_start_timer(struct nilfs_sc_info *);
105 static void nilfs_segctor_do_flush(struct nilfs_sc_info *, int);
106 static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *);
107 static void nilfs_dispose_list(struct the_nilfs *, struct list_head *, int);
109 #define nilfs_cnt32_gt(a, b) \
110  (typecheck(__u32, a) && typecheck(__u32, b) && \
111  ((__s32)(b) - (__s32)(a) < 0))
112 #define nilfs_cnt32_ge(a, b) \
113  (typecheck(__u32, a) && typecheck(__u32, b) && \
114  ((__s32)(a) - (__s32)(b) >= 0))
115 #define nilfs_cnt32_lt(a, b) nilfs_cnt32_gt(b, a)
116 #define nilfs_cnt32_le(a, b) nilfs_cnt32_ge(b, a)
117 
118 static int nilfs_prepare_segment_lock(struct nilfs_transaction_info *ti)
119 {
120  struct nilfs_transaction_info *cur_ti = current->journal_info;
121  void *save = NULL;
122 
123  if (cur_ti) {
124  if (cur_ti->ti_magic == NILFS_TI_MAGIC)
125  return ++cur_ti->ti_count;
126  else {
127  /*
128  * If journal_info field is occupied by other FS,
129  * it is saved and will be restored on
130  * nilfs_transaction_commit().
131  */
133  "NILFS warning: journal info from a different "
134  "FS\n");
135  save = current->journal_info;
136  }
137  }
138  if (!ti) {
140  if (!ti)
141  return -ENOMEM;
143  } else {
144  ti->ti_flags = 0;
145  }
146  ti->ti_count = 0;
147  ti->ti_save = save;
148  ti->ti_magic = NILFS_TI_MAGIC;
149  current->journal_info = ti;
150  return 0;
151 }
152 
181  struct nilfs_transaction_info *ti,
182  int vacancy_check)
183 {
184  struct the_nilfs *nilfs;
185  int ret = nilfs_prepare_segment_lock(ti);
186 
187  if (unlikely(ret < 0))
188  return ret;
189  if (ret > 0)
190  return 0;
191 
192  sb_start_intwrite(sb);
193 
194  nilfs = sb->s_fs_info;
195  down_read(&nilfs->ns_segctor_sem);
196  if (vacancy_check && nilfs_near_disk_full(nilfs)) {
197  up_read(&nilfs->ns_segctor_sem);
198  ret = -ENOSPC;
199  goto failed;
200  }
201  return 0;
202 
203  failed:
204  ti = current->journal_info;
205  current->journal_info = ti->ti_save;
208  sb_end_intwrite(sb);
209  return ret;
210 }
211 
224 {
225  struct nilfs_transaction_info *ti = current->journal_info;
226  struct the_nilfs *nilfs = sb->s_fs_info;
227  int err = 0;
228 
229  BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC);
230  ti->ti_flags |= NILFS_TI_COMMIT;
231  if (ti->ti_count > 0) {
232  ti->ti_count--;
233  return 0;
234  }
235  if (nilfs->ns_writer) {
236  struct nilfs_sc_info *sci = nilfs->ns_writer;
237 
238  if (ti->ti_flags & NILFS_TI_COMMIT)
239  nilfs_segctor_start_timer(sci);
240  if (atomic_read(&nilfs->ns_ndirtyblks) > sci->sc_watermark)
241  nilfs_segctor_do_flush(sci, 0);
242  }
243  up_read(&nilfs->ns_segctor_sem);
244  current->journal_info = ti->ti_save;
245 
246  if (ti->ti_flags & NILFS_TI_SYNC)
247  err = nilfs_construct_segment(sb);
250  sb_end_intwrite(sb);
251  return err;
252 }
254 void nilfs_transaction_abort(struct super_block *sb)
255 {
256  struct nilfs_transaction_info *ti = current->journal_info;
257  struct the_nilfs *nilfs = sb->s_fs_info;
258 
259  BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC);
260  if (ti->ti_count > 0) {
261  ti->ti_count--;
262  return;
263  }
264  up_read(&nilfs->ns_segctor_sem);
265 
266  current->journal_info = ti->ti_save;
269  sb_end_intwrite(sb);
270 }
273 {
274  struct the_nilfs *nilfs = sb->s_fs_info;
275  struct nilfs_sc_info *sci = nilfs->ns_writer;
276 
277  if (!sci || !sci->sc_flush_request)
278  return;
279 
281  up_read(&nilfs->ns_segctor_sem);
282 
283  down_write(&nilfs->ns_segctor_sem);
284  if (sci->sc_flush_request &&
286  struct nilfs_transaction_info *ti = current->journal_info;
287 
288  ti->ti_flags |= NILFS_TI_WRITER;
289  nilfs_segctor_do_immediate_flush(sci);
290  ti->ti_flags &= ~NILFS_TI_WRITER;
291  }
293 }
294 
295 static void nilfs_transaction_lock(struct super_block *sb,
296  struct nilfs_transaction_info *ti,
297  int gcflag)
298 {
299  struct nilfs_transaction_info *cur_ti = current->journal_info;
300  struct the_nilfs *nilfs = sb->s_fs_info;
301  struct nilfs_sc_info *sci = nilfs->ns_writer;
302 
303  WARN_ON(cur_ti);
305  ti->ti_count = 0;
306  ti->ti_save = cur_ti;
307  ti->ti_magic = NILFS_TI_MAGIC;
308  INIT_LIST_HEAD(&ti->ti_garbage);
309  current->journal_info = ti;
310 
311  for (;;) {
312  down_write(&nilfs->ns_segctor_sem);
314  break;
315 
316  nilfs_segctor_do_immediate_flush(sci);
317 
318  up_write(&nilfs->ns_segctor_sem);
319  yield();
320  }
321  if (gcflag)
322  ti->ti_flags |= NILFS_TI_GC;
323 }
324 
325 static void nilfs_transaction_unlock(struct super_block *sb)
326 {
327  struct nilfs_transaction_info *ti = current->journal_info;
328  struct the_nilfs *nilfs = sb->s_fs_info;
329 
330  BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC);
331  BUG_ON(ti->ti_count > 0);
332 
333  up_write(&nilfs->ns_segctor_sem);
334  current->journal_info = ti->ti_save;
335  if (!list_empty(&ti->ti_garbage))
336  nilfs_dispose_list(nilfs, &ti->ti_garbage, 0);
337 }
338 
339 static void *nilfs_segctor_map_segsum_entry(struct nilfs_sc_info *sci,
340  struct nilfs_segsum_pointer *ssp,
341  unsigned bytes)
342 {
343  struct nilfs_segment_buffer *segbuf = sci->sc_curseg;
344  unsigned blocksize = sci->sc_super->s_blocksize;
345  void *p;
346 
347  if (unlikely(ssp->offset + bytes > blocksize)) {
348  ssp->offset = 0;
350  &segbuf->sb_segsum_buffers));
351  ssp->bh = NILFS_SEGBUF_NEXT_BH(ssp->bh);
352  }
353  p = ssp->bh->b_data + ssp->offset;
354  ssp->offset += bytes;
355  return p;
356 }
357 
362 static int nilfs_segctor_reset_segment_buffer(struct nilfs_sc_info *sci)
363 {
364  struct nilfs_segment_buffer *segbuf = sci->sc_curseg;
365  struct buffer_head *sumbh;
366  unsigned sumbytes;
367  unsigned flags = 0;
368  int err;
369 
370  if (nilfs_doing_gc())
371  flags = NILFS_SS_GC;
372  err = nilfs_segbuf_reset(segbuf, flags, sci->sc_seg_ctime, sci->sc_cno);
373  if (unlikely(err))
374  return err;
375 
376  sumbh = NILFS_SEGBUF_FIRST_BH(&segbuf->sb_segsum_buffers);
377  sumbytes = segbuf->sb_sum.sumbytes;
378  sci->sc_finfo_ptr.bh = sumbh; sci->sc_finfo_ptr.offset = sumbytes;
379  sci->sc_binfo_ptr.bh = sumbh; sci->sc_binfo_ptr.offset = sumbytes;
380  sci->sc_blk_cnt = sci->sc_datablk_cnt = 0;
381  return 0;
382 }
383 
384 static int nilfs_segctor_feed_segment(struct nilfs_sc_info *sci)
385 {
386  sci->sc_nblk_this_inc += sci->sc_curseg->sb_sum.nblocks;
387  if (NILFS_SEGBUF_IS_LAST(sci->sc_curseg, &sci->sc_segbufs))
388  return -E2BIG; /* The current segment is filled up
389  (internal code) */
391  return nilfs_segctor_reset_segment_buffer(sci);
392 }
393 
394 static int nilfs_segctor_add_super_root(struct nilfs_sc_info *sci)
395 {
396  struct nilfs_segment_buffer *segbuf = sci->sc_curseg;
397  int err;
398 
399  if (segbuf->sb_sum.nblocks >= segbuf->sb_rest_blocks) {
400  err = nilfs_segctor_feed_segment(sci);
401  if (err)
402  return err;
403  segbuf = sci->sc_curseg;
404  }
405  err = nilfs_segbuf_extend_payload(segbuf, &segbuf->sb_super_root);
406  if (likely(!err))
407  segbuf->sb_sum.flags |= NILFS_SS_SR;
408  return err;
409 }
410 
411 /*
412  * Functions for making segment summary and payloads
413  */
414 static int nilfs_segctor_segsum_block_required(
415  struct nilfs_sc_info *sci, const struct nilfs_segsum_pointer *ssp,
416  unsigned binfo_size)
417 {
418  unsigned blocksize = sci->sc_super->s_blocksize;
419  /* Size of finfo and binfo is enough small against blocksize */
420 
421  return ssp->offset + binfo_size +
422  (!sci->sc_blk_cnt ? sizeof(struct nilfs_finfo) : 0) >
423  blocksize;
424 }
425 
426 static void nilfs_segctor_begin_finfo(struct nilfs_sc_info *sci,
427  struct inode *inode)
428 {
429  sci->sc_curseg->sb_sum.nfinfo++;
430  sci->sc_binfo_ptr = sci->sc_finfo_ptr;
431  nilfs_segctor_map_segsum_entry(
432  sci, &sci->sc_binfo_ptr, sizeof(struct nilfs_finfo));
433 
434  if (NILFS_I(inode)->i_root &&
437  /* skip finfo */
438 }
439 
440 static void nilfs_segctor_end_finfo(struct nilfs_sc_info *sci,
441  struct inode *inode)
442 {
443  struct nilfs_finfo *finfo;
444  struct nilfs_inode_info *ii;
445  struct nilfs_segment_buffer *segbuf;
446  __u64 cno;
447 
448  if (sci->sc_blk_cnt == 0)
449  return;
450 
451  ii = NILFS_I(inode);
452 
453  if (test_bit(NILFS_I_GCINODE, &ii->i_state))
454  cno = ii->i_cno;
455  else if (NILFS_ROOT_METADATA_FILE(inode->i_ino))
456  cno = 0;
457  else
458  cno = sci->sc_cno;
459 
460  finfo = nilfs_segctor_map_segsum_entry(sci, &sci->sc_finfo_ptr,
461  sizeof(*finfo));
462  finfo->fi_ino = cpu_to_le64(inode->i_ino);
463  finfo->fi_nblocks = cpu_to_le32(sci->sc_blk_cnt);
464  finfo->fi_ndatablk = cpu_to_le32(sci->sc_datablk_cnt);
465  finfo->fi_cno = cpu_to_le64(cno);
466 
467  segbuf = sci->sc_curseg;
468  segbuf->sb_sum.sumbytes = sci->sc_binfo_ptr.offset +
469  sci->sc_super->s_blocksize * (segbuf->sb_sum.nsumblk - 1);
470  sci->sc_finfo_ptr = sci->sc_binfo_ptr;
471  sci->sc_blk_cnt = sci->sc_datablk_cnt = 0;
472 }
473 
474 static int nilfs_segctor_add_file_block(struct nilfs_sc_info *sci,
475  struct buffer_head *bh,
476  struct inode *inode,
477  unsigned binfo_size)
478 {
479  struct nilfs_segment_buffer *segbuf;
480  int required, err = 0;
481 
482  retry:
483  segbuf = sci->sc_curseg;
484  required = nilfs_segctor_segsum_block_required(
485  sci, &sci->sc_binfo_ptr, binfo_size);
486  if (segbuf->sb_sum.nblocks + required + 1 > segbuf->sb_rest_blocks) {
487  nilfs_segctor_end_finfo(sci, inode);
488  err = nilfs_segctor_feed_segment(sci);
489  if (err)
490  return err;
491  goto retry;
492  }
493  if (unlikely(required)) {
494  err = nilfs_segbuf_extend_segsum(segbuf);
495  if (unlikely(err))
496  goto failed;
497  }
498  if (sci->sc_blk_cnt == 0)
499  nilfs_segctor_begin_finfo(sci, inode);
500 
501  nilfs_segctor_map_segsum_entry(sci, &sci->sc_binfo_ptr, binfo_size);
502  /* Substitution to vblocknr is delayed until update_blocknr() */
503  nilfs_segbuf_add_file_buffer(segbuf, bh);
504  sci->sc_blk_cnt++;
505  failed:
506  return err;
507 }
508 
509 /*
510  * Callback functions that enumerate, mark, and collect dirty blocks
511  */
512 static int nilfs_collect_file_data(struct nilfs_sc_info *sci,
513  struct buffer_head *bh, struct inode *inode)
514 {
515  int err;
516 
517  err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh);
518  if (err < 0)
519  return err;
520 
521  err = nilfs_segctor_add_file_block(sci, bh, inode,
522  sizeof(struct nilfs_binfo_v));
523  if (!err)
524  sci->sc_datablk_cnt++;
525  return err;
526 }
527 
528 static int nilfs_collect_file_node(struct nilfs_sc_info *sci,
529  struct buffer_head *bh,
530  struct inode *inode)
531 {
532  return nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh);
533 }
534 
535 static int nilfs_collect_file_bmap(struct nilfs_sc_info *sci,
536  struct buffer_head *bh,
537  struct inode *inode)
538 {
539  WARN_ON(!buffer_dirty(bh));
540  return nilfs_segctor_add_file_block(sci, bh, inode, sizeof(__le64));
541 }
542 
543 static void nilfs_write_file_data_binfo(struct nilfs_sc_info *sci,
544  struct nilfs_segsum_pointer *ssp,
545  union nilfs_binfo *binfo)
546 {
547  struct nilfs_binfo_v *binfo_v = nilfs_segctor_map_segsum_entry(
548  sci, ssp, sizeof(*binfo_v));
549  *binfo_v = binfo->bi_v;
550 }
551 
552 static void nilfs_write_file_node_binfo(struct nilfs_sc_info *sci,
553  struct nilfs_segsum_pointer *ssp,
554  union nilfs_binfo *binfo)
555 {
556  __le64 *vblocknr = nilfs_segctor_map_segsum_entry(
557  sci, ssp, sizeof(*vblocknr));
558  *vblocknr = binfo->bi_v.bi_vblocknr;
559 }
560 
561 static struct nilfs_sc_operations nilfs_sc_file_ops = {
562  .collect_data = nilfs_collect_file_data,
563  .collect_node = nilfs_collect_file_node,
564  .collect_bmap = nilfs_collect_file_bmap,
565  .write_data_binfo = nilfs_write_file_data_binfo,
566  .write_node_binfo = nilfs_write_file_node_binfo,
567 };
568 
569 static int nilfs_collect_dat_data(struct nilfs_sc_info *sci,
570  struct buffer_head *bh, struct inode *inode)
571 {
572  int err;
573 
574  err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh);
575  if (err < 0)
576  return err;
577 
578  err = nilfs_segctor_add_file_block(sci, bh, inode, sizeof(__le64));
579  if (!err)
580  sci->sc_datablk_cnt++;
581  return err;
582 }
583 
584 static int nilfs_collect_dat_bmap(struct nilfs_sc_info *sci,
585  struct buffer_head *bh, struct inode *inode)
586 {
587  WARN_ON(!buffer_dirty(bh));
588  return nilfs_segctor_add_file_block(sci, bh, inode,
589  sizeof(struct nilfs_binfo_dat));
590 }
591 
592 static void nilfs_write_dat_data_binfo(struct nilfs_sc_info *sci,
593  struct nilfs_segsum_pointer *ssp,
594  union nilfs_binfo *binfo)
595 {
596  __le64 *blkoff = nilfs_segctor_map_segsum_entry(sci, ssp,
597  sizeof(*blkoff));
598  *blkoff = binfo->bi_dat.bi_blkoff;
599 }
600 
601 static void nilfs_write_dat_node_binfo(struct nilfs_sc_info *sci,
602  struct nilfs_segsum_pointer *ssp,
603  union nilfs_binfo *binfo)
604 {
605  struct nilfs_binfo_dat *binfo_dat =
606  nilfs_segctor_map_segsum_entry(sci, ssp, sizeof(*binfo_dat));
607  *binfo_dat = binfo->bi_dat;
608 }
609 
610 static struct nilfs_sc_operations nilfs_sc_dat_ops = {
611  .collect_data = nilfs_collect_dat_data,
612  .collect_node = nilfs_collect_file_node,
613  .collect_bmap = nilfs_collect_dat_bmap,
614  .write_data_binfo = nilfs_write_dat_data_binfo,
615  .write_node_binfo = nilfs_write_dat_node_binfo,
616 };
617 
618 static struct nilfs_sc_operations nilfs_sc_dsync_ops = {
619  .collect_data = nilfs_collect_file_data,
620  .collect_node = NULL,
621  .collect_bmap = NULL,
622  .write_data_binfo = nilfs_write_file_data_binfo,
623  .write_node_binfo = NULL,
624 };
625 
626 static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode,
627  struct list_head *listp,
628  size_t nlimit,
629  loff_t start, loff_t end)
630 {
631  struct address_space *mapping = inode->i_mapping;
632  struct pagevec pvec;
633  pgoff_t index = 0, last = ULONG_MAX;
634  size_t ndirties = 0;
635  int i;
636 
637  if (unlikely(start != 0 || end != LLONG_MAX)) {
638  /*
639  * A valid range is given for sync-ing data pages. The
640  * range is rounded to per-page; extra dirty buffers
641  * may be included if blocksize < pagesize.
642  */
643  index = start >> PAGE_SHIFT;
644  last = end >> PAGE_SHIFT;
645  }
646  pagevec_init(&pvec, 0);
647  repeat:
648  if (unlikely(index > last) ||
649  !pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY,
650  min_t(pgoff_t, last - index,
651  PAGEVEC_SIZE - 1) + 1))
652  return ndirties;
653 
654  for (i = 0; i < pagevec_count(&pvec); i++) {
655  struct buffer_head *bh, *head;
656  struct page *page = pvec.pages[i];
657 
658  if (unlikely(page->index > last))
659  break;
660 
661  lock_page(page);
662  if (!page_has_buffers(page))
663  create_empty_buffers(page, 1 << inode->i_blkbits, 0);
664  unlock_page(page);
665 
666  bh = head = page_buffers(page);
667  do {
668  if (!buffer_dirty(bh))
669  continue;
670  get_bh(bh);
671  list_add_tail(&bh->b_assoc_buffers, listp);
672  ndirties++;
673  if (unlikely(ndirties >= nlimit)) {
674  pagevec_release(&pvec);
675  cond_resched();
676  return ndirties;
677  }
678  } while (bh = bh->b_this_page, bh != head);
679  }
680  pagevec_release(&pvec);
681  cond_resched();
682  goto repeat;
683 }
684 
685 static void nilfs_lookup_dirty_node_buffers(struct inode *inode,
686  struct list_head *listp)
687 {
688  struct nilfs_inode_info *ii = NILFS_I(inode);
689  struct address_space *mapping = &ii->i_btnode_cache;
690  struct pagevec pvec;
691  struct buffer_head *bh, *head;
692  unsigned int i;
693  pgoff_t index = 0;
694 
695  pagevec_init(&pvec, 0);
696 
697  while (pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY,
698  PAGEVEC_SIZE)) {
699  for (i = 0; i < pagevec_count(&pvec); i++) {
700  bh = head = page_buffers(pvec.pages[i]);
701  do {
702  if (buffer_dirty(bh)) {
703  get_bh(bh);
704  list_add_tail(&bh->b_assoc_buffers,
705  listp);
706  }
707  bh = bh->b_this_page;
708  } while (bh != head);
709  }
710  pagevec_release(&pvec);
711  cond_resched();
712  }
713 }
714 
715 static void nilfs_dispose_list(struct the_nilfs *nilfs,
716  struct list_head *head, int force)
717 {
718  struct nilfs_inode_info *ii, *n;
719  struct nilfs_inode_info *ivec[SC_N_INODEVEC], **pii;
720  unsigned nv = 0;
721 
722  while (!list_empty(head)) {
723  spin_lock(&nilfs->ns_inode_lock);
724  list_for_each_entry_safe(ii, n, head, i_dirty) {
725  list_del_init(&ii->i_dirty);
726  if (force) {
727  if (unlikely(ii->i_bh)) {
728  brelse(ii->i_bh);
729  ii->i_bh = NULL;
730  }
731  } else if (test_bit(NILFS_I_DIRTY, &ii->i_state)) {
733  list_add_tail(&ii->i_dirty,
734  &nilfs->ns_dirty_files);
735  continue;
736  }
737  ivec[nv++] = ii;
738  if (nv == SC_N_INODEVEC)
739  break;
740  }
741  spin_unlock(&nilfs->ns_inode_lock);
742 
743  for (pii = ivec; nv > 0; pii++, nv--)
744  iput(&(*pii)->vfs_inode);
745  }
746 }
747 
748 static int nilfs_test_metadata_dirty(struct the_nilfs *nilfs,
749  struct nilfs_root *root)
750 {
751  int ret = 0;
752 
753  if (nilfs_mdt_fetch_dirty(root->ifile))
754  ret++;
755  if (nilfs_mdt_fetch_dirty(nilfs->ns_cpfile))
756  ret++;
757  if (nilfs_mdt_fetch_dirty(nilfs->ns_sufile))
758  ret++;
759  if ((ret || nilfs_doing_gc()) && nilfs_mdt_fetch_dirty(nilfs->ns_dat))
760  ret++;
761  return ret;
762 }
763 
764 static int nilfs_segctor_clean(struct nilfs_sc_info *sci)
765 {
766  return list_empty(&sci->sc_dirty_files) &&
767  !test_bit(NILFS_SC_DIRTY, &sci->sc_flags) &&
768  sci->sc_nfreesegs == 0 &&
769  (!nilfs_doing_gc() || list_empty(&sci->sc_gc_inodes));
770 }
771 
772 static int nilfs_segctor_confirm(struct nilfs_sc_info *sci)
773 {
774  struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
775  int ret = 0;
776 
777  if (nilfs_test_metadata_dirty(nilfs, sci->sc_root))
779 
780  spin_lock(&nilfs->ns_inode_lock);
781  if (list_empty(&nilfs->ns_dirty_files) && nilfs_segctor_clean(sci))
782  ret++;
783 
784  spin_unlock(&nilfs->ns_inode_lock);
785  return ret;
786 }
787 
788 static void nilfs_segctor_clear_metadata_dirty(struct nilfs_sc_info *sci)
789 {
790  struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
791 
792  nilfs_mdt_clear_dirty(sci->sc_root->ifile);
793  nilfs_mdt_clear_dirty(nilfs->ns_cpfile);
794  nilfs_mdt_clear_dirty(nilfs->ns_sufile);
795  nilfs_mdt_clear_dirty(nilfs->ns_dat);
796 }
797 
798 static int nilfs_segctor_create_checkpoint(struct nilfs_sc_info *sci)
799 {
800  struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
801  struct buffer_head *bh_cp;
802  struct nilfs_checkpoint *raw_cp;
803  int err;
804 
805  /* XXX: this interface will be changed */
806  err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, 1,
807  &raw_cp, &bh_cp);
808  if (likely(!err)) {
809  /* The following code is duplicated with cpfile. But, it is
810  needed to collect the checkpoint even if it was not newly
811  created */
812  mark_buffer_dirty(bh_cp);
813  nilfs_mdt_mark_dirty(nilfs->ns_cpfile);
815  nilfs->ns_cpfile, nilfs->ns_cno, bh_cp);
816  } else
817  WARN_ON(err == -EINVAL || err == -ENOENT);
818 
819  return err;
820 }
821 
822 static int nilfs_segctor_fill_in_checkpoint(struct nilfs_sc_info *sci)
823 {
824  struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
825  struct buffer_head *bh_cp;
826  struct nilfs_checkpoint *raw_cp;
827  int err;
828 
829  err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, 0,
830  &raw_cp, &bh_cp);
831  if (unlikely(err)) {
832  WARN_ON(err == -EINVAL || err == -ENOENT);
833  goto failed_ibh;
834  }
835  raw_cp->cp_snapshot_list.ssl_next = 0;
836  raw_cp->cp_snapshot_list.ssl_prev = 0;
837  raw_cp->cp_inodes_count =
838  cpu_to_le64(atomic_read(&sci->sc_root->inodes_count));
839  raw_cp->cp_blocks_count =
840  cpu_to_le64(atomic_read(&sci->sc_root->blocks_count));
841  raw_cp->cp_nblk_inc =
843  raw_cp->cp_create = cpu_to_le64(sci->sc_seg_ctime);
844  raw_cp->cp_cno = cpu_to_le64(nilfs->ns_cno);
845 
847  nilfs_checkpoint_clear_minor(raw_cp);
848  else
849  nilfs_checkpoint_set_minor(raw_cp);
850 
851  nilfs_write_inode_common(sci->sc_root->ifile,
852  &raw_cp->cp_ifile_inode, 1);
853  nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, bh_cp);
854  return 0;
855 
856  failed_ibh:
857  return err;
858 }
859 
860 static void nilfs_fill_in_file_bmap(struct inode *ifile,
861  struct nilfs_inode_info *ii)
862 
863 {
864  struct buffer_head *ibh;
865  struct nilfs_inode *raw_inode;
866 
867  if (test_bit(NILFS_I_BMAP, &ii->i_state)) {
868  ibh = ii->i_bh;
869  BUG_ON(!ibh);
870  raw_inode = nilfs_ifile_map_inode(ifile, ii->vfs_inode.i_ino,
871  ibh);
872  nilfs_bmap_write(ii->i_bmap, raw_inode);
873  nilfs_ifile_unmap_inode(ifile, ii->vfs_inode.i_ino, ibh);
874  }
875 }
876 
877 static void nilfs_segctor_fill_in_file_bmap(struct nilfs_sc_info *sci)
878 {
879  struct nilfs_inode_info *ii;
880 
882  nilfs_fill_in_file_bmap(sci->sc_root->ifile, ii);
884  }
885 }
886 
887 static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci,
888  struct the_nilfs *nilfs)
889 {
890  struct buffer_head *bh_sr;
891  struct nilfs_super_root *raw_sr;
892  unsigned isz, srsz;
893 
894  bh_sr = NILFS_LAST_SEGBUF(&sci->sc_segbufs)->sb_super_root;
895  raw_sr = (struct nilfs_super_root *)bh_sr->b_data;
896  isz = nilfs->ns_inode_size;
897  srsz = NILFS_SR_BYTES(isz);
898 
899  raw_sr->sr_bytes = cpu_to_le16(srsz);
900  raw_sr->sr_nongc_ctime
901  = cpu_to_le64(nilfs_doing_gc() ?
902  nilfs->ns_nongc_ctime : sci->sc_seg_ctime);
903  raw_sr->sr_flags = 0;
904 
905  nilfs_write_inode_common(nilfs->ns_dat, (void *)raw_sr +
906  NILFS_SR_DAT_OFFSET(isz), 1);
907  nilfs_write_inode_common(nilfs->ns_cpfile, (void *)raw_sr +
908  NILFS_SR_CPFILE_OFFSET(isz), 1);
909  nilfs_write_inode_common(nilfs->ns_sufile, (void *)raw_sr +
910  NILFS_SR_SUFILE_OFFSET(isz), 1);
911  memset((void *)raw_sr + srsz, 0, nilfs->ns_blocksize - srsz);
912 }
913 
914 static void nilfs_redirty_inodes(struct list_head *head)
915 {
916  struct nilfs_inode_info *ii;
917 
918  list_for_each_entry(ii, head, i_dirty) {
921  }
922 }
923 
924 static void nilfs_drop_collected_inodes(struct list_head *head)
925 {
926  struct nilfs_inode_info *ii;
927 
928  list_for_each_entry(ii, head, i_dirty) {
930  continue;
931 
934  }
935 }
936 
937 static int nilfs_segctor_apply_buffers(struct nilfs_sc_info *sci,
938  struct inode *inode,
939  struct list_head *listp,
940  int (*collect)(struct nilfs_sc_info *,
941  struct buffer_head *,
942  struct inode *))
943 {
944  struct buffer_head *bh, *n;
945  int err = 0;
946 
947  if (collect) {
948  list_for_each_entry_safe(bh, n, listp, b_assoc_buffers) {
949  list_del_init(&bh->b_assoc_buffers);
950  err = collect(sci, bh, inode);
951  brelse(bh);
952  if (unlikely(err))
953  goto dispose_buffers;
954  }
955  return 0;
956  }
957 
958  dispose_buffers:
959  while (!list_empty(listp)) {
960  bh = list_first_entry(listp, struct buffer_head,
961  b_assoc_buffers);
962  list_del_init(&bh->b_assoc_buffers);
963  brelse(bh);
964  }
965  return err;
966 }
967 
968 static size_t nilfs_segctor_buffer_rest(struct nilfs_sc_info *sci)
969 {
970  /* Remaining number of blocks within segment buffer */
971  return sci->sc_segbuf_nblocks -
972  (sci->sc_nblk_this_inc + sci->sc_curseg->sb_sum.nblocks);
973 }
974 
975 static int nilfs_segctor_scan_file(struct nilfs_sc_info *sci,
976  struct inode *inode,
977  struct nilfs_sc_operations *sc_ops)
978 {
979  LIST_HEAD(data_buffers);
980  LIST_HEAD(node_buffers);
981  int err;
982 
983  if (!(sci->sc_stage.flags & NILFS_CF_NODE)) {
984  size_t n, rest = nilfs_segctor_buffer_rest(sci);
985 
986  n = nilfs_lookup_dirty_data_buffers(
987  inode, &data_buffers, rest + 1, 0, LLONG_MAX);
988  if (n > rest) {
989  err = nilfs_segctor_apply_buffers(
990  sci, inode, &data_buffers,
991  sc_ops->collect_data);
992  BUG_ON(!err); /* always receive -E2BIG or true error */
993  goto break_or_fail;
994  }
995  }
996  nilfs_lookup_dirty_node_buffers(inode, &node_buffers);
997 
998  if (!(sci->sc_stage.flags & NILFS_CF_NODE)) {
999  err = nilfs_segctor_apply_buffers(
1000  sci, inode, &data_buffers, sc_ops->collect_data);
1001  if (unlikely(err)) {
1002  /* dispose node list */
1003  nilfs_segctor_apply_buffers(
1004  sci, inode, &node_buffers, NULL);
1005  goto break_or_fail;
1006  }
1007  sci->sc_stage.flags |= NILFS_CF_NODE;
1008  }
1009  /* Collect node */
1010  err = nilfs_segctor_apply_buffers(
1011  sci, inode, &node_buffers, sc_ops->collect_node);
1012  if (unlikely(err))
1013  goto break_or_fail;
1014 
1015  nilfs_bmap_lookup_dirty_buffers(NILFS_I(inode)->i_bmap, &node_buffers);
1016  err = nilfs_segctor_apply_buffers(
1017  sci, inode, &node_buffers, sc_ops->collect_bmap);
1018  if (unlikely(err))
1019  goto break_or_fail;
1020 
1021  nilfs_segctor_end_finfo(sci, inode);
1022  sci->sc_stage.flags &= ~NILFS_CF_NODE;
1023 
1024  break_or_fail:
1025  return err;
1026 }
1027 
1028 static int nilfs_segctor_scan_file_dsync(struct nilfs_sc_info *sci,
1029  struct inode *inode)
1030 {
1031  LIST_HEAD(data_buffers);
1032  size_t n, rest = nilfs_segctor_buffer_rest(sci);
1033  int err;
1034 
1035  n = nilfs_lookup_dirty_data_buffers(inode, &data_buffers, rest + 1,
1036  sci->sc_dsync_start,
1037  sci->sc_dsync_end);
1038 
1039  err = nilfs_segctor_apply_buffers(sci, inode, &data_buffers,
1040  nilfs_collect_file_data);
1041  if (!err) {
1042  nilfs_segctor_end_finfo(sci, inode);
1043  BUG_ON(n > rest);
1044  /* always receive -E2BIG or true error if n > rest */
1045  }
1046  return err;
1047 }
1048 
1049 static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
1050 {
1051  struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
1052  struct list_head *head;
1053  struct nilfs_inode_info *ii;
1054  size_t ndone;
1055  int err = 0;
1056 
1057  switch (sci->sc_stage.scnt) {
1058  case NILFS_ST_INIT:
1059  /* Pre-processes */
1060  sci->sc_stage.flags = 0;
1061 
1062  if (!test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags)) {
1063  sci->sc_nblk_inc = 0;
1064  sci->sc_curseg->sb_sum.flags = NILFS_SS_LOGBGN;
1065  if (mode == SC_LSEG_DSYNC) {
1066  sci->sc_stage.scnt = NILFS_ST_DSYNC;
1067  goto dsync_mode;
1068  }
1069  }
1070 
1071  sci->sc_stage.dirty_file_ptr = NULL;
1072  sci->sc_stage.gc_inode_ptr = NULL;
1073  if (mode == SC_FLUSH_DAT) {
1074  sci->sc_stage.scnt = NILFS_ST_DAT;
1075  goto dat_stage;
1076  }
1077  sci->sc_stage.scnt++; /* Fall through */
1078  case NILFS_ST_GC:
1079  if (nilfs_doing_gc()) {
1080  head = &sci->sc_gc_inodes;
1081  ii = list_prepare_entry(sci->sc_stage.gc_inode_ptr,
1082  head, i_dirty);
1084  err = nilfs_segctor_scan_file(
1085  sci, &ii->vfs_inode,
1086  &nilfs_sc_file_ops);
1087  if (unlikely(err)) {
1088  sci->sc_stage.gc_inode_ptr = list_entry(
1089  ii->i_dirty.prev,
1090  struct nilfs_inode_info,
1091  i_dirty);
1092  goto break_or_fail;
1093  }
1095  }
1096  sci->sc_stage.gc_inode_ptr = NULL;
1097  }
1098  sci->sc_stage.scnt++; /* Fall through */
1099  case NILFS_ST_FILE:
1100  head = &sci->sc_dirty_files;
1101  ii = list_prepare_entry(sci->sc_stage.dirty_file_ptr, head,
1102  i_dirty);
1105 
1106  err = nilfs_segctor_scan_file(sci, &ii->vfs_inode,
1107  &nilfs_sc_file_ops);
1108  if (unlikely(err)) {
1109  sci->sc_stage.dirty_file_ptr =
1110  list_entry(ii->i_dirty.prev,
1111  struct nilfs_inode_info,
1112  i_dirty);
1113  goto break_or_fail;
1114  }
1115  /* sci->sc_stage.dirty_file_ptr = NILFS_I(inode); */
1116  /* XXX: required ? */
1117  }
1118  sci->sc_stage.dirty_file_ptr = NULL;
1119  if (mode == SC_FLUSH_FILE) {
1120  sci->sc_stage.scnt = NILFS_ST_DONE;
1121  return 0;
1122  }
1123  sci->sc_stage.scnt++;
1124  sci->sc_stage.flags |= NILFS_CF_IFILE_STARTED;
1125  /* Fall through */
1126  case NILFS_ST_IFILE:
1127  err = nilfs_segctor_scan_file(sci, sci->sc_root->ifile,
1128  &nilfs_sc_file_ops);
1129  if (unlikely(err))
1130  break;
1131  sci->sc_stage.scnt++;
1132  /* Creating a checkpoint */
1133  err = nilfs_segctor_create_checkpoint(sci);
1134  if (unlikely(err))
1135  break;
1136  /* Fall through */
1137  case NILFS_ST_CPFILE:
1138  err = nilfs_segctor_scan_file(sci, nilfs->ns_cpfile,
1139  &nilfs_sc_file_ops);
1140  if (unlikely(err))
1141  break;
1142  sci->sc_stage.scnt++; /* Fall through */
1143  case NILFS_ST_SUFILE:
1144  err = nilfs_sufile_freev(nilfs->ns_sufile, sci->sc_freesegs,
1145  sci->sc_nfreesegs, &ndone);
1146  if (unlikely(err)) {
1147  nilfs_sufile_cancel_freev(nilfs->ns_sufile,
1148  sci->sc_freesegs, ndone,
1149  NULL);
1150  break;
1151  }
1152  sci->sc_stage.flags |= NILFS_CF_SUFREED;
1153 
1154  err = nilfs_segctor_scan_file(sci, nilfs->ns_sufile,
1155  &nilfs_sc_file_ops);
1156  if (unlikely(err))
1157  break;
1158  sci->sc_stage.scnt++; /* Fall through */
1159  case NILFS_ST_DAT:
1160  dat_stage:
1161  err = nilfs_segctor_scan_file(sci, nilfs->ns_dat,
1162  &nilfs_sc_dat_ops);
1163  if (unlikely(err))
1164  break;
1165  if (mode == SC_FLUSH_DAT) {
1166  sci->sc_stage.scnt = NILFS_ST_DONE;
1167  return 0;
1168  }
1169  sci->sc_stage.scnt++; /* Fall through */
1170  case NILFS_ST_SR:
1171  if (mode == SC_LSEG_SR) {
1172  /* Appending a super root */
1173  err = nilfs_segctor_add_super_root(sci);
1174  if (unlikely(err))
1175  break;
1176  }
1177  /* End of a logical segment */
1178  sci->sc_curseg->sb_sum.flags |= NILFS_SS_LOGEND;
1179  sci->sc_stage.scnt = NILFS_ST_DONE;
1180  return 0;
1181  case NILFS_ST_DSYNC:
1182  dsync_mode:
1183  sci->sc_curseg->sb_sum.flags |= NILFS_SS_SYNDT;
1184  ii = sci->sc_dsync_inode;
1185  if (!test_bit(NILFS_I_BUSY, &ii->i_state))
1186  break;
1187 
1188  err = nilfs_segctor_scan_file_dsync(sci, &ii->vfs_inode);
1189  if (unlikely(err))
1190  break;
1191  sci->sc_curseg->sb_sum.flags |= NILFS_SS_LOGEND;
1192  sci->sc_stage.scnt = NILFS_ST_DONE;
1193  return 0;
1194  case NILFS_ST_DONE:
1195  return 0;
1196  default:
1197  BUG();
1198  }
1199 
1200  break_or_fail:
1201  return err;
1202 }
1203 
1209 static int nilfs_segctor_begin_construction(struct nilfs_sc_info *sci,
1210  struct the_nilfs *nilfs)
1211 {
1212  struct nilfs_segment_buffer *segbuf, *prev;
1213  __u64 nextnum;
1214  int err, alloc = 0;
1215 
1216  segbuf = nilfs_segbuf_new(sci->sc_super);
1217  if (unlikely(!segbuf))
1218  return -ENOMEM;
1219 
1220  if (list_empty(&sci->sc_write_logs)) {
1221  nilfs_segbuf_map(segbuf, nilfs->ns_segnum,
1222  nilfs->ns_pseg_offset, nilfs);
1223  if (segbuf->sb_rest_blocks < NILFS_PSEG_MIN_BLOCKS) {
1224  nilfs_shift_to_next_segment(nilfs);
1225  nilfs_segbuf_map(segbuf, nilfs->ns_segnum, 0, nilfs);
1226  }
1227 
1228  segbuf->sb_sum.seg_seq = nilfs->ns_seg_seq;
1229  nextnum = nilfs->ns_nextnum;
1230 
1231  if (nilfs->ns_segnum == nilfs->ns_nextnum)
1232  /* Start from the head of a new full segment */
1233  alloc++;
1234  } else {
1235  /* Continue logs */
1236  prev = NILFS_LAST_SEGBUF(&sci->sc_write_logs);
1237  nilfs_segbuf_map_cont(segbuf, prev);
1238  segbuf->sb_sum.seg_seq = prev->sb_sum.seg_seq;
1239  nextnum = prev->sb_nextnum;
1240 
1241  if (segbuf->sb_rest_blocks < NILFS_PSEG_MIN_BLOCKS) {
1242  nilfs_segbuf_map(segbuf, prev->sb_nextnum, 0, nilfs);
1243  segbuf->sb_sum.seg_seq++;
1244  alloc++;
1245  }
1246  }
1247 
1248  err = nilfs_sufile_mark_dirty(nilfs->ns_sufile, segbuf->sb_segnum);
1249  if (err)
1250  goto failed;
1251 
1252  if (alloc) {
1253  err = nilfs_sufile_alloc(nilfs->ns_sufile, &nextnum);
1254  if (err)
1255  goto failed;
1256  }
1257  nilfs_segbuf_set_next_segnum(segbuf, nextnum, nilfs);
1258 
1259  BUG_ON(!list_empty(&sci->sc_segbufs));
1260  list_add_tail(&segbuf->sb_list, &sci->sc_segbufs);
1261  sci->sc_segbuf_nblocks = segbuf->sb_rest_blocks;
1262  return 0;
1263 
1264  failed:
1265  nilfs_segbuf_free(segbuf);
1266  return err;
1267 }
1268 
1269 static int nilfs_segctor_extend_segments(struct nilfs_sc_info *sci,
1270  struct the_nilfs *nilfs, int nadd)
1271 {
1272  struct nilfs_segment_buffer *segbuf, *prev;
1273  struct inode *sufile = nilfs->ns_sufile;
1274  __u64 nextnextnum;
1275  LIST_HEAD(list);
1276  int err, ret, i;
1277 
1278  prev = NILFS_LAST_SEGBUF(&sci->sc_segbufs);
1279  /*
1280  * Since the segment specified with nextnum might be allocated during
1281  * the previous construction, the buffer including its segusage may
1282  * not be dirty. The following call ensures that the buffer is dirty
1283  * and will pin the buffer on memory until the sufile is written.
1284  */
1285  err = nilfs_sufile_mark_dirty(sufile, prev->sb_nextnum);
1286  if (unlikely(err))
1287  return err;
1288 
1289  for (i = 0; i < nadd; i++) {
1290  /* extend segment info */
1291  err = -ENOMEM;
1292  segbuf = nilfs_segbuf_new(sci->sc_super);
1293  if (unlikely(!segbuf))
1294  goto failed;
1295 
1296  /* map this buffer to region of segment on-disk */
1297  nilfs_segbuf_map(segbuf, prev->sb_nextnum, 0, nilfs);
1298  sci->sc_segbuf_nblocks += segbuf->sb_rest_blocks;
1299 
1300  /* allocate the next next full segment */
1301  err = nilfs_sufile_alloc(sufile, &nextnextnum);
1302  if (unlikely(err))
1303  goto failed_segbuf;
1304 
1305  segbuf->sb_sum.seg_seq = prev->sb_sum.seg_seq + 1;
1306  nilfs_segbuf_set_next_segnum(segbuf, nextnextnum, nilfs);
1307 
1308  list_add_tail(&segbuf->sb_list, &list);
1309  prev = segbuf;
1310  }
1311  list_splice_tail(&list, &sci->sc_segbufs);
1312  return 0;
1313 
1314  failed_segbuf:
1315  nilfs_segbuf_free(segbuf);
1316  failed:
1317  list_for_each_entry(segbuf, &list, sb_list) {
1318  ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum);
1319  WARN_ON(ret); /* never fails */
1320  }
1321  nilfs_destroy_logs(&list);
1322  return err;
1323 }
1324 
1325 static void nilfs_free_incomplete_logs(struct list_head *logs,
1326  struct the_nilfs *nilfs)
1327 {
1328  struct nilfs_segment_buffer *segbuf, *prev;
1329  struct inode *sufile = nilfs->ns_sufile;
1330  int ret;
1331 
1332  segbuf = NILFS_FIRST_SEGBUF(logs);
1333  if (nilfs->ns_nextnum != segbuf->sb_nextnum) {
1334  ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum);
1335  WARN_ON(ret); /* never fails */
1336  }
1337  if (atomic_read(&segbuf->sb_err)) {
1338  /* Case 1: The first segment failed */
1339  if (segbuf->sb_pseg_start != segbuf->sb_fseg_start)
1340  /* Case 1a: Partial segment appended into an existing
1341  segment */
1342  nilfs_terminate_segment(nilfs, segbuf->sb_fseg_start,
1343  segbuf->sb_fseg_end);
1344  else /* Case 1b: New full segment */
1345  set_nilfs_discontinued(nilfs);
1346  }
1347 
1348  prev = segbuf;
1349  list_for_each_entry_continue(segbuf, logs, sb_list) {
1350  if (prev->sb_nextnum != segbuf->sb_nextnum) {
1351  ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum);
1352  WARN_ON(ret); /* never fails */
1353  }
1354  if (atomic_read(&segbuf->sb_err) &&
1355  segbuf->sb_segnum != nilfs->ns_nextnum)
1356  /* Case 2: extended segment (!= next) failed */
1357  nilfs_sufile_set_error(sufile, segbuf->sb_segnum);
1358  prev = segbuf;
1359  }
1360 }
1361 
1362 static void nilfs_segctor_update_segusage(struct nilfs_sc_info *sci,
1363  struct inode *sufile)
1364 {
1365  struct nilfs_segment_buffer *segbuf;
1366  unsigned long live_blocks;
1367  int ret;
1368 
1369  list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) {
1370  live_blocks = segbuf->sb_sum.nblocks +
1371  (segbuf->sb_pseg_start - segbuf->sb_fseg_start);
1372  ret = nilfs_sufile_set_segment_usage(sufile, segbuf->sb_segnum,
1373  live_blocks,
1374  sci->sc_seg_ctime);
1375  WARN_ON(ret); /* always succeed because the segusage is dirty */
1376  }
1377 }
1378 
1379 static void nilfs_cancel_segusage(struct list_head *logs, struct inode *sufile)
1380 {
1381  struct nilfs_segment_buffer *segbuf;
1382  int ret;
1383 
1384  segbuf = NILFS_FIRST_SEGBUF(logs);
1385  ret = nilfs_sufile_set_segment_usage(sufile, segbuf->sb_segnum,
1386  segbuf->sb_pseg_start -
1387  segbuf->sb_fseg_start, 0);
1388  WARN_ON(ret); /* always succeed because the segusage is dirty */
1389 
1390  list_for_each_entry_continue(segbuf, logs, sb_list) {
1391  ret = nilfs_sufile_set_segment_usage(sufile, segbuf->sb_segnum,
1392  0, 0);
1393  WARN_ON(ret); /* always succeed */
1394  }
1395 }
1396 
1397 static void nilfs_segctor_truncate_segments(struct nilfs_sc_info *sci,
1398  struct nilfs_segment_buffer *last,
1399  struct inode *sufile)
1400 {
1401  struct nilfs_segment_buffer *segbuf = last;
1402  int ret;
1403 
1405  sci->sc_segbuf_nblocks -= segbuf->sb_rest_blocks;
1406  ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum);
1407  WARN_ON(ret);
1408  }
1409  nilfs_truncate_logs(&sci->sc_segbufs, last);
1410 }
1411 
1412 
1413 static int nilfs_segctor_collect(struct nilfs_sc_info *sci,
1414  struct the_nilfs *nilfs, int mode)
1415 {
1416  struct nilfs_cstage prev_stage = sci->sc_stage;
1417  int err, nadd = 1;
1418 
1419  /* Collection retry loop */
1420  for (;;) {
1421  sci->sc_nblk_this_inc = 0;
1422  sci->sc_curseg = NILFS_FIRST_SEGBUF(&sci->sc_segbufs);
1423 
1424  err = nilfs_segctor_reset_segment_buffer(sci);
1425  if (unlikely(err))
1426  goto failed;
1427 
1428  err = nilfs_segctor_collect_blocks(sci, mode);
1429  sci->sc_nblk_this_inc += sci->sc_curseg->sb_sum.nblocks;
1430  if (!err)
1431  break;
1432 
1433  if (unlikely(err != -E2BIG))
1434  goto failed;
1435 
1436  /* The current segment is filled up */
1437  if (mode != SC_LSEG_SR || sci->sc_stage.scnt < NILFS_ST_CPFILE)
1438  break;
1439 
1441 
1442  err = nilfs_segctor_extend_segments(sci, nilfs, nadd);
1443  if (unlikely(err))
1444  return err;
1445 
1446  if (sci->sc_stage.flags & NILFS_CF_SUFREED) {
1447  err = nilfs_sufile_cancel_freev(nilfs->ns_sufile,
1448  sci->sc_freesegs,
1449  sci->sc_nfreesegs,
1450  NULL);
1451  WARN_ON(err); /* do not happen */
1452  }
1453  nadd = min_t(int, nadd << 1, SC_MAX_SEGDELTA);
1454  sci->sc_stage = prev_stage;
1455  }
1456  nilfs_segctor_truncate_segments(sci, sci->sc_curseg, nilfs->ns_sufile);
1457  return 0;
1458 
1459  failed:
1460  return err;
1461 }
1462 
1463 static void nilfs_list_replace_buffer(struct buffer_head *old_bh,
1464  struct buffer_head *new_bh)
1465 {
1466  BUG_ON(!list_empty(&new_bh->b_assoc_buffers));
1467 
1468  list_replace_init(&old_bh->b_assoc_buffers, &new_bh->b_assoc_buffers);
1469  /* The caller must release old_bh */
1470 }
1471 
1472 static int
1473 nilfs_segctor_update_payload_blocknr(struct nilfs_sc_info *sci,
1474  struct nilfs_segment_buffer *segbuf,
1475  int mode)
1476 {
1477  struct inode *inode = NULL;
1478  sector_t blocknr;
1479  unsigned long nfinfo = segbuf->sb_sum.nfinfo;
1480  unsigned long nblocks = 0, ndatablk = 0;
1481  struct nilfs_sc_operations *sc_op = NULL;
1482  struct nilfs_segsum_pointer ssp;
1483  struct nilfs_finfo *finfo = NULL;
1484  union nilfs_binfo binfo;
1485  struct buffer_head *bh, *bh_org;
1486  ino_t ino = 0;
1487  int err = 0;
1488 
1489  if (!nfinfo)
1490  goto out;
1491 
1492  blocknr = segbuf->sb_pseg_start + segbuf->sb_sum.nsumblk;
1493  ssp.bh = NILFS_SEGBUF_FIRST_BH(&segbuf->sb_segsum_buffers);
1494  ssp.offset = sizeof(struct nilfs_segment_summary);
1495 
1496  list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) {
1497  if (bh == segbuf->sb_super_root)
1498  break;
1499  if (!finfo) {
1500  finfo = nilfs_segctor_map_segsum_entry(
1501  sci, &ssp, sizeof(*finfo));
1502  ino = le64_to_cpu(finfo->fi_ino);
1503  nblocks = le32_to_cpu(finfo->fi_nblocks);
1504  ndatablk = le32_to_cpu(finfo->fi_ndatablk);
1505 
1506  inode = bh->b_page->mapping->host;
1507 
1508  if (mode == SC_LSEG_DSYNC)
1509  sc_op = &nilfs_sc_dsync_ops;
1510  else if (ino == NILFS_DAT_INO)
1511  sc_op = &nilfs_sc_dat_ops;
1512  else /* file blocks */
1513  sc_op = &nilfs_sc_file_ops;
1514  }
1515  bh_org = bh;
1516  get_bh(bh_org);
1517  err = nilfs_bmap_assign(NILFS_I(inode)->i_bmap, &bh, blocknr,
1518  &binfo);
1519  if (bh != bh_org)
1520  nilfs_list_replace_buffer(bh_org, bh);
1521  brelse(bh_org);
1522  if (unlikely(err))
1523  goto failed_bmap;
1524 
1525  if (ndatablk > 0)
1526  sc_op->write_data_binfo(sci, &ssp, &binfo);
1527  else
1528  sc_op->write_node_binfo(sci, &ssp, &binfo);
1529 
1530  blocknr++;
1531  if (--nblocks == 0) {
1532  finfo = NULL;
1533  if (--nfinfo == 0)
1534  break;
1535  } else if (ndatablk > 0)
1536  ndatablk--;
1537  }
1538  out:
1539  return 0;
1540 
1541  failed_bmap:
1542  return err;
1543 }
1544 
1545 static int nilfs_segctor_assign(struct nilfs_sc_info *sci, int mode)
1546 {
1547  struct nilfs_segment_buffer *segbuf;
1548  int err;
1549 
1550  list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) {
1551  err = nilfs_segctor_update_payload_blocknr(sci, segbuf, mode);
1552  if (unlikely(err))
1553  return err;
1555  }
1556  return 0;
1557 }
1558 
1559 static void nilfs_begin_page_io(struct page *page)
1560 {
1561  if (!page || PageWriteback(page))
1562  /* For split b-tree node pages, this function may be called
1563  twice. We ignore the 2nd or later calls by this check. */
1564  return;
1565 
1566  lock_page(page);
1568  set_page_writeback(page);
1569  unlock_page(page);
1570 }
1571 
1572 static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci)
1573 {
1574  struct nilfs_segment_buffer *segbuf;
1575  struct page *bd_page = NULL, *fs_page = NULL;
1576 
1577  list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) {
1578  struct buffer_head *bh;
1579 
1581  b_assoc_buffers) {
1582  if (bh->b_page != bd_page) {
1583  if (bd_page) {
1584  lock_page(bd_page);
1585  clear_page_dirty_for_io(bd_page);
1586  set_page_writeback(bd_page);
1587  unlock_page(bd_page);
1588  }
1589  bd_page = bh->b_page;
1590  }
1591  }
1592 
1594  b_assoc_buffers) {
1595  if (bh == segbuf->sb_super_root) {
1596  if (bh->b_page != bd_page) {
1597  lock_page(bd_page);
1598  clear_page_dirty_for_io(bd_page);
1599  set_page_writeback(bd_page);
1600  unlock_page(bd_page);
1601  bd_page = bh->b_page;
1602  }
1603  break;
1604  }
1605  if (bh->b_page != fs_page) {
1606  nilfs_begin_page_io(fs_page);
1607  fs_page = bh->b_page;
1608  }
1609  }
1610  }
1611  if (bd_page) {
1612  lock_page(bd_page);
1613  clear_page_dirty_for_io(bd_page);
1614  set_page_writeback(bd_page);
1615  unlock_page(bd_page);
1616  }
1617  nilfs_begin_page_io(fs_page);
1618 }
1619 
1620 static int nilfs_segctor_write(struct nilfs_sc_info *sci,
1621  struct the_nilfs *nilfs)
1622 {
1623  int ret;
1624 
1625  ret = nilfs_write_logs(&sci->sc_segbufs, nilfs);
1626  list_splice_tail_init(&sci->sc_segbufs, &sci->sc_write_logs);
1627  return ret;
1628 }
1629 
1630 static void nilfs_end_page_io(struct page *page, int err)
1631 {
1632  if (!page)
1633  return;
1634 
1635  if (buffer_nilfs_node(page_buffers(page)) && !PageWriteback(page)) {
1636  /*
1637  * For b-tree node pages, this function may be called twice
1638  * or more because they might be split in a segment.
1639  */
1640  if (PageDirty(page)) {
1641  /*
1642  * For pages holding split b-tree node buffers, dirty
1643  * flag on the buffers may be cleared discretely.
1644  * In that case, the page is once redirtied for
1645  * remaining buffers, and it must be cancelled if
1646  * all the buffers get cleaned later.
1647  */
1648  lock_page(page);
1649  if (nilfs_page_buffers_clean(page))
1651  unlock_page(page);
1652  }
1653  return;
1654  }
1655 
1656  if (!err) {
1657  if (!nilfs_page_buffers_clean(page))
1659  ClearPageError(page);
1660  } else {
1662  SetPageError(page);
1663  }
1664 
1665  end_page_writeback(page);
1666 }
1667 
1668 static void nilfs_abort_logs(struct list_head *logs, int err)
1669 {
1670  struct nilfs_segment_buffer *segbuf;
1671  struct page *bd_page = NULL, *fs_page = NULL;
1672  struct buffer_head *bh;
1673 
1674  if (list_empty(logs))
1675  return;
1676 
1677  list_for_each_entry(segbuf, logs, sb_list) {
1679  b_assoc_buffers) {
1680  if (bh->b_page != bd_page) {
1681  if (bd_page)
1682  end_page_writeback(bd_page);
1683  bd_page = bh->b_page;
1684  }
1685  }
1686 
1688  b_assoc_buffers) {
1689  if (bh == segbuf->sb_super_root) {
1690  if (bh->b_page != bd_page) {
1691  end_page_writeback(bd_page);
1692  bd_page = bh->b_page;
1693  }
1694  break;
1695  }
1696  if (bh->b_page != fs_page) {
1697  nilfs_end_page_io(fs_page, err);
1698  fs_page = bh->b_page;
1699  }
1700  }
1701  }
1702  if (bd_page)
1703  end_page_writeback(bd_page);
1704 
1705  nilfs_end_page_io(fs_page, err);
1706 }
1707 
1708 static void nilfs_segctor_abort_construction(struct nilfs_sc_info *sci,
1709  struct the_nilfs *nilfs, int err)
1710 {
1711  LIST_HEAD(logs);
1712  int ret;
1713 
1714  list_splice_tail_init(&sci->sc_write_logs, &logs);
1715  ret = nilfs_wait_on_logs(&logs);
1716  nilfs_abort_logs(&logs, ret ? : err);
1717 
1718  list_splice_tail_init(&sci->sc_segbufs, &logs);
1719  nilfs_cancel_segusage(&logs, nilfs->ns_sufile);
1720  nilfs_free_incomplete_logs(&logs, nilfs);
1721 
1722  if (sci->sc_stage.flags & NILFS_CF_SUFREED) {
1723  ret = nilfs_sufile_cancel_freev(nilfs->ns_sufile,
1724  sci->sc_freesegs,
1725  sci->sc_nfreesegs,
1726  NULL);
1727  WARN_ON(ret); /* do not happen */
1728  }
1729 
1730  nilfs_destroy_logs(&logs);
1731 }
1732 
1733 static void nilfs_set_next_segment(struct the_nilfs *nilfs,
1734  struct nilfs_segment_buffer *segbuf)
1735 {
1736  nilfs->ns_segnum = segbuf->sb_segnum;
1737  nilfs->ns_nextnum = segbuf->sb_nextnum;
1738  nilfs->ns_pseg_offset = segbuf->sb_pseg_start - segbuf->sb_fseg_start
1739  + segbuf->sb_sum.nblocks;
1740  nilfs->ns_seg_seq = segbuf->sb_sum.seg_seq;
1741  nilfs->ns_ctime = segbuf->sb_sum.ctime;
1742 }
1743 
1744 static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
1745 {
1746  struct nilfs_segment_buffer *segbuf;
1747  struct page *bd_page = NULL, *fs_page = NULL;
1748  struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
1749  int update_sr = false;
1750 
1751  list_for_each_entry(segbuf, &sci->sc_write_logs, sb_list) {
1752  struct buffer_head *bh;
1753 
1755  b_assoc_buffers) {
1756  set_buffer_uptodate(bh);
1757  clear_buffer_dirty(bh);
1758  if (bh->b_page != bd_page) {
1759  if (bd_page)
1760  end_page_writeback(bd_page);
1761  bd_page = bh->b_page;
1762  }
1763  }
1764  /*
1765  * We assume that the buffers which belong to the same page
1766  * continue over the buffer list.
1767  * Under this assumption, the last BHs of pages is
1768  * identifiable by the discontinuity of bh->b_page
1769  * (page != fs_page).
1770  *
1771  * For B-tree node blocks, however, this assumption is not
1772  * guaranteed. The cleanup code of B-tree node pages needs
1773  * special care.
1774  */
1776  b_assoc_buffers) {
1777  set_buffer_uptodate(bh);
1778  clear_buffer_dirty(bh);
1779  clear_buffer_delay(bh);
1780  clear_buffer_nilfs_volatile(bh);
1781  clear_buffer_nilfs_redirected(bh);
1782  if (bh == segbuf->sb_super_root) {
1783  if (bh->b_page != bd_page) {
1784  end_page_writeback(bd_page);
1785  bd_page = bh->b_page;
1786  }
1787  update_sr = true;
1788  break;
1789  }
1790  if (bh->b_page != fs_page) {
1791  nilfs_end_page_io(fs_page, 0);
1792  fs_page = bh->b_page;
1793  }
1794  }
1795 
1796  if (!nilfs_segbuf_simplex(segbuf)) {
1797  if (segbuf->sb_sum.flags & NILFS_SS_LOGBGN) {
1799  sci->sc_lseg_stime = jiffies;
1800  }
1801  if (segbuf->sb_sum.flags & NILFS_SS_LOGEND)
1803  }
1804  }
1805  /*
1806  * Since pages may continue over multiple segment buffers,
1807  * end of the last page must be checked outside of the loop.
1808  */
1809  if (bd_page)
1810  end_page_writeback(bd_page);
1811 
1812  nilfs_end_page_io(fs_page, 0);
1813 
1814  nilfs_drop_collected_inodes(&sci->sc_dirty_files);
1815 
1816  if (nilfs_doing_gc())
1817  nilfs_drop_collected_inodes(&sci->sc_gc_inodes);
1818  else
1819  nilfs->ns_nongc_ctime = sci->sc_seg_ctime;
1820 
1821  sci->sc_nblk_inc += sci->sc_nblk_this_inc;
1822 
1823  segbuf = NILFS_LAST_SEGBUF(&sci->sc_write_logs);
1824  nilfs_set_next_segment(nilfs, segbuf);
1825 
1826  if (update_sr) {
1827  nilfs_set_last_segment(nilfs, segbuf->sb_pseg_start,
1828  segbuf->sb_sum.seg_seq, nilfs->ns_cno++);
1829 
1833  nilfs_segctor_clear_metadata_dirty(sci);
1834  } else
1836 }
1837 
1838 static int nilfs_segctor_wait(struct nilfs_sc_info *sci)
1839 {
1840  int ret;
1841 
1842  ret = nilfs_wait_on_logs(&sci->sc_write_logs);
1843  if (!ret) {
1844  nilfs_segctor_complete_write(sci);
1845  nilfs_destroy_logs(&sci->sc_write_logs);
1846  }
1847  return ret;
1848 }
1849 
1850 static int nilfs_segctor_collect_dirty_files(struct nilfs_sc_info *sci,
1851  struct the_nilfs *nilfs)
1852 {
1853  struct nilfs_inode_info *ii, *n;
1854  struct inode *ifile = sci->sc_root->ifile;
1855 
1856  spin_lock(&nilfs->ns_inode_lock);
1857  retry:
1858  list_for_each_entry_safe(ii, n, &nilfs->ns_dirty_files, i_dirty) {
1859  if (!ii->i_bh) {
1860  struct buffer_head *ibh;
1861  int err;
1862 
1863  spin_unlock(&nilfs->ns_inode_lock);
1865  ifile, ii->vfs_inode.i_ino, &ibh);
1866  if (unlikely(err)) {
1867  nilfs_warning(sci->sc_super, __func__,
1868  "failed to get inode block.\n");
1869  return err;
1870  }
1871  mark_buffer_dirty(ibh);
1872  nilfs_mdt_mark_dirty(ifile);
1873  spin_lock(&nilfs->ns_inode_lock);
1874  if (likely(!ii->i_bh))
1875  ii->i_bh = ibh;
1876  else
1877  brelse(ibh);
1878  goto retry;
1879  }
1880 
1882  set_bit(NILFS_I_BUSY, &ii->i_state);
1883  list_move_tail(&ii->i_dirty, &sci->sc_dirty_files);
1884  }
1885  spin_unlock(&nilfs->ns_inode_lock);
1886 
1887  return 0;
1888 }
1889 
1890 static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci,
1891  struct the_nilfs *nilfs)
1892 {
1893  struct nilfs_transaction_info *ti = current->journal_info;
1894  struct nilfs_inode_info *ii, *n;
1895 
1896  spin_lock(&nilfs->ns_inode_lock);
1900  continue;
1901 
1903  brelse(ii->i_bh);
1904  ii->i_bh = NULL;
1905  list_move_tail(&ii->i_dirty, &ti->ti_garbage);
1906  }
1907  spin_unlock(&nilfs->ns_inode_lock);
1908 }
1909 
1910 /*
1911  * Main procedure of segment constructor
1912  */
1913 static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
1914 {
1915  struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
1916  int err;
1917 
1918  sci->sc_stage.scnt = NILFS_ST_INIT;
1919  sci->sc_cno = nilfs->ns_cno;
1920 
1921  err = nilfs_segctor_collect_dirty_files(sci, nilfs);
1922  if (unlikely(err))
1923  goto out;
1924 
1925  if (nilfs_test_metadata_dirty(nilfs, sci->sc_root))
1927 
1928  if (nilfs_segctor_clean(sci))
1929  goto out;
1930 
1931  do {
1932  sci->sc_stage.flags &= ~NILFS_CF_HISTORY_MASK;
1933 
1934  err = nilfs_segctor_begin_construction(sci, nilfs);
1935  if (unlikely(err))
1936  goto out;
1937 
1938  /* Update time stamp */
1939  sci->sc_seg_ctime = get_seconds();
1940 
1941  err = nilfs_segctor_collect(sci, nilfs, mode);
1942  if (unlikely(err))
1943  goto failed;
1944 
1945  /* Avoid empty segment */
1946  if (sci->sc_stage.scnt == NILFS_ST_DONE &&
1947  nilfs_segbuf_empty(sci->sc_curseg)) {
1948  nilfs_segctor_abort_construction(sci, nilfs, 1);
1949  goto out;
1950  }
1951 
1952  err = nilfs_segctor_assign(sci, mode);
1953  if (unlikely(err))
1954  goto failed;
1955 
1956  if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED)
1957  nilfs_segctor_fill_in_file_bmap(sci);
1958 
1959  if (mode == SC_LSEG_SR &&
1960  sci->sc_stage.scnt >= NILFS_ST_CPFILE) {
1961  err = nilfs_segctor_fill_in_checkpoint(sci);
1962  if (unlikely(err))
1963  goto failed_to_write;
1964 
1965  nilfs_segctor_fill_in_super_root(sci, nilfs);
1966  }
1967  nilfs_segctor_update_segusage(sci, nilfs->ns_sufile);
1968 
1969  /* Write partial segments */
1970  nilfs_segctor_prepare_write(sci);
1971 
1973  nilfs->ns_crc_seed);
1974 
1975  err = nilfs_segctor_write(sci, nilfs);
1976  if (unlikely(err))
1977  goto failed_to_write;
1978 
1979  if (sci->sc_stage.scnt == NILFS_ST_DONE ||
1980  nilfs->ns_blocksize_bits != PAGE_CACHE_SHIFT) {
1981  /*
1982  * At this point, we avoid double buffering
1983  * for blocksize < pagesize because page dirty
1984  * flag is turned off during write and dirty
1985  * buffers are not properly collected for
1986  * pages crossing over segments.
1987  */
1988  err = nilfs_segctor_wait(sci);
1989  if (err)
1990  goto failed_to_write;
1991  }
1992  } while (sci->sc_stage.scnt != NILFS_ST_DONE);
1993 
1994  out:
1995  nilfs_segctor_drop_written_files(sci, nilfs);
1996  return err;
1997 
1998  failed_to_write:
1999  if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED)
2000  nilfs_redirty_inodes(&sci->sc_dirty_files);
2001 
2002  failed:
2003  if (nilfs_doing_gc())
2004  nilfs_redirty_inodes(&sci->sc_gc_inodes);
2005  nilfs_segctor_abort_construction(sci, nilfs, err);
2006  goto out;
2007 }
2008 
2017 static void nilfs_segctor_start_timer(struct nilfs_sc_info *sci)
2018 {
2019  spin_lock(&sci->sc_state_lock);
2020  if (!(sci->sc_state & NILFS_SEGCTOR_COMMIT)) {
2021  sci->sc_timer.expires = jiffies + sci->sc_interval;
2022  add_timer(&sci->sc_timer);
2024  }
2025  spin_unlock(&sci->sc_state_lock);
2026 }
2027 
2028 static void nilfs_segctor_do_flush(struct nilfs_sc_info *sci, int bn)
2029 {
2030  spin_lock(&sci->sc_state_lock);
2031  if (!(sci->sc_flush_request & (1 << bn))) {
2032  unsigned long prev_req = sci->sc_flush_request;
2033 
2034  sci->sc_flush_request |= (1 << bn);
2035  if (!prev_req)
2036  wake_up(&sci->sc_wait_daemon);
2037  }
2038  spin_unlock(&sci->sc_state_lock);
2039 }
2040 
2046 void nilfs_flush_segment(struct super_block *sb, ino_t ino)
2047 {
2048  struct the_nilfs *nilfs = sb->s_fs_info;
2049  struct nilfs_sc_info *sci = nilfs->ns_writer;
2050 
2051  if (!sci || nilfs_doing_construction())
2052  return;
2053  nilfs_segctor_do_flush(sci, NILFS_MDT_INODE(sb, ino) ? ino : 0);
2054  /* assign bit 0 to data files */
2055 }
2060  int err;
2061  atomic_t done;
2062 };
2063 
2064 static int nilfs_segctor_sync(struct nilfs_sc_info *sci)
2065 {
2066  struct nilfs_segctor_wait_request wait_req;
2067  int err = 0;
2068 
2069  spin_lock(&sci->sc_state_lock);
2070  init_wait(&wait_req.wq);
2071  wait_req.err = 0;
2072  atomic_set(&wait_req.done, 0);
2073  wait_req.seq = ++sci->sc_seq_request;
2074  spin_unlock(&sci->sc_state_lock);
2075 
2076  init_waitqueue_entry(&wait_req.wq, current);
2077  add_wait_queue(&sci->sc_wait_request, &wait_req.wq);
2079  wake_up(&sci->sc_wait_daemon);
2080 
2081  for (;;) {
2082  if (atomic_read(&wait_req.done)) {
2083  err = wait_req.err;
2084  break;
2085  }
2086  if (!signal_pending(current)) {
2087  schedule();
2088  continue;
2089  }
2090  err = -ERESTARTSYS;
2091  break;
2092  }
2093  finish_wait(&sci->sc_wait_request, &wait_req.wq);
2094  return err;
2095 }
2096 
2097 static void nilfs_segctor_wakeup(struct nilfs_sc_info *sci, int err)
2098 {
2099  struct nilfs_segctor_wait_request *wrq, *n;
2100  unsigned long flags;
2101 
2102  spin_lock_irqsave(&sci->sc_wait_request.lock, flags);
2104  wq.task_list) {
2105  if (!atomic_read(&wrq->done) &&
2106  nilfs_cnt32_ge(sci->sc_seq_done, wrq->seq)) {
2107  wrq->err = err;
2108  atomic_set(&wrq->done, 1);
2109  }
2110  if (atomic_read(&wrq->done)) {
2111  wrq->wq.func(&wrq->wq,
2113  0, NULL);
2114  }
2115  }
2116  spin_unlock_irqrestore(&sci->sc_wait_request.lock, flags);
2117 }
2118 
2136 int nilfs_construct_segment(struct super_block *sb)
2137 {
2138  struct the_nilfs *nilfs = sb->s_fs_info;
2139  struct nilfs_sc_info *sci = nilfs->ns_writer;
2140  struct nilfs_transaction_info *ti;
2141  int err;
2142 
2143  if (!sci)
2144  return -EROFS;
2145 
2146  /* A call inside transactions causes a deadlock. */
2147  BUG_ON((ti = current->journal_info) && ti->ti_magic == NILFS_TI_MAGIC);
2148 
2149  err = nilfs_segctor_sync(sci);
2150  return err;
2151 }
2152 
2173 int nilfs_construct_dsync_segment(struct super_block *sb, struct inode *inode,
2174  loff_t start, loff_t end)
2175 {
2176  struct the_nilfs *nilfs = sb->s_fs_info;
2177  struct nilfs_sc_info *sci = nilfs->ns_writer;
2178  struct nilfs_inode_info *ii;
2179  struct nilfs_transaction_info ti;
2180  int err = 0;
2181 
2182  if (!sci)
2183  return -EROFS;
2184 
2185  nilfs_transaction_lock(sb, &ti, 0);
2186 
2187  ii = NILFS_I(inode);
2188  if (test_bit(NILFS_I_INODE_DIRTY, &ii->i_state) ||
2189  nilfs_test_opt(nilfs, STRICT_ORDER) ||
2191  nilfs_discontinued(nilfs)) {
2192  nilfs_transaction_unlock(sb);
2193  err = nilfs_segctor_sync(sci);
2194  return err;
2195  }
2196 
2197  spin_lock(&nilfs->ns_inode_lock);
2198  if (!test_bit(NILFS_I_QUEUED, &ii->i_state) &&
2199  !test_bit(NILFS_I_BUSY, &ii->i_state)) {
2200  spin_unlock(&nilfs->ns_inode_lock);
2201  nilfs_transaction_unlock(sb);
2202  return 0;
2203  }
2204  spin_unlock(&nilfs->ns_inode_lock);
2205  sci->sc_dsync_inode = ii;
2206  sci->sc_dsync_start = start;
2207  sci->sc_dsync_end = end;
2208 
2209  err = nilfs_segctor_do_construct(sci, SC_LSEG_DSYNC);
2210 
2211  nilfs_transaction_unlock(sb);
2212  return err;
2213 }
2215 #define FLUSH_FILE_BIT (0x1) /* data file only */
2216 #define FLUSH_DAT_BIT (1 << NILFS_DAT_INO) /* DAT only */
2217 
2222 static void nilfs_segctor_accept(struct nilfs_sc_info *sci)
2223 {
2224  spin_lock(&sci->sc_state_lock);
2225  sci->sc_seq_accepted = sci->sc_seq_request;
2226  spin_unlock(&sci->sc_state_lock);
2227  del_timer_sync(&sci->sc_timer);
2228 }
2229 
2236 static void nilfs_segctor_notify(struct nilfs_sc_info *sci, int mode, int err)
2237 {
2238  /* Clear requests (even when the construction failed) */
2239  spin_lock(&sci->sc_state_lock);
2240 
2241  if (mode == SC_LSEG_SR) {
2242  sci->sc_state &= ~NILFS_SEGCTOR_COMMIT;
2243  sci->sc_seq_done = sci->sc_seq_accepted;
2244  nilfs_segctor_wakeup(sci, err);
2245  sci->sc_flush_request = 0;
2246  } else {
2247  if (mode == SC_FLUSH_FILE)
2249  else if (mode == SC_FLUSH_DAT)
2251 
2252  /* re-enable timer if checkpoint creation was not done */
2253  if ((sci->sc_state & NILFS_SEGCTOR_COMMIT) &&
2254  time_before(jiffies, sci->sc_timer.expires))
2255  add_timer(&sci->sc_timer);
2256  }
2257  spin_unlock(&sci->sc_state_lock);
2258 }
2259 
2265 static int nilfs_segctor_construct(struct nilfs_sc_info *sci, int mode)
2266 {
2267  struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
2268  struct nilfs_super_block **sbp;
2269  int err = 0;
2270 
2271  nilfs_segctor_accept(sci);
2272 
2273  if (nilfs_discontinued(nilfs))
2274  mode = SC_LSEG_SR;
2275  if (!nilfs_segctor_confirm(sci))
2276  err = nilfs_segctor_do_construct(sci, mode);
2277 
2278  if (likely(!err)) {
2279  if (mode != SC_FLUSH_DAT)
2280  atomic_set(&nilfs->ns_ndirtyblks, 0);
2281  if (test_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags) &&
2282  nilfs_discontinued(nilfs)) {
2283  down_write(&nilfs->ns_sem);
2284  err = -EIO;
2285  sbp = nilfs_prepare_super(sci->sc_super,
2286  nilfs_sb_will_flip(nilfs));
2287  if (likely(sbp)) {
2288  nilfs_set_log_cursor(sbp[0], nilfs);
2289  err = nilfs_commit_super(sci->sc_super,
2290  NILFS_SB_COMMIT);
2291  }
2292  up_write(&nilfs->ns_sem);
2293  }
2294  }
2295 
2296  nilfs_segctor_notify(sci, mode, err);
2297  return err;
2298 }
2299 
2300 static void nilfs_construction_timeout(unsigned long data)
2301 {
2302  struct task_struct *p = (struct task_struct *)data;
2303  wake_up_process(p);
2304 }
2305 
2306 static void
2307 nilfs_remove_written_gcinodes(struct the_nilfs *nilfs, struct list_head *head)
2308 {
2309  struct nilfs_inode_info *ii, *n;
2310 
2311  list_for_each_entry_safe(ii, n, head, i_dirty) {
2312  if (!test_bit(NILFS_I_UPDATED, &ii->i_state))
2313  continue;
2314  list_del_init(&ii->i_dirty);
2315  truncate_inode_pages(&ii->vfs_inode.i_data, 0);
2317  iput(&ii->vfs_inode);
2318  }
2319 }
2321 int nilfs_clean_segments(struct super_block *sb, struct nilfs_argv *argv,
2322  void **kbufs)
2323 {
2324  struct the_nilfs *nilfs = sb->s_fs_info;
2325  struct nilfs_sc_info *sci = nilfs->ns_writer;
2326  struct nilfs_transaction_info ti;
2327  int err;
2328 
2329  if (unlikely(!sci))
2330  return -EROFS;
2331 
2332  nilfs_transaction_lock(sb, &ti, 1);
2333 
2334  err = nilfs_mdt_save_to_shadow_map(nilfs->ns_dat);
2335  if (unlikely(err))
2336  goto out_unlock;
2337 
2338  err = nilfs_ioctl_prepare_clean_segments(nilfs, argv, kbufs);
2339  if (unlikely(err)) {
2341  goto out_unlock;
2342  }
2343 
2344  sci->sc_freesegs = kbufs[4];
2345  sci->sc_nfreesegs = argv[4].v_nmembs;
2346  list_splice_tail_init(&nilfs->ns_gc_inodes, &sci->sc_gc_inodes);
2347 
2348  for (;;) {
2349  err = nilfs_segctor_construct(sci, SC_LSEG_SR);
2350  nilfs_remove_written_gcinodes(nilfs, &sci->sc_gc_inodes);
2351 
2352  if (likely(!err))
2353  break;
2354 
2355  nilfs_warning(sb, __func__,
2356  "segment construction failed. (err=%d)", err);
2359  }
2360  if (nilfs_test_opt(nilfs, DISCARD)) {
2361  int ret = nilfs_discard_segments(nilfs, sci->sc_freesegs,
2362  sci->sc_nfreesegs);
2363  if (ret) {
2365  "NILFS warning: error %d on discard request, "
2366  "turning discards off for the device\n", ret);
2367  nilfs_clear_opt(nilfs, DISCARD);
2368  }
2369  }
2370 
2371  out_unlock:
2372  sci->sc_freesegs = NULL;
2373  sci->sc_nfreesegs = 0;
2375  nilfs_transaction_unlock(sb);
2376  return err;
2377 }
2378 
2379 static void nilfs_segctor_thread_construct(struct nilfs_sc_info *sci, int mode)
2380 {
2381  struct nilfs_transaction_info ti;
2382 
2383  nilfs_transaction_lock(sci->sc_super, &ti, 0);
2384  nilfs_segctor_construct(sci, mode);
2385 
2386  /*
2387  * Unclosed segment should be retried. We do this using sc_timer.
2388  * Timeout of sc_timer will invoke complete construction which leads
2389  * to close the current logical segment.
2390  */
2391  if (test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags))
2392  nilfs_segctor_start_timer(sci);
2393 
2394  nilfs_transaction_unlock(sci->sc_super);
2395 }
2396 
2397 static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *sci)
2398 {
2399  int mode = 0;
2400  int err;
2401 
2402  spin_lock(&sci->sc_state_lock);
2403  mode = (sci->sc_flush_request & FLUSH_DAT_BIT) ?
2405  spin_unlock(&sci->sc_state_lock);
2406 
2407  if (mode) {
2408  err = nilfs_segctor_do_construct(sci, mode);
2409 
2410  spin_lock(&sci->sc_state_lock);
2411  sci->sc_flush_request &= (mode == SC_FLUSH_FILE) ?
2413  spin_unlock(&sci->sc_state_lock);
2414  }
2416 }
2417 
2418 static int nilfs_segctor_flush_mode(struct nilfs_sc_info *sci)
2419 {
2420  if (!test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags) ||
2421  time_before(jiffies, sci->sc_lseg_stime + sci->sc_mjcp_freq)) {
2422  if (!(sci->sc_flush_request & ~FLUSH_FILE_BIT))
2423  return SC_FLUSH_FILE;
2424  else if (!(sci->sc_flush_request & ~FLUSH_DAT_BIT))
2425  return SC_FLUSH_DAT;
2426  }
2427  return SC_LSEG_SR;
2428 }
2429 
2437 static int nilfs_segctor_thread(void *arg)
2438 {
2439  struct nilfs_sc_info *sci = (struct nilfs_sc_info *)arg;
2440  struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
2441  int timeout = 0;
2442 
2443  sci->sc_timer.data = (unsigned long)current;
2444  sci->sc_timer.function = nilfs_construction_timeout;
2445 
2446  /* start sync. */
2447  sci->sc_task = current;
2448  wake_up(&sci->sc_wait_task); /* for nilfs_segctor_start_thread() */
2450  "segctord starting. Construction interval = %lu seconds, "
2451  "CP frequency < %lu seconds\n",
2452  sci->sc_interval / HZ, sci->sc_mjcp_freq / HZ);
2453 
2454  spin_lock(&sci->sc_state_lock);
2455  loop:
2456  for (;;) {
2457  int mode;
2458 
2459  if (sci->sc_state & NILFS_SEGCTOR_QUIT)
2460  goto end_thread;
2461 
2462  if (timeout || sci->sc_seq_request != sci->sc_seq_done)
2463  mode = SC_LSEG_SR;
2464  else if (!sci->sc_flush_request)
2465  break;
2466  else
2467  mode = nilfs_segctor_flush_mode(sci);
2468 
2469  spin_unlock(&sci->sc_state_lock);
2470  nilfs_segctor_thread_construct(sci, mode);
2471  spin_lock(&sci->sc_state_lock);
2472  timeout = 0;
2473  }
2474 
2475 
2476  if (freezing(current)) {
2477  spin_unlock(&sci->sc_state_lock);
2478  try_to_freeze();
2479  spin_lock(&sci->sc_state_lock);
2480  } else {
2481  DEFINE_WAIT(wait);
2482  int should_sleep = 1;
2483 
2486 
2487  if (sci->sc_seq_request != sci->sc_seq_done)
2488  should_sleep = 0;
2489  else if (sci->sc_flush_request)
2490  should_sleep = 0;
2491  else if (sci->sc_state & NILFS_SEGCTOR_COMMIT)
2492  should_sleep = time_before(jiffies,
2493  sci->sc_timer.expires);
2494 
2495  if (should_sleep) {
2496  spin_unlock(&sci->sc_state_lock);
2497  schedule();
2498  spin_lock(&sci->sc_state_lock);
2499  }
2500  finish_wait(&sci->sc_wait_daemon, &wait);
2501  timeout = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) &&
2502  time_after_eq(jiffies, sci->sc_timer.expires));
2503 
2504  if (nilfs_sb_dirty(nilfs) && nilfs_sb_need_update(nilfs))
2505  set_nilfs_discontinued(nilfs);
2506  }
2507  goto loop;
2508 
2509  end_thread:
2510  spin_unlock(&sci->sc_state_lock);
2511 
2512  /* end sync. */
2513  sci->sc_task = NULL;
2514  wake_up(&sci->sc_wait_task); /* for nilfs_segctor_kill_thread() */
2515  return 0;
2516 }
2517 
2518 static int nilfs_segctor_start_thread(struct nilfs_sc_info *sci)
2519 {
2520  struct task_struct *t;
2521 
2522  t = kthread_run(nilfs_segctor_thread, sci, "segctord");
2523  if (IS_ERR(t)) {
2524  int err = PTR_ERR(t);
2525 
2526  printk(KERN_ERR "NILFS: error %d creating segctord thread\n",
2527  err);
2528  return err;
2529  }
2530  wait_event(sci->sc_wait_task, sci->sc_task != NULL);
2531  return 0;
2532 }
2533 
2534 static void nilfs_segctor_kill_thread(struct nilfs_sc_info *sci)
2535  __acquires(&sci->sc_state_lock)
2536  __releases(&sci->sc_state_lock)
2537 {
2538  sci->sc_state |= NILFS_SEGCTOR_QUIT;
2539 
2540  while (sci->sc_task) {
2541  wake_up(&sci->sc_wait_daemon);
2542  spin_unlock(&sci->sc_state_lock);
2543  wait_event(sci->sc_wait_task, sci->sc_task == NULL);
2544  spin_lock(&sci->sc_state_lock);
2545  }
2546 }
2547 
2548 /*
2549  * Setup & clean-up functions
2550  */
2551 static struct nilfs_sc_info *nilfs_segctor_new(struct super_block *sb,
2552  struct nilfs_root *root)
2553 {
2554  struct the_nilfs *nilfs = sb->s_fs_info;
2555  struct nilfs_sc_info *sci;
2556 
2557  sci = kzalloc(sizeof(*sci), GFP_KERNEL);
2558  if (!sci)
2559  return NULL;
2560 
2561  sci->sc_super = sb;
2562 
2563  nilfs_get_root(root);
2564  sci->sc_root = root;
2565 
2570  INIT_LIST_HEAD(&sci->sc_dirty_files);
2571  INIT_LIST_HEAD(&sci->sc_segbufs);
2572  INIT_LIST_HEAD(&sci->sc_write_logs);
2573  INIT_LIST_HEAD(&sci->sc_gc_inodes);
2574  init_timer(&sci->sc_timer);
2575 
2579 
2580  if (nilfs->ns_interval)
2581  sci->sc_interval = HZ * nilfs->ns_interval;
2582  if (nilfs->ns_watermark)
2583  sci->sc_watermark = nilfs->ns_watermark;
2584  return sci;
2585 }
2586 
2587 static void nilfs_segctor_write_out(struct nilfs_sc_info *sci)
2588 {
2589  int ret, retrycount = NILFS_SC_CLEANUP_RETRY;
2590 
2591  /* The segctord thread was stopped and its timer was removed.
2592  But some tasks remain. */
2593  do {
2594  struct nilfs_transaction_info ti;
2595 
2596  nilfs_transaction_lock(sci->sc_super, &ti, 0);
2597  ret = nilfs_segctor_construct(sci, SC_LSEG_SR);
2598  nilfs_transaction_unlock(sci->sc_super);
2599 
2600  } while (ret && retrycount-- > 0);
2601 }
2602 
2611 static void nilfs_segctor_destroy(struct nilfs_sc_info *sci)
2612 {
2613  struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
2614  int flag;
2615 
2616  up_write(&nilfs->ns_segctor_sem);
2617 
2618  spin_lock(&sci->sc_state_lock);
2619  nilfs_segctor_kill_thread(sci);
2620  flag = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) || sci->sc_flush_request
2621  || sci->sc_seq_request != sci->sc_seq_done);
2622  spin_unlock(&sci->sc_state_lock);
2623 
2624  if (flag || !nilfs_segctor_confirm(sci))
2625  nilfs_segctor_write_out(sci);
2626 
2627  if (!list_empty(&sci->sc_dirty_files)) {
2628  nilfs_warning(sci->sc_super, __func__,
2629  "dirty file(s) after the final construction\n");
2630  nilfs_dispose_list(nilfs, &sci->sc_dirty_files, 1);
2631  }
2632 
2633  WARN_ON(!list_empty(&sci->sc_segbufs));
2634  WARN_ON(!list_empty(&sci->sc_write_logs));
2635 
2636  nilfs_put_root(sci->sc_root);
2637 
2638  down_write(&nilfs->ns_segctor_sem);
2639 
2640  del_timer_sync(&sci->sc_timer);
2641  kfree(sci);
2642 }
2643 
2657 int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root)
2658 {
2659  struct the_nilfs *nilfs = sb->s_fs_info;
2660  int err;
2661 
2662  if (nilfs->ns_writer) {
2663  /*
2664  * This happens if the filesystem was remounted
2665  * read/write after nilfs_error degenerated it into a
2666  * read-only mount.
2667  */
2669  }
2670 
2671  nilfs->ns_writer = nilfs_segctor_new(sb, root);
2672  if (!nilfs->ns_writer)
2673  return -ENOMEM;
2674 
2675  err = nilfs_segctor_start_thread(nilfs->ns_writer);
2676  if (err) {
2677  kfree(nilfs->ns_writer);
2678  nilfs->ns_writer = NULL;
2679  }
2680  return err;
2681 }
2682 
2690 void nilfs_detach_log_writer(struct super_block *sb)
2691 {
2692  struct the_nilfs *nilfs = sb->s_fs_info;
2693  LIST_HEAD(garbage_list);
2694 
2695  down_write(&nilfs->ns_segctor_sem);
2696  if (nilfs->ns_writer) {
2697  nilfs_segctor_destroy(nilfs->ns_writer);
2698  nilfs->ns_writer = NULL;
2699  }
2700 
2701  /* Force to free the list of dirty files */
2702  spin_lock(&nilfs->ns_inode_lock);
2703  if (!list_empty(&nilfs->ns_dirty_files)) {
2704  list_splice_init(&nilfs->ns_dirty_files, &garbage_list);
2705  nilfs_warning(sb, __func__,
2706  "Hit dirty file after stopped log writer\n");
2707  }
2708  spin_unlock(&nilfs->ns_inode_lock);
2709  up_write(&nilfs->ns_segctor_sem);
2710 
2711  nilfs_dispose_list(nilfs, &garbage_list, 1);
2712 }