Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
commit.c
Go to the documentation of this file.
1 /*
2  * This file is part of UBIFS.
3  *
4  * Copyright (C) 2006-2008 Nokia Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License version 2 as published by
8  * the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13  * more details.
14  *
15  * You should have received a copy of the GNU General Public License along with
16  * this program; if not, write to the Free Software Foundation, Inc., 51
17  * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18  *
19  * Authors: Adrian Hunter
20  * Artem Bityutskiy (Битюцкий Артём)
21  */
22 
23 /*
24  * This file implements functions that manage the running of the commit process.
25  * Each affected module has its own functions to accomplish their part in the
26  * commit and those functions are called here.
27  *
28  * The commit is the process whereby all updates to the index and LEB properties
29  * are written out together and the journal becomes empty. This keeps the
30  * file system consistent - at all times the state can be recreated by reading
31  * the index and LEB properties and then replaying the journal.
32  *
33  * The commit is split into two parts named "commit start" and "commit end".
34  * During commit start, the commit process has exclusive access to the journal
35  * by holding the commit semaphore down for writing. As few I/O operations as
36  * possible are performed during commit start, instead the nodes that are to be
37  * written are merely identified. During commit end, the commit semaphore is no
38  * longer held and the journal is again in operation, allowing users to continue
39  * to use the file system while the bulk of the commit I/O is performed. The
40  * purpose of this two-step approach is to prevent the commit from causing any
41  * latency blips. Note that in any case, the commit does not prevent lookups
42  * (as permitted by the TNC mutex), or access to VFS data structures e.g. page
43  * cache.
44  */
45 
46 #include <linux/freezer.h>
47 #include <linux/kthread.h>
48 #include <linux/slab.h>
49 #include "ubifs.h"
50 
51 /*
52  * nothing_to_commit - check if there is nothing to commit.
53  * @c: UBIFS file-system description object
54  *
55  * This is a helper function which checks if there is anything to commit. It is
56  * used as an optimization to avoid starting the commit if it is not really
57  * necessary. Indeed, the commit operation always assumes flash I/O (e.g.,
58  * writing the commit start node to the log), and it is better to avoid doing
59  * this unnecessarily. E.g., 'ubifs_sync_fs()' runs the commit, but if there is
60  * nothing to commit, it is more optimal to avoid any flash I/O.
61  *
62  * This function has to be called with @c->commit_sem locked for writing -
63  * this function does not take LPT/TNC locks because the @c->commit_sem
64  * guarantees that we have exclusive access to the TNC and LPT data structures.
65  *
66  * This function returns %1 if there is nothing to commit and %0 otherwise.
67  */
68 static int nothing_to_commit(struct ubifs_info *c)
69 {
70  /*
71  * During mounting or remounting from R/O mode to R/W mode we may
72  * commit for various recovery-related reasons.
73  */
74  if (c->mounting || c->remounting_rw)
75  return 0;
76 
77  /*
78  * If the root TNC node is dirty, we definitely have something to
79  * commit.
80  */
81  if (c->zroot.znode && ubifs_zn_dirty(c->zroot.znode))
82  return 0;
83 
84  /*
85  * Even though the TNC is clean, the LPT tree may have dirty nodes. For
86  * example, this may happen if the budgeting subsystem invoked GC to
87  * make some free space, and the GC found an LEB with only dirty and
88  * free space. In this case GC would just change the lprops of this
89  * LEB (by turning all space into free space) and unmap it.
90  */
91  if (c->nroot && test_bit(DIRTY_CNODE, &c->nroot->flags))
92  return 0;
93 
94  ubifs_assert(atomic_long_read(&c->dirty_zn_cnt) == 0);
95  ubifs_assert(c->dirty_pn_cnt == 0);
96  ubifs_assert(c->dirty_nn_cnt == 0);
97 
98  return 1;
99 }
100 
109 static int do_commit(struct ubifs_info *c)
110 {
111  int err, new_ltail_lnum, old_ltail_lnum, i;
112  struct ubifs_zbranch zroot;
113  struct ubifs_lp_stats lst;
114 
115  dbg_cmt("start");
116  ubifs_assert(!c->ro_media && !c->ro_mount);
117 
118  if (c->ro_error) {
119  err = -EROFS;
120  goto out_up;
121  }
122 
123  if (nothing_to_commit(c)) {
124  up_write(&c->commit_sem);
125  err = 0;
126  goto out_cancel;
127  }
128 
129  /* Sync all write buffers (necessary for recovery) */
130  for (i = 0; i < c->jhead_cnt; i++) {
131  err = ubifs_wbuf_sync(&c->jheads[i].wbuf);
132  if (err)
133  goto out_up;
134  }
135 
136  c->cmt_no += 1;
137  err = ubifs_gc_start_commit(c);
138  if (err)
139  goto out_up;
140  err = dbg_check_lprops(c);
141  if (err)
142  goto out_up;
143  err = ubifs_log_start_commit(c, &new_ltail_lnum);
144  if (err)
145  goto out_up;
146  err = ubifs_tnc_start_commit(c, &zroot);
147  if (err)
148  goto out_up;
149  err = ubifs_lpt_start_commit(c);
150  if (err)
151  goto out_up;
152  err = ubifs_orphan_start_commit(c);
153  if (err)
154  goto out_up;
155 
156  ubifs_get_lp_stats(c, &lst);
157 
158  up_write(&c->commit_sem);
159 
160  err = ubifs_tnc_end_commit(c);
161  if (err)
162  goto out;
163  err = ubifs_lpt_end_commit(c);
164  if (err)
165  goto out;
166  err = ubifs_orphan_end_commit(c);
167  if (err)
168  goto out;
169  old_ltail_lnum = c->ltail_lnum;
170  err = ubifs_log_end_commit(c, new_ltail_lnum);
171  if (err)
172  goto out;
173  err = dbg_check_old_index(c, &zroot);
174  if (err)
175  goto out;
176 
177  mutex_lock(&c->mst_mutex);
178  c->mst_node->cmt_no = cpu_to_le64(c->cmt_no);
179  c->mst_node->log_lnum = cpu_to_le32(new_ltail_lnum);
180  c->mst_node->root_lnum = cpu_to_le32(zroot.lnum);
181  c->mst_node->root_offs = cpu_to_le32(zroot.offs);
182  c->mst_node->root_len = cpu_to_le32(zroot.len);
183  c->mst_node->ihead_lnum = cpu_to_le32(c->ihead_lnum);
184  c->mst_node->ihead_offs = cpu_to_le32(c->ihead_offs);
185  c->mst_node->index_size = cpu_to_le64(c->bi.old_idx_sz);
186  c->mst_node->lpt_lnum = cpu_to_le32(c->lpt_lnum);
187  c->mst_node->lpt_offs = cpu_to_le32(c->lpt_offs);
188  c->mst_node->nhead_lnum = cpu_to_le32(c->nhead_lnum);
189  c->mst_node->nhead_offs = cpu_to_le32(c->nhead_offs);
190  c->mst_node->ltab_lnum = cpu_to_le32(c->ltab_lnum);
191  c->mst_node->ltab_offs = cpu_to_le32(c->ltab_offs);
192  c->mst_node->lsave_lnum = cpu_to_le32(c->lsave_lnum);
193  c->mst_node->lsave_offs = cpu_to_le32(c->lsave_offs);
194  c->mst_node->lscan_lnum = cpu_to_le32(c->lscan_lnum);
195  c->mst_node->empty_lebs = cpu_to_le32(lst.empty_lebs);
196  c->mst_node->idx_lebs = cpu_to_le32(lst.idx_lebs);
197  c->mst_node->total_free = cpu_to_le64(lst.total_free);
198  c->mst_node->total_dirty = cpu_to_le64(lst.total_dirty);
199  c->mst_node->total_used = cpu_to_le64(lst.total_used);
200  c->mst_node->total_dead = cpu_to_le64(lst.total_dead);
201  c->mst_node->total_dark = cpu_to_le64(lst.total_dark);
202  if (c->no_orphs)
204  else
205  c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_NO_ORPHS);
206  err = ubifs_write_master(c);
207  mutex_unlock(&c->mst_mutex);
208  if (err)
209  goto out;
210 
211  err = ubifs_log_post_commit(c, old_ltail_lnum);
212  if (err)
213  goto out;
214  err = ubifs_gc_end_commit(c);
215  if (err)
216  goto out;
217  err = ubifs_lpt_post_commit(c);
218  if (err)
219  goto out;
220 
221 out_cancel:
222  spin_lock(&c->cs_lock);
224  wake_up(&c->cmt_wq);
225  dbg_cmt("commit end");
226  spin_unlock(&c->cs_lock);
227  return 0;
228 
229 out_up:
230  up_write(&c->commit_sem);
231 out:
232  ubifs_err("commit failed, error %d", err);
233  spin_lock(&c->cs_lock);
235  wake_up(&c->cmt_wq);
236  spin_unlock(&c->cs_lock);
237  ubifs_ro_mode(c, err);
238  return err;
239 }
240 
248 static int run_bg_commit(struct ubifs_info *c)
249 {
250  spin_lock(&c->cs_lock);
251  /*
252  * Run background commit only if background commit was requested or if
253  * commit is required.
254  */
255  if (c->cmt_state != COMMIT_BACKGROUND &&
257  goto out;
258  spin_unlock(&c->cs_lock);
259 
260  down_write(&c->commit_sem);
261  spin_lock(&c->cs_lock);
262  if (c->cmt_state == COMMIT_REQUIRED)
264  else if (c->cmt_state == COMMIT_BACKGROUND)
266  else
267  goto out_cmt_unlock;
268  spin_unlock(&c->cs_lock);
269 
270  return do_commit(c);
271 
272 out_cmt_unlock:
273  up_write(&c->commit_sem);
274 out:
275  spin_unlock(&c->cs_lock);
276  return 0;
277 }
278 
292 {
293  int err;
294  struct ubifs_info *c = info;
295 
296  ubifs_msg("background thread \"%s\" started, PID %d",
297  c->bgt_name, current->pid);
298  set_freezable();
299 
300  while (1) {
301  if (kthread_should_stop())
302  break;
303 
304  if (try_to_freeze())
305  continue;
306 
308  /* Check if there is something to do */
309  if (!c->need_bgt) {
310  /*
311  * Nothing prevents us from going sleep now and
312  * be never woken up and block the task which
313  * could wait in 'kthread_stop()' forever.
314  */
315  if (kthread_should_stop())
316  break;
317  schedule();
318  continue;
319  } else
321 
322  c->need_bgt = 0;
323  err = ubifs_bg_wbufs_sync(c);
324  if (err)
325  ubifs_ro_mode(c, err);
326 
327  run_bg_commit(c);
328  cond_resched();
329  }
330 
331  ubifs_msg("background thread \"%s\" stops", c->bgt_name);
332  return 0;
333 }
334 
343 {
344  spin_lock(&c->cs_lock);
345  switch (c->cmt_state) {
346  case COMMIT_RESTING:
347  case COMMIT_BACKGROUND:
348  dbg_cmt("old: %s, new: %s", dbg_cstate(c->cmt_state),
351  break;
353  dbg_cmt("old: %s, new: %s", dbg_cstate(c->cmt_state),
356  break;
357  case COMMIT_REQUIRED:
359  case COMMIT_BROKEN:
360  break;
361  }
362  spin_unlock(&c->cs_lock);
363 }
364 
373 {
374  spin_lock(&c->cs_lock);
375  if (c->cmt_state == COMMIT_RESTING) {
376  dbg_cmt("old: %s, new: %s", dbg_cstate(c->cmt_state),
379  spin_unlock(&c->cs_lock);
380  ubifs_wake_up_bgt(c);
381  } else
382  spin_unlock(&c->cs_lock);
383 }
384 
391 static int wait_for_commit(struct ubifs_info *c)
392 {
393  dbg_cmt("pid %d goes sleep", current->pid);
394 
395  /*
396  * The following sleeps if the condition is false, and will be woken
397  * when the commit ends. It is possible, although very unlikely, that we
398  * will wake up and see the subsequent commit running, rather than the
399  * one we were waiting for, and go back to sleep. However, we will be
400  * woken again, so there is no danger of sleeping forever.
401  */
404  dbg_cmt("commit finished, pid %d woke up", current->pid);
405  return 0;
406 }
407 
416 {
417  int err = 0;
418 
419  spin_lock(&c->cs_lock);
420  if (c->cmt_state == COMMIT_BROKEN) {
421  err = -EROFS;
422  goto out;
423  }
424 
426  /*
427  * We set the commit state to 'running required' to indicate
428  * that we want it to complete as quickly as possible.
429  */
431 
433  spin_unlock(&c->cs_lock);
434  return wait_for_commit(c);
435  }
436  spin_unlock(&c->cs_lock);
437 
438  /* Ok, the commit is indeed needed */
439 
440  down_write(&c->commit_sem);
441  spin_lock(&c->cs_lock);
442  /*
443  * Since we unlocked 'c->cs_lock', the state may have changed, so
444  * re-check it.
445  */
446  if (c->cmt_state == COMMIT_BROKEN) {
447  err = -EROFS;
448  goto out_cmt_unlock;
449  }
450 
453 
455  up_write(&c->commit_sem);
456  spin_unlock(&c->cs_lock);
457  return wait_for_commit(c);
458  }
460  spin_unlock(&c->cs_lock);
461 
462  err = do_commit(c);
463  return err;
464 
465 out_cmt_unlock:
466  up_write(&c->commit_sem);
467 out:
468  spin_unlock(&c->cs_lock);
469  return err;
470 }
471 
484 {
485  int ret = 0;
486 
487  spin_lock(&c->cs_lock);
488  if (c->cmt_state == COMMIT_BACKGROUND) {
489  dbg_cmt("commit required now");
491  } else
492  dbg_cmt("commit not requested");
493  if (c->cmt_state == COMMIT_REQUIRED)
494  ret = 1;
495  spin_unlock(&c->cs_lock);
496  return ret;
497 }
498 
499 /*
500  * Everything below is related to debugging.
501  */
502 
513 struct idx_node {
514  struct list_head list;
515  int iip;
517  struct ubifs_idx_node idx __aligned(8);
518 };
519 
530 int dbg_old_index_check_init(struct ubifs_info *c, struct ubifs_zbranch *zroot)
531 {
532  struct ubifs_idx_node *idx;
533  int lnum, offs, len, err = 0;
534  struct ubifs_debug_info *d = c->dbg;
535 
536  d->old_zroot = *zroot;
537  lnum = d->old_zroot.lnum;
538  offs = d->old_zroot.offs;
539  len = d->old_zroot.len;
540 
541  idx = kmalloc(c->max_idx_node_sz, GFP_NOFS);
542  if (!idx)
543  return -ENOMEM;
544 
545  err = ubifs_read_node(c, idx, UBIFS_IDX_NODE, len, lnum, offs);
546  if (err)
547  goto out;
548 
549  d->old_zroot_level = le16_to_cpu(idx->level);
550  d->old_zroot_sqnum = le64_to_cpu(idx->ch.sqnum);
551 out:
552  kfree(idx);
553  return err;
554 }
555 
569 int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot)
570 {
571  int lnum, offs, len, err = 0, uninitialized_var(last_level), child_cnt;
572  int first = 1, iip;
573  struct ubifs_debug_info *d = c->dbg;
574  union ubifs_key uninitialized_var(lower_key), upper_key, l_key, u_key;
575  unsigned long long uninitialized_var(last_sqnum);
576  struct ubifs_idx_node *idx;
577  struct list_head list;
578  struct idx_node *i;
579  size_t sz;
580 
581  if (!dbg_is_chk_index(c))
582  return 0;
583 
584  INIT_LIST_HEAD(&list);
585 
586  sz = sizeof(struct idx_node) + ubifs_idx_node_sz(c, c->fanout) -
587  UBIFS_IDX_NODE_SZ;
588 
589  /* Start at the old zroot */
590  lnum = d->old_zroot.lnum;
591  offs = d->old_zroot.offs;
592  len = d->old_zroot.len;
593  iip = 0;
594 
595  /*
596  * Traverse the index tree preorder depth-first i.e. do a node and then
597  * its subtrees from left to right.
598  */
599  while (1) {
600  struct ubifs_branch *br;
601 
602  /* Get the next index node */
603  i = kmalloc(sz, GFP_NOFS);
604  if (!i) {
605  err = -ENOMEM;
606  goto out_free;
607  }
608  i->iip = iip;
609  /* Keep the index nodes on our path in a linked list */
610  list_add_tail(&i->list, &list);
611  /* Read the index node */
612  idx = &i->idx;
613  err = ubifs_read_node(c, idx, UBIFS_IDX_NODE, len, lnum, offs);
614  if (err)
615  goto out_free;
616  /* Validate index node */
617  child_cnt = le16_to_cpu(idx->child_cnt);
618  if (child_cnt < 1 || child_cnt > c->fanout) {
619  err = 1;
620  goto out_dump;
621  }
622  if (first) {
623  first = 0;
624  /* Check root level and sqnum */
625  if (le16_to_cpu(idx->level) != d->old_zroot_level) {
626  err = 2;
627  goto out_dump;
628  }
629  if (le64_to_cpu(idx->ch.sqnum) != d->old_zroot_sqnum) {
630  err = 3;
631  goto out_dump;
632  }
633  /* Set last values as though root had a parent */
634  last_level = le16_to_cpu(idx->level) + 1;
635  last_sqnum = le64_to_cpu(idx->ch.sqnum) + 1;
636  key_read(c, ubifs_idx_key(c, idx), &lower_key);
637  highest_ino_key(c, &upper_key, INUM_WATERMARK);
638  }
639  key_copy(c, &upper_key, &i->upper_key);
640  if (le16_to_cpu(idx->level) != last_level - 1) {
641  err = 3;
642  goto out_dump;
643  }
644  /*
645  * The index is always written bottom up hence a child's sqnum
646  * is always less than the parents.
647  */
648  if (le64_to_cpu(idx->ch.sqnum) >= last_sqnum) {
649  err = 4;
650  goto out_dump;
651  }
652  /* Check key range */
653  key_read(c, ubifs_idx_key(c, idx), &l_key);
654  br = ubifs_idx_branch(c, idx, child_cnt - 1);
655  key_read(c, &br->key, &u_key);
656  if (keys_cmp(c, &lower_key, &l_key) > 0) {
657  err = 5;
658  goto out_dump;
659  }
660  if (keys_cmp(c, &upper_key, &u_key) < 0) {
661  err = 6;
662  goto out_dump;
663  }
664  if (keys_cmp(c, &upper_key, &u_key) == 0)
665  if (!is_hash_key(c, &u_key)) {
666  err = 7;
667  goto out_dump;
668  }
669  /* Go to next index node */
670  if (le16_to_cpu(idx->level) == 0) {
671  /* At the bottom, so go up until can go right */
672  while (1) {
673  /* Drop the bottom of the list */
674  list_del(&i->list);
675  kfree(i);
676  /* No more list means we are done */
677  if (list_empty(&list))
678  goto out;
679  /* Look at the new bottom */
680  i = list_entry(list.prev, struct idx_node,
681  list);
682  idx = &i->idx;
683  /* Can we go right */
684  if (iip + 1 < le16_to_cpu(idx->child_cnt)) {
685  iip = iip + 1;
686  break;
687  } else
688  /* Nope, so go up again */
689  iip = i->iip;
690  }
691  } else
692  /* Go down left */
693  iip = 0;
694  /*
695  * We have the parent in 'idx' and now we set up for reading the
696  * child pointed to by slot 'iip'.
697  */
698  last_level = le16_to_cpu(idx->level);
699  last_sqnum = le64_to_cpu(idx->ch.sqnum);
700  br = ubifs_idx_branch(c, idx, iip);
701  lnum = le32_to_cpu(br->lnum);
702  offs = le32_to_cpu(br->offs);
703  len = le32_to_cpu(br->len);
704  key_read(c, &br->key, &lower_key);
705  if (iip + 1 < le16_to_cpu(idx->child_cnt)) {
706  br = ubifs_idx_branch(c, idx, iip + 1);
707  key_read(c, &br->key, &upper_key);
708  } else
709  key_copy(c, &i->upper_key, &upper_key);
710  }
711 out:
712  err = dbg_old_index_check_init(c, zroot);
713  if (err)
714  goto out_free;
715 
716  return 0;
717 
718 out_dump:
719  ubifs_err("dumping index node (iip=%d)", i->iip);
720  ubifs_dump_node(c, idx);
721  list_del(&i->list);
722  kfree(i);
723  if (!list_empty(&list)) {
724  i = list_entry(list.prev, struct idx_node, list);
725  ubifs_err("dumping parent index node");
726  ubifs_dump_node(c, &i->idx);
727  }
728 out_free:
729  while (!list_empty(&list)) {
730  i = list_entry(list.next, struct idx_node, list);
731  list_del(&i->list);
732  kfree(i);
733  }
734  ubifs_err("failed, error %d", err);
735  if (err > 0)
736  err = -EINVAL;
737  return err;
738 }