Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
log.c
Go to the documentation of this file.
1 /*
2  * This file is part of UBIFS.
3  *
4  * Copyright (C) 2006-2008 Nokia Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License version 2 as published by
8  * the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13  * more details.
14  *
15  * You should have received a copy of the GNU General Public License along with
16  * this program; if not, write to the Free Software Foundation, Inc., 51
17  * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18  *
19  * Authors: Artem Bityutskiy (Битюцкий Артём)
20  * Adrian Hunter
21  */
22 
23 /*
24  * This file is a part of UBIFS journal implementation and contains various
25  * functions which manipulate the log. The log is a fixed area on the flash
26  * which does not contain any data but refers to buds. The log is a part of the
27  * journal.
28  */
29 
30 #include "ubifs.h"
31 
32 static int dbg_check_bud_bytes(struct ubifs_info *c);
33 
43 {
44  struct rb_node *p;
45  struct ubifs_bud *bud;
46 
47  spin_lock(&c->buds_lock);
48  p = c->buds.rb_node;
49  while (p) {
50  bud = rb_entry(p, struct ubifs_bud, rb);
51  if (lnum < bud->lnum)
52  p = p->rb_left;
53  else if (lnum > bud->lnum)
54  p = p->rb_right;
55  else {
56  spin_unlock(&c->buds_lock);
57  return bud;
58  }
59  }
60  spin_unlock(&c->buds_lock);
61  return NULL;
62 }
63 
71 struct ubifs_wbuf *ubifs_get_wbuf(struct ubifs_info *c, int lnum)
72 {
73  struct rb_node *p;
74  struct ubifs_bud *bud;
75  int jhead;
76 
77  if (!c->jheads)
78  return NULL;
79 
80  spin_lock(&c->buds_lock);
81  p = c->buds.rb_node;
82  while (p) {
83  bud = rb_entry(p, struct ubifs_bud, rb);
84  if (lnum < bud->lnum)
85  p = p->rb_left;
86  else if (lnum > bud->lnum)
87  p = p->rb_right;
88  else {
89  jhead = bud->jhead;
90  spin_unlock(&c->buds_lock);
91  return &c->jheads[jhead].wbuf;
92  }
93  }
94  spin_unlock(&c->buds_lock);
95  return NULL;
96 }
97 
102 static inline long long empty_log_bytes(const struct ubifs_info *c)
103 {
104  long long h, t;
105 
106  h = (long long)c->lhead_lnum * c->leb_size + c->lhead_offs;
107  t = (long long)c->ltail_lnum * c->leb_size;
108 
109  if (h >= t)
110  return c->log_bytes - h + t;
111  else
112  return t - h;
113 }
114 
120 void ubifs_add_bud(struct ubifs_info *c, struct ubifs_bud *bud)
121 {
122  struct rb_node **p, *parent = NULL;
123  struct ubifs_bud *b;
124  struct ubifs_jhead *jhead;
125 
126  spin_lock(&c->buds_lock);
127  p = &c->buds.rb_node;
128  while (*p) {
129  parent = *p;
130  b = rb_entry(parent, struct ubifs_bud, rb);
131  ubifs_assert(bud->lnum != b->lnum);
132  if (bud->lnum < b->lnum)
133  p = &(*p)->rb_left;
134  else
135  p = &(*p)->rb_right;
136  }
137 
138  rb_link_node(&bud->rb, parent, p);
139  rb_insert_color(&bud->rb, &c->buds);
140  if (c->jheads) {
141  jhead = &c->jheads[bud->jhead];
142  list_add_tail(&bud->list, &jhead->buds_list);
143  } else
144  ubifs_assert(c->replaying && c->ro_mount);
145 
146  /*
147  * Note, although this is a new bud, we anyway account this space now,
148  * before any data has been written to it, because this is about to
149  * guarantee fixed mount time, and this bud will anyway be read and
150  * scanned.
151  */
152  c->bud_bytes += c->leb_size - bud->start;
153 
154  dbg_log("LEB %d:%d, jhead %s, bud_bytes %lld", bud->lnum,
155  bud->start, dbg_jhead(bud->jhead), c->bud_bytes);
156  spin_unlock(&c->buds_lock);
157 }
158 
172 int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs)
173 {
174  int err;
175  struct ubifs_bud *bud;
176  struct ubifs_ref_node *ref;
177 
178  bud = kmalloc(sizeof(struct ubifs_bud), GFP_NOFS);
179  if (!bud)
180  return -ENOMEM;
181  ref = kzalloc(c->ref_node_alsz, GFP_NOFS);
182  if (!ref) {
183  kfree(bud);
184  return -ENOMEM;
185  }
186 
187  mutex_lock(&c->log_mutex);
188  ubifs_assert(!c->ro_media && !c->ro_mount);
189  if (c->ro_error) {
190  err = -EROFS;
191  goto out_unlock;
192  }
193 
194  /* Make sure we have enough space in the log */
195  if (empty_log_bytes(c) - c->ref_node_alsz < c->min_log_bytes) {
196  dbg_log("not enough log space - %lld, required %d",
197  empty_log_bytes(c), c->min_log_bytes);
199  err = -EAGAIN;
200  goto out_unlock;
201  }
202 
203  /*
204  * Make sure the amount of space in buds will not exceed the
205  * 'c->max_bud_bytes' limit, because we want to guarantee mount time
206  * limits.
207  *
208  * It is not necessary to hold @c->buds_lock when reading @c->bud_bytes
209  * because we are holding @c->log_mutex. All @c->bud_bytes take place
210  * when both @c->log_mutex and @c->bud_bytes are locked.
211  */
212  if (c->bud_bytes + c->leb_size - offs > c->max_bud_bytes) {
213  dbg_log("bud bytes %lld (%lld max), require commit",
214  c->bud_bytes, c->max_bud_bytes);
216  err = -EAGAIN;
217  goto out_unlock;
218  }
219 
220  /*
221  * If the journal is full enough - start background commit. Note, it is
222  * OK to read 'c->cmt_state' without spinlock because integer reads
223  * are atomic in the kernel.
224  */
225  if (c->bud_bytes >= c->bg_bud_bytes &&
226  c->cmt_state == COMMIT_RESTING) {
227  dbg_log("bud bytes %lld (%lld max), initiate BG commit",
228  c->bud_bytes, c->max_bud_bytes);
230  }
231 
232  bud->lnum = lnum;
233  bud->start = offs;
234  bud->jhead = jhead;
235 
236  ref->ch.node_type = UBIFS_REF_NODE;
237  ref->lnum = cpu_to_le32(bud->lnum);
238  ref->offs = cpu_to_le32(bud->start);
239  ref->jhead = cpu_to_le32(jhead);
240 
241  if (c->lhead_offs > c->leb_size - c->ref_node_alsz) {
242  c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum);
243  c->lhead_offs = 0;
244  }
245 
246  if (c->lhead_offs == 0) {
247  /* Must ensure next log LEB has been unmapped */
248  err = ubifs_leb_unmap(c, c->lhead_lnum);
249  if (err)
250  goto out_unlock;
251  }
252 
253  if (bud->start == 0) {
254  /*
255  * Before writing the LEB reference which refers an empty LEB
256  * to the log, we have to make sure it is mapped, because
257  * otherwise we'd risk to refer an LEB with garbage in case of
258  * an unclean reboot, because the target LEB might have been
259  * unmapped, but not yet physically erased.
260  */
261  err = ubifs_leb_map(c, bud->lnum);
262  if (err)
263  goto out_unlock;
264  }
265 
266  dbg_log("write ref LEB %d:%d",
267  c->lhead_lnum, c->lhead_offs);
269  c->lhead_offs);
270  if (err)
271  goto out_unlock;
272 
273  c->lhead_offs += c->ref_node_alsz;
274 
275  ubifs_add_bud(c, bud);
276 
277  mutex_unlock(&c->log_mutex);
278  kfree(ref);
279  return 0;
280 
281 out_unlock:
282  mutex_unlock(&c->log_mutex);
283  kfree(ref);
284  kfree(bud);
285  return err;
286 }
287 
295 static void remove_buds(struct ubifs_info *c)
296 {
297  struct rb_node *p;
298 
299  ubifs_assert(list_empty(&c->old_buds));
300  c->cmt_bud_bytes = 0;
301  spin_lock(&c->buds_lock);
302  p = rb_first(&c->buds);
303  while (p) {
304  struct rb_node *p1 = p;
305  struct ubifs_bud *bud;
306  struct ubifs_wbuf *wbuf;
307 
308  p = rb_next(p);
309  bud = rb_entry(p1, struct ubifs_bud, rb);
310  wbuf = &c->jheads[bud->jhead].wbuf;
311 
312  if (wbuf->lnum == bud->lnum) {
313  /*
314  * Do not remove buds which are pointed to by journal
315  * heads (non-closed buds).
316  */
317  c->cmt_bud_bytes += wbuf->offs - bud->start;
318  dbg_log("preserve %d:%d, jhead %s, bud bytes %d, cmt_bud_bytes %lld",
319  bud->lnum, bud->start, dbg_jhead(bud->jhead),
320  wbuf->offs - bud->start, c->cmt_bud_bytes);
321  bud->start = wbuf->offs;
322  } else {
323  c->cmt_bud_bytes += c->leb_size - bud->start;
324  dbg_log("remove %d:%d, jhead %s, bud bytes %d, cmt_bud_bytes %lld",
325  bud->lnum, bud->start, dbg_jhead(bud->jhead),
326  c->leb_size - bud->start, c->cmt_bud_bytes);
327  rb_erase(p1, &c->buds);
328  /*
329  * If the commit does not finish, the recovery will need
330  * to replay the journal, in which case the old buds
331  * must be unchanged. Do not release them until post
332  * commit i.e. do not allow them to be garbage
333  * collected.
334  */
335  list_move(&bud->list, &c->old_buds);
336  }
337  }
338  spin_unlock(&c->buds_lock);
339 }
340 
354 int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum)
355 {
356  void *buf;
357  struct ubifs_cs_node *cs;
358  struct ubifs_ref_node *ref;
359  int err, i, max_len, len;
360 
361  err = dbg_check_bud_bytes(c);
362  if (err)
363  return err;
364 
366  max_len = ALIGN(max_len, c->min_io_size);
367  buf = cs = kmalloc(max_len, GFP_NOFS);
368  if (!buf)
369  return -ENOMEM;
370 
371  cs->ch.node_type = UBIFS_CS_NODE;
372  cs->cmt_no = cpu_to_le64(c->cmt_no);
374 
375  /*
376  * Note, we do not lock 'c->log_mutex' because this is the commit start
377  * phase and we are exclusively using the log. And we do not lock
378  * write-buffer because nobody can write to the file-system at this
379  * phase.
380  */
381 
382  len = UBIFS_CS_NODE_SZ;
383  for (i = 0; i < c->jhead_cnt; i++) {
384  int lnum = c->jheads[i].wbuf.lnum;
385  int offs = c->jheads[i].wbuf.offs;
386 
387  if (lnum == -1 || offs == c->leb_size)
388  continue;
389 
390  dbg_log("add ref to LEB %d:%d for jhead %s",
391  lnum, offs, dbg_jhead(i));
392  ref = buf + len;
393  ref->ch.node_type = UBIFS_REF_NODE;
394  ref->lnum = cpu_to_le32(lnum);
395  ref->offs = cpu_to_le32(offs);
396  ref->jhead = cpu_to_le32(i);
397 
399  len += UBIFS_REF_NODE_SZ;
400  }
401 
402  ubifs_pad(c, buf + len, ALIGN(len, c->min_io_size) - len);
403 
404  /* Switch to the next log LEB */
405  if (c->lhead_offs) {
406  c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum);
407  c->lhead_offs = 0;
408  }
409 
410  if (c->lhead_offs == 0) {
411  /* Must ensure next LEB has been unmapped */
412  err = ubifs_leb_unmap(c, c->lhead_lnum);
413  if (err)
414  goto out;
415  }
416 
417  len = ALIGN(len, c->min_io_size);
418  dbg_log("writing commit start at LEB %d:0, len %d", c->lhead_lnum, len);
419  err = ubifs_leb_write(c, c->lhead_lnum, cs, 0, len);
420  if (err)
421  goto out;
422 
423  *ltail_lnum = c->lhead_lnum;
424 
425  c->lhead_offs += len;
426  if (c->lhead_offs == c->leb_size) {
427  c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum);
428  c->lhead_offs = 0;
429  }
430 
431  remove_buds(c);
432 
433  /*
434  * We have started the commit and now users may use the rest of the log
435  * for new writes.
436  */
437  c->min_log_bytes = 0;
438 
439 out:
440  kfree(buf);
441  return err;
442 }
443 
454 int ubifs_log_end_commit(struct ubifs_info *c, int ltail_lnum)
455 {
456  int err;
457 
458  /*
459  * At this phase we have to lock 'c->log_mutex' because UBIFS allows FS
460  * writes during commit. Its only short "commit" start phase when
461  * writers are blocked.
462  */
463  mutex_lock(&c->log_mutex);
464 
465  dbg_log("old tail was LEB %d:0, new tail is LEB %d:0",
466  c->ltail_lnum, ltail_lnum);
467 
468  c->ltail_lnum = ltail_lnum;
469  /*
470  * The commit is finished and from now on it must be guaranteed that
471  * there is always enough space for the next commit.
472  */
473  c->min_log_bytes = c->leb_size;
474 
475  spin_lock(&c->buds_lock);
476  c->bud_bytes -= c->cmt_bud_bytes;
477  spin_unlock(&c->buds_lock);
478 
479  err = dbg_check_bud_bytes(c);
480 
481  mutex_unlock(&c->log_mutex);
482  return err;
483 }
484 
498 int ubifs_log_post_commit(struct ubifs_info *c, int old_ltail_lnum)
499 {
500  int lnum, err = 0;
501 
502  while (!list_empty(&c->old_buds)) {
503  struct ubifs_bud *bud;
504 
505  bud = list_entry(c->old_buds.next, struct ubifs_bud, list);
506  err = ubifs_return_leb(c, bud->lnum);
507  if (err)
508  return err;
509  list_del(&bud->list);
510  kfree(bud);
511  }
512  mutex_lock(&c->log_mutex);
513  for (lnum = old_ltail_lnum; lnum != c->ltail_lnum;
514  lnum = ubifs_next_log_lnum(c, lnum)) {
515  dbg_log("unmap log LEB %d", lnum);
516  err = ubifs_leb_unmap(c, lnum);
517  if (err)
518  goto out;
519  }
520 out:
521  mutex_unlock(&c->log_mutex);
522  return err;
523 }
524 
530 struct done_ref {
531  struct rb_node rb;
532  int lnum;
533 };
534 
543 static int done_already(struct rb_root *done_tree, int lnum)
544 {
545  struct rb_node **p = &done_tree->rb_node, *parent = NULL;
546  struct done_ref *dr;
547 
548  while (*p) {
549  parent = *p;
550  dr = rb_entry(parent, struct done_ref, rb);
551  if (lnum < dr->lnum)
552  p = &(*p)->rb_left;
553  else if (lnum > dr->lnum)
554  p = &(*p)->rb_right;
555  else
556  return 1;
557  }
558 
559  dr = kzalloc(sizeof(struct done_ref), GFP_NOFS);
560  if (!dr)
561  return -ENOMEM;
562 
563  dr->lnum = lnum;
564 
565  rb_link_node(&dr->rb, parent, p);
566  rb_insert_color(&dr->rb, done_tree);
567 
568  return 0;
569 }
570 
575 static void destroy_done_tree(struct rb_root *done_tree)
576 {
577  struct rb_node *this = done_tree->rb_node;
578  struct done_ref *dr;
579 
580  while (this) {
581  if (this->rb_left) {
582  this = this->rb_left;
583  continue;
584  } else if (this->rb_right) {
585  this = this->rb_right;
586  continue;
587  }
588  dr = rb_entry(this, struct done_ref, rb);
589  this = rb_parent(this);
590  if (this) {
591  if (this->rb_left == &dr->rb)
592  this->rb_left = NULL;
593  else
594  this->rb_right = NULL;
595  }
596  kfree(dr);
597  }
598 }
599 
610 static int add_node(struct ubifs_info *c, void *buf, int *lnum, int *offs,
611  void *node)
612 {
613  struct ubifs_ch *ch = node;
614  int len = le32_to_cpu(ch->len), remains = c->leb_size - *offs;
615 
616  if (len > remains) {
617  int sz = ALIGN(*offs, c->min_io_size), err;
618 
619  ubifs_pad(c, buf + *offs, sz - *offs);
620  err = ubifs_leb_change(c, *lnum, buf, sz);
621  if (err)
622  return err;
623  *lnum = ubifs_next_log_lnum(c, *lnum);
624  *offs = 0;
625  }
626  memcpy(buf + *offs, node, len);
627  *offs += ALIGN(len, 8);
628  return 0;
629 }
630 
642 {
643  struct ubifs_scan_leb *sleb;
644  struct ubifs_scan_node *snod;
645  struct rb_root done_tree = RB_ROOT;
646  int lnum, err, first = 1, write_lnum, offs = 0;
647  void *buf;
648 
649  dbg_rcvry("log tail LEB %d, log head LEB %d", c->ltail_lnum,
650  c->lhead_lnum);
651  buf = vmalloc(c->leb_size);
652  if (!buf)
653  return -ENOMEM;
654  lnum = c->ltail_lnum;
655  write_lnum = lnum;
656  while (1) {
657  sleb = ubifs_scan(c, lnum, 0, c->sbuf, 0);
658  if (IS_ERR(sleb)) {
659  err = PTR_ERR(sleb);
660  goto out_free;
661  }
662  list_for_each_entry(snod, &sleb->nodes, list) {
663  switch (snod->type) {
664  case UBIFS_REF_NODE: {
665  struct ubifs_ref_node *ref = snod->node;
666  int ref_lnum = le32_to_cpu(ref->lnum);
667 
668  err = done_already(&done_tree, ref_lnum);
669  if (err < 0)
670  goto out_scan;
671  if (err != 1) {
672  err = add_node(c, buf, &write_lnum,
673  &offs, snod->node);
674  if (err)
675  goto out_scan;
676  }
677  break;
678  }
679  case UBIFS_CS_NODE:
680  if (!first)
681  break;
682  err = add_node(c, buf, &write_lnum, &offs,
683  snod->node);
684  if (err)
685  goto out_scan;
686  first = 0;
687  break;
688  }
689  }
690  ubifs_scan_destroy(sleb);
691  if (lnum == c->lhead_lnum)
692  break;
693  lnum = ubifs_next_log_lnum(c, lnum);
694  }
695  if (offs) {
696  int sz = ALIGN(offs, c->min_io_size);
697 
698  ubifs_pad(c, buf + offs, sz - offs);
699  err = ubifs_leb_change(c, write_lnum, buf, sz);
700  if (err)
701  goto out_free;
702  offs = ALIGN(offs, c->min_io_size);
703  }
704  destroy_done_tree(&done_tree);
705  vfree(buf);
706  if (write_lnum == c->lhead_lnum) {
707  ubifs_err("log is too full");
708  return -EINVAL;
709  }
710  /* Unmap remaining LEBs */
711  lnum = write_lnum;
712  do {
713  lnum = ubifs_next_log_lnum(c, lnum);
714  err = ubifs_leb_unmap(c, lnum);
715  if (err)
716  return err;
717  } while (lnum != c->lhead_lnum);
718  c->lhead_lnum = write_lnum;
719  c->lhead_offs = offs;
720  dbg_rcvry("new log head at %d:%d", c->lhead_lnum, c->lhead_offs);
721  return 0;
722 
723 out_scan:
724  ubifs_scan_destroy(sleb);
725 out_free:
726  destroy_done_tree(&done_tree);
727  vfree(buf);
728  return err;
729 }
730 
739 static int dbg_check_bud_bytes(struct ubifs_info *c)
740 {
741  int i, err = 0;
742  struct ubifs_bud *bud;
743  long long bud_bytes = 0;
744 
745  if (!dbg_is_chk_gen(c))
746  return 0;
747 
748  spin_lock(&c->buds_lock);
749  for (i = 0; i < c->jhead_cnt; i++)
750  list_for_each_entry(bud, &c->jheads[i].buds_list, list)
751  bud_bytes += c->leb_size - bud->start;
752 
753  if (c->bud_bytes != bud_bytes) {
754  ubifs_err("bad bud_bytes %lld, calculated %lld",
755  c->bud_bytes, bud_bytes);
756  err = -EINVAL;
757  }
758  spin_unlock(&c->buds_lock);
759 
760  return err;
761 }