Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
recovery.c
Go to the documentation of this file.
1 /*
2  * This file is part of UBIFS.
3  *
4  * Copyright (C) 2006-2008 Nokia Corporation
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License version 2 as published by
8  * the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13  * more details.
14  *
15  * You should have received a copy of the GNU General Public License along with
16  * this program; if not, write to the Free Software Foundation, Inc., 51
17  * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18  *
19  * Authors: Adrian Hunter
20  * Artem Bityutskiy (Битюцкий Артём)
21  */
22 
23 /*
24  * This file implements functions needed to recover from unclean un-mounts.
25  * When UBIFS is mounted, it checks a flag on the master node to determine if
26  * an un-mount was completed successfully. If not, the process of mounting
27  * incorporates additional checking and fixing of on-flash data structures.
28  * UBIFS always cleans away all remnants of an unclean un-mount, so that
29  * errors do not accumulate. However UBIFS defers recovery if it is mounted
30  * read-only, and the flash is not modified in that case.
31  *
32  * The general UBIFS approach to the recovery is that it recovers from
33  * corruptions which could be caused by power cuts, but it refuses to recover
34  * from corruption caused by other reasons. And UBIFS tries to distinguish
35  * between these 2 reasons of corruptions and silently recover in the former
36  * case and loudly complain in the latter case.
37  *
38  * UBIFS writes only to erased LEBs, so it writes only to the flash space
39  * containing only 0xFFs. UBIFS also always writes strictly from the beginning
40  * of the LEB to the end. And UBIFS assumes that the underlying flash media
41  * writes in @c->max_write_size bytes at a time.
42  *
43  * Hence, if UBIFS finds a corrupted node at offset X, it expects only the min.
44  * I/O unit corresponding to offset X to contain corrupted data, all the
45  * following min. I/O units have to contain empty space (all 0xFFs). If this is
46  * not true, the corruption cannot be the result of a power cut, and UBIFS
47  * refuses to mount.
48  */
49 
50 #include <linux/crc32.h>
51 #include <linux/slab.h>
52 #include "ubifs.h"
53 
62 static int is_empty(void *buf, int len)
63 {
64  uint8_t *p = buf;
65  int i;
66 
67  for (i = 0; i < len; i++)
68  if (*p++ != 0xff)
69  return 0;
70  return 1;
71 }
72 
81 static int first_non_ff(void *buf, int len)
82 {
83  uint8_t *p = buf;
84  int i;
85 
86  for (i = 0; i < len; i++)
87  if (*p++ != 0xff)
88  return i;
89  return -1;
90 }
91 
109 static int get_master_node(const struct ubifs_info *c, int lnum, void **pbuf,
110  struct ubifs_mst_node **mst, void **cor)
111 {
112  const int sz = c->mst_node_alsz;
113  int err, offs, len;
114  void *sbuf, *buf;
115 
116  sbuf = vmalloc(c->leb_size);
117  if (!sbuf)
118  return -ENOMEM;
119 
120  err = ubifs_leb_read(c, lnum, sbuf, 0, c->leb_size, 0);
121  if (err && err != -EBADMSG)
122  goto out_free;
123 
124  /* Find the first position that is definitely not a node */
125  offs = 0;
126  buf = sbuf;
127  len = c->leb_size;
128  while (offs + UBIFS_MST_NODE_SZ <= c->leb_size) {
129  struct ubifs_ch *ch = buf;
130 
131  if (le32_to_cpu(ch->magic) != UBIFS_NODE_MAGIC)
132  break;
133  offs += sz;
134  buf += sz;
135  len -= sz;
136  }
137  /* See if there was a valid master node before that */
138  if (offs) {
139  int ret;
140 
141  offs -= sz;
142  buf -= sz;
143  len += sz;
144  ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
145  if (ret != SCANNED_A_NODE && offs) {
146  /* Could have been corruption so check one place back */
147  offs -= sz;
148  buf -= sz;
149  len += sz;
150  ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
151  if (ret != SCANNED_A_NODE)
152  /*
153  * We accept only one area of corruption because
154  * we are assuming that it was caused while
155  * trying to write a master node.
156  */
157  goto out_err;
158  }
159  if (ret == SCANNED_A_NODE) {
160  struct ubifs_ch *ch = buf;
161 
162  if (ch->node_type != UBIFS_MST_NODE)
163  goto out_err;
164  dbg_rcvry("found a master node at %d:%d", lnum, offs);
165  *mst = buf;
166  offs += sz;
167  buf += sz;
168  len -= sz;
169  }
170  }
171  /* Check for corruption */
172  if (offs < c->leb_size) {
173  if (!is_empty(buf, min_t(int, len, sz))) {
174  *cor = buf;
175  dbg_rcvry("found corruption at %d:%d", lnum, offs);
176  }
177  offs += sz;
178  buf += sz;
179  len -= sz;
180  }
181  /* Check remaining empty space */
182  if (offs < c->leb_size)
183  if (!is_empty(buf, len))
184  goto out_err;
185  *pbuf = sbuf;
186  return 0;
187 
188 out_err:
189  err = -EINVAL;
190 out_free:
191  vfree(sbuf);
192  *mst = NULL;
193  *cor = NULL;
194  return err;
195 }
196 
204 static int write_rcvrd_mst_node(struct ubifs_info *c,
205  struct ubifs_mst_node *mst)
206 {
207  int err = 0, lnum = UBIFS_MST_LNUM, sz = c->mst_node_alsz;
208  __le32 save_flags;
209 
210  dbg_rcvry("recovery");
211 
212  save_flags = mst->flags;
214 
216  err = ubifs_leb_change(c, lnum, mst, sz);
217  if (err)
218  goto out;
219  err = ubifs_leb_change(c, lnum + 1, mst, sz);
220  if (err)
221  goto out;
222 out:
223  mst->flags = save_flags;
224  return err;
225 }
226 
237 {
238  void *buf1 = NULL, *buf2 = NULL, *cor1 = NULL, *cor2 = NULL;
239  struct ubifs_mst_node *mst1 = NULL, *mst2 = NULL, *mst;
240  const int sz = c->mst_node_alsz;
241  int err, offs1, offs2;
242 
243  dbg_rcvry("recovery");
244 
245  err = get_master_node(c, UBIFS_MST_LNUM, &buf1, &mst1, &cor1);
246  if (err)
247  goto out_free;
248 
249  err = get_master_node(c, UBIFS_MST_LNUM + 1, &buf2, &mst2, &cor2);
250  if (err)
251  goto out_free;
252 
253  if (mst1) {
254  offs1 = (void *)mst1 - buf1;
255  if ((le32_to_cpu(mst1->flags) & UBIFS_MST_RCVRY) &&
256  (offs1 == 0 && !cor1)) {
257  /*
258  * mst1 was written by recovery at offset 0 with no
259  * corruption.
260  */
261  dbg_rcvry("recovery recovery");
262  mst = mst1;
263  } else if (mst2) {
264  offs2 = (void *)mst2 - buf2;
265  if (offs1 == offs2) {
266  /* Same offset, so must be the same */
267  if (memcmp((void *)mst1 + UBIFS_CH_SZ,
268  (void *)mst2 + UBIFS_CH_SZ,
270  goto out_err;
271  mst = mst1;
272  } else if (offs2 + sz == offs1) {
273  /* 1st LEB was written, 2nd was not */
274  if (cor1)
275  goto out_err;
276  mst = mst1;
277  } else if (offs1 == 0 &&
278  c->leb_size - offs2 - sz < sz) {
279  /* 1st LEB was unmapped and written, 2nd not */
280  if (cor1)
281  goto out_err;
282  mst = mst1;
283  } else
284  goto out_err;
285  } else {
286  /*
287  * 2nd LEB was unmapped and about to be written, so
288  * there must be only one master node in the first LEB
289  * and no corruption.
290  */
291  if (offs1 != 0 || cor1)
292  goto out_err;
293  mst = mst1;
294  }
295  } else {
296  if (!mst2)
297  goto out_err;
298  /*
299  * 1st LEB was unmapped and about to be written, so there must
300  * be no room left in 2nd LEB.
301  */
302  offs2 = (void *)mst2 - buf2;
303  if (offs2 + sz + sz <= c->leb_size)
304  goto out_err;
305  mst = mst2;
306  }
307 
308  ubifs_msg("recovered master node from LEB %d",
309  (mst == mst1 ? UBIFS_MST_LNUM : UBIFS_MST_LNUM + 1));
310 
312 
313  if (c->ro_mount) {
314  /* Read-only mode. Keep a copy for switching to rw mode */
316  if (!c->rcvrd_mst_node) {
317  err = -ENOMEM;
318  goto out_free;
319  }
321 
322  /*
323  * We had to recover the master node, which means there was an
324  * unclean reboot. However, it is possible that the master node
325  * is clean at this point, i.e., %UBIFS_MST_DIRTY is not set.
326  * E.g., consider the following chain of events:
327  *
328  * 1. UBIFS was cleanly unmounted, so the master node is clean
329  * 2. UBIFS is being mounted R/W and starts changing the master
330  * node in the first (%UBIFS_MST_LNUM). A power cut happens,
331  * so this LEB ends up with some amount of garbage at the
332  * end.
333  * 3. UBIFS is being mounted R/O. We reach this place and
334  * recover the master node from the second LEB
335  * (%UBIFS_MST_LNUM + 1). But we cannot update the media
336  * because we are being mounted R/O. We have to defer the
337  * operation.
338  * 4. However, this master node (@c->mst_node) is marked as
339  * clean (since the step 1). And if we just return, the
340  * mount code will be confused and won't recover the master
341  * node when it is re-mounter R/W later.
342  *
343  * Thus, to force the recovery by marking the master node as
344  * dirty.
345  */
346  c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY);
347  } else {
348  /* Write the recovered master node */
349  c->max_sqnum = le64_to_cpu(mst->ch.sqnum) - 1;
350  err = write_rcvrd_mst_node(c, c->mst_node);
351  if (err)
352  goto out_free;
353  }
354 
355  vfree(buf2);
356  vfree(buf1);
357 
358  return 0;
359 
360 out_err:
361  err = -EINVAL;
362 out_free:
363  ubifs_err("failed to recover master node");
364  if (mst1) {
365  ubifs_err("dumping first master node");
366  ubifs_dump_node(c, mst1);
367  }
368  if (mst2) {
369  ubifs_err("dumping second master node");
370  ubifs_dump_node(c, mst2);
371  }
372  vfree(buf2);
373  vfree(buf1);
374  return err;
375 }
376 
387 {
388  int err;
389 
390  if (!c->rcvrd_mst_node)
391  return 0;
393  c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY);
394  err = write_rcvrd_mst_node(c, c->rcvrd_mst_node);
395  if (err)
396  return err;
397  kfree(c->rcvrd_mst_node);
398  c->rcvrd_mst_node = NULL;
399  return 0;
400 }
401 
413 static int is_last_write(const struct ubifs_info *c, void *buf, int offs)
414 {
415  int empty_offs, check_len;
416  uint8_t *p;
417 
418  /*
419  * Round up to the next @c->max_write_size boundary i.e. @offs is in
420  * the last wbuf written. After that should be empty space.
421  */
422  empty_offs = ALIGN(offs + 1, c->max_write_size);
423  check_len = c->leb_size - empty_offs;
424  p = buf + empty_offs - offs;
425  return is_empty(p, check_len);
426 }
427 
440 static void clean_buf(const struct ubifs_info *c, void **buf, int lnum,
441  int *offs, int *len)
442 {
443  int empty_offs, pad_len;
444 
445  lnum = lnum;
446  dbg_rcvry("cleaning corruption at %d:%d", lnum, *offs);
447 
448  ubifs_assert(!(*offs & 7));
449  empty_offs = ALIGN(*offs, c->min_io_size);
450  pad_len = empty_offs - *offs;
451  ubifs_pad(c, *buf, pad_len);
452  *offs += pad_len;
453  *buf += pad_len;
454  *len -= pad_len;
455  memset(*buf, 0xff, c->leb_size - empty_offs);
456 }
457 
470 static int no_more_nodes(const struct ubifs_info *c, void *buf, int len,
471  int lnum, int offs)
472 {
473  struct ubifs_ch *ch = buf;
474  int skip, dlen = le32_to_cpu(ch->len);
475 
476  /* Check for empty space after the corrupt node's common header */
477  skip = ALIGN(offs + UBIFS_CH_SZ, c->max_write_size) - offs;
478  if (is_empty(buf + skip, len - skip))
479  return 1;
480  /*
481  * The area after the common header size is not empty, so the common
482  * header must be intact. Check it.
483  */
484  if (ubifs_check_node(c, buf, lnum, offs, 1, 0) != -EUCLEAN) {
485  dbg_rcvry("unexpected bad common header at %d:%d", lnum, offs);
486  return 0;
487  }
488  /* Now we know the corrupt node's length we can skip over it */
489  skip = ALIGN(offs + dlen, c->max_write_size) - offs;
490  /* After which there should be empty space */
491  if (is_empty(buf + skip, len - skip))
492  return 1;
493  dbg_rcvry("unexpected data at %d:%d", lnum, offs + skip);
494  return 0;
495 }
496 
503 static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
504  int start)
505 {
506  int lnum = sleb->lnum, endpt = start;
507 
508  /* Get the end offset of the last node we are keeping */
509  if (!list_empty(&sleb->nodes)) {
510  struct ubifs_scan_node *snod;
511 
512  snod = list_entry(sleb->nodes.prev,
513  struct ubifs_scan_node, list);
514  endpt = snod->offs + snod->len;
515  }
516 
517  if (c->ro_mount && !c->remounting_rw) {
518  /* Add to recovery list */
519  struct ubifs_unclean_leb *ucleb;
520 
521  dbg_rcvry("need to fix LEB %d start %d endpt %d",
522  lnum, start, sleb->endpt);
523  ucleb = kzalloc(sizeof(struct ubifs_unclean_leb), GFP_NOFS);
524  if (!ucleb)
525  return -ENOMEM;
526  ucleb->lnum = lnum;
527  ucleb->endpt = endpt;
528  list_add_tail(&ucleb->list, &c->unclean_leb_list);
529  } else {
530  /* Write the fixed LEB back to flash */
531  int err;
532 
533  dbg_rcvry("fixing LEB %d start %d endpt %d",
534  lnum, start, sleb->endpt);
535  if (endpt == 0) {
536  err = ubifs_leb_unmap(c, lnum);
537  if (err)
538  return err;
539  } else {
540  int len = ALIGN(endpt, c->min_io_size);
541 
542  if (start) {
543  err = ubifs_leb_read(c, lnum, sleb->buf, 0,
544  start, 1);
545  if (err)
546  return err;
547  }
548  /* Pad to min_io_size */
549  if (len > endpt) {
550  int pad_len = len - ALIGN(endpt, 8);
551 
552  if (pad_len > 0) {
553  void *buf = sleb->buf + len - pad_len;
554 
555  ubifs_pad(c, buf, pad_len);
556  }
557  }
558  err = ubifs_leb_change(c, lnum, sleb->buf, len);
559  if (err)
560  return err;
561  }
562  }
563  return 0;
564 }
565 
574 static void drop_last_group(struct ubifs_scan_leb *sleb, int *offs)
575 {
576  while (!list_empty(&sleb->nodes)) {
577  struct ubifs_scan_node *snod;
578  struct ubifs_ch *ch;
579 
580  snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node,
581  list);
582  ch = snod->node;
583  if (ch->group_type != UBIFS_IN_NODE_GROUP)
584  break;
585 
586  dbg_rcvry("dropping grouped node at %d:%d",
587  sleb->lnum, snod->offs);
588  *offs = snod->offs;
589  list_del(&snod->list);
590  kfree(snod);
591  sleb->nodes_cnt -= 1;
592  }
593 }
594 
604 static void drop_last_node(struct ubifs_scan_leb *sleb, int *offs)
605 {
606  struct ubifs_scan_node *snod;
607 
608  if (!list_empty(&sleb->nodes)) {
609  snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node,
610  list);
611 
612  dbg_rcvry("dropping last node at %d:%d",
613  sleb->lnum, snod->offs);
614  *offs = snod->offs;
615  list_del(&snod->list);
616  kfree(snod);
617  sleb->nodes_cnt -= 1;
618  }
619 }
620 
635 struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
636  int offs, void *sbuf, int jhead)
637 {
638  int ret = 0, err, len = c->leb_size - offs, start = offs, min_io_unit;
639  int grouped = jhead == -1 ? 0 : c->jheads[jhead].grouped;
640  struct ubifs_scan_leb *sleb;
641  void *buf = sbuf + offs;
642 
643  dbg_rcvry("%d:%d, jhead %d, grouped %d", lnum, offs, jhead, grouped);
644 
645  sleb = ubifs_start_scan(c, lnum, offs, sbuf);
646  if (IS_ERR(sleb))
647  return sleb;
648 
649  ubifs_assert(len >= 8);
650  while (len >= 8) {
651  dbg_scan("look at LEB %d:%d (%d bytes left)",
652  lnum, offs, len);
653 
654  cond_resched();
655 
656  /*
657  * Scan quietly until there is an error from which we cannot
658  * recover
659  */
660  ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
661  if (ret == SCANNED_A_NODE) {
662  /* A valid node, and not a padding node */
663  struct ubifs_ch *ch = buf;
664  int node_len;
665 
666  err = ubifs_add_snod(c, sleb, buf, offs);
667  if (err)
668  goto error;
669  node_len = ALIGN(le32_to_cpu(ch->len), 8);
670  offs += node_len;
671  buf += node_len;
672  len -= node_len;
673  } else if (ret > 0) {
674  /* Padding bytes or a valid padding node */
675  offs += ret;
676  buf += ret;
677  len -= ret;
678  } else if (ret == SCANNED_EMPTY_SPACE ||
679  ret == SCANNED_GARBAGE ||
680  ret == SCANNED_A_BAD_PAD_NODE ||
681  ret == SCANNED_A_CORRUPT_NODE) {
682  dbg_rcvry("found corruption (%d) at %d:%d",
683  ret, lnum, offs);
684  break;
685  } else {
686  ubifs_err("unexpected return value %d", ret);
687  err = -EINVAL;
688  goto error;
689  }
690  }
691 
692  if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE) {
693  if (!is_last_write(c, buf, offs))
694  goto corrupted_rescan;
695  } else if (ret == SCANNED_A_CORRUPT_NODE) {
696  if (!no_more_nodes(c, buf, len, lnum, offs))
697  goto corrupted_rescan;
698  } else if (!is_empty(buf, len)) {
699  if (!is_last_write(c, buf, offs)) {
700  int corruption = first_non_ff(buf, len);
701 
702  /*
703  * See header comment for this file for more
704  * explanations about the reasons we have this check.
705  */
706  ubifs_err("corrupt empty space LEB %d:%d, corruption starts at %d",
707  lnum, offs, corruption);
708  /* Make sure we dump interesting non-0xFF data */
709  offs += corruption;
710  buf += corruption;
711  goto corrupted;
712  }
713  }
714 
715  min_io_unit = round_down(offs, c->min_io_size);
716  if (grouped)
717  /*
718  * If nodes are grouped, always drop the incomplete group at
719  * the end.
720  */
721  drop_last_group(sleb, &offs);
722 
723  if (jhead == GCHD) {
724  /*
725  * If this LEB belongs to the GC head then while we are in the
726  * middle of the same min. I/O unit keep dropping nodes. So
727  * basically, what we want is to make sure that the last min.
728  * I/O unit where we saw the corruption is dropped completely
729  * with all the uncorrupted nodes which may possibly sit there.
730  *
731  * In other words, let's name the min. I/O unit where the
732  * corruption starts B, and the previous min. I/O unit A. The
733  * below code tries to deal with a situation when half of B
734  * contains valid nodes or the end of a valid node, and the
735  * second half of B contains corrupted data or garbage. This
736  * means that UBIFS had been writing to B just before the power
737  * cut happened. I do not know how realistic is this scenario
738  * that half of the min. I/O unit had been written successfully
739  * and the other half not, but this is possible in our 'failure
740  * mode emulation' infrastructure at least.
741  *
742  * So what is the problem, why we need to drop those nodes? Why
743  * can't we just clean-up the second half of B by putting a
744  * padding node there? We can, and this works fine with one
745  * exception which was reproduced with power cut emulation
746  * testing and happens extremely rarely.
747  *
748  * Imagine the file-system is full, we run GC which starts
749  * moving valid nodes from LEB X to LEB Y (obviously, LEB Y is
750  * the current GC head LEB). The @c->gc_lnum is -1, which means
751  * that GC will retain LEB X and will try to continue. Imagine
752  * that LEB X is currently the dirtiest LEB, and the amount of
753  * used space in LEB Y is exactly the same as amount of free
754  * space in LEB X.
755  *
756  * And a power cut happens when nodes are moved from LEB X to
757  * LEB Y. We are here trying to recover LEB Y which is the GC
758  * head LEB. We find the min. I/O unit B as described above.
759  * Then we clean-up LEB Y by padding min. I/O unit. And later
760  * 'ubifs_rcvry_gc_commit()' function fails, because it cannot
761  * find a dirty LEB which could be GC'd into LEB Y! Even LEB X
762  * does not match because the amount of valid nodes there does
763  * not fit the free space in LEB Y any more! And this is
764  * because of the padding node which we added to LEB Y. The
765  * user-visible effect of this which I once observed and
766  * analysed is that we cannot mount the file-system with
767  * -ENOSPC error.
768  *
769  * So obviously, to make sure that situation does not happen we
770  * should free min. I/O unit B in LEB Y completely and the last
771  * used min. I/O unit in LEB Y should be A. This is basically
772  * what the below code tries to do.
773  */
774  while (offs > min_io_unit)
775  drop_last_node(sleb, &offs);
776  }
777 
778  buf = sbuf + offs;
779  len = c->leb_size - offs;
780 
781  clean_buf(c, &buf, lnum, &offs, &len);
782  ubifs_end_scan(c, sleb, lnum, offs);
783 
784  err = fix_unclean_leb(c, sleb, start);
785  if (err)
786  goto error;
787 
788  return sleb;
789 
790 corrupted_rescan:
791  /* Re-scan the corrupted data with verbose messages */
792  ubifs_err("corruption %d", ret);
793  ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
794 corrupted:
795  ubifs_scanned_corruption(c, lnum, offs, buf);
796  err = -EUCLEAN;
797 error:
798  ubifs_err("LEB %d scanning failed", lnum);
799  ubifs_scan_destroy(sleb);
800  return ERR_PTR(err);
801 }
802 
812 static int get_cs_sqnum(struct ubifs_info *c, int lnum, int offs,
813  unsigned long long *cs_sqnum)
814 {
815  struct ubifs_cs_node *cs_node = NULL;
816  int err, ret;
817 
818  dbg_rcvry("at %d:%d", lnum, offs);
819  cs_node = kmalloc(UBIFS_CS_NODE_SZ, GFP_KERNEL);
820  if (!cs_node)
821  return -ENOMEM;
822  if (c->leb_size - offs < UBIFS_CS_NODE_SZ)
823  goto out_err;
824  err = ubifs_leb_read(c, lnum, (void *)cs_node, offs,
825  UBIFS_CS_NODE_SZ, 0);
826  if (err && err != -EBADMSG)
827  goto out_free;
828  ret = ubifs_scan_a_node(c, cs_node, UBIFS_CS_NODE_SZ, lnum, offs, 0);
829  if (ret != SCANNED_A_NODE) {
830  ubifs_err("Not a valid node");
831  goto out_err;
832  }
833  if (cs_node->ch.node_type != UBIFS_CS_NODE) {
834  ubifs_err("Node a CS node, type is %d", cs_node->ch.node_type);
835  goto out_err;
836  }
837  if (le64_to_cpu(cs_node->cmt_no) != c->cmt_no) {
838  ubifs_err("CS node cmt_no %llu != current cmt_no %llu",
839  (unsigned long long)le64_to_cpu(cs_node->cmt_no),
840  c->cmt_no);
841  goto out_err;
842  }
843  *cs_sqnum = le64_to_cpu(cs_node->ch.sqnum);
844  dbg_rcvry("commit start sqnum %llu", *cs_sqnum);
845  kfree(cs_node);
846  return 0;
847 
848 out_err:
849  err = -EINVAL;
850 out_free:
851  ubifs_err("failed to get CS sqnum");
852  kfree(cs_node);
853  return err;
854 }
855 
869 struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum,
870  int offs, void *sbuf)
871 {
872  struct ubifs_scan_leb *sleb;
873  int next_lnum;
874 
875  dbg_rcvry("LEB %d", lnum);
876  next_lnum = lnum + 1;
877  if (next_lnum >= UBIFS_LOG_LNUM + c->log_lebs)
878  next_lnum = UBIFS_LOG_LNUM;
879  if (next_lnum != c->ltail_lnum) {
880  /*
881  * We can only recover at the end of the log, so check that the
882  * next log LEB is empty or out of date.
883  */
884  sleb = ubifs_scan(c, next_lnum, 0, sbuf, 0);
885  if (IS_ERR(sleb))
886  return sleb;
887  if (sleb->nodes_cnt) {
888  struct ubifs_scan_node *snod;
889  unsigned long long cs_sqnum = c->cs_sqnum;
890 
891  snod = list_entry(sleb->nodes.next,
892  struct ubifs_scan_node, list);
893  if (cs_sqnum == 0) {
894  int err;
895 
896  err = get_cs_sqnum(c, lnum, offs, &cs_sqnum);
897  if (err) {
898  ubifs_scan_destroy(sleb);
899  return ERR_PTR(err);
900  }
901  }
902  if (snod->sqnum > cs_sqnum) {
903  ubifs_err("unrecoverable log corruption in LEB %d",
904  lnum);
905  ubifs_scan_destroy(sleb);
906  return ERR_PTR(-EUCLEAN);
907  }
908  }
909  ubifs_scan_destroy(sleb);
910  }
911  return ubifs_recover_leb(c, lnum, offs, sbuf, -1);
912 }
913 
925 static int recover_head(struct ubifs_info *c, int lnum, int offs, void *sbuf)
926 {
927  int len = c->max_write_size, err;
928 
929  if (offs + len > c->leb_size)
930  len = c->leb_size - offs;
931 
932  if (!len)
933  return 0;
934 
935  /* Read at the head location and check it is empty flash */
936  err = ubifs_leb_read(c, lnum, sbuf, offs, len, 1);
937  if (err || !is_empty(sbuf, len)) {
938  dbg_rcvry("cleaning head at %d:%d", lnum, offs);
939  if (offs == 0)
940  return ubifs_leb_unmap(c, lnum);
941  err = ubifs_leb_read(c, lnum, sbuf, 0, offs, 1);
942  if (err)
943  return err;
944  return ubifs_leb_change(c, lnum, sbuf, offs);
945  }
946 
947  return 0;
948 }
949 
967 int ubifs_recover_inl_heads(struct ubifs_info *c, void *sbuf)
968 {
969  int err;
970 
972 
973  dbg_rcvry("checking index head at %d:%d", c->ihead_lnum, c->ihead_offs);
974  err = recover_head(c, c->ihead_lnum, c->ihead_offs, sbuf);
975  if (err)
976  return err;
977 
978  dbg_rcvry("checking LPT head at %d:%d", c->nhead_lnum, c->nhead_offs);
979  err = recover_head(c, c->nhead_lnum, c->nhead_offs, sbuf);
980  if (err)
981  return err;
982 
983  return 0;
984 }
985 
998 static int clean_an_unclean_leb(struct ubifs_info *c,
999  struct ubifs_unclean_leb *ucleb, void *sbuf)
1000 {
1001  int err, lnum = ucleb->lnum, offs = 0, len = ucleb->endpt, quiet = 1;
1002  void *buf = sbuf;
1003 
1004  dbg_rcvry("LEB %d len %d", lnum, len);
1005 
1006  if (len == 0) {
1007  /* Nothing to read, just unmap it */
1008  err = ubifs_leb_unmap(c, lnum);
1009  if (err)
1010  return err;
1011  return 0;
1012  }
1013 
1014  err = ubifs_leb_read(c, lnum, buf, offs, len, 0);
1015  if (err && err != -EBADMSG)
1016  return err;
1017 
1018  while (len >= 8) {
1019  int ret;
1020 
1021  cond_resched();
1022 
1023  /* Scan quietly until there is an error */
1024  ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet);
1025 
1026  if (ret == SCANNED_A_NODE) {
1027  /* A valid node, and not a padding node */
1028  struct ubifs_ch *ch = buf;
1029  int node_len;
1030 
1031  node_len = ALIGN(le32_to_cpu(ch->len), 8);
1032  offs += node_len;
1033  buf += node_len;
1034  len -= node_len;
1035  continue;
1036  }
1037 
1038  if (ret > 0) {
1039  /* Padding bytes or a valid padding node */
1040  offs += ret;
1041  buf += ret;
1042  len -= ret;
1043  continue;
1044  }
1045 
1046  if (ret == SCANNED_EMPTY_SPACE) {
1047  ubifs_err("unexpected empty space at %d:%d",
1048  lnum, offs);
1049  return -EUCLEAN;
1050  }
1051 
1052  if (quiet) {
1053  /* Redo the last scan but noisily */
1054  quiet = 0;
1055  continue;
1056  }
1057 
1058  ubifs_scanned_corruption(c, lnum, offs, buf);
1059  return -EUCLEAN;
1060  }
1061 
1062  /* Pad to min_io_size */
1063  len = ALIGN(ucleb->endpt, c->min_io_size);
1064  if (len > ucleb->endpt) {
1065  int pad_len = len - ALIGN(ucleb->endpt, 8);
1066 
1067  if (pad_len > 0) {
1068  buf = c->sbuf + len - pad_len;
1069  ubifs_pad(c, buf, pad_len);
1070  }
1071  }
1072 
1073  /* Write back the LEB atomically */
1074  err = ubifs_leb_change(c, lnum, sbuf, len);
1075  if (err)
1076  return err;
1077 
1078  dbg_rcvry("cleaned LEB %d", lnum);
1079 
1080  return 0;
1081 }
1082 
1094 int ubifs_clean_lebs(struct ubifs_info *c, void *sbuf)
1095 {
1096  dbg_rcvry("recovery");
1097  while (!list_empty(&c->unclean_leb_list)) {
1098  struct ubifs_unclean_leb *ucleb;
1099  int err;
1100 
1101  ucleb = list_entry(c->unclean_leb_list.next,
1102  struct ubifs_unclean_leb, list);
1103  err = clean_an_unclean_leb(c, ucleb, sbuf);
1104  if (err)
1105  return err;
1106  list_del(&ucleb->list);
1107  kfree(ucleb);
1108  }
1109  return 0;
1110 }
1111 
1120 static int grab_empty_leb(struct ubifs_info *c)
1121 {
1122  int lnum, err;
1123 
1124  /*
1125  * Note, it is very important to first search for an empty LEB and then
1126  * run the commit, not vice-versa. The reason is that there might be
1127  * only one empty LEB at the moment, the one which has been the
1128  * @c->gc_lnum just before the power cut happened. During the regular
1129  * UBIFS operation (not now) @c->gc_lnum is marked as "taken", so no
1130  * one but GC can grab it. But at this moment this single empty LEB is
1131  * not marked as taken, so if we run commit - what happens? Right, the
1132  * commit will grab it and write the index there. Remember that the
1133  * index always expands as long as there is free space, and it only
1134  * starts consolidating when we run out of space.
1135  *
1136  * IOW, if we run commit now, we might not be able to find a free LEB
1137  * after this.
1138  */
1139  lnum = ubifs_find_free_leb_for_idx(c);
1140  if (lnum < 0) {
1141  ubifs_err("could not find an empty LEB");
1142  ubifs_dump_lprops(c);
1143  ubifs_dump_budg(c, &c->bi);
1144  return lnum;
1145  }
1146 
1147  /* Reset the index flag */
1148  err = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0,
1149  LPROPS_INDEX, 0);
1150  if (err)
1151  return err;
1152 
1153  c->gc_lnum = lnum;
1154  dbg_rcvry("found empty LEB %d, run commit", lnum);
1155 
1156  return ubifs_run_commit(c);
1157 }
1158 
1178 {
1179  struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
1180  struct ubifs_lprops lp;
1181  int err;
1182 
1183  dbg_rcvry("GC head LEB %d, offs %d", wbuf->lnum, wbuf->offs);
1184 
1185  c->gc_lnum = -1;
1186  if (wbuf->lnum == -1 || wbuf->offs == c->leb_size)
1187  return grab_empty_leb(c);
1188 
1189  err = ubifs_find_dirty_leb(c, &lp, wbuf->offs, 2);
1190  if (err) {
1191  if (err != -ENOSPC)
1192  return err;
1193 
1194  dbg_rcvry("could not find a dirty LEB");
1195  return grab_empty_leb(c);
1196  }
1197 
1198  ubifs_assert(!(lp.flags & LPROPS_INDEX));
1199  ubifs_assert(lp.free + lp.dirty >= wbuf->offs);
1200 
1201  /*
1202  * We run the commit before garbage collection otherwise subsequent
1203  * mounts will see the GC and orphan deletion in a different order.
1204  */
1205  dbg_rcvry("committing");
1206  err = ubifs_run_commit(c);
1207  if (err)
1208  return err;
1209 
1210  dbg_rcvry("GC'ing LEB %d", lp.lnum);
1211  mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
1212  err = ubifs_garbage_collect_leb(c, &lp);
1213  if (err >= 0) {
1214  int err2 = ubifs_wbuf_sync_nolock(wbuf);
1215 
1216  if (err2)
1217  err = err2;
1218  }
1219  mutex_unlock(&wbuf->io_mutex);
1220  if (err < 0) {
1221  ubifs_err("GC failed, error %d", err);
1222  if (err == -EAGAIN)
1223  err = -EINVAL;
1224  return err;
1225  }
1226 
1227  ubifs_assert(err == LEB_RETAINED);
1228  if (err != LEB_RETAINED)
1229  return -EINVAL;
1230 
1231  err = ubifs_leb_unmap(c, c->gc_lnum);
1232  if (err)
1233  return err;
1234 
1235  dbg_rcvry("allocated LEB %d for GC", lp.lnum);
1236  return 0;
1237 }
1238 
1248 struct size_entry {
1249  struct rb_node rb;
1251  loff_t i_size;
1252  loff_t d_size;
1253  int exists;
1254  struct inode *inode;
1255 };
1256 
1265 static int add_ino(struct ubifs_info *c, ino_t inum, loff_t i_size,
1266  loff_t d_size, int exists)
1267 {
1268  struct rb_node **p = &c->size_tree.rb_node, *parent = NULL;
1269  struct size_entry *e;
1270 
1271  while (*p) {
1272  parent = *p;
1273  e = rb_entry(parent, struct size_entry, rb);
1274  if (inum < e->inum)
1275  p = &(*p)->rb_left;
1276  else
1277  p = &(*p)->rb_right;
1278  }
1279 
1280  e = kzalloc(sizeof(struct size_entry), GFP_KERNEL);
1281  if (!e)
1282  return -ENOMEM;
1283 
1284  e->inum = inum;
1285  e->i_size = i_size;
1286  e->d_size = d_size;
1287  e->exists = exists;
1288 
1289  rb_link_node(&e->rb, parent, p);
1290  rb_insert_color(&e->rb, &c->size_tree);
1291 
1292  return 0;
1293 }
1294 
1300 static struct size_entry *find_ino(struct ubifs_info *c, ino_t inum)
1301 {
1302  struct rb_node *p = c->size_tree.rb_node;
1303  struct size_entry *e;
1304 
1305  while (p) {
1306  e = rb_entry(p, struct size_entry, rb);
1307  if (inum < e->inum)
1308  p = p->rb_left;
1309  else if (inum > e->inum)
1310  p = p->rb_right;
1311  else
1312  return e;
1313  }
1314  return NULL;
1315 }
1316 
1322 static void remove_ino(struct ubifs_info *c, ino_t inum)
1323 {
1324  struct size_entry *e = find_ino(c, inum);
1325 
1326  if (!e)
1327  return;
1328  rb_erase(&e->rb, &c->size_tree);
1329  kfree(e);
1330 }
1331 
1337 {
1338  struct rb_node *this = c->size_tree.rb_node;
1339  struct size_entry *e;
1340 
1341  while (this) {
1342  if (this->rb_left) {
1343  this = this->rb_left;
1344  continue;
1345  } else if (this->rb_right) {
1346  this = this->rb_right;
1347  continue;
1348  }
1349  e = rb_entry(this, struct size_entry, rb);
1350  if (e->inode)
1351  iput(e->inode);
1352  this = rb_parent(this);
1353  if (this) {
1354  if (this->rb_left == &e->rb)
1355  this->rb_left = NULL;
1356  else
1357  this->rb_right = NULL;
1358  }
1359  kfree(e);
1360  }
1361  c->size_tree = RB_ROOT;
1362 }
1363 
1390  int deletion, loff_t new_size)
1391 {
1392  ino_t inum = key_inum(c, key);
1393  struct size_entry *e;
1394  int err;
1395 
1396  switch (key_type(c, key)) {
1397  case UBIFS_INO_KEY:
1398  if (deletion)
1399  remove_ino(c, inum);
1400  else {
1401  e = find_ino(c, inum);
1402  if (e) {
1403  e->i_size = new_size;
1404  e->exists = 1;
1405  } else {
1406  err = add_ino(c, inum, new_size, 0, 1);
1407  if (err)
1408  return err;
1409  }
1410  }
1411  break;
1412  case UBIFS_DATA_KEY:
1413  e = find_ino(c, inum);
1414  if (e) {
1415  if (new_size > e->d_size)
1416  e->d_size = new_size;
1417  } else {
1418  err = add_ino(c, inum, 0, new_size, 0);
1419  if (err)
1420  return err;
1421  }
1422  break;
1423  case UBIFS_TRUN_KEY:
1424  e = find_ino(c, inum);
1425  if (e)
1426  e->d_size = new_size;
1427  break;
1428  }
1429  return 0;
1430 }
1431 
1437 static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e)
1438 {
1439  struct ubifs_ino_node *ino = c->sbuf;
1440  unsigned char *p;
1441  union ubifs_key key;
1442  int err, lnum, offs, len;
1443  loff_t i_size;
1444  uint32_t crc;
1445 
1446  /* Locate the inode node LEB number and offset */
1447  ino_key_init(c, &key, e->inum);
1448  err = ubifs_tnc_locate(c, &key, ino, &lnum, &offs);
1449  if (err)
1450  goto out;
1451  /*
1452  * If the size recorded on the inode node is greater than the size that
1453  * was calculated from nodes in the journal then don't change the inode.
1454  */
1455  i_size = le64_to_cpu(ino->size);
1456  if (i_size >= e->d_size)
1457  return 0;
1458  /* Read the LEB */
1459  err = ubifs_leb_read(c, lnum, c->sbuf, 0, c->leb_size, 1);
1460  if (err)
1461  goto out;
1462  /* Change the size field and recalculate the CRC */
1463  ino = c->sbuf + offs;
1464  ino->size = cpu_to_le64(e->d_size);
1465  len = le32_to_cpu(ino->ch.len);
1466  crc = crc32(UBIFS_CRC32_INIT, (void *)ino + 8, len - 8);
1467  ino->ch.crc = cpu_to_le32(crc);
1468  /* Work out where data in the LEB ends and free space begins */
1469  p = c->sbuf;
1470  len = c->leb_size - 1;
1471  while (p[len] == 0xff)
1472  len -= 1;
1473  len = ALIGN(len + 1, c->min_io_size);
1474  /* Atomically write the fixed LEB back again */
1475  err = ubifs_leb_change(c, lnum, c->sbuf, len);
1476  if (err)
1477  goto out;
1478  dbg_rcvry("inode %lu at %d:%d size %lld -> %lld",
1479  (unsigned long)e->inum, lnum, offs, i_size, e->d_size);
1480  return 0;
1481 
1482 out:
1483  ubifs_warn("inode %lu failed to fix size %lld -> %lld error %d",
1484  (unsigned long)e->inum, e->i_size, e->d_size, err);
1485  return err;
1486 }
1487 
1498 {
1499  struct rb_node *this = rb_first(&c->size_tree);
1500 
1501  while (this) {
1502  struct size_entry *e;
1503  int err;
1504 
1505  e = rb_entry(this, struct size_entry, rb);
1506  if (!e->exists) {
1507  union ubifs_key key;
1508 
1509  ino_key_init(c, &key, e->inum);
1510  err = ubifs_tnc_lookup(c, &key, c->sbuf);
1511  if (err && err != -ENOENT)
1512  return err;
1513  if (err == -ENOENT) {
1514  /* Remove data nodes that have no inode */
1515  dbg_rcvry("removing ino %lu",
1516  (unsigned long)e->inum);
1517  err = ubifs_tnc_remove_ino(c, e->inum);
1518  if (err)
1519  return err;
1520  } else {
1521  struct ubifs_ino_node *ino = c->sbuf;
1522 
1523  e->exists = 1;
1524  e->i_size = le64_to_cpu(ino->size);
1525  }
1526  }
1527 
1528  if (e->exists && e->i_size < e->d_size) {
1529  if (c->ro_mount) {
1530  /* Fix the inode size and pin it in memory */
1531  struct inode *inode;
1532  struct ubifs_inode *ui;
1533 
1534  ubifs_assert(!e->inode);
1535 
1536  inode = ubifs_iget(c->vfs_sb, e->inum);
1537  if (IS_ERR(inode))
1538  return PTR_ERR(inode);
1539 
1540  ui = ubifs_inode(inode);
1541  if (inode->i_size < e->d_size) {
1542  dbg_rcvry("ino %lu size %lld -> %lld",
1543  (unsigned long)e->inum,
1544  inode->i_size, e->d_size);
1545  inode->i_size = e->d_size;
1546  ui->ui_size = e->d_size;
1547  ui->synced_i_size = e->d_size;
1548  e->inode = inode;
1549  this = rb_next(this);
1550  continue;
1551  }
1552  iput(inode);
1553  } else {
1554  /* Fix the size in place */
1555  err = fix_size_in_place(c, e);
1556  if (err)
1557  return err;
1558  if (e->inode)
1559  iput(e->inode);
1560  }
1561  }
1562 
1563  this = rb_next(this);
1564  rb_erase(&e->rb, &c->size_tree);
1565  kfree(e);
1566  }
1567 
1568  return 0;
1569 }