Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
wl.c
Go to the documentation of this file.
1 /*
2  * @ubi: UBI device description object
3  * Copyright (c) International Business Machines Corp., 2006
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
13  * the GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18  *
19  * Authors: Artem Bityutskiy (Битюцкий Артём), Thomas Gleixner
20  */
21 
22 /*
23  * UBI wear-leveling sub-system.
24  *
25  * This sub-system is responsible for wear-leveling. It works in terms of
26  * physical eraseblocks and erase counters and knows nothing about logical
27  * eraseblocks, volumes, etc. From this sub-system's perspective all physical
28  * eraseblocks are of two types - used and free. Used physical eraseblocks are
29  * those that were "get" by the 'ubi_wl_get_peb()' function, and free physical
30  * eraseblocks are those that were put by the 'ubi_wl_put_peb()' function.
31  *
32  * Physical eraseblocks returned by 'ubi_wl_get_peb()' have only erase counter
33  * header. The rest of the physical eraseblock contains only %0xFF bytes.
34  *
35  * When physical eraseblocks are returned to the WL sub-system by means of the
36  * 'ubi_wl_put_peb()' function, they are scheduled for erasure. The erasure is
37  * done asynchronously in context of the per-UBI device background thread,
38  * which is also managed by the WL sub-system.
39  *
40  * The wear-leveling is ensured by means of moving the contents of used
41  * physical eraseblocks with low erase counter to free physical eraseblocks
42  * with high erase counter.
43  *
44  * If the WL sub-system fails to erase a physical eraseblock, it marks it as
45  * bad.
46  *
47  * This sub-system is also responsible for scrubbing. If a bit-flip is detected
48  * in a physical eraseblock, it has to be moved. Technically this is the same
49  * as moving it for wear-leveling reasons.
50  *
51  * As it was said, for the UBI sub-system all physical eraseblocks are either
52  * "free" or "used". Free eraseblock are kept in the @wl->free RB-tree, while
53  * used eraseblocks are kept in @wl->used, @wl->erroneous, or @wl->scrub
54  * RB-trees, as well as (temporarily) in the @wl->pq queue.
55  *
56  * When the WL sub-system returns a physical eraseblock, the physical
57  * eraseblock is protected from being moved for some "time". For this reason,
58  * the physical eraseblock is not directly moved from the @wl->free tree to the
59  * @wl->used tree. There is a protection queue in between where this
60  * physical eraseblock is temporarily stored (@wl->pq).
61  *
62  * All this protection stuff is needed because:
63  * o we don't want to move physical eraseblocks just after we have given them
64  * to the user; instead, we first want to let users fill them up with data;
65  *
66  * o there is a chance that the user will put the physical eraseblock very
67  * soon, so it makes sense not to move it for some time, but wait.
68  *
69  * Physical eraseblocks stay protected only for limited time. But the "time" is
70  * measured in erase cycles in this case. This is implemented with help of the
71  * protection queue. Eraseblocks are put to the tail of this queue when they
72  * are returned by the 'ubi_wl_get_peb()', and eraseblocks are removed from the
73  * head of the queue on each erase operation (for any eraseblock). So the
74  * length of the queue defines how may (global) erase cycles PEBs are protected.
75  *
76  * To put it differently, each physical eraseblock has 2 main states: free and
77  * used. The former state corresponds to the @wl->free tree. The latter state
78  * is split up on several sub-states:
79  * o the WL movement is allowed (@wl->used tree);
80  * o the WL movement is disallowed (@wl->erroneous) because the PEB is
81  * erroneous - e.g., there was a read error;
82  * o the WL movement is temporarily prohibited (@wl->pq queue);
83  * o scrubbing is needed (@wl->scrub tree).
84  *
85  * Depending on the sub-state, wear-leveling entries of the used physical
86  * eraseblocks may be kept in one of those structures.
87  *
88  * Note, in this implementation, we keep a small in-RAM object for each physical
89  * eraseblock. This is surely not a scalable solution. But it appears to be good
90  * enough for moderately large flashes and it is simple. In future, one may
91  * re-work this sub-system and make it more scalable.
92  *
93  * At the moment this sub-system does not utilize the sequence number, which
94  * was introduced relatively recently. But it would be wise to do this because
95  * the sequence number of a logical eraseblock characterizes how old is it. For
96  * example, when we move a PEB with low erase counter, and we need to pick the
97  * target PEB, we pick a PEB with the highest EC if our PEB is "old" and we
98  * pick target PEB with an average EC if our PEB is not very "old". This is a
99  * room for future re-works of the WL sub-system.
100  */
101 
102 #include <linux/slab.h>
103 #include <linux/crc32.h>
104 #include <linux/freezer.h>
105 #include <linux/kthread.h>
106 #include "ubi.h"
107 
108 /* Number of physical eraseblocks reserved for wear-leveling purposes */
109 #define WL_RESERVED_PEBS 1
110 
111 /*
112  * Maximum difference between two erase counters. If this threshold is
113  * exceeded, the WL sub-system starts moving data from used physical
114  * eraseblocks with low erase counter to free physical eraseblocks with high
115  * erase counter.
116  */
117 #define UBI_WL_THRESHOLD CONFIG_MTD_UBI_WL_THRESHOLD
118 
119 /*
120  * When a physical eraseblock is moved, the WL sub-system has to pick the target
121  * physical eraseblock to move to. The simplest way would be just to pick the
122  * one with the highest erase counter. But in certain workloads this could lead
123  * to an unlimited wear of one or few physical eraseblock. Indeed, imagine a
124  * situation when the picked physical eraseblock is constantly erased after the
125  * data is written to it. So, we have a constant which limits the highest erase
126  * counter of the free physical eraseblock to pick. Namely, the WL sub-system
127  * does not pick eraseblocks with erase counter greater than the lowest erase
128  * counter plus %WL_FREE_MAX_DIFF.
129  */
130 #define WL_FREE_MAX_DIFF (2*UBI_WL_THRESHOLD)
131 
132 /*
133  * Maximum number of consecutive background thread failures which is enough to
134  * switch to read-only mode.
135  */
136 #define WL_MAX_FAILURES 32
137 
138 static int self_check_ec(struct ubi_device *ubi, int pnum, int ec);
139 static int self_check_in_wl_tree(const struct ubi_device *ubi,
140  struct ubi_wl_entry *e, struct rb_root *root);
141 static int self_check_in_pq(const struct ubi_device *ubi,
142  struct ubi_wl_entry *e);
143 
144 #ifdef CONFIG_MTD_UBI_FASTMAP
145 
149 static void update_fastmap_work_fn(struct work_struct *wrk)
150 {
151  struct ubi_device *ubi = container_of(wrk, struct ubi_device, fm_work);
152  ubi_update_fastmap(ubi);
153 }
154 
160 static int ubi_is_fm_block(struct ubi_device *ubi, int pnum)
161 {
162  int i;
163 
164  if (!ubi->fm)
165  return 0;
166 
167  for (i = 0; i < ubi->fm->used_blocks; i++)
168  if (ubi->fm->e[i]->pnum == pnum)
169  return 1;
170 
171  return 0;
172 }
173 #else
174 static int ubi_is_fm_block(struct ubi_device *ubi, int pnum)
175 {
176  return 0;
177 }
178 #endif
179 
188 static void wl_tree_add(struct ubi_wl_entry *e, struct rb_root *root)
189 {
190  struct rb_node **p, *parent = NULL;
191 
192  p = &root->rb_node;
193  while (*p) {
194  struct ubi_wl_entry *e1;
195 
196  parent = *p;
197  e1 = rb_entry(parent, struct ubi_wl_entry, u.rb);
198 
199  if (e->ec < e1->ec)
200  p = &(*p)->rb_left;
201  else if (e->ec > e1->ec)
202  p = &(*p)->rb_right;
203  else {
204  ubi_assert(e->pnum != e1->pnum);
205  if (e->pnum < e1->pnum)
206  p = &(*p)->rb_left;
207  else
208  p = &(*p)->rb_right;
209  }
210  }
211 
212  rb_link_node(&e->u.rb, parent, p);
213  rb_insert_color(&e->u.rb, root);
214 }
215 
223 static int do_work(struct ubi_device *ubi)
224 {
225  int err;
226  struct ubi_work *wrk;
227 
228  cond_resched();
229 
230  /*
231  * @ubi->work_sem is used to synchronize with the workers. Workers take
232  * it in read mode, so many of them may be doing works at a time. But
233  * the queue flush code has to be sure the whole queue of works is
234  * done, and it takes the mutex in write mode.
235  */
236  down_read(&ubi->work_sem);
237  spin_lock(&ubi->wl_lock);
238  if (list_empty(&ubi->works)) {
239  spin_unlock(&ubi->wl_lock);
240  up_read(&ubi->work_sem);
241  return 0;
242  }
243 
244  wrk = list_entry(ubi->works.next, struct ubi_work, list);
245  list_del(&wrk->list);
246  ubi->works_count -= 1;
247  ubi_assert(ubi->works_count >= 0);
248  spin_unlock(&ubi->wl_lock);
249 
250  /*
251  * Call the worker function. Do not touch the work structure
252  * after this call as it will have been freed or reused by that
253  * time by the worker function.
254  */
255  err = wrk->func(ubi, wrk, 0);
256  if (err)
257  ubi_err("work failed with error code %d", err);
258  up_read(&ubi->work_sem);
259 
260  return err;
261 }
262 
272 static int produce_free_peb(struct ubi_device *ubi)
273 {
274  int err;
275 
276  while (!ubi->free.rb_node) {
277  spin_unlock(&ubi->wl_lock);
278 
279  dbg_wl("do one work synchronously");
280  err = do_work(ubi);
281 
282  spin_lock(&ubi->wl_lock);
283  if (err)
284  return err;
285  }
286 
287  return 0;
288 }
289 
298 static int in_wl_tree(struct ubi_wl_entry *e, struct rb_root *root)
299 {
300  struct rb_node *p;
301 
302  p = root->rb_node;
303  while (p) {
304  struct ubi_wl_entry *e1;
305 
306  e1 = rb_entry(p, struct ubi_wl_entry, u.rb);
307 
308  if (e->pnum == e1->pnum) {
309  ubi_assert(e == e1);
310  return 1;
311  }
312 
313  if (e->ec < e1->ec)
314  p = p->rb_left;
315  else if (e->ec > e1->ec)
316  p = p->rb_right;
317  else {
318  ubi_assert(e->pnum != e1->pnum);
319  if (e->pnum < e1->pnum)
320  p = p->rb_left;
321  else
322  p = p->rb_right;
323  }
324  }
325 
326  return 0;
327 }
328 
339 static void prot_queue_add(struct ubi_device *ubi, struct ubi_wl_entry *e)
340 {
341  int pq_tail = ubi->pq_head - 1;
342 
343  if (pq_tail < 0)
344  pq_tail = UBI_PROT_QUEUE_LEN - 1;
345  ubi_assert(pq_tail >= 0 && pq_tail < UBI_PROT_QUEUE_LEN);
346  list_add_tail(&e->u.list, &ubi->pq[pq_tail]);
347  dbg_wl("added PEB %d EC %d to the protection queue", e->pnum, e->ec);
348 }
349 
359 static struct ubi_wl_entry *find_wl_entry(struct ubi_device *ubi,
360  struct rb_root *root, int diff)
361 {
362  struct rb_node *p;
363  struct ubi_wl_entry *e, *prev_e = NULL;
364  int max;
365 
366  e = rb_entry(rb_first(root), struct ubi_wl_entry, u.rb);
367  max = e->ec + diff;
368 
369  p = root->rb_node;
370  while (p) {
371  struct ubi_wl_entry *e1;
372 
373  e1 = rb_entry(p, struct ubi_wl_entry, u.rb);
374  if (e1->ec >= max)
375  p = p->rb_left;
376  else {
377  p = p->rb_right;
378  prev_e = e;
379  e = e1;
380  }
381  }
382 
383  /* If no fastmap has been written and this WL entry can be used
384  * as anchor PEB, hold it back and return the second best WL entry
385  * such that fastmap can use the anchor PEB later. */
386  if (prev_e && !ubi->fm_disabled &&
387  !ubi->fm && e->pnum < UBI_FM_MAX_START)
388  return prev_e;
389 
390  return e;
391 }
392 
402 static struct ubi_wl_entry *find_mean_wl_entry(struct ubi_device *ubi,
403  struct rb_root *root)
404 {
405  struct ubi_wl_entry *e, *first, *last;
406 
407  first = rb_entry(rb_first(root), struct ubi_wl_entry, u.rb);
408  last = rb_entry(rb_last(root), struct ubi_wl_entry, u.rb);
409 
410  if (last->ec - first->ec < WL_FREE_MAX_DIFF) {
411  e = rb_entry(root->rb_node, struct ubi_wl_entry, u.rb);
412 
413 #ifdef CONFIG_MTD_UBI_FASTMAP
414  /* If no fastmap has been written and this WL entry can be used
415  * as anchor PEB, hold it back and return the second best
416  * WL entry such that fastmap can use the anchor PEB later. */
417  if (e && !ubi->fm_disabled && !ubi->fm &&
418  e->pnum < UBI_FM_MAX_START)
419  e = rb_entry(rb_next(root->rb_node),
420  struct ubi_wl_entry, u.rb);
421 #endif
422  } else
423  e = find_wl_entry(ubi, root, WL_FREE_MAX_DIFF/2);
424 
425  return e;
426 }
427 
428 #ifdef CONFIG_MTD_UBI_FASTMAP
429 
433 static struct ubi_wl_entry *find_anchor_wl_entry(struct rb_root *root)
434 {
435  struct rb_node *p;
436  struct ubi_wl_entry *e, *victim = NULL;
437  int max_ec = UBI_MAX_ERASECOUNTER;
438 
439  ubi_rb_for_each_entry(p, e, root, u.rb) {
440  if (e->pnum < UBI_FM_MAX_START && e->ec < max_ec) {
441  victim = e;
442  max_ec = e->ec;
443  }
444  }
445 
446  return victim;
447 }
448 
449 static int anchor_pebs_avalible(struct rb_root *root)
450 {
451  struct rb_node *p;
452  struct ubi_wl_entry *e;
453 
454  ubi_rb_for_each_entry(p, e, root, u.rb)
455  if (e->pnum < UBI_FM_MAX_START)
456  return 1;
457 
458  return 0;
459 }
460 
471 {
472  struct ubi_wl_entry *e = NULL;
473 
474  if (!ubi->free.rb_node || (ubi->free_count - ubi->beb_rsvd_pebs < 1))
475  goto out;
476 
477  if (anchor)
478  e = find_anchor_wl_entry(&ubi->free);
479  else
480  e = find_mean_wl_entry(ubi, &ubi->free);
481 
482  if (!e)
483  goto out;
484 
485  self_check_in_wl_tree(ubi, e, &ubi->free);
486 
487  /* remove it from the free list,
488  * the wl subsystem does no longer know this erase block */
489  rb_erase(&e->u.rb, &ubi->free);
490  ubi->free_count--;
491 out:
492  return e;
493 }
494 #endif
495 
503 static int __wl_get_peb(struct ubi_device *ubi)
504 {
505  int err;
506  struct ubi_wl_entry *e;
507 
508 retry:
509  if (!ubi->free.rb_node) {
510  if (ubi->works_count == 0) {
511  ubi_err("no free eraseblocks");
512  ubi_assert(list_empty(&ubi->works));
513  return -ENOSPC;
514  }
515 
516  err = produce_free_peb(ubi);
517  if (err < 0)
518  return err;
519  goto retry;
520  }
521 
522  e = find_mean_wl_entry(ubi, &ubi->free);
523  if (!e) {
524  ubi_err("no free eraseblocks");
525  return -ENOSPC;
526  }
527 
528  self_check_in_wl_tree(ubi, e, &ubi->free);
529 
530  /*
531  * Move the physical eraseblock to the protection queue where it will
532  * be protected from being moved for some time.
533  */
534  rb_erase(&e->u.rb, &ubi->free);
535  ubi->free_count--;
536  dbg_wl("PEB %d EC %d", e->pnum, e->ec);
537 #ifndef CONFIG_MTD_UBI_FASTMAP
538  /* We have to enqueue e only if fastmap is disabled,
539  * is fastmap enabled prot_queue_add() will be called by
540  * ubi_wl_get_peb() after removing e from the pool. */
541  prot_queue_add(ubi, e);
542 #endif
543  return e->pnum;
544 }
545 
546 #ifdef CONFIG_MTD_UBI_FASTMAP
547 
552 static void return_unused_pool_pebs(struct ubi_device *ubi,
553  struct ubi_fm_pool *pool)
554 {
555  int i;
556  struct ubi_wl_entry *e;
557 
558  for (i = pool->used; i < pool->size; i++) {
559  e = ubi->lookuptbl[pool->pebs[i]];
560  wl_tree_add(e, &ubi->free);
561  ubi->free_count++;
562  }
563 }
564 
570 static void refill_wl_pool(struct ubi_device *ubi)
571 {
572  struct ubi_wl_entry *e;
573  struct ubi_fm_pool *pool = &ubi->fm_wl_pool;
574 
575  return_unused_pool_pebs(ubi, pool);
576 
577  for (pool->size = 0; pool->size < pool->max_size; pool->size++) {
578  if (!ubi->free.rb_node ||
579  (ubi->free_count - ubi->beb_rsvd_pebs < 5))
580  break;
581 
582  e = find_wl_entry(ubi, &ubi->free, WL_FREE_MAX_DIFF);
583  self_check_in_wl_tree(ubi, e, &ubi->free);
584  rb_erase(&e->u.rb, &ubi->free);
585  ubi->free_count--;
586 
587  pool->pebs[pool->size] = e->pnum;
588  }
589  pool->used = 0;
590 }
591 
596 static void refill_wl_user_pool(struct ubi_device *ubi)
597 {
598  struct ubi_fm_pool *pool = &ubi->fm_pool;
599 
600  return_unused_pool_pebs(ubi, pool);
601 
602  for (pool->size = 0; pool->size < pool->max_size; pool->size++) {
603  if (!ubi->free.rb_node ||
604  (ubi->free_count - ubi->beb_rsvd_pebs < 1))
605  break;
606 
607  pool->pebs[pool->size] = __wl_get_peb(ubi);
608  if (pool->pebs[pool->size] < 0)
609  break;
610  }
611  pool->used = 0;
612 }
613 
618 void ubi_refill_pools(struct ubi_device *ubi)
619 {
620  spin_lock(&ubi->wl_lock);
621  refill_wl_pool(ubi);
622  refill_wl_user_pool(ubi);
623  spin_unlock(&ubi->wl_lock);
624 }
625 
626 /* ubi_wl_get_peb - works exaclty like __wl_get_peb but keeps track of
627  * the fastmap pool.
628  */
629 int ubi_wl_get_peb(struct ubi_device *ubi)
630 {
631  int ret;
632  struct ubi_fm_pool *pool = &ubi->fm_pool;
633  struct ubi_fm_pool *wl_pool = &ubi->fm_wl_pool;
634 
635  if (!pool->size || !wl_pool->size || pool->used == pool->size ||
636  wl_pool->used == wl_pool->size)
637  ubi_update_fastmap(ubi);
638 
639  /* we got not a single free PEB */
640  if (!pool->size)
641  ret = -ENOSPC;
642  else {
643  spin_lock(&ubi->wl_lock);
644  ret = pool->pebs[pool->used++];
645  prot_queue_add(ubi, ubi->lookuptbl[ret]);
646  spin_unlock(&ubi->wl_lock);
647  }
648 
649  return ret;
650 }
651 
652 /* get_peb_for_wl - returns a PEB to be used internally by the WL sub-system.
653  *
654  * @ubi: UBI device description object
655  */
656 static struct ubi_wl_entry *get_peb_for_wl(struct ubi_device *ubi)
657 {
658  struct ubi_fm_pool *pool = &ubi->fm_wl_pool;
659  int pnum;
660 
661  if (pool->used == pool->size || !pool->size) {
662  /* We cannot update the fastmap here because this
663  * function is called in atomic context.
664  * Let's fail here and refill/update it as soon as possible. */
665  schedule_work(&ubi->fm_work);
666  return NULL;
667  } else {
668  pnum = pool->pebs[pool->used++];
669  return ubi->lookuptbl[pnum];
670  }
671 }
672 #else
673 static struct ubi_wl_entry *get_peb_for_wl(struct ubi_device *ubi)
674 {
675  struct ubi_wl_entry *e;
676 
677  e = find_wl_entry(ubi, &ubi->free, WL_FREE_MAX_DIFF);
678  self_check_in_wl_tree(ubi, e, &ubi->free);
679  rb_erase(&e->u.rb, &ubi->free);
680 
681  return e;
682 }
683 
684 int ubi_wl_get_peb(struct ubi_device *ubi)
685 {
686  int peb, err;
687 
688  spin_lock(&ubi->wl_lock);
689  peb = __wl_get_peb(ubi);
690  spin_unlock(&ubi->wl_lock);
691 
692  err = ubi_self_check_all_ff(ubi, peb, ubi->vid_hdr_aloffset,
693  ubi->peb_size - ubi->vid_hdr_aloffset);
694  if (err) {
695  ubi_err("new PEB %d does not contain all 0xFF bytes", peb);
696  return err;
697  }
698 
699  return peb;
700 }
701 #endif
702 
711 static int prot_queue_del(struct ubi_device *ubi, int pnum)
712 {
713  struct ubi_wl_entry *e;
714 
715  e = ubi->lookuptbl[pnum];
716  if (!e)
717  return -ENODEV;
718 
719  if (self_check_in_pq(ubi, e))
720  return -ENODEV;
721 
722  list_del(&e->u.list);
723  dbg_wl("deleted PEB %d from the protection queue", e->pnum);
724  return 0;
725 }
726 
736 static int sync_erase(struct ubi_device *ubi, struct ubi_wl_entry *e,
737  int torture)
738 {
739  int err;
740  struct ubi_ec_hdr *ec_hdr;
741  unsigned long long ec = e->ec;
742 
743  dbg_wl("erase PEB %d, old EC %llu", e->pnum, ec);
744 
745  err = self_check_ec(ubi, e->pnum, e->ec);
746  if (err)
747  return -EINVAL;
748 
749  ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS);
750  if (!ec_hdr)
751  return -ENOMEM;
752 
753  err = ubi_io_sync_erase(ubi, e->pnum, torture);
754  if (err < 0)
755  goto out_free;
756 
757  ec += err;
758  if (ec > UBI_MAX_ERASECOUNTER) {
759  /*
760  * Erase counter overflow. Upgrade UBI and use 64-bit
761  * erase counters internally.
762  */
763  ubi_err("erase counter overflow at PEB %d, EC %llu",
764  e->pnum, ec);
765  err = -EINVAL;
766  goto out_free;
767  }
768 
769  dbg_wl("erased PEB %d, new EC %llu", e->pnum, ec);
770 
771  ec_hdr->ec = cpu_to_be64(ec);
772 
773  err = ubi_io_write_ec_hdr(ubi, e->pnum, ec_hdr);
774  if (err)
775  goto out_free;
776 
777  e->ec = ec;
778  spin_lock(&ubi->wl_lock);
779  if (e->ec > ubi->max_ec)
780  ubi->max_ec = e->ec;
781  spin_unlock(&ubi->wl_lock);
782 
783 out_free:
784  kfree(ec_hdr);
785  return err;
786 }
787 
796 static void serve_prot_queue(struct ubi_device *ubi)
797 {
798  struct ubi_wl_entry *e, *tmp;
799  int count;
800 
801  /*
802  * There may be several protected physical eraseblock to remove,
803  * process them all.
804  */
805 repeat:
806  count = 0;
807  spin_lock(&ubi->wl_lock);
808  list_for_each_entry_safe(e, tmp, &ubi->pq[ubi->pq_head], u.list) {
809  dbg_wl("PEB %d EC %d protection over, move to used tree",
810  e->pnum, e->ec);
811 
812  list_del(&e->u.list);
813  wl_tree_add(e, &ubi->used);
814  if (count++ > 32) {
815  /*
816  * Let's be nice and avoid holding the spinlock for
817  * too long.
818  */
819  spin_unlock(&ubi->wl_lock);
820  cond_resched();
821  goto repeat;
822  }
823  }
824 
825  ubi->pq_head += 1;
826  if (ubi->pq_head == UBI_PROT_QUEUE_LEN)
827  ubi->pq_head = 0;
828  ubi_assert(ubi->pq_head >= 0 && ubi->pq_head < UBI_PROT_QUEUE_LEN);
829  spin_unlock(&ubi->wl_lock);
830 }
831 
840 static void __schedule_ubi_work(struct ubi_device *ubi, struct ubi_work *wrk)
841 {
842  spin_lock(&ubi->wl_lock);
843  list_add_tail(&wrk->list, &ubi->works);
844  ubi_assert(ubi->works_count >= 0);
845  ubi->works_count += 1;
846  if (ubi->thread_enabled && !ubi_dbg_is_bgt_disabled(ubi))
848  spin_unlock(&ubi->wl_lock);
849 }
850 
859 static void schedule_ubi_work(struct ubi_device *ubi, struct ubi_work *wrk)
860 {
861  down_read(&ubi->work_sem);
862  __schedule_ubi_work(ubi, wrk);
863  up_read(&ubi->work_sem);
864 }
865 
866 static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk,
867  int cancel);
868 
869 #ifdef CONFIG_MTD_UBI_FASTMAP
870 
874 int ubi_is_erase_work(struct ubi_work *wrk)
875 {
876  return wrk->func == erase_worker;
877 }
878 #endif
879 
891 static int schedule_erase(struct ubi_device *ubi, struct ubi_wl_entry *e,
892  int vol_id, int lnum, int torture)
893 {
894  struct ubi_work *wl_wrk;
895 
896  ubi_assert(e);
897  ubi_assert(!ubi_is_fm_block(ubi, e->pnum));
898 
899  dbg_wl("schedule erasure of PEB %d, EC %d, torture %d",
900  e->pnum, e->ec, torture);
901 
902  wl_wrk = kmalloc(sizeof(struct ubi_work), GFP_NOFS);
903  if (!wl_wrk)
904  return -ENOMEM;
905 
906  wl_wrk->func = &erase_worker;
907  wl_wrk->e = e;
908  wl_wrk->vol_id = vol_id;
909  wl_wrk->lnum = lnum;
910  wl_wrk->torture = torture;
911 
912  schedule_ubi_work(ubi, wl_wrk);
913  return 0;
914 }
915 
925 static int do_sync_erase(struct ubi_device *ubi, struct ubi_wl_entry *e,
926  int vol_id, int lnum, int torture)
927 {
928  struct ubi_work *wl_wrk;
929 
930  dbg_wl("sync erase of PEB %i", e->pnum);
931 
932  wl_wrk = kmalloc(sizeof(struct ubi_work), GFP_NOFS);
933  if (!wl_wrk)
934  return -ENOMEM;
935 
936  wl_wrk->e = e;
937  wl_wrk->vol_id = vol_id;
938  wl_wrk->lnum = lnum;
939  wl_wrk->torture = torture;
940 
941  return erase_worker(ubi, wl_wrk, 0);
942 }
943 
944 #ifdef CONFIG_MTD_UBI_FASTMAP
945 
955 int ubi_wl_put_fm_peb(struct ubi_device *ubi, struct ubi_wl_entry *fm_e,
956  int lnum, int torture)
957 {
958  struct ubi_wl_entry *e;
959  int vol_id, pnum = fm_e->pnum;
960 
961  dbg_wl("PEB %d", pnum);
962 
963  ubi_assert(pnum >= 0);
964  ubi_assert(pnum < ubi->peb_count);
965 
966  spin_lock(&ubi->wl_lock);
967  e = ubi->lookuptbl[pnum];
968 
969  /* This can happen if we recovered from a fastmap the very
970  * first time and writing now a new one. In this case the wl system
971  * has never seen any PEB used by the original fastmap.
972  */
973  if (!e) {
974  e = fm_e;
975  ubi_assert(e->ec >= 0);
976  ubi->lookuptbl[pnum] = e;
977  } else {
978  e->ec = fm_e->ec;
979  kfree(fm_e);
980  }
981 
982  spin_unlock(&ubi->wl_lock);
983 
984  vol_id = lnum ? UBI_FM_DATA_VOLUME_ID : UBI_FM_SB_VOLUME_ID;
985  return schedule_erase(ubi, e, vol_id, lnum, torture);
986 }
987 #endif
988 
999 static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
1000  int cancel)
1001 {
1002  int err, scrubbing = 0, torture = 0, protect = 0, erroneous = 0;
1003  int vol_id = -1, uninitialized_var(lnum);
1004 #ifdef CONFIG_MTD_UBI_FASTMAP
1005  int anchor = wrk->anchor;
1006 #endif
1007  struct ubi_wl_entry *e1, *e2;
1008  struct ubi_vid_hdr *vid_hdr;
1009 
1010  kfree(wrk);
1011  if (cancel)
1012  return 0;
1013 
1014  vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS);
1015  if (!vid_hdr)
1016  return -ENOMEM;
1017 
1018  mutex_lock(&ubi->move_mutex);
1019  spin_lock(&ubi->wl_lock);
1020  ubi_assert(!ubi->move_from && !ubi->move_to);
1021  ubi_assert(!ubi->move_to_put);
1022 
1023  if (!ubi->free.rb_node ||
1024  (!ubi->used.rb_node && !ubi->scrub.rb_node)) {
1025  /*
1026  * No free physical eraseblocks? Well, they must be waiting in
1027  * the queue to be erased. Cancel movement - it will be
1028  * triggered again when a free physical eraseblock appears.
1029  *
1030  * No used physical eraseblocks? They must be temporarily
1031  * protected from being moved. They will be moved to the
1032  * @ubi->used tree later and the wear-leveling will be
1033  * triggered again.
1034  */
1035  dbg_wl("cancel WL, a list is empty: free %d, used %d",
1036  !ubi->free.rb_node, !ubi->used.rb_node);
1037  goto out_cancel;
1038  }
1039 
1040 #ifdef CONFIG_MTD_UBI_FASTMAP
1041  /* Check whether we need to produce an anchor PEB */
1042  if (!anchor)
1043  anchor = !anchor_pebs_avalible(&ubi->free);
1044 
1045  if (anchor) {
1046  e1 = find_anchor_wl_entry(&ubi->used);
1047  if (!e1)
1048  goto out_cancel;
1049  e2 = get_peb_for_wl(ubi);
1050  if (!e2)
1051  goto out_cancel;
1052 
1053  self_check_in_wl_tree(ubi, e1, &ubi->used);
1054  rb_erase(&e1->u.rb, &ubi->used);
1055  dbg_wl("anchor-move PEB %d to PEB %d", e1->pnum, e2->pnum);
1056  } else if (!ubi->scrub.rb_node) {
1057 #else
1058  if (!ubi->scrub.rb_node) {
1059 #endif
1060  /*
1061  * Now pick the least worn-out used physical eraseblock and a
1062  * highly worn-out free physical eraseblock. If the erase
1063  * counters differ much enough, start wear-leveling.
1064  */
1065  e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, u.rb);
1066  e2 = get_peb_for_wl(ubi);
1067  if (!e2)
1068  goto out_cancel;
1069 
1070  if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD)) {
1071  dbg_wl("no WL needed: min used EC %d, max free EC %d",
1072  e1->ec, e2->ec);
1073  goto out_cancel;
1074  }
1075  self_check_in_wl_tree(ubi, e1, &ubi->used);
1076  rb_erase(&e1->u.rb, &ubi->used);
1077  dbg_wl("move PEB %d EC %d to PEB %d EC %d",
1078  e1->pnum, e1->ec, e2->pnum, e2->ec);
1079  } else {
1080  /* Perform scrubbing */
1081  scrubbing = 1;
1082  e1 = rb_entry(rb_first(&ubi->scrub), struct ubi_wl_entry, u.rb);
1083  e2 = get_peb_for_wl(ubi);
1084  if (!e2)
1085  goto out_cancel;
1086 
1087  self_check_in_wl_tree(ubi, e1, &ubi->scrub);
1088  rb_erase(&e1->u.rb, &ubi->scrub);
1089  dbg_wl("scrub PEB %d to PEB %d", e1->pnum, e2->pnum);
1090  }
1091 
1092  ubi->move_from = e1;
1093  ubi->move_to = e2;
1094  spin_unlock(&ubi->wl_lock);
1095 
1096  /*
1097  * Now we are going to copy physical eraseblock @e1->pnum to @e2->pnum.
1098  * We so far do not know which logical eraseblock our physical
1099  * eraseblock (@e1) belongs to. We have to read the volume identifier
1100  * header first.
1101  *
1102  * Note, we are protected from this PEB being unmapped and erased. The
1103  * 'ubi_wl_put_peb()' would wait for moving to be finished if the PEB
1104  * which is being moved was unmapped.
1105  */
1106 
1107  err = ubi_io_read_vid_hdr(ubi, e1->pnum, vid_hdr, 0);
1108  if (err && err != UBI_IO_BITFLIPS) {
1109  if (err == UBI_IO_FF) {
1110  /*
1111  * We are trying to move PEB without a VID header. UBI
1112  * always write VID headers shortly after the PEB was
1113  * given, so we have a situation when it has not yet
1114  * had a chance to write it, because it was preempted.
1115  * So add this PEB to the protection queue so far,
1116  * because presumably more data will be written there
1117  * (including the missing VID header), and then we'll
1118  * move it.
1119  */
1120  dbg_wl("PEB %d has no VID header", e1->pnum);
1121  protect = 1;
1122  goto out_not_moved;
1123  } else if (err == UBI_IO_FF_BITFLIPS) {
1124  /*
1125  * The same situation as %UBI_IO_FF, but bit-flips were
1126  * detected. It is better to schedule this PEB for
1127  * scrubbing.
1128  */
1129  dbg_wl("PEB %d has no VID header but has bit-flips",
1130  e1->pnum);
1131  scrubbing = 1;
1132  goto out_not_moved;
1133  }
1134 
1135  ubi_err("error %d while reading VID header from PEB %d",
1136  err, e1->pnum);
1137  goto out_error;
1138  }
1139 
1140  vol_id = be32_to_cpu(vid_hdr->vol_id);
1141  lnum = be32_to_cpu(vid_hdr->lnum);
1142 
1143  err = ubi_eba_copy_leb(ubi, e1->pnum, e2->pnum, vid_hdr);
1144  if (err) {
1145  if (err == MOVE_CANCEL_RACE) {
1146  /*
1147  * The LEB has not been moved because the volume is
1148  * being deleted or the PEB has been put meanwhile. We
1149  * should prevent this PEB from being selected for
1150  * wear-leveling movement again, so put it to the
1151  * protection queue.
1152  */
1153  protect = 1;
1154  goto out_not_moved;
1155  }
1156  if (err == MOVE_RETRY) {
1157  scrubbing = 1;
1158  goto out_not_moved;
1159  }
1160  if (err == MOVE_TARGET_BITFLIPS || err == MOVE_TARGET_WR_ERR ||
1161  err == MOVE_TARGET_RD_ERR) {
1162  /*
1163  * Target PEB had bit-flips or write error - torture it.
1164  */
1165  torture = 1;
1166  goto out_not_moved;
1167  }
1168 
1169  if (err == MOVE_SOURCE_RD_ERR) {
1170  /*
1171  * An error happened while reading the source PEB. Do
1172  * not switch to R/O mode in this case, and give the
1173  * upper layers a possibility to recover from this,
1174  * e.g. by unmapping corresponding LEB. Instead, just
1175  * put this PEB to the @ubi->erroneous list to prevent
1176  * UBI from trying to move it over and over again.
1177  */
1178  if (ubi->erroneous_peb_count > ubi->max_erroneous) {
1179  ubi_err("too many erroneous eraseblocks (%d)",
1180  ubi->erroneous_peb_count);
1181  goto out_error;
1182  }
1183  erroneous = 1;
1184  goto out_not_moved;
1185  }
1186 
1187  if (err < 0)
1188  goto out_error;
1189 
1190  ubi_assert(0);
1191  }
1192 
1193  /* The PEB has been successfully moved */
1194  if (scrubbing)
1195  ubi_msg("scrubbed PEB %d (LEB %d:%d), data moved to PEB %d",
1196  e1->pnum, vol_id, lnum, e2->pnum);
1197  ubi_free_vid_hdr(ubi, vid_hdr);
1198 
1199  spin_lock(&ubi->wl_lock);
1200  if (!ubi->move_to_put) {
1201  wl_tree_add(e2, &ubi->used);
1202  e2 = NULL;
1203  }
1204  ubi->move_from = ubi->move_to = NULL;
1205  ubi->move_to_put = ubi->wl_scheduled = 0;
1206  spin_unlock(&ubi->wl_lock);
1207 
1208  err = do_sync_erase(ubi, e1, vol_id, lnum, 0);
1209  if (err) {
1211  if (e2)
1213  goto out_ro;
1214  }
1215 
1216  if (e2) {
1217  /*
1218  * Well, the target PEB was put meanwhile, schedule it for
1219  * erasure.
1220  */
1221  dbg_wl("PEB %d (LEB %d:%d) was put meanwhile, erase",
1222  e2->pnum, vol_id, lnum);
1223  err = do_sync_erase(ubi, e2, vol_id, lnum, 0);
1224  if (err) {
1226  goto out_ro;
1227  }
1228  }
1229 
1230  dbg_wl("done");
1231  mutex_unlock(&ubi->move_mutex);
1232  return 0;
1233 
1234  /*
1235  * For some reasons the LEB was not moved, might be an error, might be
1236  * something else. @e1 was not changed, so return it back. @e2 might
1237  * have been changed, schedule it for erasure.
1238  */
1239 out_not_moved:
1240  if (vol_id != -1)
1241  dbg_wl("cancel moving PEB %d (LEB %d:%d) to PEB %d (%d)",
1242  e1->pnum, vol_id, lnum, e2->pnum, err);
1243  else
1244  dbg_wl("cancel moving PEB %d to PEB %d (%d)",
1245  e1->pnum, e2->pnum, err);
1246  spin_lock(&ubi->wl_lock);
1247  if (protect)
1248  prot_queue_add(ubi, e1);
1249  else if (erroneous) {
1250  wl_tree_add(e1, &ubi->erroneous);
1251  ubi->erroneous_peb_count += 1;
1252  } else if (scrubbing)
1253  wl_tree_add(e1, &ubi->scrub);
1254  else
1255  wl_tree_add(e1, &ubi->used);
1256  ubi_assert(!ubi->move_to_put);
1257  ubi->move_from = ubi->move_to = NULL;
1258  ubi->wl_scheduled = 0;
1259  spin_unlock(&ubi->wl_lock);
1260 
1261  ubi_free_vid_hdr(ubi, vid_hdr);
1262  err = do_sync_erase(ubi, e2, vol_id, lnum, torture);
1263  if (err) {
1265  goto out_ro;
1266  }
1267  mutex_unlock(&ubi->move_mutex);
1268  return 0;
1269 
1270 out_error:
1271  if (vol_id != -1)
1272  ubi_err("error %d while moving PEB %d to PEB %d",
1273  err, e1->pnum, e2->pnum);
1274  else
1275  ubi_err("error %d while moving PEB %d (LEB %d:%d) to PEB %d",
1276  err, e1->pnum, vol_id, lnum, e2->pnum);
1277  spin_lock(&ubi->wl_lock);
1278  ubi->move_from = ubi->move_to = NULL;
1279  ubi->move_to_put = ubi->wl_scheduled = 0;
1280  spin_unlock(&ubi->wl_lock);
1281 
1282  ubi_free_vid_hdr(ubi, vid_hdr);
1285 
1286 out_ro:
1287  ubi_ro_mode(ubi);
1288  mutex_unlock(&ubi->move_mutex);
1289  ubi_assert(err != 0);
1290  return err < 0 ? err : -EIO;
1291 
1292 out_cancel:
1293  ubi->wl_scheduled = 0;
1294  spin_unlock(&ubi->wl_lock);
1295  mutex_unlock(&ubi->move_mutex);
1296  ubi_free_vid_hdr(ubi, vid_hdr);
1297  return 0;
1298 }
1299 
1309 static int ensure_wear_leveling(struct ubi_device *ubi, int nested)
1310 {
1311  int err = 0;
1312  struct ubi_wl_entry *e1;
1313  struct ubi_wl_entry *e2;
1314  struct ubi_work *wrk;
1315 
1316  spin_lock(&ubi->wl_lock);
1317  if (ubi->wl_scheduled)
1318  /* Wear-leveling is already in the work queue */
1319  goto out_unlock;
1320 
1321  /*
1322  * If the ubi->scrub tree is not empty, scrubbing is needed, and the
1323  * the WL worker has to be scheduled anyway.
1324  */
1325  if (!ubi->scrub.rb_node) {
1326  if (!ubi->used.rb_node || !ubi->free.rb_node)
1327  /* No physical eraseblocks - no deal */
1328  goto out_unlock;
1329 
1330  /*
1331  * We schedule wear-leveling only if the difference between the
1332  * lowest erase counter of used physical eraseblocks and a high
1333  * erase counter of free physical eraseblocks is greater than
1334  * %UBI_WL_THRESHOLD.
1335  */
1336  e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, u.rb);
1337  e2 = find_wl_entry(ubi, &ubi->free, WL_FREE_MAX_DIFF);
1338 
1339  if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD))
1340  goto out_unlock;
1341  dbg_wl("schedule wear-leveling");
1342  } else
1343  dbg_wl("schedule scrubbing");
1344 
1345  ubi->wl_scheduled = 1;
1346  spin_unlock(&ubi->wl_lock);
1347 
1348  wrk = kmalloc(sizeof(struct ubi_work), GFP_NOFS);
1349  if (!wrk) {
1350  err = -ENOMEM;
1351  goto out_cancel;
1352  }
1353 
1354  wrk->anchor = 0;
1355  wrk->func = &wear_leveling_worker;
1356  if (nested)
1357  __schedule_ubi_work(ubi, wrk);
1358  else
1359  schedule_ubi_work(ubi, wrk);
1360  return err;
1361 
1362 out_cancel:
1363  spin_lock(&ubi->wl_lock);
1364  ubi->wl_scheduled = 0;
1365 out_unlock:
1366  spin_unlock(&ubi->wl_lock);
1367  return err;
1368 }
1369 
1370 #ifdef CONFIG_MTD_UBI_FASTMAP
1371 
1375 int ubi_ensure_anchor_pebs(struct ubi_device *ubi)
1376 {
1377  struct ubi_work *wrk;
1378 
1379  spin_lock(&ubi->wl_lock);
1380  if (ubi->wl_scheduled) {
1381  spin_unlock(&ubi->wl_lock);
1382  return 0;
1383  }
1384  ubi->wl_scheduled = 1;
1385  spin_unlock(&ubi->wl_lock);
1386 
1387  wrk = kmalloc(sizeof(struct ubi_work), GFP_NOFS);
1388  if (!wrk) {
1389  spin_lock(&ubi->wl_lock);
1390  ubi->wl_scheduled = 0;
1391  spin_unlock(&ubi->wl_lock);
1392  return -ENOMEM;
1393  }
1394 
1395  wrk->anchor = 1;
1396  wrk->func = &wear_leveling_worker;
1397  schedule_ubi_work(ubi, wrk);
1398  return 0;
1399 }
1400 #endif
1401 
1413 static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk,
1414  int cancel)
1415 {
1416  struct ubi_wl_entry *e = wl_wrk->e;
1417  int pnum = e->pnum;
1418  int vol_id = wl_wrk->vol_id;
1419  int lnum = wl_wrk->lnum;
1420  int err, available_consumed = 0;
1421 
1422  if (cancel) {
1423  dbg_wl("cancel erasure of PEB %d EC %d", pnum, e->ec);
1424  kfree(wl_wrk);
1426  return 0;
1427  }
1428 
1429  dbg_wl("erase PEB %d EC %d LEB %d:%d",
1430  pnum, e->ec, wl_wrk->vol_id, wl_wrk->lnum);
1431 
1432  ubi_assert(!ubi_is_fm_block(ubi, e->pnum));
1433 
1434  err = sync_erase(ubi, e, wl_wrk->torture);
1435  if (!err) {
1436  /* Fine, we've erased it successfully */
1437  kfree(wl_wrk);
1438 
1439  spin_lock(&ubi->wl_lock);
1440  wl_tree_add(e, &ubi->free);
1441  ubi->free_count++;
1442  spin_unlock(&ubi->wl_lock);
1443 
1444  /*
1445  * One more erase operation has happened, take care about
1446  * protected physical eraseblocks.
1447  */
1448  serve_prot_queue(ubi);
1449 
1450  /* And take care about wear-leveling */
1451  err = ensure_wear_leveling(ubi, 1);
1452  return err;
1453  }
1454 
1455  ubi_err("failed to erase PEB %d, error %d", pnum, err);
1456  kfree(wl_wrk);
1457 
1458  if (err == -EINTR || err == -ENOMEM || err == -EAGAIN ||
1459  err == -EBUSY) {
1460  int err1;
1461 
1462  /* Re-schedule the LEB for erasure */
1463  err1 = schedule_erase(ubi, e, vol_id, lnum, 0);
1464  if (err1) {
1465  err = err1;
1466  goto out_ro;
1467  }
1468  return err;
1469  }
1470 
1472  if (err != -EIO)
1473  /*
1474  * If this is not %-EIO, we have no idea what to do. Scheduling
1475  * this physical eraseblock for erasure again would cause
1476  * errors again and again. Well, lets switch to R/O mode.
1477  */
1478  goto out_ro;
1479 
1480  /* It is %-EIO, the PEB went bad */
1481 
1482  if (!ubi->bad_allowed) {
1483  ubi_err("bad physical eraseblock %d detected", pnum);
1484  goto out_ro;
1485  }
1486 
1487  spin_lock(&ubi->volumes_lock);
1488  if (ubi->beb_rsvd_pebs == 0) {
1489  if (ubi->avail_pebs == 0) {
1490  spin_unlock(&ubi->volumes_lock);
1491  ubi_err("no reserved/available physical eraseblocks");
1492  goto out_ro;
1493  }
1494  ubi->avail_pebs -= 1;
1495  available_consumed = 1;
1496  }
1497  spin_unlock(&ubi->volumes_lock);
1498 
1499  ubi_msg("mark PEB %d as bad", pnum);
1500  err = ubi_io_mark_bad(ubi, pnum);
1501  if (err)
1502  goto out_ro;
1503 
1504  spin_lock(&ubi->volumes_lock);
1505  if (ubi->beb_rsvd_pebs > 0) {
1506  if (available_consumed) {
1507  /*
1508  * The amount of reserved PEBs increased since we last
1509  * checked.
1510  */
1511  ubi->avail_pebs += 1;
1512  available_consumed = 0;
1513  }
1514  ubi->beb_rsvd_pebs -= 1;
1515  }
1516  ubi->bad_peb_count += 1;
1517  ubi->good_peb_count -= 1;
1519  if (available_consumed)
1520  ubi_warn("no PEBs in the reserved pool, used an available PEB");
1521  else if (ubi->beb_rsvd_pebs)
1522  ubi_msg("%d PEBs left in the reserve", ubi->beb_rsvd_pebs);
1523  else
1524  ubi_warn("last PEB from the reserve was used");
1525  spin_unlock(&ubi->volumes_lock);
1526 
1527  return err;
1528 
1529 out_ro:
1530  if (available_consumed) {
1531  spin_lock(&ubi->volumes_lock);
1532  ubi->avail_pebs += 1;
1533  spin_unlock(&ubi->volumes_lock);
1534  }
1535  ubi_ro_mode(ubi);
1536  return err;
1537 }
1538 
1552 int ubi_wl_put_peb(struct ubi_device *ubi, int vol_id, int lnum,
1553  int pnum, int torture)
1554 {
1555  int err;
1556  struct ubi_wl_entry *e;
1557 
1558  dbg_wl("PEB %d", pnum);
1559  ubi_assert(pnum >= 0);
1560  ubi_assert(pnum < ubi->peb_count);
1561 
1562 retry:
1563  spin_lock(&ubi->wl_lock);
1564  e = ubi->lookuptbl[pnum];
1565  if (e == ubi->move_from) {
1566  /*
1567  * User is putting the physical eraseblock which was selected to
1568  * be moved. It will be scheduled for erasure in the
1569  * wear-leveling worker.
1570  */
1571  dbg_wl("PEB %d is being moved, wait", pnum);
1572  spin_unlock(&ubi->wl_lock);
1573 
1574  /* Wait for the WL worker by taking the @ubi->move_mutex */
1575  mutex_lock(&ubi->move_mutex);
1576  mutex_unlock(&ubi->move_mutex);
1577  goto retry;
1578  } else if (e == ubi->move_to) {
1579  /*
1580  * User is putting the physical eraseblock which was selected
1581  * as the target the data is moved to. It may happen if the EBA
1582  * sub-system already re-mapped the LEB in 'ubi_eba_copy_leb()'
1583  * but the WL sub-system has not put the PEB to the "used" tree
1584  * yet, but it is about to do this. So we just set a flag which
1585  * will tell the WL worker that the PEB is not needed anymore
1586  * and should be scheduled for erasure.
1587  */
1588  dbg_wl("PEB %d is the target of data moving", pnum);
1589  ubi_assert(!ubi->move_to_put);
1590  ubi->move_to_put = 1;
1591  spin_unlock(&ubi->wl_lock);
1592  return 0;
1593  } else {
1594  if (in_wl_tree(e, &ubi->used)) {
1595  self_check_in_wl_tree(ubi, e, &ubi->used);
1596  rb_erase(&e->u.rb, &ubi->used);
1597  } else if (in_wl_tree(e, &ubi->scrub)) {
1598  self_check_in_wl_tree(ubi, e, &ubi->scrub);
1599  rb_erase(&e->u.rb, &ubi->scrub);
1600  } else if (in_wl_tree(e, &ubi->erroneous)) {
1601  self_check_in_wl_tree(ubi, e, &ubi->erroneous);
1602  rb_erase(&e->u.rb, &ubi->erroneous);
1603  ubi->erroneous_peb_count -= 1;
1604  ubi_assert(ubi->erroneous_peb_count >= 0);
1605  /* Erroneous PEBs should be tortured */
1606  torture = 1;
1607  } else {
1608  err = prot_queue_del(ubi, e->pnum);
1609  if (err) {
1610  ubi_err("PEB %d not found", pnum);
1611  ubi_ro_mode(ubi);
1612  spin_unlock(&ubi->wl_lock);
1613  return err;
1614  }
1615  }
1616  }
1617  spin_unlock(&ubi->wl_lock);
1618 
1619  err = schedule_erase(ubi, e, vol_id, lnum, torture);
1620  if (err) {
1621  spin_lock(&ubi->wl_lock);
1622  wl_tree_add(e, &ubi->used);
1623  spin_unlock(&ubi->wl_lock);
1624  }
1625 
1626  return err;
1627 }
1628 
1639 int ubi_wl_scrub_peb(struct ubi_device *ubi, int pnum)
1640 {
1641  struct ubi_wl_entry *e;
1642 
1643  ubi_msg("schedule PEB %d for scrubbing", pnum);
1644 
1645 retry:
1646  spin_lock(&ubi->wl_lock);
1647  e = ubi->lookuptbl[pnum];
1648  if (e == ubi->move_from || in_wl_tree(e, &ubi->scrub) ||
1649  in_wl_tree(e, &ubi->erroneous)) {
1650  spin_unlock(&ubi->wl_lock);
1651  return 0;
1652  }
1653 
1654  if (e == ubi->move_to) {
1655  /*
1656  * This physical eraseblock was used to move data to. The data
1657  * was moved but the PEB was not yet inserted to the proper
1658  * tree. We should just wait a little and let the WL worker
1659  * proceed.
1660  */
1661  spin_unlock(&ubi->wl_lock);
1662  dbg_wl("the PEB %d is not in proper tree, retry", pnum);
1663  yield();
1664  goto retry;
1665  }
1666 
1667  if (in_wl_tree(e, &ubi->used)) {
1668  self_check_in_wl_tree(ubi, e, &ubi->used);
1669  rb_erase(&e->u.rb, &ubi->used);
1670  } else {
1671  int err;
1672 
1673  err = prot_queue_del(ubi, e->pnum);
1674  if (err) {
1675  ubi_err("PEB %d not found", pnum);
1676  ubi_ro_mode(ubi);
1677  spin_unlock(&ubi->wl_lock);
1678  return err;
1679  }
1680  }
1681 
1682  wl_tree_add(e, &ubi->scrub);
1683  spin_unlock(&ubi->wl_lock);
1684 
1685  /*
1686  * Technically scrubbing is the same as wear-leveling, so it is done
1687  * by the WL worker.
1688  */
1689  return ensure_wear_leveling(ubi, 0);
1690 }
1691 
1704 int ubi_wl_flush(struct ubi_device *ubi, int vol_id, int lnum)
1705 {
1706  int err = 0;
1707  int found = 1;
1708 
1709  /*
1710  * Erase while the pending works queue is not empty, but not more than
1711  * the number of currently pending works.
1712  */
1713  dbg_wl("flush pending work for LEB %d:%d (%d pending works)",
1714  vol_id, lnum, ubi->works_count);
1715 
1716  while (found) {
1717  struct ubi_work *wrk;
1718  found = 0;
1719 
1720  down_read(&ubi->work_sem);
1721  spin_lock(&ubi->wl_lock);
1722  list_for_each_entry(wrk, &ubi->works, list) {
1723  if ((vol_id == UBI_ALL || wrk->vol_id == vol_id) &&
1724  (lnum == UBI_ALL || wrk->lnum == lnum)) {
1725  list_del(&wrk->list);
1726  ubi->works_count -= 1;
1727  ubi_assert(ubi->works_count >= 0);
1728  spin_unlock(&ubi->wl_lock);
1729 
1730  err = wrk->func(ubi, wrk, 0);
1731  if (err) {
1732  up_read(&ubi->work_sem);
1733  return err;
1734  }
1735 
1736  spin_lock(&ubi->wl_lock);
1737  found = 1;
1738  break;
1739  }
1740  }
1741  spin_unlock(&ubi->wl_lock);
1742  up_read(&ubi->work_sem);
1743  }
1744 
1745  /*
1746  * Make sure all the works which have been done in parallel are
1747  * finished.
1748  */
1749  down_write(&ubi->work_sem);
1750  up_write(&ubi->work_sem);
1751 
1752  return err;
1753 }
1754 
1759 static void tree_destroy(struct rb_root *root)
1760 {
1761  struct rb_node *rb;
1762  struct ubi_wl_entry *e;
1763 
1764  rb = root->rb_node;
1765  while (rb) {
1766  if (rb->rb_left)
1767  rb = rb->rb_left;
1768  else if (rb->rb_right)
1769  rb = rb->rb_right;
1770  else {
1771  e = rb_entry(rb, struct ubi_wl_entry, u.rb);
1772 
1773  rb = rb_parent(rb);
1774  if (rb) {
1775  if (rb->rb_left == &e->u.rb)
1776  rb->rb_left = NULL;
1777  else
1778  rb->rb_right = NULL;
1779  }
1780 
1782  }
1783  }
1784 }
1785 
1790 int ubi_thread(void *u)
1791 {
1792  int failures = 0;
1793  struct ubi_device *ubi = u;
1794 
1795  ubi_msg("background thread \"%s\" started, PID %d",
1796  ubi->bgt_name, task_pid_nr(current));
1797 
1798  set_freezable();
1799  for (;;) {
1800  int err;
1801 
1802  if (kthread_should_stop())
1803  break;
1804 
1805  if (try_to_freeze())
1806  continue;
1807 
1808  spin_lock(&ubi->wl_lock);
1809  if (list_empty(&ubi->works) || ubi->ro_mode ||
1810  !ubi->thread_enabled || ubi_dbg_is_bgt_disabled(ubi)) {
1812  spin_unlock(&ubi->wl_lock);
1813  schedule();
1814  continue;
1815  }
1816  spin_unlock(&ubi->wl_lock);
1817 
1818  err = do_work(ubi);
1819  if (err) {
1820  ubi_err("%s: work failed with error code %d",
1821  ubi->bgt_name, err);
1822  if (failures++ > WL_MAX_FAILURES) {
1823  /*
1824  * Too many failures, disable the thread and
1825  * switch to read-only mode.
1826  */
1827  ubi_msg("%s: %d consecutive failures",
1828  ubi->bgt_name, WL_MAX_FAILURES);
1829  ubi_ro_mode(ubi);
1830  ubi->thread_enabled = 0;
1831  continue;
1832  }
1833  } else
1834  failures = 0;
1835 
1836  cond_resched();
1837  }
1838 
1839  dbg_wl("background thread \"%s\" is killed", ubi->bgt_name);
1840  return 0;
1841 }
1842 
1847 static void cancel_pending(struct ubi_device *ubi)
1848 {
1849  while (!list_empty(&ubi->works)) {
1850  struct ubi_work *wrk;
1851 
1852  wrk = list_entry(ubi->works.next, struct ubi_work, list);
1853  list_del(&wrk->list);
1854  wrk->func(ubi, wrk, 1);
1855  ubi->works_count -= 1;
1856  ubi_assert(ubi->works_count >= 0);
1857  }
1858 }
1859 
1868 int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai)
1869 {
1870  int err, i, reserved_pebs, found_pebs = 0;
1871  struct rb_node *rb1, *rb2;
1872  struct ubi_ainf_volume *av;
1873  struct ubi_ainf_peb *aeb, *tmp;
1874  struct ubi_wl_entry *e;
1875 
1876  ubi->used = ubi->erroneous = ubi->free = ubi->scrub = RB_ROOT;
1877  spin_lock_init(&ubi->wl_lock);
1878  mutex_init(&ubi->move_mutex);
1879  init_rwsem(&ubi->work_sem);
1880  ubi->max_ec = ai->max_ec;
1881  INIT_LIST_HEAD(&ubi->works);
1882 #ifdef CONFIG_MTD_UBI_FASTMAP
1883  INIT_WORK(&ubi->fm_work, update_fastmap_work_fn);
1884 #endif
1885 
1887 
1888  err = -ENOMEM;
1889  ubi->lookuptbl = kzalloc(ubi->peb_count * sizeof(void *), GFP_KERNEL);
1890  if (!ubi->lookuptbl)
1891  return err;
1892 
1893  for (i = 0; i < UBI_PROT_QUEUE_LEN; i++)
1894  INIT_LIST_HEAD(&ubi->pq[i]);
1895  ubi->pq_head = 0;
1896 
1897  list_for_each_entry_safe(aeb, tmp, &ai->erase, u.list) {
1898  cond_resched();
1899 
1901  if (!e)
1902  goto out_free;
1903 
1904  e->pnum = aeb->pnum;
1905  e->ec = aeb->ec;
1906  ubi_assert(!ubi_is_fm_block(ubi, e->pnum));
1907  ubi->lookuptbl[e->pnum] = e;
1908  if (schedule_erase(ubi, e, aeb->vol_id, aeb->lnum, 0)) {
1910  goto out_free;
1911  }
1912 
1913  found_pebs++;
1914  }
1915 
1916  ubi->free_count = 0;
1917  list_for_each_entry(aeb, &ai->free, u.list) {
1918  cond_resched();
1919 
1921  if (!e)
1922  goto out_free;
1923 
1924  e->pnum = aeb->pnum;
1925  e->ec = aeb->ec;
1926  ubi_assert(e->ec >= 0);
1927  ubi_assert(!ubi_is_fm_block(ubi, e->pnum));
1928 
1929  wl_tree_add(e, &ubi->free);
1930  ubi->free_count++;
1931 
1932  ubi->lookuptbl[e->pnum] = e;
1933 
1934  found_pebs++;
1935  }
1936 
1937  ubi_rb_for_each_entry(rb1, av, &ai->volumes, rb) {
1938  ubi_rb_for_each_entry(rb2, aeb, &av->root, u.rb) {
1939  cond_resched();
1940 
1942  if (!e)
1943  goto out_free;
1944 
1945  e->pnum = aeb->pnum;
1946  e->ec = aeb->ec;
1947  ubi->lookuptbl[e->pnum] = e;
1948 
1949  if (!aeb->scrub) {
1950  dbg_wl("add PEB %d EC %d to the used tree",
1951  e->pnum, e->ec);
1952  wl_tree_add(e, &ubi->used);
1953  } else {
1954  dbg_wl("add PEB %d EC %d to the scrub tree",
1955  e->pnum, e->ec);
1956  wl_tree_add(e, &ubi->scrub);
1957  }
1958 
1959  found_pebs++;
1960  }
1961  }
1962 
1963  dbg_wl("found %i PEBs", found_pebs);
1964 
1965  if (ubi->fm)
1966  ubi_assert(ubi->good_peb_count == \
1967  found_pebs + ubi->fm->used_blocks);
1968  else
1969  ubi_assert(ubi->good_peb_count == found_pebs);
1970 
1971  reserved_pebs = WL_RESERVED_PEBS;
1972 #ifdef CONFIG_MTD_UBI_FASTMAP
1973  /* Reserve enough LEBs to store two fastmaps. */
1974  reserved_pebs += (ubi->fm_size / ubi->leb_size) * 2;
1975 #endif
1976 
1977  if (ubi->avail_pebs < reserved_pebs) {
1978  ubi_err("no enough physical eraseblocks (%d, need %d)",
1979  ubi->avail_pebs, reserved_pebs);
1980  if (ubi->corr_peb_count)
1981  ubi_err("%d PEBs are corrupted and not used",
1982  ubi->corr_peb_count);
1983  goto out_free;
1984  }
1985  ubi->avail_pebs -= reserved_pebs;
1986  ubi->rsvd_pebs += reserved_pebs;
1987 
1988  /* Schedule wear-leveling if needed */
1989  err = ensure_wear_leveling(ubi, 0);
1990  if (err)
1991  goto out_free;
1992 
1993  return 0;
1994 
1995 out_free:
1996  cancel_pending(ubi);
1997  tree_destroy(&ubi->used);
1998  tree_destroy(&ubi->free);
1999  tree_destroy(&ubi->scrub);
2000  kfree(ubi->lookuptbl);
2001  return err;
2002 }
2003 
2008 static void protection_queue_destroy(struct ubi_device *ubi)
2009 {
2010  int i;
2011  struct ubi_wl_entry *e, *tmp;
2012 
2013  for (i = 0; i < UBI_PROT_QUEUE_LEN; ++i) {
2014  list_for_each_entry_safe(e, tmp, &ubi->pq[i], u.list) {
2015  list_del(&e->u.list);
2017  }
2018  }
2019 }
2020 
2025 void ubi_wl_close(struct ubi_device *ubi)
2026 {
2027  dbg_wl("close the WL sub-system");
2028  cancel_pending(ubi);
2029  protection_queue_destroy(ubi);
2030  tree_destroy(&ubi->used);
2031  tree_destroy(&ubi->erroneous);
2032  tree_destroy(&ubi->free);
2033  tree_destroy(&ubi->scrub);
2034  kfree(ubi->lookuptbl);
2035 }
2036 
2047 static int self_check_ec(struct ubi_device *ubi, int pnum, int ec)
2048 {
2049  int err;
2050  long long read_ec;
2051  struct ubi_ec_hdr *ec_hdr;
2052 
2053  if (!ubi->dbg->chk_gen)
2054  return 0;
2055 
2056  ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS);
2057  if (!ec_hdr)
2058  return -ENOMEM;
2059 
2060  err = ubi_io_read_ec_hdr(ubi, pnum, ec_hdr, 0);
2061  if (err && err != UBI_IO_BITFLIPS) {
2062  /* The header does not have to exist */
2063  err = 0;
2064  goto out_free;
2065  }
2066 
2067  read_ec = be64_to_cpu(ec_hdr->ec);
2068  if (ec != read_ec && read_ec - ec > 1) {
2069  ubi_err("self-check failed for PEB %d", pnum);
2070  ubi_err("read EC is %lld, should be %d", read_ec, ec);
2071  dump_stack();
2072  err = 1;
2073  } else
2074  err = 0;
2075 
2076 out_free:
2077  kfree(ec_hdr);
2078  return err;
2079 }
2080 
2090 static int self_check_in_wl_tree(const struct ubi_device *ubi,
2091  struct ubi_wl_entry *e, struct rb_root *root)
2092 {
2093  if (!ubi->dbg->chk_gen)
2094  return 0;
2095 
2096  if (in_wl_tree(e, root))
2097  return 0;
2098 
2099  ubi_err("self-check failed for PEB %d, EC %d, RB-tree %p ",
2100  e->pnum, e->ec, root);
2101  dump_stack();
2102  return -EINVAL;
2103 }
2104 
2113 static int self_check_in_pq(const struct ubi_device *ubi,
2114  struct ubi_wl_entry *e)
2115 {
2116  struct ubi_wl_entry *p;
2117  int i;
2118 
2119  if (!ubi->dbg->chk_gen)
2120  return 0;
2121 
2122  for (i = 0; i < UBI_PROT_QUEUE_LEN; ++i)
2123  list_for_each_entry(p, &ubi->pq[i], u.list)
2124  if (p == e)
2125  return 0;
2126 
2127  ubi_err("self-check failed for PEB %d, EC %d, Protect queue",
2128  e->pnum, e->ec);
2129  dump_stack();
2130  return -EINVAL;
2131 }