Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
budget.c
Go to the documentation of this file.
1 /*
2  * This file is part of UBIFS.
3  *
4  * Copyright (C) 2006-2008 Nokia Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License version 2 as published by
8  * the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13  * more details.
14  *
15  * You should have received a copy of the GNU General Public License along with
16  * this program; if not, write to the Free Software Foundation, Inc., 51
17  * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18  *
19  * Authors: Adrian Hunter
20  * Artem Bityutskiy (Битюцкий Артём)
21  */
22 
23 /*
24  * This file implements the budgeting sub-system which is responsible for UBIFS
25  * space management.
26  *
27  * Factors such as compression, wasted space at the ends of LEBs, space in other
28  * journal heads, the effect of updates on the index, and so on, make it
29  * impossible to accurately predict the amount of space needed. Consequently
30  * approximations are used.
31  */
32 
33 #include "ubifs.h"
34 #include <linux/writeback.h>
35 #include <linux/math64.h>
36 
37 /*
38  * When pessimistic budget calculations say that there is no enough space,
39  * UBIFS starts writing back dirty inodes and pages, doing garbage collection,
40  * or committing. The below constant defines maximum number of times UBIFS
41  * repeats the operations.
42  */
43 #define MAX_MKSPC_RETRIES 3
44 
45 /*
46  * The below constant defines amount of dirty pages which should be written
47  * back at when trying to shrink the liability.
48  */
49 #define NR_TO_WRITE 16
50 
63 static void shrink_liability(struct ubifs_info *c, int nr_to_write)
64 {
65  down_read(&c->vfs_sb->s_umount);
67  up_read(&c->vfs_sb->s_umount);
68 }
69 
78 static int run_gc(struct ubifs_info *c)
79 {
80  int err, lnum;
81 
82  /* Make some free space by garbage-collecting dirty space */
83  down_read(&c->commit_sem);
84  lnum = ubifs_garbage_collect(c, 1);
85  up_read(&c->commit_sem);
86  if (lnum < 0)
87  return lnum;
88 
89  /* GC freed one LEB, return it to lprops */
90  dbg_budg("GC freed LEB %d", lnum);
91  err = ubifs_return_leb(c, lnum);
92  if (err)
93  return err;
94  return 0;
95 }
96 
104 static long long get_liability(struct ubifs_info *c)
105 {
106  long long liab;
107 
108  spin_lock(&c->space_lock);
109  liab = c->bi.idx_growth + c->bi.data_growth + c->bi.dd_growth;
110  spin_unlock(&c->space_lock);
111  return liab;
112 }
113 
132 static int make_free_space(struct ubifs_info *c)
133 {
134  int err, retries = 0;
135  long long liab1, liab2;
136 
137  do {
138  liab1 = get_liability(c);
139  /*
140  * We probably have some dirty pages or inodes (liability), try
141  * to write them back.
142  */
143  dbg_budg("liability %lld, run write-back", liab1);
144  shrink_liability(c, NR_TO_WRITE);
145 
146  liab2 = get_liability(c);
147  if (liab2 < liab1)
148  return -EAGAIN;
149 
150  dbg_budg("new liability %lld (not shrunk)", liab2);
151 
152  /* Liability did not shrink again, try GC */
153  dbg_budg("Run GC");
154  err = run_gc(c);
155  if (!err)
156  return -EAGAIN;
157 
158  if (err != -EAGAIN && err != -ENOSPC)
159  /* Some real error happened */
160  return err;
161 
162  dbg_budg("Run commit (retries %d)", retries);
163  err = ubifs_run_commit(c);
164  if (err)
165  return err;
166  } while (retries++ < MAX_MKSPC_RETRIES);
167 
168  return -ENOSPC;
169 }
170 
179 {
180  int idx_lebs;
181  long long idx_size;
182 
183  idx_size = c->bi.old_idx_sz + c->bi.idx_growth + c->bi.uncommitted_idx;
184  /* And make sure we have thrice the index size of space reserved */
185  idx_size += idx_size << 1;
186  /*
187  * We do not maintain 'old_idx_size' as 'old_idx_lebs'/'old_idx_bytes'
188  * pair, nor similarly the two variables for the new index size, so we
189  * have to do this costly 64-bit division on fast-path.
190  */
191  idx_lebs = div_u64(idx_size + c->idx_leb_size - 1, c->idx_leb_size);
192  /*
193  * The index head is not available for the in-the-gaps method, so add an
194  * extra LEB to compensate.
195  */
196  idx_lebs += 1;
197  if (idx_lebs < MIN_INDEX_LEBS)
198  idx_lebs = MIN_INDEX_LEBS;
199  return idx_lebs;
200 }
201 
209 long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs)
210 {
211  int subtract_lebs;
212  long long available;
213 
214  available = c->main_bytes - c->lst.total_used;
215 
216  /*
217  * Now 'available' contains theoretically available flash space
218  * assuming there is no index, so we have to subtract the space which
219  * is reserved for the index.
220  */
221  subtract_lebs = min_idx_lebs;
222 
223  /* Take into account that GC reserves one LEB for its own needs */
224  subtract_lebs += 1;
225 
226  /*
227  * The GC journal head LEB is not really accessible. And since
228  * different write types go to different heads, we may count only on
229  * one head's space.
230  */
231  subtract_lebs += c->jhead_cnt - 1;
232 
233  /* We also reserve one LEB for deletions, which bypass budgeting */
234  subtract_lebs += 1;
235 
236  available -= (long long)subtract_lebs * c->leb_size;
237 
238  /* Subtract the dead space which is not available for use */
239  available -= c->lst.total_dead;
240 
241  /*
242  * Subtract dark space, which might or might not be usable - it depends
243  * on the data which we have on the media and which will be written. If
244  * this is a lot of uncompressed or not-compressible data, the dark
245  * space cannot be used.
246  */
247  available -= c->lst.total_dark;
248 
249  /*
250  * However, there is more dark space. The index may be bigger than
251  * @min_idx_lebs. Those extra LEBs are assumed to be available, but
252  * their dark space is not included in total_dark, so it is subtracted
253  * here.
254  */
255  if (c->lst.idx_lebs > min_idx_lebs) {
256  subtract_lebs = c->lst.idx_lebs - min_idx_lebs;
257  available -= subtract_lebs * c->dark_wm;
258  }
259 
260  /* The calculations are rough and may end up with a negative number */
261  return available > 0 ? available : 0;
262 }
263 
273 static int can_use_rp(struct ubifs_info *c)
274 {
275  if (uid_eq(current_fsuid(), c->rp_uid) || capable(CAP_SYS_RESOURCE) ||
276  (!gid_eq(c->rp_gid, GLOBAL_ROOT_GID) && in_group_p(c->rp_gid)))
277  return 1;
278  return 0;
279 }
280 
306 static int do_budget_space(struct ubifs_info *c)
307 {
308  long long outstanding, available;
309  int lebs, rsvd_idx_lebs, min_idx_lebs;
310 
311  /* First budget index space */
312  min_idx_lebs = ubifs_calc_min_idx_lebs(c);
313 
314  /* Now 'min_idx_lebs' contains number of LEBs to reserve */
315  if (min_idx_lebs > c->lst.idx_lebs)
316  rsvd_idx_lebs = min_idx_lebs - c->lst.idx_lebs;
317  else
318  rsvd_idx_lebs = 0;
319 
320  /*
321  * The number of LEBs that are available to be used by the index is:
322  *
323  * @c->lst.empty_lebs + @c->freeable_cnt + @c->idx_gc_cnt -
324  * @c->lst.taken_empty_lebs
325  *
326  * @c->lst.empty_lebs are available because they are empty.
327  * @c->freeable_cnt are available because they contain only free and
328  * dirty space, @c->idx_gc_cnt are available because they are index
329  * LEBs that have been garbage collected and are awaiting the commit
330  * before they can be used. And the in-the-gaps method will grab these
331  * if it needs them. @c->lst.taken_empty_lebs are empty LEBs that have
332  * already been allocated for some purpose.
333  *
334  * Note, @c->idx_gc_cnt is included to both @c->lst.empty_lebs (because
335  * these LEBs are empty) and to @c->lst.taken_empty_lebs (because they
336  * are taken until after the commit).
337  *
338  * Note, @c->lst.taken_empty_lebs may temporarily be higher by one
339  * because of the way we serialize LEB allocations and budgeting. See a
340  * comment in 'ubifs_find_free_space()'.
341  */
342  lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt -
343  c->lst.taken_empty_lebs;
344  if (unlikely(rsvd_idx_lebs > lebs)) {
345  dbg_budg("out of indexing space: min_idx_lebs %d (old %d), rsvd_idx_lebs %d",
346  min_idx_lebs, c->bi.min_idx_lebs, rsvd_idx_lebs);
347  return -ENOSPC;
348  }
349 
350  available = ubifs_calc_available(c, min_idx_lebs);
351  outstanding = c->bi.data_growth + c->bi.dd_growth;
352 
353  if (unlikely(available < outstanding)) {
354  dbg_budg("out of data space: available %lld, outstanding %lld",
355  available, outstanding);
356  return -ENOSPC;
357  }
358 
359  if (available - outstanding <= c->rp_size && !can_use_rp(c))
360  return -ENOSPC;
361 
362  c->bi.min_idx_lebs = min_idx_lebs;
363  return 0;
364 }
365 
374 static int calc_idx_growth(const struct ubifs_info *c,
375  const struct ubifs_budget_req *req)
376 {
377  int znodes;
378 
379  znodes = req->new_ino + (req->new_page << UBIFS_BLOCKS_PER_PAGE_SHIFT) +
380  req->new_dent;
381  return znodes * c->max_idx_node_sz;
382 }
383 
390 static int calc_data_growth(const struct ubifs_info *c,
391  const struct ubifs_budget_req *req)
392 {
393  int data_growth;
394 
395  data_growth = req->new_ino ? c->bi.inode_budget : 0;
396  if (req->new_page)
397  data_growth += c->bi.page_budget;
398  if (req->new_dent)
399  data_growth += c->bi.dent_budget;
400  data_growth += req->new_ino_d;
401  return data_growth;
402 }
403 
410 static int calc_dd_growth(const struct ubifs_info *c,
411  const struct ubifs_budget_req *req)
412 {
413  int dd_growth;
414 
415  dd_growth = req->dirtied_page ? c->bi.page_budget : 0;
416 
417  if (req->dirtied_ino)
418  dd_growth += c->bi.inode_budget << (req->dirtied_ino - 1);
419  if (req->mod_dent)
420  dd_growth += c->bi.dent_budget;
421  dd_growth += req->dirtied_ino_d;
422  return dd_growth;
423 }
424 
438 int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req)
439 {
440  int uninitialized_var(cmt_retries), uninitialized_var(wb_retries);
441  int err, idx_growth, data_growth, dd_growth, retried = 0;
442 
443  ubifs_assert(req->new_page <= 1);
444  ubifs_assert(req->dirtied_page <= 1);
445  ubifs_assert(req->new_dent <= 1);
446  ubifs_assert(req->mod_dent <= 1);
447  ubifs_assert(req->new_ino <= 1);
449  ubifs_assert(req->dirtied_ino <= 4);
451  ubifs_assert(!(req->new_ino_d & 7));
452  ubifs_assert(!(req->dirtied_ino_d & 7));
453 
454  data_growth = calc_data_growth(c, req);
455  dd_growth = calc_dd_growth(c, req);
456  if (!data_growth && !dd_growth)
457  return 0;
458  idx_growth = calc_idx_growth(c, req);
459 
460 again:
461  spin_lock(&c->space_lock);
462  ubifs_assert(c->bi.idx_growth >= 0);
463  ubifs_assert(c->bi.data_growth >= 0);
464  ubifs_assert(c->bi.dd_growth >= 0);
465 
466  if (unlikely(c->bi.nospace) && (c->bi.nospace_rp || !can_use_rp(c))) {
467  dbg_budg("no space");
468  spin_unlock(&c->space_lock);
469  return -ENOSPC;
470  }
471 
472  c->bi.idx_growth += idx_growth;
473  c->bi.data_growth += data_growth;
474  c->bi.dd_growth += dd_growth;
475 
476  err = do_budget_space(c);
477  if (likely(!err)) {
478  req->idx_growth = idx_growth;
479  req->data_growth = data_growth;
480  req->dd_growth = dd_growth;
481  spin_unlock(&c->space_lock);
482  return 0;
483  }
484 
485  /* Restore the old values */
486  c->bi.idx_growth -= idx_growth;
487  c->bi.data_growth -= data_growth;
488  c->bi.dd_growth -= dd_growth;
489  spin_unlock(&c->space_lock);
490 
491  if (req->fast) {
492  dbg_budg("no space for fast budgeting");
493  return err;
494  }
495 
496  err = make_free_space(c);
497  cond_resched();
498  if (err == -EAGAIN) {
499  dbg_budg("try again");
500  goto again;
501  } else if (err == -ENOSPC) {
502  if (!retried) {
503  retried = 1;
504  dbg_budg("-ENOSPC, but anyway try once again");
505  goto again;
506  }
507  dbg_budg("FS is full, -ENOSPC");
508  c->bi.nospace = 1;
509  if (can_use_rp(c) || c->rp_size == 0)
510  c->bi.nospace_rp = 1;
511  smp_wmb();
512  } else
513  ubifs_err("cannot budget space, error %d", err);
514  return err;
515 }
516 
529 {
530  ubifs_assert(req->new_page <= 1);
531  ubifs_assert(req->dirtied_page <= 1);
532  ubifs_assert(req->new_dent <= 1);
533  ubifs_assert(req->mod_dent <= 1);
534  ubifs_assert(req->new_ino <= 1);
536  ubifs_assert(req->dirtied_ino <= 4);
538  ubifs_assert(!(req->new_ino_d & 7));
539  ubifs_assert(!(req->dirtied_ino_d & 7));
540  if (!req->recalculate) {
541  ubifs_assert(req->idx_growth >= 0);
542  ubifs_assert(req->data_growth >= 0);
543  ubifs_assert(req->dd_growth >= 0);
544  }
545 
546  if (req->recalculate) {
547  req->data_growth = calc_data_growth(c, req);
548  req->dd_growth = calc_dd_growth(c, req);
549  req->idx_growth = calc_idx_growth(c, req);
550  }
551 
552  if (!req->data_growth && !req->dd_growth)
553  return;
554 
555  c->bi.nospace = c->bi.nospace_rp = 0;
556  smp_wmb();
557 
558  spin_lock(&c->space_lock);
559  c->bi.idx_growth -= req->idx_growth;
560  c->bi.uncommitted_idx += req->idx_growth;
561  c->bi.data_growth -= req->data_growth;
562  c->bi.dd_growth -= req->dd_growth;
563  c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c);
564 
565  ubifs_assert(c->bi.idx_growth >= 0);
566  ubifs_assert(c->bi.data_growth >= 0);
567  ubifs_assert(c->bi.dd_growth >= 0);
568  ubifs_assert(c->bi.min_idx_lebs < c->main_lebs);
569  ubifs_assert(!(c->bi.idx_growth & 7));
570  ubifs_assert(!(c->bi.data_growth & 7));
571  ubifs_assert(!(c->bi.dd_growth & 7));
572  spin_unlock(&c->space_lock);
573 }
574 
585 {
586  spin_lock(&c->space_lock);
587  /* Release the index growth reservation */
588  c->bi.idx_growth -= c->max_idx_node_sz << UBIFS_BLOCKS_PER_PAGE_SHIFT;
589  /* Release the data growth reservation */
590  c->bi.data_growth -= c->bi.page_budget;
591  /* Increase the dirty data growth reservation instead */
592  c->bi.dd_growth += c->bi.page_budget;
593  /* And re-calculate the indexing space reservation */
594  c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c);
595  spin_unlock(&c->space_lock);
596 }
597 
608  struct ubifs_inode *ui)
609 {
610  struct ubifs_budget_req req;
611 
612  memset(&req, 0, sizeof(struct ubifs_budget_req));
613  /* The "no space" flags will be cleared because dd_growth is > 0 */
614  req.dd_growth = c->bi.inode_budget + ALIGN(ui->data_len, 8);
615  ubifs_release_budget(c, &req);
616 }
617 
638 long long ubifs_reported_space(const struct ubifs_info *c, long long free)
639 {
640  int divisor, factor, f;
641 
642  /*
643  * Reported space size is @free * X, where X is UBIFS block size
644  * divided by UBIFS block size + all overhead one data block
645  * introduces. The overhead is the node header + indexing overhead.
646  *
647  * Indexing overhead calculations are based on the following formula:
648  * I = N/(f - 1) + 1, where I - number of indexing nodes, N - number
649  * of data nodes, f - fanout. Because effective UBIFS fanout is twice
650  * as less than maximum fanout, we assume that each data node
651  * introduces 3 * @c->max_idx_node_sz / (@c->fanout/2 - 1) bytes.
652  * Note, the multiplier 3 is because UBIFS reserves thrice as more space
653  * for the index.
654  */
655  f = c->fanout > 3 ? c->fanout >> 1 : 2;
656  factor = UBIFS_BLOCK_SIZE;
657  divisor = UBIFS_MAX_DATA_NODE_SZ;
658  divisor += (c->max_idx_node_sz * 3) / (f - 1);
659  free *= factor;
660  return div_u64(free, divisor);
661 }
662 
680 {
681  int rsvd_idx_lebs, lebs;
682  long long available, outstanding, free;
683 
684  ubifs_assert(c->bi.min_idx_lebs == ubifs_calc_min_idx_lebs(c));
685  outstanding = c->bi.data_growth + c->bi.dd_growth;
686  available = ubifs_calc_available(c, c->bi.min_idx_lebs);
687 
688  /*
689  * When reporting free space to user-space, UBIFS guarantees that it is
690  * possible to write a file of free space size. This means that for
691  * empty LEBs we may use more precise calculations than
692  * 'ubifs_calc_available()' is using. Namely, we know that in empty
693  * LEBs we would waste only @c->leb_overhead bytes, not @c->dark_wm.
694  * Thus, amend the available space.
695  *
696  * Note, the calculations below are similar to what we have in
697  * 'do_budget_space()', so refer there for comments.
698  */
699  if (c->bi.min_idx_lebs > c->lst.idx_lebs)
700  rsvd_idx_lebs = c->bi.min_idx_lebs - c->lst.idx_lebs;
701  else
702  rsvd_idx_lebs = 0;
703  lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt -
704  c->lst.taken_empty_lebs;
705  lebs -= rsvd_idx_lebs;
706  available += lebs * (c->dark_wm - c->leb_overhead);
707 
708  if (available > outstanding)
709  free = ubifs_reported_space(c, available - outstanding);
710  else
711  free = 0;
712  return free;
713 }
714 
722 long long ubifs_get_free_space(struct ubifs_info *c)
723 {
724  long long free;
725 
726  spin_lock(&c->space_lock);
727  free = ubifs_get_free_space_nolock(c);
728  spin_unlock(&c->space_lock);
729 
730  return free;
731 }