Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
shrinker.c
Go to the documentation of this file.
1 /*
2  * This file is part of UBIFS.
3  *
4  * Copyright (C) 2006-2008 Nokia Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License version 2 as published by
8  * the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13  * more details.
14  *
15  * You should have received a copy of the GNU General Public License along with
16  * this program; if not, write to the Free Software Foundation, Inc., 51
17  * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18  *
19  * Authors: Artem Bityutskiy (Битюцкий Артём)
20  * Adrian Hunter
21  */
22 
23 /*
24  * This file implements UBIFS shrinker which evicts clean znodes from the TNC
25  * tree when Linux VM needs more RAM.
26  *
27  * We do not implement any LRU lists to find oldest znodes to free because it
28  * would add additional overhead to the file system fast paths. So the shrinker
29  * just walks the TNC tree when searching for znodes to free.
30  *
31  * If the root of a TNC sub-tree is clean and old enough, then the children are
32  * also clean and old enough. So the shrinker walks the TNC in level order and
33  * dumps entire sub-trees.
34  *
35  * The age of znodes is just the time-stamp when they were last looked at.
36  * The current shrinker first tries to evict old znodes, then young ones.
37  *
38  * Since the shrinker is global, it has to protect against races with FS
39  * un-mounts, which is done by the 'ubifs_infos_lock' and 'c->umount_mutex'.
40  */
41 
42 #include "ubifs.h"
43 
44 /* List of all UBIFS file-system instances */
45 LIST_HEAD(ubifs_infos);
46 
47 /*
48  * We number each shrinker run and record the number on the ubifs_info structure
49  * so that we can easily work out which ubifs_info structures have already been
50  * done by the current run.
51  */
52 static unsigned int shrinker_run_no;
53 
54 /* Protects 'ubifs_infos' list */
55 DEFINE_SPINLOCK(ubifs_infos_lock);
56 
57 /* Global clean znode counter (for all mounted UBIFS instances) */
59 
70 static int shrink_tnc(struct ubifs_info *c, int nr, int age, int *contention)
71 {
72  int total_freed = 0;
73  struct ubifs_znode *znode, *zprev;
74  int time = get_seconds();
75 
76  ubifs_assert(mutex_is_locked(&c->umount_mutex));
77  ubifs_assert(mutex_is_locked(&c->tnc_mutex));
78 
79  if (!c->zroot.znode || atomic_long_read(&c->clean_zn_cnt) == 0)
80  return 0;
81 
82  /*
83  * Traverse the TNC tree in levelorder manner, so that it is possible
84  * to destroy large sub-trees. Indeed, if a znode is old, then all its
85  * children are older or of the same age.
86  *
87  * Note, we are holding 'c->tnc_mutex', so we do not have to lock the
88  * 'c->space_lock' when _reading_ 'c->clean_zn_cnt', because it is
89  * changed only when the 'c->tnc_mutex' is held.
90  */
91  zprev = NULL;
92  znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL);
93  while (znode && total_freed < nr &&
94  atomic_long_read(&c->clean_zn_cnt) > 0) {
95  int freed;
96 
97  /*
98  * If the znode is clean, but it is in the 'c->cnext' list, this
99  * means that this znode has just been written to flash as a
100  * part of commit and was marked clean. They will be removed
101  * from the list at end commit. We cannot change the list,
102  * because it is not protected by any mutex (design decision to
103  * make commit really independent and parallel to main I/O). So
104  * we just skip these znodes.
105  *
106  * Note, the 'clean_zn_cnt' counters are not updated until
107  * after the commit, so the UBIFS shrinker does not report
108  * the znodes which are in the 'c->cnext' list as freeable.
109  *
110  * Also note, if the root of a sub-tree is not in 'c->cnext',
111  * then the whole sub-tree is not in 'c->cnext' as well, so it
112  * is safe to dump whole sub-tree.
113  */
114 
115  if (znode->cnext) {
116  /*
117  * Very soon these znodes will be removed from the list
118  * and become freeable.
119  */
120  *contention = 1;
121  } else if (!ubifs_zn_dirty(znode) &&
122  abs(time - znode->time) >= age) {
123  if (znode->parent)
124  znode->parent->zbranch[znode->iip].znode = NULL;
125  else
126  c->zroot.znode = NULL;
127 
128  freed = ubifs_destroy_tnc_subtree(znode);
129  atomic_long_sub(freed, &ubifs_clean_zn_cnt);
130  atomic_long_sub(freed, &c->clean_zn_cnt);
131  ubifs_assert(atomic_long_read(&c->clean_zn_cnt) >= 0);
132  total_freed += freed;
133  znode = zprev;
134  }
135 
136  if (unlikely(!c->zroot.znode))
137  break;
138 
139  zprev = znode;
140  znode = ubifs_tnc_levelorder_next(c->zroot.znode, znode);
141  cond_resched();
142  }
143 
144  return total_freed;
145 }
146 
157 static int shrink_tnc_trees(int nr, int age, int *contention)
158 {
159  struct ubifs_info *c;
160  struct list_head *p;
161  unsigned int run_no;
162  int freed = 0;
163 
164  spin_lock(&ubifs_infos_lock);
165  do {
166  run_no = ++shrinker_run_no;
167  } while (run_no == 0);
168  /* Iterate over all mounted UBIFS file-systems and try to shrink them */
169  p = ubifs_infos.next;
170  while (p != &ubifs_infos) {
171  c = list_entry(p, struct ubifs_info, infos_list);
172  /*
173  * We move the ones we do to the end of the list, so we stop
174  * when we see one we have already done.
175  */
176  if (c->shrinker_run_no == run_no)
177  break;
178  if (!mutex_trylock(&c->umount_mutex)) {
179  /* Some un-mount is in progress, try next FS */
180  *contention = 1;
181  p = p->next;
182  continue;
183  }
184  /*
185  * We're holding 'c->umount_mutex', so the file-system won't go
186  * away.
187  */
188  if (!mutex_trylock(&c->tnc_mutex)) {
190  *contention = 1;
191  p = p->next;
192  continue;
193  }
194  spin_unlock(&ubifs_infos_lock);
195  /*
196  * OK, now we have TNC locked, the file-system cannot go away -
197  * it is safe to reap the cache.
198  */
199  c->shrinker_run_no = run_no;
200  freed += shrink_tnc(c, nr, age, contention);
201  mutex_unlock(&c->tnc_mutex);
202  spin_lock(&ubifs_infos_lock);
203  /* Get the next list element before we move this one */
204  p = p->next;
205  /*
206  * Move this one to the end of the list to provide some
207  * fairness.
208  */
209  list_move_tail(&c->infos_list, &ubifs_infos);
211  if (freed >= nr)
212  break;
213  }
214  spin_unlock(&ubifs_infos_lock);
215  return freed;
216 }
217 
226 static int kick_a_thread(void)
227 {
228  int i;
229  struct ubifs_info *c;
230 
231  /*
232  * Iterate over all mounted UBIFS file-systems and find out if there is
233  * already an ongoing commit operation there. If no, then iterate for
234  * the second time and initiate background commit.
235  */
236  spin_lock(&ubifs_infos_lock);
237  for (i = 0; i < 2; i++) {
238  list_for_each_entry(c, &ubifs_infos, infos_list) {
239  long dirty_zn_cnt;
240 
241  if (!mutex_trylock(&c->umount_mutex)) {
242  /*
243  * Some un-mount is in progress, it will
244  * certainly free memory, so just return.
245  */
246  spin_unlock(&ubifs_infos_lock);
247  return -1;
248  }
249 
250  dirty_zn_cnt = atomic_long_read(&c->dirty_zn_cnt);
251 
252  if (!dirty_zn_cnt || c->cmt_state == COMMIT_BROKEN ||
253  c->ro_mount || c->ro_error) {
255  continue;
256  }
257 
258  if (c->cmt_state != COMMIT_RESTING) {
259  spin_unlock(&ubifs_infos_lock);
261  return -1;
262  }
263 
264  if (i == 1) {
265  list_move_tail(&c->infos_list, &ubifs_infos);
266  spin_unlock(&ubifs_infos_lock);
267 
270  return -1;
271  }
273  }
274  }
275  spin_unlock(&ubifs_infos_lock);
276 
277  return 0;
278 }
279 
281 {
282  int nr = sc->nr_to_scan;
283  int freed, contention = 0;
284  long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
285 
286  if (nr == 0)
287  /*
288  * Due to the way UBIFS updates the clean znode counter it may
289  * temporarily be negative.
290  */
291  return clean_zn_cnt >= 0 ? clean_zn_cnt : 1;
292 
293  if (!clean_zn_cnt) {
294  /*
295  * No clean znodes, nothing to reap. All we can do in this case
296  * is to kick background threads to start commit, which will
297  * probably make clean znodes which, in turn, will be freeable.
298  * And we return -1 which means will make VM call us again
299  * later.
300  */
301  dbg_tnc("no clean znodes, kick a thread");
302  return kick_a_thread();
303  }
304 
305  freed = shrink_tnc_trees(nr, OLD_ZNODE_AGE, &contention);
306  if (freed >= nr)
307  goto out;
308 
309  dbg_tnc("not enough old znodes, try to free young ones");
310  freed += shrink_tnc_trees(nr - freed, YOUNG_ZNODE_AGE, &contention);
311  if (freed >= nr)
312  goto out;
313 
314  dbg_tnc("not enough young znodes, free all");
315  freed += shrink_tnc_trees(nr - freed, 0, &contention);
316 
317  if (!freed && contention) {
318  dbg_tnc("freed nothing, but contention");
319  return -1;
320  }
321 
322 out:
323  dbg_tnc("%d znodes were freed, requested %d", freed, nr);
324  return freed;
325 }