Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
xfs_extent_busy.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3  * Copyright (c) 2010 David Chinner.
4  * Copyright (c) 2011 Christoph Hellwig.
5  * All Rights Reserved.
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License as
9  * published by the Free Software Foundation.
10  *
11  * This program is distributed in the hope that it would be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write the Free Software Foundation,
18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 #include "xfs.h"
21 #include "xfs_fs.h"
22 #include "xfs_types.h"
23 #include "xfs_log.h"
24 #include "xfs_trans.h"
25 #include "xfs_sb.h"
26 #include "xfs_ag.h"
27 #include "xfs_mount.h"
28 #include "xfs_bmap_btree.h"
29 #include "xfs_alloc.h"
30 #include "xfs_inode.h"
31 #include "xfs_extent_busy.h"
32 #include "xfs_trace.h"
33 
34 void
36  struct xfs_trans *tp,
37  xfs_agnumber_t agno,
38  xfs_agblock_t bno,
39  xfs_extlen_t len,
40  unsigned int flags)
41 {
42  struct xfs_extent_busy *new;
43  struct xfs_extent_busy *busyp;
44  struct xfs_perag *pag;
45  struct rb_node **rbp;
46  struct rb_node *parent = NULL;
47 
48  new = kmem_zalloc(sizeof(struct xfs_extent_busy), KM_MAYFAIL);
49  if (!new) {
50  /*
51  * No Memory! Since it is now not possible to track the free
52  * block, make this a synchronous transaction to insure that
53  * the block is not reused before this transaction commits.
54  */
55  trace_xfs_extent_busy_enomem(tp->t_mountp, agno, bno, len);
56  xfs_trans_set_sync(tp);
57  return;
58  }
59 
60  new->agno = agno;
61  new->bno = bno;
62  new->length = len;
63  INIT_LIST_HEAD(&new->list);
64  new->flags = flags;
65 
66  /* trace before insert to be able to see failed inserts */
67  trace_xfs_extent_busy(tp->t_mountp, agno, bno, len);
68 
69  pag = xfs_perag_get(tp->t_mountp, new->agno);
70  spin_lock(&pag->pagb_lock);
71  rbp = &pag->pagb_tree.rb_node;
72  while (*rbp) {
73  parent = *rbp;
74  busyp = rb_entry(parent, struct xfs_extent_busy, rb_node);
75 
76  if (new->bno < busyp->bno) {
77  rbp = &(*rbp)->rb_left;
78  ASSERT(new->bno + new->length <= busyp->bno);
79  } else if (new->bno > busyp->bno) {
80  rbp = &(*rbp)->rb_right;
81  ASSERT(bno >= busyp->bno + busyp->length);
82  } else {
83  ASSERT(0);
84  }
85  }
86 
87  rb_link_node(&new->rb_node, parent, rbp);
88  rb_insert_color(&new->rb_node, &pag->pagb_tree);
89 
90  list_add(&new->list, &tp->t_busy);
91  spin_unlock(&pag->pagb_lock);
92  xfs_perag_put(pag);
93 }
94 
95 /*
96  * Search for a busy extent within the range of the extent we are about to
97  * allocate. You need to be holding the busy extent tree lock when calling
98  * xfs_extent_busy_search(). This function returns 0 for no overlapping busy
99  * extent, -1 for an overlapping but not exact busy extent, and 1 for an exact
100  * match. This is done so that a non-zero return indicates an overlap that
101  * will require a synchronous transaction, but it can still be
102  * used to distinguish between a partial or exact match.
103  */
104 int
106  struct xfs_mount *mp,
107  xfs_agnumber_t agno,
108  xfs_agblock_t bno,
109  xfs_extlen_t len)
110 {
111  struct xfs_perag *pag;
112  struct rb_node *rbp;
113  struct xfs_extent_busy *busyp;
114  int match = 0;
115 
116  pag = xfs_perag_get(mp, agno);
117  spin_lock(&pag->pagb_lock);
118 
119  rbp = pag->pagb_tree.rb_node;
120 
121  /* find closest start bno overlap */
122  while (rbp) {
123  busyp = rb_entry(rbp, struct xfs_extent_busy, rb_node);
124  if (bno < busyp->bno) {
125  /* may overlap, but exact start block is lower */
126  if (bno + len > busyp->bno)
127  match = -1;
128  rbp = rbp->rb_left;
129  } else if (bno > busyp->bno) {
130  /* may overlap, but exact start block is higher */
131  if (bno < busyp->bno + busyp->length)
132  match = -1;
133  rbp = rbp->rb_right;
134  } else {
135  /* bno matches busyp, length determines exact match */
136  match = (busyp->length == len) ? 1 : -1;
137  break;
138  }
139  }
140  spin_unlock(&pag->pagb_lock);
141  xfs_perag_put(pag);
142  return match;
143 }
144 
145 /*
146  * The found free extent [fbno, fend] overlaps part or all of the given busy
147  * extent. If the overlap covers the beginning, the end, or all of the busy
148  * extent, the overlapping portion can be made unbusy and used for the
149  * allocation. We can't split a busy extent because we can't modify a
150  * transaction/CIL context busy list, but we can update an entries block
151  * number or length.
152  *
153  * Returns true if the extent can safely be reused, or false if the search
154  * needs to be restarted.
155  */
156 STATIC bool
158  struct xfs_mount *mp,
159  struct xfs_perag *pag,
160  struct xfs_extent_busy *busyp,
161  xfs_agblock_t fbno,
162  xfs_extlen_t flen,
163  bool userdata)
164 {
165  xfs_agblock_t fend = fbno + flen;
166  xfs_agblock_t bbno = busyp->bno;
167  xfs_agblock_t bend = bbno + busyp->length;
168 
169  /*
170  * This extent is currently being discarded. Give the thread
171  * performing the discard a chance to mark the extent unbusy
172  * and retry.
173  */
174  if (busyp->flags & XFS_EXTENT_BUSY_DISCARDED) {
175  spin_unlock(&pag->pagb_lock);
176  delay(1);
177  spin_lock(&pag->pagb_lock);
178  return false;
179  }
180 
181  /*
182  * If there is a busy extent overlapping a user allocation, we have
183  * no choice but to force the log and retry the search.
184  *
185  * Fortunately this does not happen during normal operation, but
186  * only if the filesystem is very low on space and has to dip into
187  * the AGFL for normal allocations.
188  */
189  if (userdata)
190  goto out_force_log;
191 
192  if (bbno < fbno && bend > fend) {
193  /*
194  * Case 1:
195  * bbno bend
196  * +BBBBBBBBBBBBBBBBB+
197  * +---------+
198  * fbno fend
199  */
200 
201  /*
202  * We would have to split the busy extent to be able to track
203  * it correct, which we cannot do because we would have to
204  * modify the list of busy extents attached to the transaction
205  * or CIL context, which is immutable.
206  *
207  * Force out the log to clear the busy extent and retry the
208  * search.
209  */
210  goto out_force_log;
211  } else if (bbno >= fbno && bend <= fend) {
212  /*
213  * Case 2:
214  * bbno bend
215  * +BBBBBBBBBBBBBBBBB+
216  * +-----------------+
217  * fbno fend
218  *
219  * Case 3:
220  * bbno bend
221  * +BBBBBBBBBBBBBBBBB+
222  * +--------------------------+
223  * fbno fend
224  *
225  * Case 4:
226  * bbno bend
227  * +BBBBBBBBBBBBBBBBB+
228  * +--------------------------+
229  * fbno fend
230  *
231  * Case 5:
232  * bbno bend
233  * +BBBBBBBBBBBBBBBBB+
234  * +-----------------------------------+
235  * fbno fend
236  *
237  */
238 
239  /*
240  * The busy extent is fully covered by the extent we are
241  * allocating, and can simply be removed from the rbtree.
242  * However we cannot remove it from the immutable list
243  * tracking busy extents in the transaction or CIL context,
244  * so set the length to zero to mark it invalid.
245  *
246  * We also need to restart the busy extent search from the
247  * tree root, because erasing the node can rearrange the
248  * tree topology.
249  */
250  rb_erase(&busyp->rb_node, &pag->pagb_tree);
251  busyp->length = 0;
252  return false;
253  } else if (fend < bend) {
254  /*
255  * Case 6:
256  * bbno bend
257  * +BBBBBBBBBBBBBBBBB+
258  * +---------+
259  * fbno fend
260  *
261  * Case 7:
262  * bbno bend
263  * +BBBBBBBBBBBBBBBBB+
264  * +------------------+
265  * fbno fend
266  *
267  */
268  busyp->bno = fend;
269  } else if (bbno < fbno) {
270  /*
271  * Case 8:
272  * bbno bend
273  * +BBBBBBBBBBBBBBBBB+
274  * +-------------+
275  * fbno fend
276  *
277  * Case 9:
278  * bbno bend
279  * +BBBBBBBBBBBBBBBBB+
280  * +----------------------+
281  * fbno fend
282  */
283  busyp->length = fbno - busyp->bno;
284  } else {
285  ASSERT(0);
286  }
287 
288  trace_xfs_extent_busy_reuse(mp, pag->pag_agno, fbno, flen);
289  return true;
290 
291 out_force_log:
292  spin_unlock(&pag->pagb_lock);
293  xfs_log_force(mp, XFS_LOG_SYNC);
294  trace_xfs_extent_busy_force(mp, pag->pag_agno, fbno, flen);
295  spin_lock(&pag->pagb_lock);
296  return false;
297 }
298 
299 
300 /*
301  * For a given extent [fbno, flen], make sure we can reuse it safely.
302  */
303 void
305  struct xfs_mount *mp,
307  xfs_agblock_t fbno,
308  xfs_extlen_t flen,
309  bool userdata)
310 {
311  struct xfs_perag *pag;
312  struct rb_node *rbp;
313 
314  ASSERT(flen > 0);
315 
316  pag = xfs_perag_get(mp, agno);
317  spin_lock(&pag->pagb_lock);
318 restart:
319  rbp = pag->pagb_tree.rb_node;
320  while (rbp) {
321  struct xfs_extent_busy *busyp =
322  rb_entry(rbp, struct xfs_extent_busy, rb_node);
323  xfs_agblock_t bbno = busyp->bno;
324  xfs_agblock_t bend = bbno + busyp->length;
325 
326  if (fbno + flen <= bbno) {
327  rbp = rbp->rb_left;
328  continue;
329  } else if (fbno >= bend) {
330  rbp = rbp->rb_right;
331  continue;
332  }
333 
334  if (!xfs_extent_busy_update_extent(mp, pag, busyp, fbno, flen,
335  userdata))
336  goto restart;
337  }
338  spin_unlock(&pag->pagb_lock);
339  xfs_perag_put(pag);
340 }
341 
342 /*
343  * For a given extent [fbno, flen], search the busy extent list to find a
344  * subset of the extent that is not busy. If *rlen is smaller than
345  * args->minlen no suitable extent could be found, and the higher level
346  * code needs to force out the log and retry the allocation.
347  */
348 void
350  struct xfs_alloc_arg *args,
352  xfs_extlen_t len,
353  xfs_agblock_t *rbno,
354  xfs_extlen_t *rlen)
355 {
356  xfs_agblock_t fbno;
357  xfs_extlen_t flen;
358  struct rb_node *rbp;
359 
360  ASSERT(len > 0);
361 
362  spin_lock(&args->pag->pagb_lock);
363 restart:
364  fbno = bno;
365  flen = len;
366  rbp = args->pag->pagb_tree.rb_node;
367  while (rbp && flen >= args->minlen) {
368  struct xfs_extent_busy *busyp =
369  rb_entry(rbp, struct xfs_extent_busy, rb_node);
370  xfs_agblock_t fend = fbno + flen;
371  xfs_agblock_t bbno = busyp->bno;
372  xfs_agblock_t bend = bbno + busyp->length;
373 
374  if (fend <= bbno) {
375  rbp = rbp->rb_left;
376  continue;
377  } else if (fbno >= bend) {
378  rbp = rbp->rb_right;
379  continue;
380  }
381 
382  /*
383  * If this is a metadata allocation, try to reuse the busy
384  * extent instead of trimming the allocation.
385  */
386  if (!args->userdata &&
387  !(busyp->flags & XFS_EXTENT_BUSY_DISCARDED)) {
388  if (!xfs_extent_busy_update_extent(args->mp, args->pag,
389  busyp, fbno, flen,
390  false))
391  goto restart;
392  continue;
393  }
394 
395  if (bbno <= fbno) {
396  /* start overlap */
397 
398  /*
399  * Case 1:
400  * bbno bend
401  * +BBBBBBBBBBBBBBBBB+
402  * +---------+
403  * fbno fend
404  *
405  * Case 2:
406  * bbno bend
407  * +BBBBBBBBBBBBBBBBB+
408  * +-------------+
409  * fbno fend
410  *
411  * Case 3:
412  * bbno bend
413  * +BBBBBBBBBBBBBBBBB+
414  * +-------------+
415  * fbno fend
416  *
417  * Case 4:
418  * bbno bend
419  * +BBBBBBBBBBBBBBBBB+
420  * +-----------------+
421  * fbno fend
422  *
423  * No unbusy region in extent, return failure.
424  */
425  if (fend <= bend)
426  goto fail;
427 
428  /*
429  * Case 5:
430  * bbno bend
431  * +BBBBBBBBBBBBBBBBB+
432  * +----------------------+
433  * fbno fend
434  *
435  * Case 6:
436  * bbno bend
437  * +BBBBBBBBBBBBBBBBB+
438  * +--------------------------+
439  * fbno fend
440  *
441  * Needs to be trimmed to:
442  * +-------+
443  * fbno fend
444  */
445  fbno = bend;
446  } else if (bend >= fend) {
447  /* end overlap */
448 
449  /*
450  * Case 7:
451  * bbno bend
452  * +BBBBBBBBBBBBBBBBB+
453  * +------------------+
454  * fbno fend
455  *
456  * Case 8:
457  * bbno bend
458  * +BBBBBBBBBBBBBBBBB+
459  * +--------------------------+
460  * fbno fend
461  *
462  * Needs to be trimmed to:
463  * +-------+
464  * fbno fend
465  */
466  fend = bbno;
467  } else {
468  /* middle overlap */
469 
470  /*
471  * Case 9:
472  * bbno bend
473  * +BBBBBBBBBBBBBBBBB+
474  * +-----------------------------------+
475  * fbno fend
476  *
477  * Can be trimmed to:
478  * +-------+ OR +-------+
479  * fbno fend fbno fend
480  *
481  * Backward allocation leads to significant
482  * fragmentation of directories, which degrades
483  * directory performance, therefore we always want to
484  * choose the option that produces forward allocation
485  * patterns.
486  * Preferring the lower bno extent will make the next
487  * request use "fend" as the start of the next
488  * allocation; if the segment is no longer busy at
489  * that point, we'll get a contiguous allocation, but
490  * even if it is still busy, we will get a forward
491  * allocation.
492  * We try to avoid choosing the segment at "bend",
493  * because that can lead to the next allocation
494  * taking the segment at "fbno", which would be a
495  * backward allocation. We only use the segment at
496  * "fbno" if it is much larger than the current
497  * requested size, because in that case there's a
498  * good chance subsequent allocations will be
499  * contiguous.
500  */
501  if (bbno - fbno >= args->maxlen) {
502  /* left candidate fits perfect */
503  fend = bbno;
504  } else if (fend - bend >= args->maxlen * 4) {
505  /* right candidate has enough free space */
506  fbno = bend;
507  } else if (bbno - fbno >= args->minlen) {
508  /* left candidate fits minimum requirement */
509  fend = bbno;
510  } else {
511  goto fail;
512  }
513  }
514 
515  flen = fend - fbno;
516  }
517  spin_unlock(&args->pag->pagb_lock);
518 
519  if (fbno != bno || flen != len) {
520  trace_xfs_extent_busy_trim(args->mp, args->agno, bno, len,
521  fbno, flen);
522  }
523  *rbno = fbno;
524  *rlen = flen;
525  return;
526 fail:
527  /*
528  * Return a zero extent length as failure indications. All callers
529  * re-check if the trimmed extent satisfies the minlen requirement.
530  */
531  spin_unlock(&args->pag->pagb_lock);
532  trace_xfs_extent_busy_trim(args->mp, args->agno, bno, len, fbno, 0);
533  *rbno = fbno;
534  *rlen = 0;
535 }
536 
537 STATIC void
539  struct xfs_mount *mp,
540  struct xfs_perag *pag,
541  struct xfs_extent_busy *busyp)
542 {
543  if (busyp->length) {
544  trace_xfs_extent_busy_clear(mp, busyp->agno, busyp->bno,
545  busyp->length);
546  rb_erase(&busyp->rb_node, &pag->pagb_tree);
547  }
548 
549  list_del_init(&busyp->list);
550  kmem_free(busyp);
551 }
552 
553 /*
554  * Remove all extents on the passed in list from the busy extents tree.
555  * If do_discard is set skip extents that need to be discarded, and mark
556  * these as undergoing a discard operation instead.
557  */
558 void
560  struct xfs_mount *mp,
561  struct list_head *list,
562  bool do_discard)
563 {
564  struct xfs_extent_busy *busyp, *n;
565  struct xfs_perag *pag = NULL;
567 
568  list_for_each_entry_safe(busyp, n, list, list) {
569  if (busyp->agno != agno) {
570  if (pag) {
571  spin_unlock(&pag->pagb_lock);
572  xfs_perag_put(pag);
573  }
574  pag = xfs_perag_get(mp, busyp->agno);
575  spin_lock(&pag->pagb_lock);
576  agno = busyp->agno;
577  }
578 
579  if (do_discard && busyp->length &&
582  else
583  xfs_extent_busy_clear_one(mp, pag, busyp);
584  }
585 
586  if (pag) {
587  spin_unlock(&pag->pagb_lock);
588  xfs_perag_put(pag);
589  }
590 }
591 
592 /*
593  * Callback for list_sort to sort busy extents by the AG they reside in.
594  */
595 int
597  void *priv,
598  struct list_head *a,
599  struct list_head *b)
600 {
601  return container_of(a, struct xfs_extent_busy, list)->agno -
602  container_of(b, struct xfs_extent_busy, list)->agno;
603 }