Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
xfs_alloc.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_types.h"
21 #include "xfs_bit.h"
22 #include "xfs_log.h"
23 #include "xfs_trans.h"
24 #include "xfs_sb.h"
25 #include "xfs_ag.h"
26 #include "xfs_mount.h"
27 #include "xfs_bmap_btree.h"
28 #include "xfs_alloc_btree.h"
29 #include "xfs_ialloc_btree.h"
30 #include "xfs_dinode.h"
31 #include "xfs_inode.h"
32 #include "xfs_btree.h"
33 #include "xfs_alloc.h"
34 #include "xfs_extent_busy.h"
35 #include "xfs_error.h"
36 #include "xfs_trace.h"
37 
39 
40 #define XFS_ABSDIFF(a,b) (((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
41 
42 #define XFSA_FIXUP_BNO_OK 1
43 #define XFSA_FIXUP_CNT_OK 2
44 
50 
51 /*
52  * Lookup the record equal to [bno, len] in the btree given by cur.
53  */
54 STATIC int /* error */
56  struct xfs_btree_cur *cur, /* btree cursor */
57  xfs_agblock_t bno, /* starting block of extent */
58  xfs_extlen_t len, /* length of extent */
59  int *stat) /* success/failure */
60 {
61  cur->bc_rec.a.ar_startblock = bno;
62  cur->bc_rec.a.ar_blockcount = len;
63  return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
64 }
65 
66 /*
67  * Lookup the first record greater than or equal to [bno, len]
68  * in the btree given by cur.
69  */
70 int /* error */
72  struct xfs_btree_cur *cur, /* btree cursor */
73  xfs_agblock_t bno, /* starting block of extent */
74  xfs_extlen_t len, /* length of extent */
75  int *stat) /* success/failure */
76 {
77  cur->bc_rec.a.ar_startblock = bno;
78  cur->bc_rec.a.ar_blockcount = len;
79  return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
80 }
81 
82 /*
83  * Lookup the first record less than or equal to [bno, len]
84  * in the btree given by cur.
85  */
86 int /* error */
88  struct xfs_btree_cur *cur, /* btree cursor */
89  xfs_agblock_t bno, /* starting block of extent */
90  xfs_extlen_t len, /* length of extent */
91  int *stat) /* success/failure */
92 {
93  cur->bc_rec.a.ar_startblock = bno;
94  cur->bc_rec.a.ar_blockcount = len;
95  return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
96 }
97 
98 /*
99  * Update the record referred to by cur to the value given
100  * by [bno, len].
101  * This either works (return 0) or gets an EFSCORRUPTED error.
102  */
103 STATIC int /* error */
105  struct xfs_btree_cur *cur, /* btree cursor */
106  xfs_agblock_t bno, /* starting block of extent */
107  xfs_extlen_t len) /* length of extent */
108 {
109  union xfs_btree_rec rec;
110 
111  rec.alloc.ar_startblock = cpu_to_be32(bno);
112  rec.alloc.ar_blockcount = cpu_to_be32(len);
113  return xfs_btree_update(cur, &rec);
114 }
115 
116 /*
117  * Get the data from the pointed-to record.
118  */
119 int /* error */
121  struct xfs_btree_cur *cur, /* btree cursor */
122  xfs_agblock_t *bno, /* output: starting block of extent */
123  xfs_extlen_t *len, /* output: length of extent */
124  int *stat) /* output: success/failure */
125 {
126  union xfs_btree_rec *rec;
127  int error;
128 
129  error = xfs_btree_get_rec(cur, &rec, stat);
130  if (!error && *stat == 1) {
131  *bno = be32_to_cpu(rec->alloc.ar_startblock);
132  *len = be32_to_cpu(rec->alloc.ar_blockcount);
133  }
134  return error;
135 }
136 
137 /*
138  * Compute aligned version of the found extent.
139  * Takes alignment and min length into account.
140  */
141 STATIC void
143  xfs_alloc_arg_t *args, /* allocation argument structure */
144  xfs_agblock_t foundbno, /* starting block in found extent */
145  xfs_extlen_t foundlen, /* length in found extent */
146  xfs_agblock_t *resbno, /* result block number */
147  xfs_extlen_t *reslen) /* result length */
148 {
149  xfs_agblock_t bno;
151 
152  /* Trim busy sections out of found extent */
153  xfs_extent_busy_trim(args, foundbno, foundlen, &bno, &len);
154 
155  if (args->alignment > 1 && len >= args->minlen) {
156  xfs_agblock_t aligned_bno = roundup(bno, args->alignment);
157  xfs_extlen_t diff = aligned_bno - bno;
158 
159  *resbno = aligned_bno;
160  *reslen = diff >= len ? 0 : len - diff;
161  } else {
162  *resbno = bno;
163  *reslen = len;
164  }
165 }
166 
167 /*
168  * Compute best start block and diff for "near" allocations.
169  * freelen >= wantlen already checked by caller.
170  */
171 STATIC xfs_extlen_t /* difference value (absolute) */
173  xfs_agblock_t wantbno, /* target starting block */
174  xfs_extlen_t wantlen, /* target length */
175  xfs_extlen_t alignment, /* target alignment */
176  xfs_agblock_t freebno, /* freespace's starting block */
177  xfs_extlen_t freelen, /* freespace's length */
178  xfs_agblock_t *newbnop) /* result: best start block from free */
179 {
180  xfs_agblock_t freeend; /* end of freespace extent */
181  xfs_agblock_t newbno1; /* return block number */
182  xfs_agblock_t newbno2; /* other new block number */
183  xfs_extlen_t newlen1=0; /* length with newbno1 */
184  xfs_extlen_t newlen2=0; /* length with newbno2 */
185  xfs_agblock_t wantend; /* end of target extent */
186 
187  ASSERT(freelen >= wantlen);
188  freeend = freebno + freelen;
189  wantend = wantbno + wantlen;
190  if (freebno >= wantbno) {
191  if ((newbno1 = roundup(freebno, alignment)) >= freeend)
192  newbno1 = NULLAGBLOCK;
193  } else if (freeend >= wantend && alignment > 1) {
194  newbno1 = roundup(wantbno, alignment);
195  newbno2 = newbno1 - alignment;
196  if (newbno1 >= freeend)
197  newbno1 = NULLAGBLOCK;
198  else
199  newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1);
200  if (newbno2 < freebno)
201  newbno2 = NULLAGBLOCK;
202  else
203  newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2);
204  if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) {
205  if (newlen1 < newlen2 ||
206  (newlen1 == newlen2 &&
207  XFS_ABSDIFF(newbno1, wantbno) >
208  XFS_ABSDIFF(newbno2, wantbno)))
209  newbno1 = newbno2;
210  } else if (newbno2 != NULLAGBLOCK)
211  newbno1 = newbno2;
212  } else if (freeend >= wantend) {
213  newbno1 = wantbno;
214  } else if (alignment > 1) {
215  newbno1 = roundup(freeend - wantlen, alignment);
216  if (newbno1 > freeend - wantlen &&
217  newbno1 - alignment >= freebno)
218  newbno1 -= alignment;
219  else if (newbno1 >= freeend)
220  newbno1 = NULLAGBLOCK;
221  } else
222  newbno1 = freeend - wantlen;
223  *newbnop = newbno1;
224  return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno);
225 }
226 
227 /*
228  * Fix up the length, based on mod and prod.
229  * len should be k * prod + mod for some k.
230  * If len is too small it is returned unchanged.
231  * If len hits maxlen it is left alone.
232  */
233 STATIC void
235  xfs_alloc_arg_t *args) /* allocation argument structure */
236 {
237  xfs_extlen_t k;
238  xfs_extlen_t rlen;
239 
240  ASSERT(args->mod < args->prod);
241  rlen = args->len;
242  ASSERT(rlen >= args->minlen);
243  ASSERT(rlen <= args->maxlen);
244  if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen ||
245  (args->mod == 0 && rlen < args->prod))
246  return;
247  k = rlen % args->prod;
248  if (k == args->mod)
249  return;
250  if (k > args->mod) {
251  if ((int)(rlen = rlen - k - args->mod) < (int)args->minlen)
252  return;
253  } else {
254  if ((int)(rlen = rlen - args->prod - (args->mod - k)) <
255  (int)args->minlen)
256  return;
257  }
258  ASSERT(rlen >= args->minlen);
259  ASSERT(rlen <= args->maxlen);
260  args->len = rlen;
261 }
262 
263 /*
264  * Fix up length if there is too little space left in the a.g.
265  * Return 1 if ok, 0 if too little, should give up.
266  */
267 STATIC int
269  xfs_alloc_arg_t *args) /* allocation argument structure */
270 {
271  xfs_agf_t *agf; /* a.g. freelist header */
272  int diff; /* free space difference */
273 
274  if (args->minleft == 0)
275  return 1;
276  agf = XFS_BUF_TO_AGF(args->agbp);
277  diff = be32_to_cpu(agf->agf_freeblks)
278  - args->len - args->minleft;
279  if (diff >= 0)
280  return 1;
281  args->len += diff; /* shrink the allocated space */
282  if (args->len >= args->minlen)
283  return 1;
284  args->agbno = NULLAGBLOCK;
285  return 0;
286 }
287 
288 /*
289  * Update the two btrees, logically removing from freespace the extent
290  * starting at rbno, rlen blocks. The extent is contained within the
291  * actual (current) free extent fbno for flen blocks.
292  * Flags are passed in indicating whether the cursors are set to the
293  * relevant records.
294  */
295 STATIC int /* error code */
297  xfs_btree_cur_t *cnt_cur, /* cursor for by-size btree */
298  xfs_btree_cur_t *bno_cur, /* cursor for by-block btree */
299  xfs_agblock_t fbno, /* starting block of free extent */
300  xfs_extlen_t flen, /* length of free extent */
301  xfs_agblock_t rbno, /* starting block of returned extent */
302  xfs_extlen_t rlen, /* length of returned extent */
303  int flags) /* flags, XFSA_FIXUP_... */
304 {
305  int error; /* error code */
306  int i; /* operation results */
307  xfs_agblock_t nfbno1; /* first new free startblock */
308  xfs_agblock_t nfbno2; /* second new free startblock */
309  xfs_extlen_t nflen1=0; /* first new free length */
310  xfs_extlen_t nflen2=0; /* second new free length */
311 
312  /*
313  * Look up the record in the by-size tree if necessary.
314  */
315  if (flags & XFSA_FIXUP_CNT_OK) {
316 #ifdef DEBUG
317  if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i)))
318  return error;
320  i == 1 && nfbno1 == fbno && nflen1 == flen);
321 #endif
322  } else {
323  if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i)))
324  return error;
326  }
327  /*
328  * Look up the record in the by-block tree if necessary.
329  */
330  if (flags & XFSA_FIXUP_BNO_OK) {
331 #ifdef DEBUG
332  if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i)))
333  return error;
335  i == 1 && nfbno1 == fbno && nflen1 == flen);
336 #endif
337  } else {
338  if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i)))
339  return error;
341  }
342 
343 #ifdef DEBUG
344  if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) {
345  struct xfs_btree_block *bnoblock;
346  struct xfs_btree_block *cntblock;
347 
348  bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]);
349  cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]);
350 
352  bnoblock->bb_numrecs == cntblock->bb_numrecs);
353  }
354 #endif
355 
356  /*
357  * Deal with all four cases: the allocated record is contained
358  * within the freespace record, so we can have new freespace
359  * at either (or both) end, or no freespace remaining.
360  */
361  if (rbno == fbno && rlen == flen)
362  nfbno1 = nfbno2 = NULLAGBLOCK;
363  else if (rbno == fbno) {
364  nfbno1 = rbno + rlen;
365  nflen1 = flen - rlen;
366  nfbno2 = NULLAGBLOCK;
367  } else if (rbno + rlen == fbno + flen) {
368  nfbno1 = fbno;
369  nflen1 = flen - rlen;
370  nfbno2 = NULLAGBLOCK;
371  } else {
372  nfbno1 = fbno;
373  nflen1 = rbno - fbno;
374  nfbno2 = rbno + rlen;
375  nflen2 = (fbno + flen) - nfbno2;
376  }
377  /*
378  * Delete the entry from the by-size btree.
379  */
380  if ((error = xfs_btree_delete(cnt_cur, &i)))
381  return error;
383  /*
384  * Add new by-size btree entry(s).
385  */
386  if (nfbno1 != NULLAGBLOCK) {
387  if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
388  return error;
390  if ((error = xfs_btree_insert(cnt_cur, &i)))
391  return error;
393  }
394  if (nfbno2 != NULLAGBLOCK) {
395  if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
396  return error;
398  if ((error = xfs_btree_insert(cnt_cur, &i)))
399  return error;
401  }
402  /*
403  * Fix up the by-block btree entry(s).
404  */
405  if (nfbno1 == NULLAGBLOCK) {
406  /*
407  * No remaining freespace, just delete the by-block tree entry.
408  */
409  if ((error = xfs_btree_delete(bno_cur, &i)))
410  return error;
412  } else {
413  /*
414  * Update the by-block entry to start later|be shorter.
415  */
416  if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1)))
417  return error;
418  }
419  if (nfbno2 != NULLAGBLOCK) {
420  /*
421  * 2 resulting free entries, need to add one.
422  */
423  if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
424  return error;
426  if ((error = xfs_btree_insert(bno_cur, &i)))
427  return error;
429  }
430  return 0;
431 }
432 
433 /*
434  * Read in the allocation group free block array.
435  */
436 STATIC int /* error */
438  xfs_mount_t *mp, /* mount point structure */
439  xfs_trans_t *tp, /* transaction pointer */
440  xfs_agnumber_t agno, /* allocation group number */
441  xfs_buf_t **bpp) /* buffer for the ag free block array */
442 {
443  xfs_buf_t *bp; /* return value */
444  int error;
445 
446  ASSERT(agno != NULLAGNUMBER);
447  error = xfs_trans_read_buf(
448  mp, tp, mp->m_ddev_targp,
449  XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
450  XFS_FSS_TO_BB(mp, 1), 0, &bp);
451  if (error)
452  return error;
453  ASSERT(!xfs_buf_geterror(bp));
454  xfs_buf_set_ref(bp, XFS_AGFL_REF);
455  *bpp = bp;
456  return 0;
457 }
458 
459 STATIC int
461  struct xfs_trans *tp,
462  struct xfs_perag *pag,
463  struct xfs_buf *agbp,
464  long len)
465 {
466  struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
467 
468  pag->pagf_freeblks += len;
469  be32_add_cpu(&agf->agf_freeblks, len);
470 
471  xfs_trans_agblocks_delta(tp, len);
472  if (unlikely(be32_to_cpu(agf->agf_freeblks) >
473  be32_to_cpu(agf->agf_length)))
474  return EFSCORRUPTED;
475 
477  return 0;
478 }
479 
480 /*
481  * Allocation group level functions.
482  */
483 
484 /*
485  * Allocate a variable extent in the allocation group agno.
486  * Type and bno are used to determine where in the allocation group the
487  * extent will start.
488  * Extent's length (returned in *len) will be between minlen and maxlen,
489  * and of the form k * prod + mod unless there's nothing that large.
490  * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
491  */
492 STATIC int /* error */
494  xfs_alloc_arg_t *args) /* argument structure for allocation */
495 {
496  int error=0;
497 
498  ASSERT(args->minlen > 0);
499  ASSERT(args->maxlen > 0);
500  ASSERT(args->minlen <= args->maxlen);
501  ASSERT(args->mod < args->prod);
502  ASSERT(args->alignment > 0);
503  /*
504  * Branch to correct routine based on the type.
505  */
506  args->wasfromfl = 0;
507  switch (args->type) {
509  error = xfs_alloc_ag_vextent_size(args);
510  break;
512  error = xfs_alloc_ag_vextent_near(args);
513  break;
515  error = xfs_alloc_ag_vextent_exact(args);
516  break;
517  default:
518  ASSERT(0);
519  /* NOTREACHED */
520  }
521 
522  if (error || args->agbno == NULLAGBLOCK)
523  return error;
524 
525  ASSERT(args->len >= args->minlen);
526  ASSERT(args->len <= args->maxlen);
527  ASSERT(!args->wasfromfl || !args->isfl);
528  ASSERT(args->agbno % args->alignment == 0);
529 
530  if (!args->wasfromfl) {
531  error = xfs_alloc_update_counters(args->tp, args->pag,
532  args->agbp,
533  -((long)(args->len)));
534  if (error)
535  return error;
536 
537  ASSERT(!xfs_extent_busy_search(args->mp, args->agno,
538  args->agbno, args->len));
539  }
540 
541  if (!args->isfl) {
542  xfs_trans_mod_sb(args->tp, args->wasdel ?
545  -((long)(args->len)));
546  }
547 
548  XFS_STATS_INC(xs_allocx);
549  XFS_STATS_ADD(xs_allocb, args->len);
550  return error;
551 }
552 
553 /*
554  * Allocate a variable extent at exactly agno/bno.
555  * Extent's length (returned in *len) will be between minlen and maxlen,
556  * and of the form k * prod + mod unless there's nothing that large.
557  * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it.
558  */
559 STATIC int /* error */
561  xfs_alloc_arg_t *args) /* allocation argument structure */
562 {
563  xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */
564  xfs_btree_cur_t *cnt_cur;/* by count btree cursor */
565  int error;
566  xfs_agblock_t fbno; /* start block of found extent */
567  xfs_extlen_t flen; /* length of found extent */
568  xfs_agblock_t tbno; /* start block of trimmed extent */
569  xfs_extlen_t tlen; /* length of trimmed extent */
570  xfs_agblock_t tend; /* end block of trimmed extent */
571  int i; /* success/failure of operation */
572 
573  ASSERT(args->alignment == 1);
574 
575  /*
576  * Allocate/initialize a cursor for the by-number freespace btree.
577  */
578  bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
579  args->agno, XFS_BTNUM_BNO);
580 
581  /*
582  * Lookup bno and minlen in the btree (minlen is irrelevant, really).
583  * Look for the closest free block <= bno, it must contain bno
584  * if any free block does.
585  */
586  error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i);
587  if (error)
588  goto error0;
589  if (!i)
590  goto not_found;
591 
592  /*
593  * Grab the freespace record.
594  */
595  error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i);
596  if (error)
597  goto error0;
598  XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
599  ASSERT(fbno <= args->agbno);
600 
601  /*
602  * Check for overlapping busy extents.
603  */
604  xfs_extent_busy_trim(args, fbno, flen, &tbno, &tlen);
605 
606  /*
607  * Give up if the start of the extent is busy, or the freespace isn't
608  * long enough for the minimum request.
609  */
610  if (tbno > args->agbno)
611  goto not_found;
612  if (tlen < args->minlen)
613  goto not_found;
614  tend = tbno + tlen;
615  if (tend < args->agbno + args->minlen)
616  goto not_found;
617 
618  /*
619  * End of extent will be smaller of the freespace end and the
620  * maximal requested end.
621  *
622  * Fix the length according to mod and prod if given.
623  */
624  args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen)
625  - args->agbno;
626  xfs_alloc_fix_len(args);
627  if (!xfs_alloc_fix_minleft(args))
628  goto not_found;
629 
630  ASSERT(args->agbno + args->len <= tend);
631 
632  /*
633  * We are allocating agbno for args->len
634  * Allocate/initialize a cursor for the by-size btree.
635  */
636  cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
637  args->agno, XFS_BTNUM_CNT);
638  ASSERT(args->agbno + args->len <=
639  be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
640  error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno,
641  args->len, XFSA_FIXUP_BNO_OK);
642  if (error) {
644  goto error0;
645  }
646 
649 
650  args->wasfromfl = 0;
651  trace_xfs_alloc_exact_done(args);
652  return 0;
653 
654 not_found:
655  /* Didn't find it, return null. */
657  args->agbno = NULLAGBLOCK;
658  trace_xfs_alloc_exact_notfound(args);
659  return 0;
660 
661 error0:
663  trace_xfs_alloc_exact_error(args);
664  return error;
665 }
666 
667 /*
668  * Search the btree in a given direction via the search cursor and compare
669  * the records found against the good extent we've already found.
670  */
671 STATIC int
673  struct xfs_alloc_arg *args, /* allocation argument structure */
674  struct xfs_btree_cur **gcur, /* good cursor */
675  struct xfs_btree_cur **scur, /* searching cursor */
676  xfs_agblock_t gdiff, /* difference for search comparison */
677  xfs_agblock_t *sbno, /* extent found by search */
678  xfs_extlen_t *slen, /* extent length */
679  xfs_agblock_t *sbnoa, /* aligned extent found by search */
680  xfs_extlen_t *slena, /* aligned extent length */
681  int dir) /* 0 = search right, 1 = search left */
682 {
683  xfs_agblock_t new;
684  xfs_agblock_t sdiff;
685  int error;
686  int i;
687 
688  /* The good extent is perfect, no need to search. */
689  if (!gdiff)
690  goto out_use_good;
691 
692  /*
693  * Look until we find a better one, run out of space or run off the end.
694  */
695  do {
696  error = xfs_alloc_get_rec(*scur, sbno, slen, &i);
697  if (error)
698  goto error0;
699  XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
700  xfs_alloc_compute_aligned(args, *sbno, *slen, sbnoa, slena);
701 
702  /*
703  * The good extent is closer than this one.
704  */
705  if (!dir) {
706  if (*sbnoa >= args->agbno + gdiff)
707  goto out_use_good;
708  } else {
709  if (*sbnoa <= args->agbno - gdiff)
710  goto out_use_good;
711  }
712 
713  /*
714  * Same distance, compare length and pick the best.
715  */
716  if (*slena >= args->minlen) {
717  args->len = XFS_EXTLEN_MIN(*slena, args->maxlen);
718  xfs_alloc_fix_len(args);
719 
720  sdiff = xfs_alloc_compute_diff(args->agbno, args->len,
721  args->alignment, *sbnoa,
722  *slena, &new);
723 
724  /*
725  * Choose closer size and invalidate other cursor.
726  */
727  if (sdiff < gdiff)
728  goto out_use_search;
729  goto out_use_good;
730  }
731 
732  if (!dir)
733  error = xfs_btree_increment(*scur, 0, &i);
734  else
735  error = xfs_btree_decrement(*scur, 0, &i);
736  if (error)
737  goto error0;
738  } while (i);
739 
740 out_use_good:
742  *scur = NULL;
743  return 0;
744 
745 out_use_search:
747  *gcur = NULL;
748  return 0;
749 
750 error0:
751  /* caller invalidates cursors */
752  return error;
753 }
754 
755 /*
756  * Allocate a variable extent near bno in the allocation group agno.
757  * Extent's length (returned in len) will be between minlen and maxlen,
758  * and of the form k * prod + mod unless there's nothing that large.
759  * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
760  */
761 STATIC int /* error */
763  xfs_alloc_arg_t *args) /* allocation argument structure */
764 {
765  xfs_btree_cur_t *bno_cur_gt; /* cursor for bno btree, right side */
766  xfs_btree_cur_t *bno_cur_lt; /* cursor for bno btree, left side */
767  xfs_btree_cur_t *cnt_cur; /* cursor for count btree */
768  xfs_agblock_t gtbno; /* start bno of right side entry */
769  xfs_agblock_t gtbnoa; /* aligned ... */
770  xfs_extlen_t gtdiff; /* difference to right side entry */
771  xfs_extlen_t gtlen; /* length of right side entry */
772  xfs_extlen_t gtlena; /* aligned ... */
773  xfs_agblock_t gtnew; /* useful start bno of right side */
774  int error; /* error code */
775  int i; /* result code, temporary */
776  int j; /* result code, temporary */
777  xfs_agblock_t ltbno; /* start bno of left side entry */
778  xfs_agblock_t ltbnoa; /* aligned ... */
779  xfs_extlen_t ltdiff; /* difference to left side entry */
780  xfs_extlen_t ltlen; /* length of left side entry */
781  xfs_extlen_t ltlena; /* aligned ... */
782  xfs_agblock_t ltnew; /* useful start bno of left side */
783  xfs_extlen_t rlen; /* length of returned extent */
784  int forced = 0;
785 #if defined(DEBUG) && defined(__KERNEL__)
786  /*
787  * Randomly don't execute the first algorithm.
788  */
789  int dofirst; /* set to do first algorithm */
790 
791  dofirst = random32() & 1;
792 #endif
793 
794 restart:
795  bno_cur_lt = NULL;
796  bno_cur_gt = NULL;
797  ltlen = 0;
798  gtlena = 0;
799  ltlena = 0;
800 
801  /*
802  * Get a cursor for the by-size btree.
803  */
804  cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
805  args->agno, XFS_BTNUM_CNT);
806 
807  /*
808  * See if there are any free extents as big as maxlen.
809  */
810  if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, args->maxlen, &i)))
811  goto error0;
812  /*
813  * If none, then pick up the last entry in the tree unless the
814  * tree is empty.
815  */
816  if (!i) {
817  if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, &ltbno,
818  &ltlen, &i)))
819  goto error0;
820  if (i == 0 || ltlen == 0) {
822  trace_xfs_alloc_near_noentry(args);
823  return 0;
824  }
825  ASSERT(i == 1);
826  }
827  args->wasfromfl = 0;
828 
829  /*
830  * First algorithm.
831  * If the requested extent is large wrt the freespaces available
832  * in this a.g., then the cursor will be pointing to a btree entry
833  * near the right edge of the tree. If it's in the last btree leaf
834  * block, then we just examine all the entries in that block
835  * that are big enough, and pick the best one.
836  * This is written as a while loop so we can break out of it,
837  * but we never loop back to the top.
838  */
839  while (xfs_btree_islastblock(cnt_cur, 0)) {
840  xfs_extlen_t bdiff;
841  int besti=0;
842  xfs_extlen_t blen=0;
843  xfs_agblock_t bnew=0;
844 
845 #if defined(DEBUG) && defined(__KERNEL__)
846  if (!dofirst)
847  break;
848 #endif
849  /*
850  * Start from the entry that lookup found, sequence through
851  * all larger free blocks. If we're actually pointing at a
852  * record smaller than maxlen, go to the start of this block,
853  * and skip all those smaller than minlen.
854  */
855  if (ltlen || args->alignment > 1) {
856  cnt_cur->bc_ptrs[0] = 1;
857  do {
858  if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno,
859  &ltlen, &i)))
860  goto error0;
861  XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
862  if (ltlen >= args->minlen)
863  break;
864  if ((error = xfs_btree_increment(cnt_cur, 0, &i)))
865  goto error0;
866  } while (i);
867  ASSERT(ltlen >= args->minlen);
868  if (!i)
869  break;
870  }
871  i = cnt_cur->bc_ptrs[0];
872  for (j = 1, blen = 0, bdiff = 0;
873  !error && j && (blen < args->maxlen || bdiff > 0);
874  error = xfs_btree_increment(cnt_cur, 0, &j)) {
875  /*
876  * For each entry, decide if it's better than
877  * the previous best entry.
878  */
879  if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
880  goto error0;
881  XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
882  xfs_alloc_compute_aligned(args, ltbno, ltlen,
883  &ltbnoa, &ltlena);
884  if (ltlena < args->minlen)
885  continue;
886  args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
887  xfs_alloc_fix_len(args);
888  ASSERT(args->len >= args->minlen);
889  if (args->len < blen)
890  continue;
891  ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
892  args->alignment, ltbnoa, ltlena, &ltnew);
893  if (ltnew != NULLAGBLOCK &&
894  (args->len > blen || ltdiff < bdiff)) {
895  bdiff = ltdiff;
896  bnew = ltnew;
897  blen = args->len;
898  besti = cnt_cur->bc_ptrs[0];
899  }
900  }
901  /*
902  * It didn't work. We COULD be in a case where
903  * there's a good record somewhere, so try again.
904  */
905  if (blen == 0)
906  break;
907  /*
908  * Point at the best entry, and retrieve it again.
909  */
910  cnt_cur->bc_ptrs[0] = besti;
911  if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
912  goto error0;
913  XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
914  ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
915  args->len = blen;
916  if (!xfs_alloc_fix_minleft(args)) {
918  trace_xfs_alloc_near_nominleft(args);
919  return 0;
920  }
921  blen = args->len;
922  /*
923  * We are allocating starting at bnew for blen blocks.
924  */
925  args->agbno = bnew;
926  ASSERT(bnew >= ltbno);
927  ASSERT(bnew + blen <= ltbno + ltlen);
928  /*
929  * Set up a cursor for the by-bno tree.
930  */
931  bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp,
932  args->agbp, args->agno, XFS_BTNUM_BNO);
933  /*
934  * Fix up the btree entries.
935  */
936  if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno,
937  ltlen, bnew, blen, XFSA_FIXUP_CNT_OK)))
938  goto error0;
941 
942  trace_xfs_alloc_near_first(args);
943  return 0;
944  }
945  /*
946  * Second algorithm.
947  * Search in the by-bno tree to the left and to the right
948  * simultaneously, until in each case we find a space big enough,
949  * or run into the edge of the tree. When we run into the edge,
950  * we deallocate that cursor.
951  * If both searches succeed, we compare the two spaces and pick
952  * the better one.
953  * With alignment, it's possible for both to fail; the upper
954  * level algorithm that picks allocation groups for allocations
955  * is not supposed to do this.
956  */
957  /*
958  * Allocate and initialize the cursor for the leftward search.
959  */
960  bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
961  args->agno, XFS_BTNUM_BNO);
962  /*
963  * Lookup <= bno to find the leftward search's starting point.
964  */
965  if ((error = xfs_alloc_lookup_le(bno_cur_lt, args->agbno, args->maxlen, &i)))
966  goto error0;
967  if (!i) {
968  /*
969  * Didn't find anything; use this cursor for the rightward
970  * search.
971  */
972  bno_cur_gt = bno_cur_lt;
973  bno_cur_lt = NULL;
974  }
975  /*
976  * Found something. Duplicate the cursor for the rightward search.
977  */
978  else if ((error = xfs_btree_dup_cursor(bno_cur_lt, &bno_cur_gt)))
979  goto error0;
980  /*
981  * Increment the cursor, so we will point at the entry just right
982  * of the leftward entry if any, or to the leftmost entry.
983  */
984  if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
985  goto error0;
986  if (!i) {
987  /*
988  * It failed, there are no rightward entries.
989  */
991  bno_cur_gt = NULL;
992  }
993  /*
994  * Loop going left with the leftward cursor, right with the
995  * rightward cursor, until either both directions give up or
996  * we find an entry at least as big as minlen.
997  */
998  do {
999  if (bno_cur_lt) {
1000  if ((error = xfs_alloc_get_rec(bno_cur_lt, &ltbno, &ltlen, &i)))
1001  goto error0;
1002  XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1003  xfs_alloc_compute_aligned(args, ltbno, ltlen,
1004  &ltbnoa, &ltlena);
1005  if (ltlena >= args->minlen)
1006  break;
1007  if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i)))
1008  goto error0;
1009  if (!i) {
1010  xfs_btree_del_cursor(bno_cur_lt,
1012  bno_cur_lt = NULL;
1013  }
1014  }
1015  if (bno_cur_gt) {
1016  if ((error = xfs_alloc_get_rec(bno_cur_gt, &gtbno, &gtlen, &i)))
1017  goto error0;
1018  XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1019  xfs_alloc_compute_aligned(args, gtbno, gtlen,
1020  &gtbnoa, &gtlena);
1021  if (gtlena >= args->minlen)
1022  break;
1023  if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
1024  goto error0;
1025  if (!i) {
1026  xfs_btree_del_cursor(bno_cur_gt,
1028  bno_cur_gt = NULL;
1029  }
1030  }
1031  } while (bno_cur_lt || bno_cur_gt);
1032 
1033  /*
1034  * Got both cursors still active, need to find better entry.
1035  */
1036  if (bno_cur_lt && bno_cur_gt) {
1037  if (ltlena >= args->minlen) {
1038  /*
1039  * Left side is good, look for a right side entry.
1040  */
1041  args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1042  xfs_alloc_fix_len(args);
1043  ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
1044  args->alignment, ltbnoa, ltlena, &ltnew);
1045 
1046  error = xfs_alloc_find_best_extent(args,
1047  &bno_cur_lt, &bno_cur_gt,
1048  ltdiff, &gtbno, &gtlen,
1049  &gtbnoa, &gtlena,
1050  0 /* search right */);
1051  } else {
1052  ASSERT(gtlena >= args->minlen);
1053 
1054  /*
1055  * Right side is good, look for a left side entry.
1056  */
1057  args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen);
1058  xfs_alloc_fix_len(args);
1059  gtdiff = xfs_alloc_compute_diff(args->agbno, args->len,
1060  args->alignment, gtbnoa, gtlena, &gtnew);
1061 
1062  error = xfs_alloc_find_best_extent(args,
1063  &bno_cur_gt, &bno_cur_lt,
1064  gtdiff, &ltbno, &ltlen,
1065  &ltbnoa, &ltlena,
1066  1 /* search left */);
1067  }
1068 
1069  if (error)
1070  goto error0;
1071  }
1072 
1073  /*
1074  * If we couldn't get anything, give up.
1075  */
1076  if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
1078 
1079  if (!forced++) {
1080  trace_xfs_alloc_near_busy(args);
1081  xfs_log_force(args->mp, XFS_LOG_SYNC);
1082  goto restart;
1083  }
1084  trace_xfs_alloc_size_neither(args);
1085  args->agbno = NULLAGBLOCK;
1086  return 0;
1087  }
1088 
1089  /*
1090  * At this point we have selected a freespace entry, either to the
1091  * left or to the right. If it's on the right, copy all the
1092  * useful variables to the "left" set so we only have one
1093  * copy of this code.
1094  */
1095  if (bno_cur_gt) {
1096  bno_cur_lt = bno_cur_gt;
1097  bno_cur_gt = NULL;
1098  ltbno = gtbno;
1099  ltbnoa = gtbnoa;
1100  ltlen = gtlen;
1101  ltlena = gtlena;
1102  j = 1;
1103  } else
1104  j = 0;
1105 
1106  /*
1107  * Fix up the length and compute the useful address.
1108  */
1109  args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1110  xfs_alloc_fix_len(args);
1111  if (!xfs_alloc_fix_minleft(args)) {
1112  trace_xfs_alloc_near_nominleft(args);
1115  return 0;
1116  }
1117  rlen = args->len;
1118  (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment,
1119  ltbnoa, ltlena, &ltnew);
1120  ASSERT(ltnew >= ltbno);
1121  ASSERT(ltnew + rlen <= ltbnoa + ltlena);
1122  ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
1123  args->agbno = ltnew;
1124 
1125  if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
1126  ltnew, rlen, XFSA_FIXUP_BNO_OK)))
1127  goto error0;
1128 
1129  if (j)
1130  trace_xfs_alloc_near_greater(args);
1131  else
1132  trace_xfs_alloc_near_lesser(args);
1133 
1136  return 0;
1137 
1138  error0:
1139  trace_xfs_alloc_near_error(args);
1140  if (cnt_cur != NULL)
1142  if (bno_cur_lt != NULL)
1144  if (bno_cur_gt != NULL)
1146  return error;
1147 }
1148 
1149 /*
1150  * Allocate a variable extent anywhere in the allocation group agno.
1151  * Extent's length (returned in len) will be between minlen and maxlen,
1152  * and of the form k * prod + mod unless there's nothing that large.
1153  * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1154  */
1155 STATIC int /* error */
1157  xfs_alloc_arg_t *args) /* allocation argument structure */
1158 {
1159  xfs_btree_cur_t *bno_cur; /* cursor for bno btree */
1160  xfs_btree_cur_t *cnt_cur; /* cursor for cnt btree */
1161  int error; /* error result */
1162  xfs_agblock_t fbno; /* start of found freespace */
1163  xfs_extlen_t flen; /* length of found freespace */
1164  int i; /* temp status variable */
1165  xfs_agblock_t rbno; /* returned block number */
1166  xfs_extlen_t rlen; /* length of returned extent */
1167  int forced = 0;
1168 
1169 restart:
1170  /*
1171  * Allocate and initialize a cursor for the by-size btree.
1172  */
1173  cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1174  args->agno, XFS_BTNUM_CNT);
1175  bno_cur = NULL;
1176 
1177  /*
1178  * Look for an entry >= maxlen+alignment-1 blocks.
1179  */
1180  if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
1181  args->maxlen + args->alignment - 1, &i)))
1182  goto error0;
1183 
1184  /*
1185  * If none or we have busy extents that we cannot allocate from, then
1186  * we have to settle for a smaller extent. In the case that there are
1187  * no large extents, this will return the last entry in the tree unless
1188  * the tree is empty. In the case that there are only busy large
1189  * extents, this will return the largest small extent unless there
1190  * are no smaller extents available.
1191  */
1192  if (!i || forced > 1) {
1193  error = xfs_alloc_ag_vextent_small(args, cnt_cur,
1194  &fbno, &flen, &i);
1195  if (error)
1196  goto error0;
1197  if (i == 0 || flen == 0) {
1199  trace_xfs_alloc_size_noentry(args);
1200  return 0;
1201  }
1202  ASSERT(i == 1);
1203  xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen);
1204  } else {
1205  /*
1206  * Search for a non-busy extent that is large enough.
1207  * If we are at low space, don't check, or if we fall of
1208  * the end of the btree, turn off the busy check and
1209  * restart.
1210  */
1211  for (;;) {
1212  error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
1213  if (error)
1214  goto error0;
1215  XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1216 
1217  xfs_alloc_compute_aligned(args, fbno, flen,
1218  &rbno, &rlen);
1219 
1220  if (rlen >= args->maxlen)
1221  break;
1222 
1223  error = xfs_btree_increment(cnt_cur, 0, &i);
1224  if (error)
1225  goto error0;
1226  if (i == 0) {
1227  /*
1228  * Our only valid extents must have been busy.
1229  * Make it unbusy by forcing the log out and
1230  * retrying. If we've been here before, forcing
1231  * the log isn't making the extents available,
1232  * which means they have probably been freed in
1233  * this transaction. In that case, we have to
1234  * give up on them and we'll attempt a minlen
1235  * allocation the next time around.
1236  */
1237  xfs_btree_del_cursor(cnt_cur,
1239  trace_xfs_alloc_size_busy(args);
1240  if (!forced++)
1241  xfs_log_force(args->mp, XFS_LOG_SYNC);
1242  goto restart;
1243  }
1244  }
1245  }
1246 
1247  /*
1248  * In the first case above, we got the last entry in the
1249  * by-size btree. Now we check to see if the space hits maxlen
1250  * once aligned; if not, we search left for something better.
1251  * This can't happen in the second case above.
1252  */
1253  rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1254  XFS_WANT_CORRUPTED_GOTO(rlen == 0 ||
1255  (rlen <= flen && rbno + rlen <= fbno + flen), error0);
1256  if (rlen < args->maxlen) {
1257  xfs_agblock_t bestfbno;
1258  xfs_extlen_t bestflen;
1259  xfs_agblock_t bestrbno;
1260  xfs_extlen_t bestrlen;
1261 
1262  bestrlen = rlen;
1263  bestrbno = rbno;
1264  bestflen = flen;
1265  bestfbno = fbno;
1266  for (;;) {
1267  if ((error = xfs_btree_decrement(cnt_cur, 0, &i)))
1268  goto error0;
1269  if (i == 0)
1270  break;
1271  if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen,
1272  &i)))
1273  goto error0;
1274  XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1275  if (flen < bestrlen)
1276  break;
1277  xfs_alloc_compute_aligned(args, fbno, flen,
1278  &rbno, &rlen);
1279  rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1280  XFS_WANT_CORRUPTED_GOTO(rlen == 0 ||
1281  (rlen <= flen && rbno + rlen <= fbno + flen),
1282  error0);
1283  if (rlen > bestrlen) {
1284  bestrlen = rlen;
1285  bestrbno = rbno;
1286  bestflen = flen;
1287  bestfbno = fbno;
1288  if (rlen == args->maxlen)
1289  break;
1290  }
1291  }
1292  if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen,
1293  &i)))
1294  goto error0;
1295  XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1296  rlen = bestrlen;
1297  rbno = bestrbno;
1298  flen = bestflen;
1299  fbno = bestfbno;
1300  }
1301  args->wasfromfl = 0;
1302  /*
1303  * Fix up the length.
1304  */
1305  args->len = rlen;
1306  if (rlen < args->minlen) {
1307  if (!forced++) {
1309  trace_xfs_alloc_size_busy(args);
1310  xfs_log_force(args->mp, XFS_LOG_SYNC);
1311  goto restart;
1312  }
1313  goto out_nominleft;
1314  }
1315  xfs_alloc_fix_len(args);
1316 
1317  if (!xfs_alloc_fix_minleft(args))
1318  goto out_nominleft;
1319  rlen = args->len;
1320  XFS_WANT_CORRUPTED_GOTO(rlen <= flen, error0);
1321  /*
1322  * Allocate and initialize a cursor for the by-block tree.
1323  */
1324  bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1325  args->agno, XFS_BTNUM_BNO);
1326  if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
1327  rbno, rlen, XFSA_FIXUP_CNT_OK)))
1328  goto error0;
1331  cnt_cur = bno_cur = NULL;
1332  args->len = rlen;
1333  args->agbno = rbno;
1335  args->agbno + args->len <=
1336  be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
1337  error0);
1338  trace_xfs_alloc_size_done(args);
1339  return 0;
1340 
1341 error0:
1342  trace_xfs_alloc_size_error(args);
1343  if (cnt_cur)
1345  if (bno_cur)
1347  return error;
1348 
1349 out_nominleft:
1351  trace_xfs_alloc_size_nominleft(args);
1352  args->agbno = NULLAGBLOCK;
1353  return 0;
1354 }
1355 
1356 /*
1357  * Deal with the case where only small freespaces remain.
1358  * Either return the contents of the last freespace record,
1359  * or allocate space from the freelist if there is nothing in the tree.
1360  */
1361 STATIC int /* error */
1363  xfs_alloc_arg_t *args, /* allocation argument structure */
1364  xfs_btree_cur_t *ccur, /* by-size cursor */
1365  xfs_agblock_t *fbnop, /* result block number */
1366  xfs_extlen_t *flenp, /* result length */
1367  int *stat) /* status: 0-freelist, 1-normal/none */
1368 {
1369  int error;
1370  xfs_agblock_t fbno;
1371  xfs_extlen_t flen;
1372  int i;
1373 
1374  if ((error = xfs_btree_decrement(ccur, 0, &i)))
1375  goto error0;
1376  if (i) {
1377  if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i)))
1378  goto error0;
1379  XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1380  }
1381  /*
1382  * Nothing in the btree, try the freelist. Make sure
1383  * to respect minleft even when pulling from the
1384  * freelist.
1385  */
1386  else if (args->minlen == 1 && args->alignment == 1 && !args->isfl &&
1387  (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount)
1388  > args->minleft)) {
1389  error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0);
1390  if (error)
1391  goto error0;
1392  if (fbno != NULLAGBLOCK) {
1393  xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1,
1394  args->userdata);
1395 
1396  if (args->userdata) {
1397  xfs_buf_t *bp;
1398 
1399  bp = xfs_btree_get_bufs(args->mp, args->tp,
1400  args->agno, fbno, 0);
1401  xfs_trans_binval(args->tp, bp);
1402  }
1403  args->len = 1;
1404  args->agbno = fbno;
1406  args->agbno + args->len <=
1407  be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
1408  error0);
1409  args->wasfromfl = 1;
1410  trace_xfs_alloc_small_freelist(args);
1411  *stat = 0;
1412  return 0;
1413  }
1414  /*
1415  * Nothing in the freelist.
1416  */
1417  else
1418  flen = 0;
1419  }
1420  /*
1421  * Can't allocate from the freelist for some reason.
1422  */
1423  else {
1424  fbno = NULLAGBLOCK;
1425  flen = 0;
1426  }
1427  /*
1428  * Can't do the allocation, give up.
1429  */
1430  if (flen < args->minlen) {
1431  args->agbno = NULLAGBLOCK;
1432  trace_xfs_alloc_small_notenough(args);
1433  flen = 0;
1434  }
1435  *fbnop = fbno;
1436  *flenp = flen;
1437  *stat = 1;
1438  trace_xfs_alloc_small_done(args);
1439  return 0;
1440 
1441 error0:
1442  trace_xfs_alloc_small_error(args);
1443  return error;
1444 }
1445 
1446 /*
1447  * Free the extent starting at agno/bno for length.
1448  */
1449 STATIC int /* error */
1451  xfs_trans_t *tp, /* transaction pointer */
1452  xfs_buf_t *agbp, /* buffer for a.g. freelist header */
1453  xfs_agnumber_t agno, /* allocation group number */
1454  xfs_agblock_t bno, /* starting block number */
1455  xfs_extlen_t len, /* length of extent */
1456  int isfl) /* set if is freelist blocks - no sb acctg */
1457 {
1458  xfs_btree_cur_t *bno_cur; /* cursor for by-block btree */
1459  xfs_btree_cur_t *cnt_cur; /* cursor for by-size btree */
1460  int error; /* error return value */
1461  xfs_agblock_t gtbno; /* start of right neighbor block */
1462  xfs_extlen_t gtlen; /* length of right neighbor block */
1463  int haveleft; /* have a left neighbor block */
1464  int haveright; /* have a right neighbor block */
1465  int i; /* temp, result code */
1466  xfs_agblock_t ltbno; /* start of left neighbor block */
1467  xfs_extlen_t ltlen; /* length of left neighbor block */
1468  xfs_mount_t *mp; /* mount point struct for filesystem */
1469  xfs_agblock_t nbno; /* new starting block of freespace */
1470  xfs_extlen_t nlen; /* new length of freespace */
1471  xfs_perag_t *pag; /* per allocation group data */
1472 
1473  mp = tp->t_mountp;
1474  /*
1475  * Allocate and initialize a cursor for the by-block btree.
1476  */
1477  bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO);
1478  cnt_cur = NULL;
1479  /*
1480  * Look for a neighboring block on the left (lower block numbers)
1481  * that is contiguous with this space.
1482  */
1483  if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft)))
1484  goto error0;
1485  if (haveleft) {
1486  /*
1487  * There is a block to our left.
1488  */
1489  if ((error = xfs_alloc_get_rec(bno_cur, &ltbno, &ltlen, &i)))
1490  goto error0;
1491  XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1492  /*
1493  * It's not contiguous, though.
1494  */
1495  if (ltbno + ltlen < bno)
1496  haveleft = 0;
1497  else {
1498  /*
1499  * If this failure happens the request to free this
1500  * space was invalid, it's (partly) already free.
1501  * Very bad.
1502  */
1503  XFS_WANT_CORRUPTED_GOTO(ltbno + ltlen <= bno, error0);
1504  }
1505  }
1506  /*
1507  * Look for a neighboring block on the right (higher block numbers)
1508  * that is contiguous with this space.
1509  */
1510  if ((error = xfs_btree_increment(bno_cur, 0, &haveright)))
1511  goto error0;
1512  if (haveright) {
1513  /*
1514  * There is a block to our right.
1515  */
1516  if ((error = xfs_alloc_get_rec(bno_cur, &gtbno, &gtlen, &i)))
1517  goto error0;
1518  XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1519  /*
1520  * It's not contiguous, though.
1521  */
1522  if (bno + len < gtbno)
1523  haveright = 0;
1524  else {
1525  /*
1526  * If this failure happens the request to free this
1527  * space was invalid, it's (partly) already free.
1528  * Very bad.
1529  */
1530  XFS_WANT_CORRUPTED_GOTO(gtbno >= bno + len, error0);
1531  }
1532  }
1533  /*
1534  * Now allocate and initialize a cursor for the by-size tree.
1535  */
1536  cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT);
1537  /*
1538  * Have both left and right contiguous neighbors.
1539  * Merge all three into a single free block.
1540  */
1541  if (haveleft && haveright) {
1542  /*
1543  * Delete the old by-size entry on the left.
1544  */
1545  if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
1546  goto error0;
1547  XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1548  if ((error = xfs_btree_delete(cnt_cur, &i)))
1549  goto error0;
1550  XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1551  /*
1552  * Delete the old by-size entry on the right.
1553  */
1554  if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
1555  goto error0;
1556  XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1557  if ((error = xfs_btree_delete(cnt_cur, &i)))
1558  goto error0;
1559  XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1560  /*
1561  * Delete the old by-block entry for the right block.
1562  */
1563  if ((error = xfs_btree_delete(bno_cur, &i)))
1564  goto error0;
1565  XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1566  /*
1567  * Move the by-block cursor back to the left neighbor.
1568  */
1569  if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
1570  goto error0;
1571  XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1572 #ifdef DEBUG
1573  /*
1574  * Check that this is the right record: delete didn't
1575  * mangle the cursor.
1576  */
1577  {
1578  xfs_agblock_t xxbno;
1579  xfs_extlen_t xxlen;
1580 
1581  if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen,
1582  &i)))
1583  goto error0;
1585  i == 1 && xxbno == ltbno && xxlen == ltlen,
1586  error0);
1587  }
1588 #endif
1589  /*
1590  * Update remaining by-block entry to the new, joined block.
1591  */
1592  nbno = ltbno;
1593  nlen = len + ltlen + gtlen;
1594  if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1595  goto error0;
1596  }
1597  /*
1598  * Have only a left contiguous neighbor.
1599  * Merge it together with the new freespace.
1600  */
1601  else if (haveleft) {
1602  /*
1603  * Delete the old by-size entry on the left.
1604  */
1605  if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
1606  goto error0;
1607  XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1608  if ((error = xfs_btree_delete(cnt_cur, &i)))
1609  goto error0;
1610  XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1611  /*
1612  * Back up the by-block cursor to the left neighbor, and
1613  * update its length.
1614  */
1615  if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
1616  goto error0;
1617  XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1618  nbno = ltbno;
1619  nlen = len + ltlen;
1620  if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1621  goto error0;
1622  }
1623  /*
1624  * Have only a right contiguous neighbor.
1625  * Merge it together with the new freespace.
1626  */
1627  else if (haveright) {
1628  /*
1629  * Delete the old by-size entry on the right.
1630  */
1631  if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
1632  goto error0;
1633  XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1634  if ((error = xfs_btree_delete(cnt_cur, &i)))
1635  goto error0;
1636  XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1637  /*
1638  * Update the starting block and length of the right
1639  * neighbor in the by-block tree.
1640  */
1641  nbno = bno;
1642  nlen = len + gtlen;
1643  if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1644  goto error0;
1645  }
1646  /*
1647  * No contiguous neighbors.
1648  * Insert the new freespace into the by-block tree.
1649  */
1650  else {
1651  nbno = bno;
1652  nlen = len;
1653  if ((error = xfs_btree_insert(bno_cur, &i)))
1654  goto error0;
1655  XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1656  }
1658  bno_cur = NULL;
1659  /*
1660  * In all cases we need to insert the new freespace in the by-size tree.
1661  */
1662  if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
1663  goto error0;
1664  XFS_WANT_CORRUPTED_GOTO(i == 0, error0);
1665  if ((error = xfs_btree_insert(cnt_cur, &i)))
1666  goto error0;
1667  XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1669  cnt_cur = NULL;
1670 
1671  /*
1672  * Update the freespace totals in the ag and superblock.
1673  */
1674  pag = xfs_perag_get(mp, agno);
1675  error = xfs_alloc_update_counters(tp, pag, agbp, len);
1676  xfs_perag_put(pag);
1677  if (error)
1678  goto error0;
1679 
1680  if (!isfl)
1681  xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len);
1682  XFS_STATS_INC(xs_freex);
1683  XFS_STATS_ADD(xs_freeb, len);
1684 
1685  trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright);
1686 
1687  return 0;
1688 
1689  error0:
1690  trace_xfs_free_extent(mp, agno, bno, len, isfl, -1, -1);
1691  if (bno_cur)
1693  if (cnt_cur)
1695  return error;
1696 }
1697 
1698 /*
1699  * Visible (exported) allocation/free functions.
1700  * Some of these are used just by xfs_alloc_btree.c and this file.
1701  */
1702 
1703 /*
1704  * Compute and fill in value of m_ag_maxlevels.
1705  */
1706 void
1708  xfs_mount_t *mp) /* file system mount structure */
1709 {
1710  int level;
1711  uint maxblocks;
1712  uint maxleafents;
1713  int minleafrecs;
1714  int minnoderecs;
1715 
1716  maxleafents = (mp->m_sb.sb_agblocks + 1) / 2;
1717  minleafrecs = mp->m_alloc_mnr[0];
1718  minnoderecs = mp->m_alloc_mnr[1];
1719  maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
1720  for (level = 1; maxblocks > 1; level++)
1721  maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
1722  mp->m_ag_maxlevels = level;
1723 }
1724 
1725 /*
1726  * Find the length of the longest extent in an AG.
1727  */
1730  struct xfs_mount *mp,
1731  struct xfs_perag *pag)
1732 {
1733  xfs_extlen_t need, delta = 0;
1734 
1735  need = XFS_MIN_FREELIST_PAG(pag, mp);
1736  if (need > pag->pagf_flcount)
1737  delta = need - pag->pagf_flcount;
1738 
1739  if (pag->pagf_longest > delta)
1740  return pag->pagf_longest - delta;
1741  return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
1742 }
1743 
1744 /*
1745  * Decide whether to use this allocation group for this allocation.
1746  * If so, fix up the btree freelist's size.
1747  */
1748 STATIC int /* error */
1750  xfs_alloc_arg_t *args, /* allocation argument structure */
1751  int flags) /* XFS_ALLOC_FLAG_... */
1752 {
1753  xfs_buf_t *agbp; /* agf buffer pointer */
1754  xfs_agf_t *agf; /* a.g. freespace structure pointer */
1755  xfs_buf_t *agflbp;/* agfl buffer pointer */
1756  xfs_agblock_t bno; /* freelist block */
1757  xfs_extlen_t delta; /* new blocks needed in freelist */
1758  int error; /* error result code */
1759  xfs_extlen_t longest;/* longest extent in allocation group */
1760  xfs_mount_t *mp; /* file system mount point structure */
1761  xfs_extlen_t need; /* total blocks needed in freelist */
1762  xfs_perag_t *pag; /* per-ag information structure */
1763  xfs_alloc_arg_t targs; /* local allocation arguments */
1764  xfs_trans_t *tp; /* transaction pointer */
1765 
1766  mp = args->mp;
1767 
1768  pag = args->pag;
1769  tp = args->tp;
1770  if (!pag->pagf_init) {
1771  if ((error = xfs_alloc_read_agf(mp, tp, args->agno, flags,
1772  &agbp)))
1773  return error;
1774  if (!pag->pagf_init) {
1775  ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
1776  ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
1777  args->agbp = NULL;
1778  return 0;
1779  }
1780  } else
1781  agbp = NULL;
1782 
1783  /*
1784  * If this is a metadata preferred pag and we are user data
1785  * then try somewhere else if we are not being asked to
1786  * try harder at this point
1787  */
1788  if (pag->pagf_metadata && args->userdata &&
1789  (flags & XFS_ALLOC_FLAG_TRYLOCK)) {
1790  ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
1791  args->agbp = NULL;
1792  return 0;
1793  }
1794 
1795  if (!(flags & XFS_ALLOC_FLAG_FREEING)) {
1796  /*
1797  * If it looks like there isn't a long enough extent, or enough
1798  * total blocks, reject it.
1799  */
1800  need = XFS_MIN_FREELIST_PAG(pag, mp);
1801  longest = xfs_alloc_longest_free_extent(mp, pag);
1802  if ((args->minlen + args->alignment + args->minalignslop - 1) >
1803  longest ||
1804  ((int)(pag->pagf_freeblks + pag->pagf_flcount -
1805  need - args->total) < (int)args->minleft)) {
1806  if (agbp)
1807  xfs_trans_brelse(tp, agbp);
1808  args->agbp = NULL;
1809  return 0;
1810  }
1811  }
1812 
1813  /*
1814  * Get the a.g. freespace buffer.
1815  * Can fail if we're not blocking on locks, and it's held.
1816  */
1817  if (agbp == NULL) {
1818  if ((error = xfs_alloc_read_agf(mp, tp, args->agno, flags,
1819  &agbp)))
1820  return error;
1821  if (agbp == NULL) {
1822  ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
1823  ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
1824  args->agbp = NULL;
1825  return 0;
1826  }
1827  }
1828  /*
1829  * Figure out how many blocks we should have in the freelist.
1830  */
1831  agf = XFS_BUF_TO_AGF(agbp);
1832  need = XFS_MIN_FREELIST(agf, mp);
1833  /*
1834  * If there isn't enough total or single-extent, reject it.
1835  */
1836  if (!(flags & XFS_ALLOC_FLAG_FREEING)) {
1837  delta = need > be32_to_cpu(agf->agf_flcount) ?
1838  (need - be32_to_cpu(agf->agf_flcount)) : 0;
1839  longest = be32_to_cpu(agf->agf_longest);
1840  longest = (longest > delta) ? (longest - delta) :
1841  (be32_to_cpu(agf->agf_flcount) > 0 || longest > 0);
1842  if ((args->minlen + args->alignment + args->minalignslop - 1) >
1843  longest ||
1844  ((int)(be32_to_cpu(agf->agf_freeblks) +
1845  be32_to_cpu(agf->agf_flcount) - need - args->total) <
1846  (int)args->minleft)) {
1847  xfs_trans_brelse(tp, agbp);
1848  args->agbp = NULL;
1849  return 0;
1850  }
1851  }
1852  /*
1853  * Make the freelist shorter if it's too long.
1854  */
1855  while (be32_to_cpu(agf->agf_flcount) > need) {
1856  xfs_buf_t *bp;
1857 
1858  error = xfs_alloc_get_freelist(tp, agbp, &bno, 0);
1859  if (error)
1860  return error;
1861  if ((error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 1)))
1862  return error;
1863  bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0);
1864  xfs_trans_binval(tp, bp);
1865  }
1866  /*
1867  * Initialize the args structure.
1868  */
1869  memset(&targs, 0, sizeof(targs));
1870  targs.tp = tp;
1871  targs.mp = mp;
1872  targs.agbp = agbp;
1873  targs.agno = args->agno;
1874  targs.mod = targs.minleft = targs.wasdel = targs.userdata =
1875  targs.minalignslop = 0;
1876  targs.alignment = targs.minlen = targs.prod = targs.isfl = 1;
1877  targs.type = XFS_ALLOCTYPE_THIS_AG;
1878  targs.pag = pag;
1879  if ((error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp)))
1880  return error;
1881  /*
1882  * Make the freelist longer if it's too short.
1883  */
1884  while (be32_to_cpu(agf->agf_flcount) < need) {
1885  targs.agbno = 0;
1886  targs.maxlen = need - be32_to_cpu(agf->agf_flcount);
1887  /*
1888  * Allocate as many blocks as possible at once.
1889  */
1890  if ((error = xfs_alloc_ag_vextent(&targs))) {
1891  xfs_trans_brelse(tp, agflbp);
1892  return error;
1893  }
1894  /*
1895  * Stop if we run out. Won't happen if callers are obeying
1896  * the restrictions correctly. Can happen for free calls
1897  * on a completely full ag.
1898  */
1899  if (targs.agbno == NULLAGBLOCK) {
1900  if (flags & XFS_ALLOC_FLAG_FREEING)
1901  break;
1902  xfs_trans_brelse(tp, agflbp);
1903  args->agbp = NULL;
1904  return 0;
1905  }
1906  /*
1907  * Put each allocated block on the list.
1908  */
1909  for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) {
1910  error = xfs_alloc_put_freelist(tp, agbp,
1911  agflbp, bno, 0);
1912  if (error)
1913  return error;
1914  }
1915  }
1916  xfs_trans_brelse(tp, agflbp);
1917  args->agbp = agbp;
1918  return 0;
1919 }
1920 
1921 /*
1922  * Get a block from the freelist.
1923  * Returns with the buffer for the block gotten.
1924  */
1925 int /* error */
1927  xfs_trans_t *tp, /* transaction pointer */
1928  xfs_buf_t *agbp, /* buffer containing the agf structure */
1929  xfs_agblock_t *bnop, /* block address retrieved from freelist */
1930  int btreeblk) /* destination is a AGF btree */
1931 {
1932  xfs_agf_t *agf; /* a.g. freespace structure */
1933  xfs_agfl_t *agfl; /* a.g. freelist structure */
1934  xfs_buf_t *agflbp;/* buffer for a.g. freelist structure */
1935  xfs_agblock_t bno; /* block number returned */
1936  int error;
1937  int logflags;
1938  xfs_mount_t *mp; /* mount structure */
1939  xfs_perag_t *pag; /* per allocation group data */
1940 
1941  agf = XFS_BUF_TO_AGF(agbp);
1942  /*
1943  * Freelist is empty, give up.
1944  */
1945  if (!agf->agf_flcount) {
1946  *bnop = NULLAGBLOCK;
1947  return 0;
1948  }
1949  /*
1950  * Read the array of free blocks.
1951  */
1952  mp = tp->t_mountp;
1953  if ((error = xfs_alloc_read_agfl(mp, tp,
1954  be32_to_cpu(agf->agf_seqno), &agflbp)))
1955  return error;
1956  agfl = XFS_BUF_TO_AGFL(agflbp);
1957  /*
1958  * Get the block number and update the data structures.
1959  */
1960  bno = be32_to_cpu(agfl->agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
1961  be32_add_cpu(&agf->agf_flfirst, 1);
1962  xfs_trans_brelse(tp, agflbp);
1963  if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp))
1964  agf->agf_flfirst = 0;
1965 
1966  pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
1967  be32_add_cpu(&agf->agf_flcount, -1);
1968  xfs_trans_agflist_delta(tp, -1);
1969  pag->pagf_flcount--;
1970  xfs_perag_put(pag);
1971 
1972  logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
1973  if (btreeblk) {
1974  be32_add_cpu(&agf->agf_btreeblks, 1);
1975  pag->pagf_btreeblks++;
1976  logflags |= XFS_AGF_BTREEBLKS;
1977  }
1978 
1979  xfs_alloc_log_agf(tp, agbp, logflags);
1980  *bnop = bno;
1981 
1982  return 0;
1983 }
1984 
1985 /*
1986  * Log the given fields from the agf structure.
1987  */
1988 void
1990  xfs_trans_t *tp, /* transaction pointer */
1991  xfs_buf_t *bp, /* buffer for a.g. freelist header */
1992  int fields) /* mask of fields to be logged (XFS_AGF_...) */
1993 {
1994  int first; /* first byte offset */
1995  int last; /* last byte offset */
1996  static const short offsets[] = {
2009  sizeof(xfs_agf_t)
2010  };
2011 
2012  trace_xfs_agf(tp->t_mountp, XFS_BUF_TO_AGF(bp), fields, _RET_IP_);
2013 
2014  xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last);
2015  xfs_trans_log_buf(tp, bp, (uint)first, (uint)last);
2016 }
2017 
2018 /*
2019  * Interface for inode allocation to force the pag data to be initialized.
2020  */
2021 int /* error */
2023  xfs_mount_t *mp, /* file system mount structure */
2024  xfs_trans_t *tp, /* transaction pointer */
2025  xfs_agnumber_t agno, /* allocation group number */
2026  int flags) /* XFS_ALLOC_FLAGS_... */
2027 {
2028  xfs_buf_t *bp;
2029  int error;
2030 
2031  if ((error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp)))
2032  return error;
2033  if (bp)
2034  xfs_trans_brelse(tp, bp);
2035  return 0;
2036 }
2037 
2038 /*
2039  * Put the block on the freelist for the allocation group.
2040  */
2041 int /* error */
2043  xfs_trans_t *tp, /* transaction pointer */
2044  xfs_buf_t *agbp, /* buffer for a.g. freelist header */
2045  xfs_buf_t *agflbp,/* buffer for a.g. free block array */
2046  xfs_agblock_t bno, /* block being freed */
2047  int btreeblk) /* block came from a AGF btree */
2048 {
2049  xfs_agf_t *agf; /* a.g. freespace structure */
2050  xfs_agfl_t *agfl; /* a.g. free block array */
2051  __be32 *blockp;/* pointer to array entry */
2052  int error;
2053  int logflags;
2054  xfs_mount_t *mp; /* mount structure */
2055  xfs_perag_t *pag; /* per allocation group data */
2056 
2057  agf = XFS_BUF_TO_AGF(agbp);
2058  mp = tp->t_mountp;
2059 
2060  if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp,
2061  be32_to_cpu(agf->agf_seqno), &agflbp)))
2062  return error;
2063  agfl = XFS_BUF_TO_AGFL(agflbp);
2064  be32_add_cpu(&agf->agf_fllast, 1);
2065  if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp))
2066  agf->agf_fllast = 0;
2067 
2068  pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
2069  be32_add_cpu(&agf->agf_flcount, 1);
2070  xfs_trans_agflist_delta(tp, 1);
2071  pag->pagf_flcount++;
2072 
2073  logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT;
2074  if (btreeblk) {
2075  be32_add_cpu(&agf->agf_btreeblks, -1);
2076  pag->pagf_btreeblks--;
2077  logflags |= XFS_AGF_BTREEBLKS;
2078  }
2079  xfs_perag_put(pag);
2080 
2081  xfs_alloc_log_agf(tp, agbp, logflags);
2082 
2084  blockp = &agfl->agfl_bno[be32_to_cpu(agf->agf_fllast)];
2085  *blockp = cpu_to_be32(bno);
2086  xfs_alloc_log_agf(tp, agbp, logflags);
2087  xfs_trans_log_buf(tp, agflbp,
2088  (int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl),
2089  (int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl +
2090  sizeof(xfs_agblock_t) - 1));
2091  return 0;
2092 }
2093 
2094 /*
2095  * Read in the allocation group header (free/alloc section).
2096  */
2097 int /* error */
2099  struct xfs_mount *mp, /* mount point structure */
2100  struct xfs_trans *tp, /* transaction pointer */
2101  xfs_agnumber_t agno, /* allocation group number */
2102  int flags, /* XFS_BUF_ */
2103  struct xfs_buf **bpp) /* buffer for the ag freelist header */
2104 {
2105  struct xfs_agf *agf; /* ag freelist header */
2106  int agf_ok; /* set if agf is consistent */
2107  int error;
2108 
2109  ASSERT(agno != NULLAGNUMBER);
2110  error = xfs_trans_read_buf(
2111  mp, tp, mp->m_ddev_targp,
2112  XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
2113  XFS_FSS_TO_BB(mp, 1), flags, bpp);
2114  if (error)
2115  return error;
2116  if (!*bpp)
2117  return 0;
2118 
2119  ASSERT(!(*bpp)->b_error);
2120  agf = XFS_BUF_TO_AGF(*bpp);
2121 
2122  /*
2123  * Validate the magic number of the agf block.
2124  */
2125  agf_ok =
2129  be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) &&
2130  be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) &&
2131  be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp) &&
2132  be32_to_cpu(agf->agf_seqno) == agno;
2133  if (xfs_sb_version_haslazysbcount(&mp->m_sb))
2134  agf_ok = agf_ok && be32_to_cpu(agf->agf_btreeblks) <=
2135  be32_to_cpu(agf->agf_length);
2138  XFS_CORRUPTION_ERROR("xfs_alloc_read_agf",
2139  XFS_ERRLEVEL_LOW, mp, agf);
2140  xfs_trans_brelse(tp, *bpp);
2141  return XFS_ERROR(EFSCORRUPTED);
2142  }
2143  xfs_buf_set_ref(*bpp, XFS_AGF_REF);
2144  return 0;
2145 }
2146 
2147 /*
2148  * Read in the allocation group header (free/alloc section).
2149  */
2150 int /* error */
2152  struct xfs_mount *mp, /* mount point structure */
2153  struct xfs_trans *tp, /* transaction pointer */
2154  xfs_agnumber_t agno, /* allocation group number */
2155  int flags, /* XFS_ALLOC_FLAG_... */
2156  struct xfs_buf **bpp) /* buffer for the ag freelist header */
2157 {
2158  struct xfs_agf *agf; /* ag freelist header */
2159  struct xfs_perag *pag; /* per allocation group data */
2160  int error;
2161 
2162  ASSERT(agno != NULLAGNUMBER);
2163 
2164  error = xfs_read_agf(mp, tp, agno,
2165  (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0,
2166  bpp);
2167  if (error)
2168  return error;
2169  if (!*bpp)
2170  return 0;
2171  ASSERT(!(*bpp)->b_error);
2172 
2173  agf = XFS_BUF_TO_AGF(*bpp);
2174  pag = xfs_perag_get(mp, agno);
2175  if (!pag->pagf_init) {
2176  pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
2178  pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
2179  pag->pagf_longest = be32_to_cpu(agf->agf_longest);
2180  pag->pagf_levels[XFS_BTNUM_BNOi] =
2182  pag->pagf_levels[XFS_BTNUM_CNTi] =
2184  spin_lock_init(&pag->pagb_lock);
2185  pag->pagb_count = 0;
2186  pag->pagb_tree = RB_ROOT;
2187  pag->pagf_init = 1;
2188  }
2189 #ifdef DEBUG
2190  else if (!XFS_FORCED_SHUTDOWN(mp)) {
2193  ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
2194  ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
2199  }
2200 #endif
2201  xfs_perag_put(pag);
2202  return 0;
2203 }
2204 
2205 /*
2206  * Allocate an extent (variable-size).
2207  * Depending on the allocation type, we either look in a single allocation
2208  * group or loop over the allocation groups to find the result.
2209  */
2210 int /* error */
2212  xfs_alloc_arg_t *args) /* allocation argument structure */
2213 {
2214  xfs_agblock_t agsize; /* allocation group size */
2215  int error;
2216  int flags; /* XFS_ALLOC_FLAG_... locking flags */
2217  xfs_extlen_t minleft;/* minimum left value, temp copy */
2218  xfs_mount_t *mp; /* mount structure pointer */
2219  xfs_agnumber_t sagno; /* starting allocation group number */
2220  xfs_alloctype_t type; /* input allocation type */
2221  int bump_rotor = 0;
2222  int no_min = 0;
2223  xfs_agnumber_t rotorstep = xfs_rotorstep; /* inode32 agf stepper */
2224 
2225  mp = args->mp;
2226  type = args->otype = args->type;
2227  args->agbno = NULLAGBLOCK;
2228  /*
2229  * Just fix this up, for the case where the last a.g. is shorter
2230  * (or there's only one a.g.) and the caller couldn't easily figure
2231  * that out (xfs_bmap_alloc).
2232  */
2233  agsize = mp->m_sb.sb_agblocks;
2234  if (args->maxlen > agsize)
2235  args->maxlen = agsize;
2236  if (args->alignment == 0)
2237  args->alignment = 1;
2238  ASSERT(XFS_FSB_TO_AGNO(mp, args->fsbno) < mp->m_sb.sb_agcount);
2239  ASSERT(XFS_FSB_TO_AGBNO(mp, args->fsbno) < agsize);
2240  ASSERT(args->minlen <= args->maxlen);
2241  ASSERT(args->minlen <= agsize);
2242  ASSERT(args->mod < args->prod);
2243  if (XFS_FSB_TO_AGNO(mp, args->fsbno) >= mp->m_sb.sb_agcount ||
2244  XFS_FSB_TO_AGBNO(mp, args->fsbno) >= agsize ||
2245  args->minlen > args->maxlen || args->minlen > agsize ||
2246  args->mod >= args->prod) {
2247  args->fsbno = NULLFSBLOCK;
2248  trace_xfs_alloc_vextent_badargs(args);
2249  return 0;
2250  }
2251  minleft = args->minleft;
2252 
2253  switch (type) {
2254  case XFS_ALLOCTYPE_THIS_AG:
2257  /*
2258  * These three force us into a single a.g.
2259  */
2260  args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2261  args->pag = xfs_perag_get(mp, args->agno);
2262  args->minleft = 0;
2263  error = xfs_alloc_fix_freelist(args, 0);
2264  args->minleft = minleft;
2265  if (error) {
2266  trace_xfs_alloc_vextent_nofix(args);
2267  goto error0;
2268  }
2269  if (!args->agbp) {
2270  trace_xfs_alloc_vextent_noagbp(args);
2271  break;
2272  }
2273  args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
2274  if ((error = xfs_alloc_ag_vextent(args)))
2275  goto error0;
2276  break;
2278  /*
2279  * Try near allocation first, then anywhere-in-ag after
2280  * the first a.g. fails.
2281  */
2282  if ((args->userdata == XFS_ALLOC_INITIAL_USER_DATA) &&
2283  (mp->m_flags & XFS_MOUNT_32BITINODES)) {
2284  args->fsbno = XFS_AGB_TO_FSB(mp,
2285  ((mp->m_agfrotor / rotorstep) %
2286  mp->m_sb.sb_agcount), 0);
2287  bump_rotor = 1;
2288  }
2289  args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
2290  args->type = XFS_ALLOCTYPE_NEAR_BNO;
2291  /* FALLTHROUGH */
2292  case XFS_ALLOCTYPE_ANY_AG:
2295  /*
2296  * Rotate through the allocation groups looking for a winner.
2297  */
2298  if (type == XFS_ALLOCTYPE_ANY_AG) {
2299  /*
2300  * Start with the last place we left off.
2301  */
2302  args->agno = sagno = (mp->m_agfrotor / rotorstep) %
2303  mp->m_sb.sb_agcount;
2304  args->type = XFS_ALLOCTYPE_THIS_AG;
2305  flags = XFS_ALLOC_FLAG_TRYLOCK;
2306  } else if (type == XFS_ALLOCTYPE_FIRST_AG) {
2307  /*
2308  * Start with allocation group given by bno.
2309  */
2310  args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2311  args->type = XFS_ALLOCTYPE_THIS_AG;
2312  sagno = 0;
2313  flags = 0;
2314  } else {
2315  if (type == XFS_ALLOCTYPE_START_AG)
2316  args->type = XFS_ALLOCTYPE_THIS_AG;
2317  /*
2318  * Start with the given allocation group.
2319  */
2320  args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2321  flags = XFS_ALLOC_FLAG_TRYLOCK;
2322  }
2323  /*
2324  * Loop over allocation groups twice; first time with
2325  * trylock set, second time without.
2326  */
2327  for (;;) {
2328  args->pag = xfs_perag_get(mp, args->agno);
2329  if (no_min) args->minleft = 0;
2330  error = xfs_alloc_fix_freelist(args, flags);
2331  args->minleft = minleft;
2332  if (error) {
2333  trace_xfs_alloc_vextent_nofix(args);
2334  goto error0;
2335  }
2336  /*
2337  * If we get a buffer back then the allocation will fly.
2338  */
2339  if (args->agbp) {
2340  if ((error = xfs_alloc_ag_vextent(args)))
2341  goto error0;
2342  break;
2343  }
2344 
2345  trace_xfs_alloc_vextent_loopfailed(args);
2346 
2347  /*
2348  * Didn't work, figure out the next iteration.
2349  */
2350  if (args->agno == sagno &&
2351  type == XFS_ALLOCTYPE_START_BNO)
2352  args->type = XFS_ALLOCTYPE_THIS_AG;
2353  /*
2354  * For the first allocation, we can try any AG to get
2355  * space. However, if we already have allocated a
2356  * block, we don't want to try AGs whose number is below
2357  * sagno. Otherwise, we may end up with out-of-order
2358  * locking of AGF, which might cause deadlock.
2359  */
2360  if (++(args->agno) == mp->m_sb.sb_agcount) {
2361  if (args->firstblock != NULLFSBLOCK)
2362  args->agno = sagno;
2363  else
2364  args->agno = 0;
2365  }
2366  /*
2367  * Reached the starting a.g., must either be done
2368  * or switch to non-trylock mode.
2369  */
2370  if (args->agno == sagno) {
2371  if (no_min == 1) {
2372  args->agbno = NULLAGBLOCK;
2373  trace_xfs_alloc_vextent_allfailed(args);
2374  break;
2375  }
2376  if (flags == 0) {
2377  no_min = 1;
2378  } else {
2379  flags = 0;
2380  if (type == XFS_ALLOCTYPE_START_BNO) {
2381  args->agbno = XFS_FSB_TO_AGBNO(mp,
2382  args->fsbno);
2383  args->type = XFS_ALLOCTYPE_NEAR_BNO;
2384  }
2385  }
2386  }
2387  xfs_perag_put(args->pag);
2388  }
2389  if (bump_rotor || (type == XFS_ALLOCTYPE_ANY_AG)) {
2390  if (args->agno == sagno)
2391  mp->m_agfrotor = (mp->m_agfrotor + 1) %
2392  (mp->m_sb.sb_agcount * rotorstep);
2393  else
2394  mp->m_agfrotor = (args->agno * rotorstep + 1) %
2395  (mp->m_sb.sb_agcount * rotorstep);
2396  }
2397  break;
2398  default:
2399  ASSERT(0);
2400  /* NOTREACHED */
2401  }
2402  if (args->agbno == NULLAGBLOCK)
2403  args->fsbno = NULLFSBLOCK;
2404  else {
2405  args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
2406 #ifdef DEBUG
2407  ASSERT(args->len >= args->minlen);
2408  ASSERT(args->len <= args->maxlen);
2409  ASSERT(args->agbno % args->alignment == 0);
2410  XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno),
2411  args->len);
2412 #endif
2413  }
2414  xfs_perag_put(args->pag);
2415  return 0;
2416 error0:
2417  xfs_perag_put(args->pag);
2418  return error;
2419 }
2420 
2421 /*
2422  * Free an extent.
2423  * Just break up the extent address and hand off to xfs_free_ag_extent
2424  * after fixing up the freelist.
2425  */
2426 int /* error */
2428  xfs_trans_t *tp, /* transaction pointer */
2429  xfs_fsblock_t bno, /* starting block number of extent */
2430  xfs_extlen_t len) /* length of extent */
2431 {
2433  int error;
2434 
2435  ASSERT(len != 0);
2436  memset(&args, 0, sizeof(xfs_alloc_arg_t));
2437  args.tp = tp;
2438  args.mp = tp->t_mountp;
2439 
2440  /*
2441  * validate that the block number is legal - the enables us to detect
2442  * and handle a silent filesystem corruption rather than crashing.
2443  */
2444  args.agno = XFS_FSB_TO_AGNO(args.mp, bno);
2445  if (args.agno >= args.mp->m_sb.sb_agcount)
2446  return EFSCORRUPTED;
2447 
2448  args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno);
2449  if (args.agbno >= args.mp->m_sb.sb_agblocks)
2450  return EFSCORRUPTED;
2451 
2452  args.pag = xfs_perag_get(args.mp, args.agno);
2453  ASSERT(args.pag);
2454 
2456  if (error)
2457  goto error0;
2458 
2459  /* validate the extent size is legal now we have the agf locked */
2460  if (args.agbno + len >
2461  be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length)) {
2462  error = EFSCORRUPTED;
2463  goto error0;
2464  }
2465 
2466  error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0);
2467  if (!error)
2468  xfs_extent_busy_insert(tp, args.agno, args.agbno, len, 0);
2469 error0:
2470  xfs_perag_put(args.pag);
2471  return error;
2472 }