Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
balloc.c
Go to the documentation of this file.
1 /*
2  * linux/fs/ext3/balloc.c
3  *
4  * Copyright (C) 1992, 1993, 1994, 1995
5  * Remy Card ([email protected])
6  * Laboratoire MASI - Institut Blaise Pascal
7  * Universite Pierre et Marie Curie (Paris VI)
8  *
9  * Enhanced block allocation by Stephen Tweedie ([email protected]), 1993
10  * Big-endian to little-endian byte-swapping/bitmaps by
11  * David S. Miller ([email protected]), 1995
12  */
13 
14 #include <linux/quotaops.h>
15 #include <linux/blkdev.h>
16 #include "ext3.h"
17 
18 /*
19  * balloc.c contains the blocks allocation and deallocation routines
20  */
21 
22 /*
23  * The free blocks are managed by bitmaps. A file system contains several
24  * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap
25  * block for inodes, N blocks for the inode table and data blocks.
26  *
27  * The file system contains group descriptors which are located after the
28  * super block. Each descriptor contains the number of the bitmap block and
29  * the free blocks count in the block. The descriptors are loaded in memory
30  * when a file system is mounted (see ext3_fill_super).
31  */
32 
33 
34 #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
35 
36 /*
37  * Calculate the block group number and offset, given a block number
38  */
39 static void ext3_get_group_no_and_offset(struct super_block *sb,
40  ext3_fsblk_t blocknr, unsigned long *blockgrpp, ext3_grpblk_t *offsetp)
41 {
42  struct ext3_super_block *es = EXT3_SB(sb)->s_es;
43 
44  blocknr = blocknr - le32_to_cpu(es->s_first_data_block);
45  if (offsetp)
46  *offsetp = blocknr % EXT3_BLOCKS_PER_GROUP(sb);
47  if (blockgrpp)
48  *blockgrpp = blocknr / EXT3_BLOCKS_PER_GROUP(sb);
49 }
50 
59  unsigned int block_group,
60  struct buffer_head ** bh)
61 {
62  unsigned long group_desc;
63  unsigned long offset;
64  struct ext3_group_desc * desc;
65  struct ext3_sb_info *sbi = EXT3_SB(sb);
66 
67  if (block_group >= sbi->s_groups_count) {
68  ext3_error (sb, "ext3_get_group_desc",
69  "block_group >= groups_count - "
70  "block_group = %d, groups_count = %lu",
71  block_group, sbi->s_groups_count);
72 
73  return NULL;
74  }
75  smp_rmb();
76 
77  group_desc = block_group >> EXT3_DESC_PER_BLOCK_BITS(sb);
78  offset = block_group & (EXT3_DESC_PER_BLOCK(sb) - 1);
79  if (!sbi->s_group_desc[group_desc]) {
80  ext3_error (sb, "ext3_get_group_desc",
81  "Group descriptor not loaded - "
82  "block_group = %d, group_desc = %lu, desc = %lu",
83  block_group, group_desc, offset);
84  return NULL;
85  }
86 
87  desc = (struct ext3_group_desc *) sbi->s_group_desc[group_desc]->b_data;
88  if (bh)
89  *bh = sbi->s_group_desc[group_desc];
90  return desc + offset;
91 }
92 
93 static int ext3_valid_block_bitmap(struct super_block *sb,
94  struct ext3_group_desc *desc,
95  unsigned int block_group,
96  struct buffer_head *bh)
97 {
99  ext3_grpblk_t next_zero_bit;
100  ext3_fsblk_t bitmap_blk;
101  ext3_fsblk_t group_first_block;
102 
103  group_first_block = ext3_group_first_block_no(sb, block_group);
104 
105  /* check whether block bitmap block number is set */
106  bitmap_blk = le32_to_cpu(desc->bg_block_bitmap);
107  offset = bitmap_blk - group_first_block;
108  if (!ext3_test_bit(offset, bh->b_data))
109  /* bad block bitmap */
110  goto err_out;
111 
112  /* check whether the inode bitmap block number is set */
113  bitmap_blk = le32_to_cpu(desc->bg_inode_bitmap);
114  offset = bitmap_blk - group_first_block;
115  if (!ext3_test_bit(offset, bh->b_data))
116  /* bad block bitmap */
117  goto err_out;
118 
119  /* check whether the inode table block number is set */
120  bitmap_blk = le32_to_cpu(desc->bg_inode_table);
121  offset = bitmap_blk - group_first_block;
122  next_zero_bit = ext3_find_next_zero_bit(bh->b_data,
123  offset + EXT3_SB(sb)->s_itb_per_group,
124  offset);
125  if (next_zero_bit >= offset + EXT3_SB(sb)->s_itb_per_group)
126  /* good bitmap for inode tables */
127  return 1;
128 
129 err_out:
130  ext3_error(sb, __func__,
131  "Invalid block bitmap - "
132  "block_group = %d, block = %lu",
133  block_group, bitmap_blk);
134  return 0;
135 }
136 
147 static struct buffer_head *
148 read_block_bitmap(struct super_block *sb, unsigned int block_group)
149 {
150  struct ext3_group_desc * desc;
151  struct buffer_head * bh = NULL;
152  ext3_fsblk_t bitmap_blk;
153 
154  desc = ext3_get_group_desc(sb, block_group, NULL);
155  if (!desc)
156  return NULL;
157  trace_ext3_read_block_bitmap(sb, block_group);
158  bitmap_blk = le32_to_cpu(desc->bg_block_bitmap);
159  bh = sb_getblk(sb, bitmap_blk);
160  if (unlikely(!bh)) {
161  ext3_error(sb, __func__,
162  "Cannot read block bitmap - "
163  "block_group = %d, block_bitmap = %u",
164  block_group, le32_to_cpu(desc->bg_block_bitmap));
165  return NULL;
166  }
167  if (likely(bh_uptodate_or_lock(bh)))
168  return bh;
169 
170  if (bh_submit_read(bh) < 0) {
171  brelse(bh);
172  ext3_error(sb, __func__,
173  "Cannot read block bitmap - "
174  "block_group = %d, block_bitmap = %u",
175  block_group, le32_to_cpu(desc->bg_block_bitmap));
176  return NULL;
177  }
178  ext3_valid_block_bitmap(sb, desc, block_group, bh);
179  /*
180  * file system mounted not to panic on error, continue with corrupt
181  * bitmap
182  */
183  return bh;
184 }
185 /*
186  * The reservation window structure operations
187  * --------------------------------------------
188  * Operations include:
189  * dump, find, add, remove, is_empty, find_next_reservable_window, etc.
190  *
191  * We use a red-black tree to represent per-filesystem reservation
192  * windows.
193  *
194  */
195 
206 #if 1
207 static void __rsv_window_dump(struct rb_root *root, int verbose,
208  const char *fn)
209 {
210  struct rb_node *n;
212  int bad;
213 
214 restart:
215  n = rb_first(root);
216  bad = 0;
217  prev = NULL;
218 
219  printk("Block Allocation Reservation Windows Map (%s):\n", fn);
220  while (n) {
221  rsv = rb_entry(n, struct ext3_reserve_window_node, rsv_node);
222  if (verbose)
223  printk("reservation window 0x%p "
224  "start: %lu, end: %lu\n",
225  rsv, rsv->rsv_start, rsv->rsv_end);
226  if (rsv->rsv_start && rsv->rsv_start >= rsv->rsv_end) {
227  printk("Bad reservation %p (start >= end)\n",
228  rsv);
229  bad = 1;
230  }
231  if (prev && prev->rsv_end >= rsv->rsv_start) {
232  printk("Bad reservation %p (prev->end >= start)\n",
233  rsv);
234  bad = 1;
235  }
236  if (bad) {
237  if (!verbose) {
238  printk("Restarting reservation walk in verbose mode\n");
239  verbose = 1;
240  goto restart;
241  }
242  }
243  n = rb_next(n);
244  prev = rsv;
245  }
246  printk("Window map complete.\n");
247  BUG_ON(bad);
248 }
249 #define rsv_window_dump(root, verbose) \
250  __rsv_window_dump((root), (verbose), __func__)
251 #else
252 #define rsv_window_dump(root, verbose) do {} while (0)
253 #endif
254 
271 static int
272 goal_in_my_reservation(struct ext3_reserve_window *rsv, ext3_grpblk_t grp_goal,
273  unsigned int group, struct super_block * sb)
274 {
275  ext3_fsblk_t group_first_block, group_last_block;
276 
277  group_first_block = ext3_group_first_block_no(sb, group);
278  group_last_block = group_first_block + (EXT3_BLOCKS_PER_GROUP(sb) - 1);
279 
280  if ((rsv->_rsv_start > group_last_block) ||
281  (rsv->_rsv_end < group_first_block))
282  return 0;
283  if ((grp_goal >= 0) && ((grp_goal + group_first_block < rsv->_rsv_start)
284  || (grp_goal + group_first_block > rsv->_rsv_end)))
285  return 0;
286  return 1;
287 }
288 
298 static struct ext3_reserve_window_node *
299 search_reserve_window(struct rb_root *root, ext3_fsblk_t goal)
300 {
301  struct rb_node *n = root->rb_node;
303 
304  if (!n)
305  return NULL;
306 
307  do {
308  rsv = rb_entry(n, struct ext3_reserve_window_node, rsv_node);
309 
310  if (goal < rsv->rsv_start)
311  n = n->rb_left;
312  else if (goal > rsv->rsv_end)
313  n = n->rb_right;
314  else
315  return rsv;
316  } while (n);
317  /*
318  * We've fallen off the end of the tree: the goal wasn't inside
319  * any particular node. OK, the previous node must be to one
320  * side of the interval containing the goal. If it's the RHS,
321  * we need to back up one.
322  */
323  if (rsv->rsv_start > goal) {
324  n = rb_prev(&rsv->rsv_node);
325  rsv = rb_entry(n, struct ext3_reserve_window_node, rsv_node);
326  }
327  return rsv;
328 }
329 
338  struct ext3_reserve_window_node *rsv)
339 {
340  struct rb_root *root = &EXT3_SB(sb)->s_rsv_window_root;
341  struct rb_node *node = &rsv->rsv_node;
342  ext3_fsblk_t start = rsv->rsv_start;
343 
344  struct rb_node ** p = &root->rb_node;
345  struct rb_node * parent = NULL;
346  struct ext3_reserve_window_node *this;
347 
348  trace_ext3_rsv_window_add(sb, rsv);
349  while (*p)
350  {
351  parent = *p;
352  this = rb_entry(parent, struct ext3_reserve_window_node, rsv_node);
353 
354  if (start < this->rsv_start)
355  p = &(*p)->rb_left;
356  else if (start > this->rsv_end)
357  p = &(*p)->rb_right;
358  else {
359  rsv_window_dump(root, 1);
360  BUG();
361  }
362  }
363 
364  rb_link_node(node, parent, p);
365  rb_insert_color(node, root);
366 }
367 
377 static void rsv_window_remove(struct super_block *sb,
378  struct ext3_reserve_window_node *rsv)
379 {
380  rsv->rsv_start = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
381  rsv->rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
382  rsv->rsv_alloc_hit = 0;
383  rb_erase(&rsv->rsv_node, &EXT3_SB(sb)->s_rsv_window_root);
384 }
385 
386 /*
387  * rsv_is_empty() -- Check if the reservation window is allocated.
388  * @rsv: given reservation window to check
389  *
390  * returns 1 if the end block is EXT3_RESERVE_WINDOW_NOT_ALLOCATED.
391  */
392 static inline int rsv_is_empty(struct ext3_reserve_window *rsv)
393 {
394  /* a valid reservation end block could not be 0 */
396 }
397 
420 {
421  struct ext3_inode_info *ei = EXT3_I(inode);
422  struct ext3_block_alloc_info *block_i;
423  struct super_block *sb = inode->i_sb;
424 
425  block_i = kmalloc(sizeof(*block_i), GFP_NOFS);
426  if (block_i) {
427  struct ext3_reserve_window_node *rsv = &block_i->rsv_window_node;
428 
429  rsv->rsv_start = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
430  rsv->rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
431 
432  /*
433  * if filesystem is mounted with NORESERVATION, the goal
434  * reservation window size is set to zero to indicate
435  * block reservation is off
436  */
437  if (!test_opt(sb, RESERVATION))
438  rsv->rsv_goal_size = 0;
439  else
441  rsv->rsv_alloc_hit = 0;
442  block_i->last_alloc_logical_block = 0;
443  block_i->last_alloc_physical_block = 0;
444  }
445  ei->i_block_alloc_info = block_i;
446 }
447 
462 {
463  struct ext3_inode_info *ei = EXT3_I(inode);
464  struct ext3_block_alloc_info *block_i = ei->i_block_alloc_info;
466  spinlock_t *rsv_lock = &EXT3_SB(inode->i_sb)->s_rsv_window_lock;
467 
468  if (!block_i)
469  return;
470 
471  rsv = &block_i->rsv_window_node;
472  if (!rsv_is_empty(&rsv->rsv_window)) {
473  spin_lock(rsv_lock);
474  if (!rsv_is_empty(&rsv->rsv_window)) {
475  trace_ext3_discard_reservation(inode, rsv);
476  rsv_window_remove(inode->i_sb, rsv);
477  }
478  spin_unlock(rsv_lock);
479  }
480 }
481 
490 void ext3_free_blocks_sb(handle_t *handle, struct super_block *sb,
491  ext3_fsblk_t block, unsigned long count,
492  unsigned long *pdquot_freed_blocks)
493 {
494  struct buffer_head *bitmap_bh = NULL;
495  struct buffer_head *gd_bh;
496  unsigned long block_group;
498  unsigned long i;
499  unsigned long overflow;
500  struct ext3_group_desc * desc;
501  struct ext3_super_block * es;
502  struct ext3_sb_info *sbi;
503  int err = 0, ret;
504  ext3_grpblk_t group_freed;
505 
506  *pdquot_freed_blocks = 0;
507  sbi = EXT3_SB(sb);
508  es = sbi->s_es;
509  if (block < le32_to_cpu(es->s_first_data_block) ||
510  block + count < block ||
511  block + count > le32_to_cpu(es->s_blocks_count)) {
512  ext3_error (sb, "ext3_free_blocks",
513  "Freeing blocks not in datazone - "
514  "block = "E3FSBLK", count = %lu", block, count);
515  goto error_return;
516  }
517 
518  ext3_debug ("freeing block(s) %lu-%lu\n", block, block + count - 1);
519 
520 do_more:
521  overflow = 0;
522  block_group = (block - le32_to_cpu(es->s_first_data_block)) /
524  bit = (block - le32_to_cpu(es->s_first_data_block)) %
526  /*
527  * Check to see if we are freeing blocks across a group
528  * boundary.
529  */
530  if (bit + count > EXT3_BLOCKS_PER_GROUP(sb)) {
531  overflow = bit + count - EXT3_BLOCKS_PER_GROUP(sb);
532  count -= overflow;
533  }
534  brelse(bitmap_bh);
535  bitmap_bh = read_block_bitmap(sb, block_group);
536  if (!bitmap_bh)
537  goto error_return;
538  desc = ext3_get_group_desc (sb, block_group, &gd_bh);
539  if (!desc)
540  goto error_return;
541 
542  if (in_range (le32_to_cpu(desc->bg_block_bitmap), block, count) ||
543  in_range (le32_to_cpu(desc->bg_inode_bitmap), block, count) ||
544  in_range (block, le32_to_cpu(desc->bg_inode_table),
545  sbi->s_itb_per_group) ||
546  in_range (block + count - 1, le32_to_cpu(desc->bg_inode_table),
547  sbi->s_itb_per_group)) {
548  ext3_error (sb, "ext3_free_blocks",
549  "Freeing blocks in system zones - "
550  "Block = "E3FSBLK", count = %lu",
551  block, count);
552  goto error_return;
553  }
554 
555  /*
556  * We are about to start releasing blocks in the bitmap,
557  * so we need undo access.
558  */
559  /* @@@ check errors */
560  BUFFER_TRACE(bitmap_bh, "getting undo access");
561  err = ext3_journal_get_undo_access(handle, bitmap_bh);
562  if (err)
563  goto error_return;
564 
565  /*
566  * We are about to modify some metadata. Call the journal APIs
567  * to unshare ->b_data if a currently-committing transaction is
568  * using it
569  */
570  BUFFER_TRACE(gd_bh, "get_write_access");
571  err = ext3_journal_get_write_access(handle, gd_bh);
572  if (err)
573  goto error_return;
574 
575  jbd_lock_bh_state(bitmap_bh);
576 
577  for (i = 0, group_freed = 0; i < count; i++) {
578  /*
579  * An HJ special. This is expensive...
580  */
581 #ifdef CONFIG_JBD_DEBUG
582  jbd_unlock_bh_state(bitmap_bh);
583  {
584  struct buffer_head *debug_bh;
585  debug_bh = sb_find_get_block(sb, block + i);
586  if (debug_bh) {
587  BUFFER_TRACE(debug_bh, "Deleted!");
588  if (!bh2jh(bitmap_bh)->b_committed_data)
589  BUFFER_TRACE(debug_bh,
590  "No committed data in bitmap");
591  BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap");
592  __brelse(debug_bh);
593  }
594  }
595  jbd_lock_bh_state(bitmap_bh);
596 #endif
597  if (need_resched()) {
598  jbd_unlock_bh_state(bitmap_bh);
599  cond_resched();
600  jbd_lock_bh_state(bitmap_bh);
601  }
602  /* @@@ This prevents newly-allocated data from being
603  * freed and then reallocated within the same
604  * transaction.
605  *
606  * Ideally we would want to allow that to happen, but to
607  * do so requires making journal_forget() capable of
608  * revoking the queued write of a data block, which
609  * implies blocking on the journal lock. *forget()
610  * cannot block due to truncate races.
611  *
612  * Eventually we can fix this by making journal_forget()
613  * return a status indicating whether or not it was able
614  * to revoke the buffer. On successful revoke, it is
615  * safe not to set the allocation bit in the committed
616  * bitmap, because we know that there is no outstanding
617  * activity on the buffer any more and so it is safe to
618  * reallocate it.
619  */
620  BUFFER_TRACE(bitmap_bh, "set in b_committed_data");
621  J_ASSERT_BH(bitmap_bh,
622  bh2jh(bitmap_bh)->b_committed_data != NULL);
623  ext3_set_bit_atomic(sb_bgl_lock(sbi, block_group), bit + i,
624  bh2jh(bitmap_bh)->b_committed_data);
625 
626  /*
627  * We clear the bit in the bitmap after setting the committed
628  * data bit, because this is the reverse order to that which
629  * the allocator uses.
630  */
631  BUFFER_TRACE(bitmap_bh, "clear bit");
632  if (!ext3_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
633  bit + i, bitmap_bh->b_data)) {
634  jbd_unlock_bh_state(bitmap_bh);
635  ext3_error(sb, __func__,
636  "bit already cleared for block "E3FSBLK,
637  block + i);
638  jbd_lock_bh_state(bitmap_bh);
639  BUFFER_TRACE(bitmap_bh, "bit already cleared");
640  } else {
641  group_freed++;
642  }
643  }
644  jbd_unlock_bh_state(bitmap_bh);
645 
646  spin_lock(sb_bgl_lock(sbi, block_group));
647  le16_add_cpu(&desc->bg_free_blocks_count, group_freed);
648  spin_unlock(sb_bgl_lock(sbi, block_group));
649  percpu_counter_add(&sbi->s_freeblocks_counter, count);
650 
651  /* We dirtied the bitmap block */
652  BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
653  err = ext3_journal_dirty_metadata(handle, bitmap_bh);
654 
655  /* And the group descriptor block */
656  BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
657  ret = ext3_journal_dirty_metadata(handle, gd_bh);
658  if (!err) err = ret;
659  *pdquot_freed_blocks += group_freed;
660 
661  if (overflow && !err) {
662  block += count;
663  count = overflow;
664  goto do_more;
665  }
666 
667 error_return:
668  brelse(bitmap_bh);
669  ext3_std_error(sb, err);
670  return;
671 }
672 
680 void ext3_free_blocks(handle_t *handle, struct inode *inode,
681  ext3_fsblk_t block, unsigned long count)
682 {
683  struct super_block *sb = inode->i_sb;
684  unsigned long dquot_freed_blocks;
685 
686  trace_ext3_free_blocks(inode, block, count);
687  ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks);
688  if (dquot_freed_blocks)
689  dquot_free_block(inode, dquot_freed_blocks);
690  return;
691 }
692 
713 static int ext3_test_allocatable(ext3_grpblk_t nr, struct buffer_head *bh)
714 {
715  int ret;
716  struct journal_head *jh = bh2jh(bh);
717 
718  if (ext3_test_bit(nr, bh->b_data))
719  return 0;
720 
721  jbd_lock_bh_state(bh);
722  if (!jh->b_committed_data)
723  ret = 1;
724  else
725  ret = !ext3_test_bit(nr, jh->b_committed_data);
726  jbd_unlock_bh_state(bh);
727  return ret;
728 }
729 
740 static ext3_grpblk_t
741 bitmap_search_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh,
742  ext3_grpblk_t maxblocks)
743 {
745  struct journal_head *jh = bh2jh(bh);
746 
747  while (start < maxblocks) {
748  next = ext3_find_next_zero_bit(bh->b_data, maxblocks, start);
749  if (next >= maxblocks)
750  return -1;
751  if (ext3_test_allocatable(next, bh))
752  return next;
753  jbd_lock_bh_state(bh);
754  if (jh->b_committed_data)
756  maxblocks, next);
757  jbd_unlock_bh_state(bh);
758  }
759  return -1;
760 }
761 
775 static ext3_grpblk_t
776 find_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh,
777  ext3_grpblk_t maxblocks)
778 {
780  char *p, *r;
781 
782  if (start > 0) {
783  /*
784  * The goal was occupied; search forward for a free
785  * block within the next XX blocks.
786  *
787  * end_goal is more or less random, but it has to be
788  * less than EXT3_BLOCKS_PER_GROUP. Aligning up to the
789  * next 64-bit boundary is simple..
790  */
791  ext3_grpblk_t end_goal = (start + 63) & ~63;
792  if (end_goal > maxblocks)
793  end_goal = maxblocks;
794  here = ext3_find_next_zero_bit(bh->b_data, end_goal, start);
795  if (here < end_goal && ext3_test_allocatable(here, bh))
796  return here;
797  ext3_debug("Bit not found near goal\n");
798  }
799 
800  here = start;
801  if (here < 0)
802  here = 0;
803 
804  p = bh->b_data + (here >> 3);
805  r = memscan(p, 0, ((maxblocks + 7) >> 3) - (here >> 3));
806  next = (r - bh->b_data) << 3;
807 
808  if (next < maxblocks && next >= start && ext3_test_allocatable(next, bh))
809  return next;
810 
811  /*
812  * The bitmap search --- search forward alternately through the actual
813  * bitmap and the last-committed copy until we find a bit free in
814  * both
815  */
816  here = bitmap_search_next_usable_block(here, bh, maxblocks);
817  return here;
818 }
819 
832 static inline int
833 claim_block(spinlock_t *lock, ext3_grpblk_t block, struct buffer_head *bh)
834 {
835  struct journal_head *jh = bh2jh(bh);
836  int ret;
837 
838  if (ext3_set_bit_atomic(lock, block, bh->b_data))
839  return 0;
840  jbd_lock_bh_state(bh);
841  if (jh->b_committed_data && ext3_test_bit(block,jh->b_committed_data)) {
842  ext3_clear_bit_atomic(lock, block, bh->b_data);
843  ret = 0;
844  } else {
845  ret = 1;
846  }
847  jbd_unlock_bh_state(bh);
848  return ret;
849 }
850 
875 static ext3_grpblk_t
876 ext3_try_to_allocate(struct super_block *sb, handle_t *handle, int group,
877  struct buffer_head *bitmap_bh, ext3_grpblk_t grp_goal,
878  unsigned long *count, struct ext3_reserve_window *my_rsv)
879 {
880  ext3_fsblk_t group_first_block;
882  unsigned long num = 0;
883 
884  /* we do allocation within the reservation window if we have a window */
885  if (my_rsv) {
886  group_first_block = ext3_group_first_block_no(sb, group);
887  if (my_rsv->_rsv_start >= group_first_block)
888  start = my_rsv->_rsv_start - group_first_block;
889  else
890  /* reservation window cross group boundary */
891  start = 0;
892  end = my_rsv->_rsv_end - group_first_block + 1;
893  if (end > EXT3_BLOCKS_PER_GROUP(sb))
894  /* reservation window crosses group boundary */
895  end = EXT3_BLOCKS_PER_GROUP(sb);
896  if ((start <= grp_goal) && (grp_goal < end))
897  start = grp_goal;
898  else
899  grp_goal = -1;
900  } else {
901  if (grp_goal > 0)
902  start = grp_goal;
903  else
904  start = 0;
905  end = EXT3_BLOCKS_PER_GROUP(sb);
906  }
907 
908  BUG_ON(start > EXT3_BLOCKS_PER_GROUP(sb));
909 
910 repeat:
911  if (grp_goal < 0 || !ext3_test_allocatable(grp_goal, bitmap_bh)) {
912  grp_goal = find_next_usable_block(start, bitmap_bh, end);
913  if (grp_goal < 0)
914  goto fail_access;
915  if (!my_rsv) {
916  int i;
917 
918  for (i = 0; i < 7 && grp_goal > start &&
919  ext3_test_allocatable(grp_goal - 1,
920  bitmap_bh);
921  i++, grp_goal--)
922  ;
923  }
924  }
925  start = grp_goal;
926 
927  if (!claim_block(sb_bgl_lock(EXT3_SB(sb), group),
928  grp_goal, bitmap_bh)) {
929  /*
930  * The block was allocated by another thread, or it was
931  * allocated and then freed by another thread
932  */
933  start++;
934  grp_goal++;
935  if (start >= end)
936  goto fail_access;
937  goto repeat;
938  }
939  num++;
940  grp_goal++;
941  while (num < *count && grp_goal < end
942  && ext3_test_allocatable(grp_goal, bitmap_bh)
943  && claim_block(sb_bgl_lock(EXT3_SB(sb), group),
944  grp_goal, bitmap_bh)) {
945  num++;
946  grp_goal++;
947  }
948  *count = num;
949  return grp_goal - num;
950 fail_access:
951  *count = num;
952  return -1;
953 }
954 
990 static int find_next_reservable_window(
991  struct ext3_reserve_window_node *search_head,
992  struct ext3_reserve_window_node *my_rsv,
993  struct super_block * sb,
995  ext3_fsblk_t last_block)
996 {
997  struct rb_node *next;
1000  int size = my_rsv->rsv_goal_size;
1001 
1002  /* TODO: make the start of the reservation window byte-aligned */
1003  /* cur = *start_block & ~7;*/
1004  cur = start_block;
1005  rsv = search_head;
1006  if (!rsv)
1007  return -1;
1008 
1009  while (1) {
1010  if (cur <= rsv->rsv_end)
1011  cur = rsv->rsv_end + 1;
1012 
1013  /* TODO?
1014  * in the case we could not find a reservable space
1015  * that is what is expected, during the re-search, we could
1016  * remember what's the largest reservable space we could have
1017  * and return that one.
1018  *
1019  * For now it will fail if we could not find the reservable
1020  * space with expected-size (or more)...
1021  */
1022  if (cur > last_block)
1023  return -1; /* fail */
1024 
1025  prev = rsv;
1026  next = rb_next(&rsv->rsv_node);
1027  rsv = rb_entry(next,struct ext3_reserve_window_node,rsv_node);
1028 
1029  /*
1030  * Reached the last reservation, we can just append to the
1031  * previous one.
1032  */
1033  if (!next)
1034  break;
1035 
1036  if (cur + size <= rsv->rsv_start) {
1037  /*
1038  * Found a reserveable space big enough. We could
1039  * have a reservation across the group boundary here
1040  */
1041  break;
1042  }
1043  }
1044  /*
1045  * we come here either :
1046  * when we reach the end of the whole list,
1047  * and there is empty reservable space after last entry in the list.
1048  * append it to the end of the list.
1049  *
1050  * or we found one reservable space in the middle of the list,
1051  * return the reservation window that we could append to.
1052  * succeed.
1053  */
1054 
1055  if ((prev != my_rsv) && (!rsv_is_empty(&my_rsv->rsv_window)))
1056  rsv_window_remove(sb, my_rsv);
1057 
1058  /*
1059  * Let's book the whole available window for now. We will check the
1060  * disk bitmap later and then, if there are free blocks then we adjust
1061  * the window size if it's larger than requested.
1062  * Otherwise, we will remove this node from the tree next time
1063  * call find_next_reservable_window.
1064  */
1065  my_rsv->rsv_start = cur;
1066  my_rsv->rsv_end = cur + size - 1;
1067  my_rsv->rsv_alloc_hit = 0;
1068 
1069  if (prev != my_rsv)
1070  ext3_rsv_window_add(sb, my_rsv);
1071 
1072  return 0;
1073 }
1074 
1112 static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv,
1113  ext3_grpblk_t grp_goal, struct super_block *sb,
1114  unsigned int group, struct buffer_head *bitmap_bh)
1115 {
1116  struct ext3_reserve_window_node *search_head;
1117  ext3_fsblk_t group_first_block, group_end_block, start_block;
1118  ext3_grpblk_t first_free_block;
1119  struct rb_root *fs_rsv_root = &EXT3_SB(sb)->s_rsv_window_root;
1120  unsigned long size;
1121  int ret;
1122  spinlock_t *rsv_lock = &EXT3_SB(sb)->s_rsv_window_lock;
1123 
1124  group_first_block = ext3_group_first_block_no(sb, group);
1125  group_end_block = group_first_block + (EXT3_BLOCKS_PER_GROUP(sb) - 1);
1126 
1127  if (grp_goal < 0)
1128  start_block = group_first_block;
1129  else
1130  start_block = grp_goal + group_first_block;
1131 
1132  trace_ext3_alloc_new_reservation(sb, start_block);
1133  size = my_rsv->rsv_goal_size;
1134 
1135  if (!rsv_is_empty(&my_rsv->rsv_window)) {
1136  /*
1137  * if the old reservation is cross group boundary
1138  * and if the goal is inside the old reservation window,
1139  * we will come here when we just failed to allocate from
1140  * the first part of the window. We still have another part
1141  * that belongs to the next group. In this case, there is no
1142  * point to discard our window and try to allocate a new one
1143  * in this group(which will fail). we should
1144  * keep the reservation window, just simply move on.
1145  *
1146  * Maybe we could shift the start block of the reservation
1147  * window to the first block of next group.
1148  */
1149 
1150  if ((my_rsv->rsv_start <= group_end_block) &&
1151  (my_rsv->rsv_end > group_end_block) &&
1152  (start_block >= my_rsv->rsv_start))
1153  return -1;
1154 
1155  if ((my_rsv->rsv_alloc_hit >
1156  (my_rsv->rsv_end - my_rsv->rsv_start + 1) / 2)) {
1157  /*
1158  * if the previously allocation hit ratio is
1159  * greater than 1/2, then we double the size of
1160  * the reservation window the next time,
1161  * otherwise we keep the same size window
1162  */
1163  size = size * 2;
1164  if (size > EXT3_MAX_RESERVE_BLOCKS)
1165  size = EXT3_MAX_RESERVE_BLOCKS;
1166  my_rsv->rsv_goal_size= size;
1167  }
1168  }
1169 
1170  spin_lock(rsv_lock);
1171  /*
1172  * shift the search start to the window near the goal block
1173  */
1174  search_head = search_reserve_window(fs_rsv_root, start_block);
1175 
1176  /*
1177  * find_next_reservable_window() simply finds a reservable window
1178  * inside the given range(start_block, group_end_block).
1179  *
1180  * To make sure the reservation window has a free bit inside it, we
1181  * need to check the bitmap after we found a reservable window.
1182  */
1183 retry:
1184  ret = find_next_reservable_window(search_head, my_rsv, sb,
1185  start_block, group_end_block);
1186 
1187  if (ret == -1) {
1188  if (!rsv_is_empty(&my_rsv->rsv_window))
1189  rsv_window_remove(sb, my_rsv);
1190  spin_unlock(rsv_lock);
1191  return -1;
1192  }
1193 
1194  /*
1195  * On success, find_next_reservable_window() returns the
1196  * reservation window where there is a reservable space after it.
1197  * Before we reserve this reservable space, we need
1198  * to make sure there is at least a free block inside this region.
1199  *
1200  * searching the first free bit on the block bitmap and copy of
1201  * last committed bitmap alternatively, until we found a allocatable
1202  * block. Search start from the start block of the reservable space
1203  * we just found.
1204  */
1205  spin_unlock(rsv_lock);
1206  first_free_block = bitmap_search_next_usable_block(
1207  my_rsv->rsv_start - group_first_block,
1208  bitmap_bh, group_end_block - group_first_block + 1);
1209 
1210  if (first_free_block < 0) {
1211  /*
1212  * no free block left on the bitmap, no point
1213  * to reserve the space. return failed.
1214  */
1215  spin_lock(rsv_lock);
1216  if (!rsv_is_empty(&my_rsv->rsv_window))
1217  rsv_window_remove(sb, my_rsv);
1218  spin_unlock(rsv_lock);
1219  return -1; /* failed */
1220  }
1221 
1222  start_block = first_free_block + group_first_block;
1223  /*
1224  * check if the first free block is within the
1225  * free space we just reserved
1226  */
1227  if (start_block >= my_rsv->rsv_start &&
1228  start_block <= my_rsv->rsv_end) {
1229  trace_ext3_reserved(sb, start_block, my_rsv);
1230  return 0; /* success */
1231  }
1232  /*
1233  * if the first free bit we found is out of the reservable space
1234  * continue search for next reservable space,
1235  * start from where the free block is,
1236  * we also shift the list head to where we stopped last time
1237  */
1238  search_head = my_rsv;
1239  spin_lock(rsv_lock);
1240  goto retry;
1241 }
1242 
1260 static void try_to_extend_reservation(struct ext3_reserve_window_node *my_rsv,
1261  struct super_block *sb, int size)
1262 {
1263  struct ext3_reserve_window_node *next_rsv;
1264  struct rb_node *next;
1265  spinlock_t *rsv_lock = &EXT3_SB(sb)->s_rsv_window_lock;
1266 
1267  if (!spin_trylock(rsv_lock))
1268  return;
1269 
1270  next = rb_next(&my_rsv->rsv_node);
1271 
1272  if (!next)
1273  my_rsv->rsv_end += size;
1274  else {
1275  next_rsv = rb_entry(next, struct ext3_reserve_window_node, rsv_node);
1276 
1277  if ((next_rsv->rsv_start - my_rsv->rsv_end - 1) >= size)
1278  my_rsv->rsv_end += size;
1279  else
1280  my_rsv->rsv_end = next_rsv->rsv_start - 1;
1281  }
1282  spin_unlock(rsv_lock);
1283 }
1284 
1314 static ext3_grpblk_t
1315 ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
1316  unsigned int group, struct buffer_head *bitmap_bh,
1317  ext3_grpblk_t grp_goal,
1318  struct ext3_reserve_window_node * my_rsv,
1319  unsigned long *count, int *errp)
1320 {
1321  ext3_fsblk_t group_first_block, group_last_block;
1322  ext3_grpblk_t ret = 0;
1323  int fatal;
1324  unsigned long num = *count;
1325 
1326  *errp = 0;
1327 
1328  /*
1329  * Make sure we use undo access for the bitmap, because it is critical
1330  * that we do the frozen_data COW on bitmap buffers in all cases even
1331  * if the buffer is in BJ_Forget state in the committing transaction.
1332  */
1333  BUFFER_TRACE(bitmap_bh, "get undo access for new block");
1334  fatal = ext3_journal_get_undo_access(handle, bitmap_bh);
1335  if (fatal) {
1336  *errp = fatal;
1337  return -1;
1338  }
1339 
1340  /*
1341  * we don't deal with reservation when
1342  * filesystem is mounted without reservation
1343  * or the file is not a regular file
1344  * or last attempt to allocate a block with reservation turned on failed
1345  */
1346  if (my_rsv == NULL ) {
1347  ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh,
1348  grp_goal, count, NULL);
1349  goto out;
1350  }
1351  /*
1352  * grp_goal is a group relative block number (if there is a goal)
1353  * 0 <= grp_goal < EXT3_BLOCKS_PER_GROUP(sb)
1354  * first block is a filesystem wide block number
1355  * first block is the block number of the first block in this group
1356  */
1357  group_first_block = ext3_group_first_block_no(sb, group);
1358  group_last_block = group_first_block + (EXT3_BLOCKS_PER_GROUP(sb) - 1);
1359 
1360  /*
1361  * Basically we will allocate a new block from inode's reservation
1362  * window.
1363  *
1364  * We need to allocate a new reservation window, if:
1365  * a) inode does not have a reservation window; or
1366  * b) last attempt to allocate a block from existing reservation
1367  * failed; or
1368  * c) we come here with a goal and with a reservation window
1369  *
1370  * We do not need to allocate a new reservation window if we come here
1371  * at the beginning with a goal and the goal is inside the window, or
1372  * we don't have a goal but already have a reservation window.
1373  * then we could go to allocate from the reservation window directly.
1374  */
1375  while (1) {
1376  if (rsv_is_empty(&my_rsv->rsv_window) || (ret < 0) ||
1377  !goal_in_my_reservation(&my_rsv->rsv_window,
1378  grp_goal, group, sb)) {
1379  if (my_rsv->rsv_goal_size < *count)
1380  my_rsv->rsv_goal_size = *count;
1381  ret = alloc_new_reservation(my_rsv, grp_goal, sb,
1382  group, bitmap_bh);
1383  if (ret < 0)
1384  break; /* failed */
1385 
1386  if (!goal_in_my_reservation(&my_rsv->rsv_window,
1387  grp_goal, group, sb))
1388  grp_goal = -1;
1389  } else if (grp_goal >= 0) {
1390  int curr = my_rsv->rsv_end -
1391  (grp_goal + group_first_block) + 1;
1392 
1393  if (curr < *count)
1394  try_to_extend_reservation(my_rsv, sb,
1395  *count - curr);
1396  }
1397 
1398  if ((my_rsv->rsv_start > group_last_block) ||
1399  (my_rsv->rsv_end < group_first_block)) {
1400  rsv_window_dump(&EXT3_SB(sb)->s_rsv_window_root, 1);
1401  BUG();
1402  }
1403  ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh,
1404  grp_goal, &num, &my_rsv->rsv_window);
1405  if (ret >= 0) {
1406  my_rsv->rsv_alloc_hit += num;
1407  *count = num;
1408  break; /* succeed */
1409  }
1410  num = *count;
1411  }
1412 out:
1413  if (ret >= 0) {
1414  BUFFER_TRACE(bitmap_bh, "journal_dirty_metadata for "
1415  "bitmap block");
1416  fatal = ext3_journal_dirty_metadata(handle, bitmap_bh);
1417  if (fatal) {
1418  *errp = fatal;
1419  return -1;
1420  }
1421  return ret;
1422  }
1423 
1424  BUFFER_TRACE(bitmap_bh, "journal_release_buffer");
1425  ext3_journal_release_buffer(handle, bitmap_bh);
1426  return ret;
1427 }
1428 
1435 static int ext3_has_free_blocks(struct ext3_sb_info *sbi, int use_reservation)
1436 {
1437  ext3_fsblk_t free_blocks, root_blocks;
1438 
1439  free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
1440  root_blocks = le32_to_cpu(sbi->s_es->s_r_blocks_count);
1441  if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) &&
1442  !use_reservation && !uid_eq(sbi->s_resuid, current_fsuid()) &&
1443  (gid_eq(sbi->s_resgid, GLOBAL_ROOT_GID) ||
1444  !in_group_p (sbi->s_resgid))) {
1445  return 0;
1446  }
1447  return 1;
1448 }
1449 
1463 {
1464  if (!ext3_has_free_blocks(EXT3_SB(sb), 0) || (*retries)++ > 3)
1465  return 0;
1466 
1467  jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id);
1468 
1469  return journal_force_commit_nested(EXT3_SB(sb)->s_journal);
1470 }
1471 
1486 ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode,
1487  ext3_fsblk_t goal, unsigned long *count, int *errp)
1488 {
1489  struct buffer_head *bitmap_bh = NULL;
1490  struct buffer_head *gdp_bh;
1491  int group_no;
1492  int goal_group;
1493  ext3_grpblk_t grp_target_blk; /* blockgroup relative goal block */
1494  ext3_grpblk_t grp_alloc_blk; /* blockgroup-relative allocated block*/
1495  ext3_fsblk_t ret_block; /* filesyetem-wide allocated block */
1496  int bgi; /* blockgroup iteration index */
1497  int fatal = 0, err;
1498  int performed_allocation = 0;
1499  ext3_grpblk_t free_blocks; /* number of free blocks in a group */
1500  struct super_block *sb;
1501  struct ext3_group_desc *gdp;
1502  struct ext3_super_block *es;
1503  struct ext3_sb_info *sbi;
1504  struct ext3_reserve_window_node *my_rsv = NULL;
1505  struct ext3_block_alloc_info *block_i;
1506  unsigned short windowsz = 0;
1507 #ifdef EXT3FS_DEBUG
1508  static int goal_hits, goal_attempts;
1509 #endif
1510  unsigned long ngroups;
1511  unsigned long num = *count;
1512 
1513  *errp = -ENOSPC;
1514  sb = inode->i_sb;
1515 
1516  /*
1517  * Check quota for allocation of this block.
1518  */
1519  err = dquot_alloc_block(inode, num);
1520  if (err) {
1521  *errp = err;
1522  return 0;
1523  }
1524 
1525  trace_ext3_request_blocks(inode, goal, num);
1526 
1527  sbi = EXT3_SB(sb);
1528  es = sbi->s_es;
1529  ext3_debug("goal=%lu.\n", goal);
1530  /*
1531  * Allocate a block from reservation only when
1532  * filesystem is mounted with reservation(default,-o reservation), and
1533  * it's a regular file, and
1534  * the desired window size is greater than 0 (One could use ioctl
1535  * command EXT3_IOC_SETRSVSZ to set the window size to 0 to turn off
1536  * reservation on that particular file)
1537  */
1538  block_i = EXT3_I(inode)->i_block_alloc_info;
1539  if (block_i && ((windowsz = block_i->rsv_window_node.rsv_goal_size) > 0))
1540  my_rsv = &block_i->rsv_window_node;
1541 
1542  if (!ext3_has_free_blocks(sbi, IS_NOQUOTA(inode))) {
1543  *errp = -ENOSPC;
1544  goto out;
1545  }
1546 
1547  /*
1548  * First, test whether the goal block is free.
1549  */
1550  if (goal < le32_to_cpu(es->s_first_data_block) ||
1551  goal >= le32_to_cpu(es->s_blocks_count))
1552  goal = le32_to_cpu(es->s_first_data_block);
1553  group_no = (goal - le32_to_cpu(es->s_first_data_block)) /
1555  goal_group = group_no;
1556 retry_alloc:
1557  gdp = ext3_get_group_desc(sb, group_no, &gdp_bh);
1558  if (!gdp)
1559  goto io_error;
1560 
1561  free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
1562  /*
1563  * if there is not enough free blocks to make a new resevation
1564  * turn off reservation for this allocation
1565  */
1566  if (my_rsv && (free_blocks < windowsz)
1567  && (free_blocks > 0)
1568  && (rsv_is_empty(&my_rsv->rsv_window)))
1569  my_rsv = NULL;
1570 
1571  if (free_blocks > 0) {
1572  grp_target_blk = ((goal - le32_to_cpu(es->s_first_data_block)) %
1573  EXT3_BLOCKS_PER_GROUP(sb));
1574  bitmap_bh = read_block_bitmap(sb, group_no);
1575  if (!bitmap_bh)
1576  goto io_error;
1577  grp_alloc_blk = ext3_try_to_allocate_with_rsv(sb, handle,
1578  group_no, bitmap_bh, grp_target_blk,
1579  my_rsv, &num, &fatal);
1580  if (fatal)
1581  goto out;
1582  if (grp_alloc_blk >= 0)
1583  goto allocated;
1584  }
1585 
1586  ngroups = EXT3_SB(sb)->s_groups_count;
1587  smp_rmb();
1588 
1589  /*
1590  * Now search the rest of the groups. We assume that
1591  * group_no and gdp correctly point to the last group visited.
1592  */
1593  for (bgi = 0; bgi < ngroups; bgi++) {
1594  group_no++;
1595  if (group_no >= ngroups)
1596  group_no = 0;
1597  gdp = ext3_get_group_desc(sb, group_no, &gdp_bh);
1598  if (!gdp)
1599  goto io_error;
1600  free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
1601  /*
1602  * skip this group (and avoid loading bitmap) if there
1603  * are no free blocks
1604  */
1605  if (!free_blocks)
1606  continue;
1607  /*
1608  * skip this group if the number of
1609  * free blocks is less than half of the reservation
1610  * window size.
1611  */
1612  if (my_rsv && (free_blocks <= (windowsz/2)))
1613  continue;
1614 
1615  brelse(bitmap_bh);
1616  bitmap_bh = read_block_bitmap(sb, group_no);
1617  if (!bitmap_bh)
1618  goto io_error;
1619  /*
1620  * try to allocate block(s) from this group, without a goal(-1).
1621  */
1622  grp_alloc_blk = ext3_try_to_allocate_with_rsv(sb, handle,
1623  group_no, bitmap_bh, -1, my_rsv,
1624  &num, &fatal);
1625  if (fatal)
1626  goto out;
1627  if (grp_alloc_blk >= 0)
1628  goto allocated;
1629  }
1630  /*
1631  * We may end up a bogus earlier ENOSPC error due to
1632  * filesystem is "full" of reservations, but
1633  * there maybe indeed free blocks available on disk
1634  * In this case, we just forget about the reservations
1635  * just do block allocation as without reservations.
1636  */
1637  if (my_rsv) {
1638  my_rsv = NULL;
1639  windowsz = 0;
1640  group_no = goal_group;
1641  goto retry_alloc;
1642  }
1643  /* No space left on the device */
1644  *errp = -ENOSPC;
1645  goto out;
1646 
1647 allocated:
1648 
1649  ext3_debug("using block group %d(%d)\n",
1650  group_no, gdp->bg_free_blocks_count);
1651 
1652  BUFFER_TRACE(gdp_bh, "get_write_access");
1653  fatal = ext3_journal_get_write_access(handle, gdp_bh);
1654  if (fatal)
1655  goto out;
1656 
1657  ret_block = grp_alloc_blk + ext3_group_first_block_no(sb, group_no);
1658 
1659  if (in_range(le32_to_cpu(gdp->bg_block_bitmap), ret_block, num) ||
1660  in_range(le32_to_cpu(gdp->bg_inode_bitmap), ret_block, num) ||
1661  in_range(ret_block, le32_to_cpu(gdp->bg_inode_table),
1662  EXT3_SB(sb)->s_itb_per_group) ||
1663  in_range(ret_block + num - 1, le32_to_cpu(gdp->bg_inode_table),
1664  EXT3_SB(sb)->s_itb_per_group)) {
1665  ext3_error(sb, "ext3_new_block",
1666  "Allocating block in system zone - "
1667  "blocks from "E3FSBLK", length %lu",
1668  ret_block, num);
1669  /*
1670  * claim_block() marked the blocks we allocated as in use. So we
1671  * may want to selectively mark some of the blocks as free.
1672  */
1673  goto retry_alloc;
1674  }
1675 
1676  performed_allocation = 1;
1677 
1678 #ifdef CONFIG_JBD_DEBUG
1679  {
1680  struct buffer_head *debug_bh;
1681 
1682  /* Record bitmap buffer state in the newly allocated block */
1683  debug_bh = sb_find_get_block(sb, ret_block);
1684  if (debug_bh) {
1685  BUFFER_TRACE(debug_bh, "state when allocated");
1686  BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap state");
1687  brelse(debug_bh);
1688  }
1689  }
1690  jbd_lock_bh_state(bitmap_bh);
1691  spin_lock(sb_bgl_lock(sbi, group_no));
1692  if (buffer_jbd(bitmap_bh) && bh2jh(bitmap_bh)->b_committed_data) {
1693  int i;
1694 
1695  for (i = 0; i < num; i++) {
1696  if (ext3_test_bit(grp_alloc_blk+i,
1697  bh2jh(bitmap_bh)->b_committed_data)) {
1698  printk("%s: block was unexpectedly set in "
1699  "b_committed_data\n", __func__);
1700  }
1701  }
1702  }
1703  ext3_debug("found bit %d\n", grp_alloc_blk);
1704  spin_unlock(sb_bgl_lock(sbi, group_no));
1705  jbd_unlock_bh_state(bitmap_bh);
1706 #endif
1707 
1708  if (ret_block + num - 1 >= le32_to_cpu(es->s_blocks_count)) {
1709  ext3_error(sb, "ext3_new_block",
1710  "block("E3FSBLK") >= blocks count(%d) - "
1711  "block_group = %d, es == %p ", ret_block,
1712  le32_to_cpu(es->s_blocks_count), group_no, es);
1713  goto out;
1714  }
1715 
1716  /*
1717  * It is up to the caller to add the new buffer to a journal
1718  * list of some description. We don't know in advance whether
1719  * the caller wants to use it as metadata or data.
1720  */
1721  ext3_debug("allocating block %lu. Goal hits %d of %d.\n",
1722  ret_block, goal_hits, goal_attempts);
1723 
1724  spin_lock(sb_bgl_lock(sbi, group_no));
1725  le16_add_cpu(&gdp->bg_free_blocks_count, -num);
1726  spin_unlock(sb_bgl_lock(sbi, group_no));
1727  percpu_counter_sub(&sbi->s_freeblocks_counter, num);
1728 
1729  BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor");
1730  err = ext3_journal_dirty_metadata(handle, gdp_bh);
1731  if (!fatal)
1732  fatal = err;
1733 
1734  if (fatal)
1735  goto out;
1736 
1737  *errp = 0;
1738  brelse(bitmap_bh);
1739 
1740  if (num < *count) {
1741  dquot_free_block(inode, *count-num);
1742  *count = num;
1743  }
1744 
1745  trace_ext3_allocate_blocks(inode, goal, num,
1746  (unsigned long long)ret_block);
1747 
1748  return ret_block;
1749 
1750 io_error:
1751  *errp = -EIO;
1752 out:
1753  if (fatal) {
1754  *errp = fatal;
1755  ext3_std_error(sb, fatal);
1756  }
1757  /*
1758  * Undo the block allocation
1759  */
1760  if (!performed_allocation)
1761  dquot_free_block(inode, *count);
1762  brelse(bitmap_bh);
1763  return 0;
1764 }
1765 
1766 ext3_fsblk_t ext3_new_block(handle_t *handle, struct inode *inode,
1767  ext3_fsblk_t goal, int *errp)
1768 {
1769  unsigned long count = 1;
1770 
1771  return ext3_new_blocks(handle, inode, goal, &count, errp);
1772 }
1773 
1781 {
1783  struct ext3_group_desc *gdp;
1784  int i;
1785  unsigned long ngroups = EXT3_SB(sb)->s_groups_count;
1786 #ifdef EXT3FS_DEBUG
1787  struct ext3_super_block *es;
1788  ext3_fsblk_t bitmap_count;
1789  unsigned long x;
1790  struct buffer_head *bitmap_bh = NULL;
1791 
1792  es = EXT3_SB(sb)->s_es;
1793  desc_count = 0;
1794  bitmap_count = 0;
1795  gdp = NULL;
1796 
1797  smp_rmb();
1798  for (i = 0; i < ngroups; i++) {
1799  gdp = ext3_get_group_desc(sb, i, NULL);
1800  if (!gdp)
1801  continue;
1802  desc_count += le16_to_cpu(gdp->bg_free_blocks_count);
1803  brelse(bitmap_bh);
1804  bitmap_bh = read_block_bitmap(sb, i);
1805  if (bitmap_bh == NULL)
1806  continue;
1807 
1808  x = ext3_count_free(bitmap_bh, sb->s_blocksize);
1809  printk("group %d: stored = %d, counted = %lu\n",
1810  i, le16_to_cpu(gdp->bg_free_blocks_count), x);
1811  bitmap_count += x;
1812  }
1813  brelse(bitmap_bh);
1814  printk("ext3_count_free_blocks: stored = "E3FSBLK
1815  ", computed = "E3FSBLK", "E3FSBLK"\n",
1817  desc_count, bitmap_count);
1818  return bitmap_count;
1819 #else
1820  desc_count = 0;
1821  smp_rmb();
1822  for (i = 0; i < ngroups; i++) {
1823  gdp = ext3_get_group_desc(sb, i, NULL);
1824  if (!gdp)
1825  continue;
1826  desc_count += le16_to_cpu(gdp->bg_free_blocks_count);
1827  }
1828 
1829  return desc_count;
1830 #endif
1831 }
1832 
1833 static inline int test_root(int a, int b)
1834 {
1835  int num = b;
1836 
1837  while (a > num)
1838  num *= b;
1839  return num == a;
1840 }
1841 
1842 static int ext3_group_sparse(int group)
1843 {
1844  if (group <= 1)
1845  return 1;
1846  if (!(group & 1))
1847  return 0;
1848  return (test_root(group, 7) || test_root(group, 5) ||
1849  test_root(group, 3));
1850 }
1851 
1860 int ext3_bg_has_super(struct super_block *sb, int group)
1861 {
1864  !ext3_group_sparse(group))
1865  return 0;
1866  return 1;
1867 }
1868 
1869 static unsigned long ext3_bg_num_gdb_meta(struct super_block *sb, int group)
1870 {
1871  unsigned long metagroup = group / EXT3_DESC_PER_BLOCK(sb);
1872  unsigned long first = metagroup * EXT3_DESC_PER_BLOCK(sb);
1873  unsigned long last = first + EXT3_DESC_PER_BLOCK(sb) - 1;
1874 
1875  if (group == first || group == first + 1 || group == last)
1876  return 1;
1877  return 0;
1878 }
1879 
1880 static unsigned long ext3_bg_num_gdb_nometa(struct super_block *sb, int group)
1881 {
1882  return ext3_bg_has_super(sb, group) ? EXT3_SB(sb)->s_gdb_count : 0;
1883 }
1884 
1894 unsigned long ext3_bg_num_gdb(struct super_block *sb, int group)
1895 {
1896  unsigned long first_meta_bg =
1897  le32_to_cpu(EXT3_SB(sb)->s_es->s_first_meta_bg);
1898  unsigned long metagroup = group / EXT3_DESC_PER_BLOCK(sb);
1899 
1901  metagroup < first_meta_bg)
1902  return ext3_bg_num_gdb_nometa(sb,group);
1903 
1904  return ext3_bg_num_gdb_meta(sb,group);
1905 
1906 }
1907 
1923 static ext3_grpblk_t ext3_trim_all_free(struct super_block *sb,
1924  unsigned int group,
1926  ext3_grpblk_t minblocks)
1927 {
1928  handle_t *handle;
1929  ext3_grpblk_t next, free_blocks, bit, freed, count = 0;
1930  ext3_fsblk_t discard_block;
1931  struct ext3_sb_info *sbi;
1932  struct buffer_head *gdp_bh, *bitmap_bh = NULL;
1933  struct ext3_group_desc *gdp;
1934  int err = 0, ret = 0;
1935 
1936  /*
1937  * We will update one block bitmap, and one group descriptor
1938  */
1939  handle = ext3_journal_start_sb(sb, 2);
1940  if (IS_ERR(handle))
1941  return PTR_ERR(handle);
1942 
1943  bitmap_bh = read_block_bitmap(sb, group);
1944  if (!bitmap_bh) {
1945  err = -EIO;
1946  goto err_out;
1947  }
1948 
1949  BUFFER_TRACE(bitmap_bh, "getting undo access");
1950  err = ext3_journal_get_undo_access(handle, bitmap_bh);
1951  if (err)
1952  goto err_out;
1953 
1954  gdp = ext3_get_group_desc(sb, group, &gdp_bh);
1955  if (!gdp) {
1956  err = -EIO;
1957  goto err_out;
1958  }
1959 
1960  BUFFER_TRACE(gdp_bh, "get_write_access");
1961  err = ext3_journal_get_write_access(handle, gdp_bh);
1962  if (err)
1963  goto err_out;
1964 
1965  free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
1966  sbi = EXT3_SB(sb);
1967 
1968  /* Walk through the whole group */
1969  while (start <= max) {
1970  start = bitmap_search_next_usable_block(start, bitmap_bh, max);
1971  if (start < 0)
1972  break;
1973  next = start;
1974 
1975  /*
1976  * Allocate contiguous free extents by setting bits in the
1977  * block bitmap
1978  */
1979  while (next <= max
1980  && claim_block(sb_bgl_lock(sbi, group),
1981  next, bitmap_bh)) {
1982  next++;
1983  }
1984 
1985  /* We did not claim any blocks */
1986  if (next == start)
1987  continue;
1988 
1989  discard_block = (ext3_fsblk_t)start +
1990  ext3_group_first_block_no(sb, group);
1991 
1992  /* Update counters */
1993  spin_lock(sb_bgl_lock(sbi, group));
1994  le16_add_cpu(&gdp->bg_free_blocks_count, start - next);
1995  spin_unlock(sb_bgl_lock(sbi, group));
1996  percpu_counter_sub(&sbi->s_freeblocks_counter, next - start);
1997 
1998  free_blocks -= next - start;
1999  /* Do not issue a TRIM on extents smaller than minblocks */
2000  if ((next - start) < minblocks)
2001  goto free_extent;
2002 
2003  trace_ext3_discard_blocks(sb, discard_block, next - start);
2004  /* Send the TRIM command down to the device */
2005  err = sb_issue_discard(sb, discard_block, next - start,
2006  GFP_NOFS, 0);
2007  count += (next - start);
2008 free_extent:
2009  freed = 0;
2010 
2011  /*
2012  * Clear bits in the bitmap
2013  */
2014  for (bit = start; bit < next; bit++) {
2015  BUFFER_TRACE(bitmap_bh, "clear bit");
2016  if (!ext3_clear_bit_atomic(sb_bgl_lock(sbi, group),
2017  bit, bitmap_bh->b_data)) {
2018  ext3_error(sb, __func__,
2019  "bit already cleared for block "E3FSBLK,
2020  (unsigned long)bit);
2021  BUFFER_TRACE(bitmap_bh, "bit already cleared");
2022  } else {
2023  freed++;
2024  }
2025  }
2026 
2027  /* Update couters */
2028  spin_lock(sb_bgl_lock(sbi, group));
2029  le16_add_cpu(&gdp->bg_free_blocks_count, freed);
2030  spin_unlock(sb_bgl_lock(sbi, group));
2031  percpu_counter_add(&sbi->s_freeblocks_counter, freed);
2032 
2033  start = next;
2034  if (err < 0) {
2035  if (err != -EOPNOTSUPP)
2036  ext3_warning(sb, __func__, "Discard command "
2037  "returned error %d\n", err);
2038  break;
2039  }
2040 
2041  if (fatal_signal_pending(current)) {
2042  err = -ERESTARTSYS;
2043  break;
2044  }
2045 
2046  cond_resched();
2047 
2048  /* No more suitable extents */
2049  if (free_blocks < minblocks)
2050  break;
2051  }
2052 
2053  /* We dirtied the bitmap block */
2054  BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
2055  ret = ext3_journal_dirty_metadata(handle, bitmap_bh);
2056  if (!err)
2057  err = ret;
2058 
2059  /* And the group descriptor block */
2060  BUFFER_TRACE(gdp_bh, "dirtied group descriptor block");
2061  ret = ext3_journal_dirty_metadata(handle, gdp_bh);
2062  if (!err)
2063  err = ret;
2064 
2065  ext3_debug("trimmed %d blocks in the group %d\n",
2066  count, group);
2067 
2068 err_out:
2069  if (err)
2070  count = err;
2071  ext3_journal_stop(handle);
2072  brelse(bitmap_bh);
2073 
2074  return count;
2075 }
2076 
2088 int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range)
2089 {
2090  ext3_grpblk_t last_block, first_block;
2091  unsigned long group, first_group, last_group;
2092  struct ext3_group_desc *gdp;
2093  struct ext3_super_block *es = EXT3_SB(sb)->s_es;
2094  uint64_t start, minlen, end, trimmed = 0;
2095  ext3_fsblk_t first_data_blk =
2096  le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block);
2097  ext3_fsblk_t max_blks = le32_to_cpu(es->s_blocks_count);
2098  int ret = 0;
2099 
2100  start = range->start >> sb->s_blocksize_bits;
2101  end = start + (range->len >> sb->s_blocksize_bits) - 1;
2102  minlen = range->minlen >> sb->s_blocksize_bits;
2103 
2104  if (minlen > EXT3_BLOCKS_PER_GROUP(sb) ||
2105  start >= max_blks ||
2106  range->len < sb->s_blocksize)
2107  return -EINVAL;
2108  if (end >= max_blks)
2109  end = max_blks - 1;
2110  if (end <= first_data_blk)
2111  goto out;
2112  if (start < first_data_blk)
2113  start = first_data_blk;
2114 
2115  smp_rmb();
2116 
2117  /* Determine first and last group to examine based on start and len */
2118  ext3_get_group_no_and_offset(sb, (ext3_fsblk_t) start,
2119  &first_group, &first_block);
2120  ext3_get_group_no_and_offset(sb, (ext3_fsblk_t) end,
2121  &last_group, &last_block);
2122 
2123  /* end now represents the last block to discard in this group */
2124  end = EXT3_BLOCKS_PER_GROUP(sb) - 1;
2125 
2126  for (group = first_group; group <= last_group; group++) {
2127  gdp = ext3_get_group_desc(sb, group, NULL);
2128  if (!gdp)
2129  break;
2130 
2131  /*
2132  * For all the groups except the last one, last block will
2133  * always be EXT3_BLOCKS_PER_GROUP(sb)-1, so we only need to
2134  * change it for the last group, note that last_block is
2135  * already computed earlier by ext3_get_group_no_and_offset()
2136  */
2137  if (group == last_group)
2138  end = last_block;
2139 
2140  if (le16_to_cpu(gdp->bg_free_blocks_count) >= minlen) {
2141  ret = ext3_trim_all_free(sb, group, first_block,
2142  end, minlen);
2143  if (ret < 0)
2144  break;
2145  trimmed += ret;
2146  }
2147 
2148  /*
2149  * For every group except the first one, we are sure
2150  * that the first block to discard will be block #0.
2151  */
2152  first_block = 0;
2153  }
2154 
2155  if (ret > 0)
2156  ret = 0;
2157 
2158 out:
2159  range->len = trimmed * sb->s_blocksize;
2160  return ret;
2161 }