Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
buffer_head_io.c
Go to the documentation of this file.
1 /* -*- mode: c; c-basic-offset: 8; -*-
2  * vim: noexpandtab sw=8 ts=8 sts=0:
3  *
4  * io.c
5  *
6  * Buffer cache handling
7  *
8  * Copyright (C) 2002, 2004 Oracle. All rights reserved.
9  *
10  * This program is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU General Public
12  * License as published by the Free Software Foundation; either
13  * version 2 of the License, or (at your option) any later version.
14  *
15  * This program is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  * General Public License for more details.
19  *
20  * You should have received a copy of the GNU General Public
21  * License along with this program; if not, write to the
22  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23  * Boston, MA 021110-1307, USA.
24  */
25 
26 #include <linux/fs.h>
27 #include <linux/types.h>
28 #include <linux/highmem.h>
29 
30 #include <cluster/masklog.h>
31 
32 #include "ocfs2.h"
33 
34 #include "alloc.h"
35 #include "inode.h"
36 #include "journal.h"
37 #include "uptodate.h"
38 #include "buffer_head_io.h"
39 #include "ocfs2_trace.h"
40 
41 /*
42  * Bits on bh->b_state used by ocfs2.
43  *
44  * These MUST be after the JBD2 bits. Hence, we use BH_JBDPrivateStart.
45  */
48 };
49 
50 /* Expand the magic b_state functions */
51 BUFFER_FNS(NeedsValidate, needs_validate);
52 
53 int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
54  struct ocfs2_caching_info *ci)
55 {
56  int ret = 0;
57 
58  trace_ocfs2_write_block((unsigned long long)bh->b_blocknr, ci);
59 
60  BUG_ON(bh->b_blocknr < OCFS2_SUPER_BLOCK_BLKNO);
61  BUG_ON(buffer_jbd(bh));
62 
63  /* No need to check for a soft readonly file system here. non
64  * journalled writes are only ever done on system files which
65  * can get modified during recovery even if read-only. */
66  if (ocfs2_is_hard_readonly(osb)) {
67  ret = -EROFS;
68  mlog_errno(ret);
69  goto out;
70  }
71 
73 
74  lock_buffer(bh);
75  set_buffer_uptodate(bh);
76 
77  /* remove from dirty list before I/O. */
78  clear_buffer_dirty(bh);
79 
80  get_bh(bh); /* for end_buffer_write_sync() */
81  bh->b_end_io = end_buffer_write_sync;
82  submit_bh(WRITE, bh);
83 
84  wait_on_buffer(bh);
85 
86  if (buffer_uptodate(bh)) {
88  } else {
89  /* We don't need to remove the clustered uptodate
90  * information for this bh as it's not marked locally
91  * uptodate. */
92  ret = -EIO;
93  put_bh(bh);
94  mlog_errno(ret);
95  }
96 
98 out:
99  return ret;
100 }
101 
103  unsigned int nr, struct buffer_head *bhs[])
104 {
105  int status = 0;
106  unsigned int i;
107  struct buffer_head *bh;
108 
109  trace_ocfs2_read_blocks_sync((unsigned long long)block, nr);
110 
111  if (!nr)
112  goto bail;
113 
114  for (i = 0 ; i < nr ; i++) {
115  if (bhs[i] == NULL) {
116  bhs[i] = sb_getblk(osb->sb, block++);
117  if (bhs[i] == NULL) {
118  status = -EIO;
119  mlog_errno(status);
120  goto bail;
121  }
122  }
123  bh = bhs[i];
124 
125  if (buffer_jbd(bh)) {
126  trace_ocfs2_read_blocks_sync_jbd(
127  (unsigned long long)bh->b_blocknr);
128  continue;
129  }
130 
131  if (buffer_dirty(bh)) {
132  /* This should probably be a BUG, or
133  * at least return an error. */
134  mlog(ML_ERROR,
135  "trying to sync read a dirty "
136  "buffer! (blocknr = %llu), skipping\n",
137  (unsigned long long)bh->b_blocknr);
138  continue;
139  }
140 
141  lock_buffer(bh);
142  if (buffer_jbd(bh)) {
143  mlog(ML_ERROR,
144  "block %llu had the JBD bit set "
145  "while I was in lock_buffer!",
146  (unsigned long long)bh->b_blocknr);
147  BUG();
148  }
149 
150  clear_buffer_uptodate(bh);
151  get_bh(bh); /* for end_buffer_read_sync() */
152  bh->b_end_io = end_buffer_read_sync;
153  submit_bh(READ, bh);
154  }
155 
156  for (i = nr; i > 0; i--) {
157  bh = bhs[i - 1];
158 
159  /* No need to wait on the buffer if it's managed by JBD. */
160  if (!buffer_jbd(bh))
161  wait_on_buffer(bh);
162 
163  if (!buffer_uptodate(bh)) {
164  /* Status won't be cleared from here on out,
165  * so we can safely record this and loop back
166  * to cleanup the other buffers. */
167  status = -EIO;
168  put_bh(bh);
169  bhs[i - 1] = NULL;
170  }
171  }
172 
173 bail:
174  return status;
175 }
176 
178  struct buffer_head *bhs[], int flags,
179  int (*validate)(struct super_block *sb,
180  struct buffer_head *bh))
181 {
182  int status = 0;
183  int i, ignore_cache = 0;
184  struct buffer_head *bh;
186 
187  trace_ocfs2_read_blocks_begin(ci, (unsigned long long)block, nr, flags);
188 
189  BUG_ON(!ci);
190  BUG_ON((flags & OCFS2_BH_READAHEAD) &&
191  (flags & OCFS2_BH_IGNORE_CACHE));
192 
193  if (bhs == NULL) {
194  status = -EINVAL;
195  mlog_errno(status);
196  goto bail;
197  }
198 
199  if (nr < 0) {
200  mlog(ML_ERROR, "asked to read %d blocks!\n", nr);
201  status = -EINVAL;
202  mlog_errno(status);
203  goto bail;
204  }
205 
206  if (nr == 0) {
207  status = 0;
208  goto bail;
209  }
210 
212  for (i = 0 ; i < nr ; i++) {
213  if (bhs[i] == NULL) {
214  bhs[i] = sb_getblk(sb, block++);
215  if (bhs[i] == NULL) {
217  status = -EIO;
218  mlog_errno(status);
219  goto bail;
220  }
221  }
222  bh = bhs[i];
223  ignore_cache = (flags & OCFS2_BH_IGNORE_CACHE);
224 
225  /* There are three read-ahead cases here which we need to
226  * be concerned with. All three assume a buffer has
227  * previously been submitted with OCFS2_BH_READAHEAD
228  * and it hasn't yet completed I/O.
229  *
230  * 1) The current request is sync to disk. This rarely
231  * happens these days, and never when performance
232  * matters - the code can just wait on the buffer
233  * lock and re-submit.
234  *
235  * 2) The current request is cached, but not
236  * readahead. ocfs2_buffer_uptodate() will return
237  * false anyway, so we'll wind up waiting on the
238  * buffer lock to do I/O. We re-check the request
239  * with after getting the lock to avoid a re-submit.
240  *
241  * 3) The current request is readahead (and so must
242  * also be a caching one). We short circuit if the
243  * buffer is locked (under I/O) and if it's in the
244  * uptodate cache. The re-check from #2 catches the
245  * case that the previous read-ahead completes just
246  * before our is-it-in-flight check.
247  */
248 
249  if (!ignore_cache && !ocfs2_buffer_uptodate(ci, bh)) {
250  trace_ocfs2_read_blocks_from_disk(
251  (unsigned long long)bh->b_blocknr,
252  (unsigned long long)ocfs2_metadata_cache_owner(ci));
253  /* We're using ignore_cache here to say
254  * "go to disk" */
255  ignore_cache = 1;
256  }
257 
258  trace_ocfs2_read_blocks_bh((unsigned long long)bh->b_blocknr,
259  ignore_cache, buffer_jbd(bh), buffer_dirty(bh));
260 
261  if (buffer_jbd(bh)) {
262  continue;
263  }
264 
265  if (ignore_cache) {
266  if (buffer_dirty(bh)) {
267  /* This should probably be a BUG, or
268  * at least return an error. */
269  continue;
270  }
271 
272  /* A read-ahead request was made - if the
273  * buffer is already under read-ahead from a
274  * previously submitted request than we are
275  * done here. */
276  if ((flags & OCFS2_BH_READAHEAD)
277  && ocfs2_buffer_read_ahead(ci, bh))
278  continue;
279 
280  lock_buffer(bh);
281  if (buffer_jbd(bh)) {
282 #ifdef CATCH_BH_JBD_RACES
283  mlog(ML_ERROR, "block %llu had the JBD bit set "
284  "while I was in lock_buffer!",
285  (unsigned long long)bh->b_blocknr);
286  BUG();
287 #else
288  unlock_buffer(bh);
289  continue;
290 #endif
291  }
292 
293  /* Re-check ocfs2_buffer_uptodate() as a
294  * previously read-ahead buffer may have
295  * completed I/O while we were waiting for the
296  * buffer lock. */
297  if (!(flags & OCFS2_BH_IGNORE_CACHE)
298  && !(flags & OCFS2_BH_READAHEAD)
299  && ocfs2_buffer_uptodate(ci, bh)) {
300  unlock_buffer(bh);
301  continue;
302  }
303 
304  clear_buffer_uptodate(bh);
305  get_bh(bh); /* for end_buffer_read_sync() */
306  if (validate)
307  set_buffer_needs_validate(bh);
308  bh->b_end_io = end_buffer_read_sync;
309  submit_bh(READ, bh);
310  continue;
311  }
312  }
313 
314  status = 0;
315 
316  for (i = (nr - 1); i >= 0; i--) {
317  bh = bhs[i];
318 
319  if (!(flags & OCFS2_BH_READAHEAD)) {
320  /* We know this can't have changed as we hold the
321  * owner sem. Avoid doing any work on the bh if the
322  * journal has it. */
323  if (!buffer_jbd(bh))
324  wait_on_buffer(bh);
325 
326  if (!buffer_uptodate(bh)) {
327  /* Status won't be cleared from here on out,
328  * so we can safely record this and loop back
329  * to cleanup the other buffers. Don't need to
330  * remove the clustered uptodate information
331  * for this bh as it's not marked locally
332  * uptodate. */
333  status = -EIO;
334  put_bh(bh);
335  bhs[i] = NULL;
336  continue;
337  }
338 
339  if (buffer_needs_validate(bh)) {
340  /* We never set NeedsValidate if the
341  * buffer was held by the journal, so
342  * that better not have changed */
343  BUG_ON(buffer_jbd(bh));
344  clear_buffer_needs_validate(bh);
345  status = validate(sb, bh);
346  if (status) {
347  put_bh(bh);
348  bhs[i] = NULL;
349  continue;
350  }
351  }
352  }
353 
354  /* Always set the buffer in the cache, even if it was
355  * a forced read, or read-ahead which hasn't yet
356  * completed. */
358  }
360 
361  trace_ocfs2_read_blocks_end((unsigned long long)block, nr,
362  flags, ignore_cache);
363 
364 bail:
365 
366  return status;
367 }
368 
369 /* Check whether the blkno is the super block or one of the backups. */
370 static void ocfs2_check_super_or_backup(struct super_block *sb,
371  sector_t blkno)
372 {
373  int i;
374  u64 backup_blkno;
375 
376  if (blkno == OCFS2_SUPER_BLOCK_BLKNO)
377  return;
378 
379  for (i = 0; i < OCFS2_MAX_BACKUP_SUPERBLOCKS; i++) {
380  backup_blkno = ocfs2_backup_super_blkno(sb, i);
381  if (backup_blkno == blkno)
382  return;
383  }
384 
385  BUG();
386 }
387 
388 /*
389  * Write super block and backups doesn't need to collaborate with journal,
390  * so we don't need to lock ip_io_mutex and ci doesn't need to bea passed
391  * into this function.
392  */
394  struct buffer_head *bh)
395 {
396  int ret = 0;
397  struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
398 
399  BUG_ON(buffer_jbd(bh));
400  ocfs2_check_super_or_backup(osb->sb, bh->b_blocknr);
401 
402  if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) {
403  ret = -EROFS;
404  mlog_errno(ret);
405  goto out;
406  }
407 
408  lock_buffer(bh);
409  set_buffer_uptodate(bh);
410 
411  /* remove from dirty list before I/O. */
412  clear_buffer_dirty(bh);
413 
414  get_bh(bh); /* for end_buffer_write_sync() */
415  bh->b_end_io = end_buffer_write_sync;
416  ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &di->i_check);
417  submit_bh(WRITE, bh);
418 
419  wait_on_buffer(bh);
420 
421  if (!buffer_uptodate(bh)) {
422  ret = -EIO;
423  put_bh(bh);
424  mlog_errno(ret);
425  }
426 
427 out:
428  return ret;
429 }