Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
log.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3  * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
4  *
5  * This copyrighted material is made available to anyone wishing to use,
6  * modify, copy, or redistribute it subject to the terms and conditions
7  * of the GNU General Public License version 2.
8  */
9 
10 #include <linux/sched.h>
11 #include <linux/slab.h>
12 #include <linux/spinlock.h>
13 #include <linux/completion.h>
14 #include <linux/buffer_head.h>
15 #include <linux/gfs2_ondisk.h>
16 #include <linux/crc32.h>
17 #include <linux/delay.h>
18 #include <linux/kthread.h>
19 #include <linux/freezer.h>
20 #include <linux/bio.h>
21 #include <linux/writeback.h>
22 #include <linux/list_sort.h>
23 
24 #include "gfs2.h"
25 #include "incore.h"
26 #include "bmap.h"
27 #include "glock.h"
28 #include "log.h"
29 #include "lops.h"
30 #include "meta_io.h"
31 #include "util.h"
32 #include "dir.h"
33 #include "trace_gfs2.h"
34 
47 unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
48  unsigned int ssize)
49 {
50  unsigned int blks;
51  unsigned int first, second;
52 
53  blks = 1;
54  first = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_log_descriptor)) / ssize;
55 
56  if (nstruct > first) {
57  second = (sdp->sd_sb.sb_bsize -
58  sizeof(struct gfs2_meta_header)) / ssize;
59  blks += DIV_ROUND_UP(nstruct - first, second);
60  }
61 
62  return blks;
63 }
64 
75 {
76  bd->bd_ail = NULL;
77  list_del_init(&bd->bd_ail_st_list);
78  list_del_init(&bd->bd_ail_gl_list);
79  atomic_dec(&bd->bd_gl->gl_ail_count);
80  brelse(bd->bd_bh);
81 }
82 
91 static int gfs2_ail1_start_one(struct gfs2_sbd *sdp,
92  struct writeback_control *wbc,
93  struct gfs2_ail *ai)
94 __releases(&sdp->sd_ail_lock)
95 __acquires(&sdp->sd_ail_lock)
96 {
97  struct gfs2_glock *gl = NULL;
98  struct address_space *mapping;
99  struct gfs2_bufdata *bd, *s;
100  struct buffer_head *bh;
101 
102  list_for_each_entry_safe_reverse(bd, s, &ai->ai_ail1_list, bd_ail_st_list) {
103  bh = bd->bd_bh;
104 
105  gfs2_assert(sdp, bd->bd_ail == ai);
106 
107  if (!buffer_busy(bh)) {
108  if (!buffer_uptodate(bh))
109  gfs2_io_error_bh(sdp, bh);
110  list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
111  continue;
112  }
113 
114  if (!buffer_dirty(bh))
115  continue;
116  if (gl == bd->bd_gl)
117  continue;
118  gl = bd->bd_gl;
119  list_move(&bd->bd_ail_st_list, &ai->ai_ail1_list);
120  mapping = bh->b_page->mapping;
121  if (!mapping)
122  continue;
123  spin_unlock(&sdp->sd_ail_lock);
124  generic_writepages(mapping, wbc);
125  spin_lock(&sdp->sd_ail_lock);
126  if (wbc->nr_to_write <= 0)
127  break;
128  return 1;
129  }
130 
131  return 0;
132 }
133 
134 
144 void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc)
145 {
146  struct list_head *head = &sdp->sd_ail1_list;
147  struct gfs2_ail *ai;
148 
149  trace_gfs2_ail_flush(sdp, wbc, 1);
150  spin_lock(&sdp->sd_ail_lock);
151 restart:
153  if (wbc->nr_to_write <= 0)
154  break;
155  if (gfs2_ail1_start_one(sdp, wbc, ai))
156  goto restart;
157  }
158  spin_unlock(&sdp->sd_ail_lock);
159  trace_gfs2_ail_flush(sdp, wbc, 0);
160 }
161 
167 static void gfs2_ail1_start(struct gfs2_sbd *sdp)
168 {
169  struct writeback_control wbc = {
171  .nr_to_write = LONG_MAX,
172  .range_start = 0,
173  .range_end = LLONG_MAX,
174  };
175 
176  return gfs2_ail1_flush(sdp, &wbc);
177 }
178 
186 static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
187 {
188  struct gfs2_bufdata *bd, *s;
189  struct buffer_head *bh;
190 
192  bd_ail_st_list) {
193  bh = bd->bd_bh;
194  gfs2_assert(sdp, bd->bd_ail == ai);
195  if (buffer_busy(bh))
196  continue;
197  if (!buffer_uptodate(bh))
198  gfs2_io_error_bh(sdp, bh);
199  list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
200  }
201 
202 }
203 
211 static int gfs2_ail1_empty(struct gfs2_sbd *sdp)
212 {
213  struct gfs2_ail *ai, *s;
214  int ret;
215 
216  spin_lock(&sdp->sd_ail_lock);
218  gfs2_ail1_empty_one(sdp, ai);
219  if (list_empty(&ai->ai_ail1_list))
220  list_move(&ai->ai_list, &sdp->sd_ail2_list);
221  else
222  break;
223  }
224  ret = list_empty(&sdp->sd_ail1_list);
225  spin_unlock(&sdp->sd_ail_lock);
226 
227  return ret;
228 }
229 
230 static void gfs2_ail1_wait(struct gfs2_sbd *sdp)
231 {
232  struct gfs2_ail *ai;
233  struct gfs2_bufdata *bd;
234  struct buffer_head *bh;
235 
236  spin_lock(&sdp->sd_ail_lock);
237  list_for_each_entry_reverse(ai, &sdp->sd_ail1_list, ai_list) {
238  list_for_each_entry(bd, &ai->ai_ail1_list, bd_ail_st_list) {
239  bh = bd->bd_bh;
240  if (!buffer_locked(bh))
241  continue;
242  get_bh(bh);
243  spin_unlock(&sdp->sd_ail_lock);
244  wait_on_buffer(bh);
245  brelse(bh);
246  return;
247  }
248  }
249  spin_unlock(&sdp->sd_ail_lock);
250 }
251 
259 static void gfs2_ail2_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
260 {
261  struct list_head *head = &ai->ai_ail2_list;
262  struct gfs2_bufdata *bd;
263 
264  while (!list_empty(head)) {
265  bd = list_entry(head->prev, struct gfs2_bufdata,
267  gfs2_assert(sdp, bd->bd_ail == ai);
269  }
270 }
271 
272 static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
273 {
274  struct gfs2_ail *ai, *safe;
275  unsigned int old_tail = sdp->sd_log_tail;
276  int wrap = (new_tail < old_tail);
277  int a, b, rm;
278 
279  spin_lock(&sdp->sd_ail_lock);
280 
281  list_for_each_entry_safe(ai, safe, &sdp->sd_ail2_list, ai_list) {
282  a = (old_tail <= ai->ai_first);
283  b = (ai->ai_first < new_tail);
284  rm = (wrap) ? (a || b) : (a && b);
285  if (!rm)
286  continue;
287 
288  gfs2_ail2_empty_one(sdp, ai);
289  list_del(&ai->ai_list);
290  gfs2_assert_warn(sdp, list_empty(&ai->ai_ail1_list));
291  gfs2_assert_warn(sdp, list_empty(&ai->ai_ail2_list));
292  kfree(ai);
293  }
294 
295  spin_unlock(&sdp->sd_ail_lock);
296 }
297 
318 int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
319 {
320  unsigned reserved_blks = 6 * (4096 / sdp->sd_vfs->s_blocksize);
321  unsigned wanted = blks + reserved_blks;
322  DEFINE_WAIT(wait);
323  int did_wait = 0;
324  unsigned int free_blocks;
325 
326  if (gfs2_assert_warn(sdp, blks) ||
327  gfs2_assert_warn(sdp, blks <= sdp->sd_jdesc->jd_blocks))
328  return -EINVAL;
329 retry:
330  free_blocks = atomic_read(&sdp->sd_log_blks_free);
331  if (unlikely(free_blocks <= wanted)) {
332  do {
335  wake_up(&sdp->sd_logd_waitq);
336  did_wait = 1;
337  if (atomic_read(&sdp->sd_log_blks_free) <= wanted)
338  io_schedule();
339  free_blocks = atomic_read(&sdp->sd_log_blks_free);
340  } while(free_blocks <= wanted);
341  finish_wait(&sdp->sd_log_waitq, &wait);
342  }
343  if (atomic_cmpxchg(&sdp->sd_log_blks_free, free_blocks,
344  free_blocks - blks) != free_blocks)
345  goto retry;
346  trace_gfs2_log_blocks(sdp, -blks);
347 
348  /*
349  * If we waited, then so might others, wake them up _after_ we get
350  * our share of the log.
351  */
352  if (unlikely(did_wait))
353  wake_up(&sdp->sd_log_waitq);
354 
356 
357  return 0;
358 }
359 
372 static inline unsigned int log_distance(struct gfs2_sbd *sdp, unsigned int newer,
373  unsigned int older)
374 {
375  int dist;
376 
377  dist = newer - older;
378  if (dist < 0)
379  dist += sdp->sd_jdesc->jd_blocks;
380 
381  return dist;
382 }
383 
409 static unsigned int calc_reserved(struct gfs2_sbd *sdp)
410 {
411  unsigned int reserved = 0;
412  unsigned int mbuf_limit, metabufhdrs_needed;
413  unsigned int dbuf_limit, databufhdrs_needed;
414  unsigned int revokes = 0;
415 
416  mbuf_limit = buf_limit(sdp);
417  metabufhdrs_needed = (sdp->sd_log_commited_buf +
418  (mbuf_limit - 1)) / mbuf_limit;
419  dbuf_limit = databuf_limit(sdp);
420  databufhdrs_needed = (sdp->sd_log_commited_databuf +
421  (dbuf_limit - 1)) / dbuf_limit;
422 
423  if (sdp->sd_log_commited_revoke > 0)
424  revokes = gfs2_struct2blk(sdp, sdp->sd_log_commited_revoke,
425  sizeof(u64));
426 
427  reserved = sdp->sd_log_commited_buf + metabufhdrs_needed +
428  sdp->sd_log_commited_databuf + databufhdrs_needed +
429  revokes;
430  /* One for the overall header */
431  if (reserved)
432  reserved++;
433  return reserved;
434 }
435 
436 static unsigned int current_tail(struct gfs2_sbd *sdp)
437 {
438  struct gfs2_ail *ai;
439  unsigned int tail;
440 
441  spin_lock(&sdp->sd_ail_lock);
442 
443  if (list_empty(&sdp->sd_ail1_list)) {
444  tail = sdp->sd_log_head;
445  } else {
446  ai = list_entry(sdp->sd_ail1_list.prev, struct gfs2_ail, ai_list);
447  tail = ai->ai_first;
448  }
449 
450  spin_unlock(&sdp->sd_ail_lock);
451 
452  return tail;
453 }
454 
455 static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail)
456 {
457  unsigned int dist = log_distance(sdp, new_tail, sdp->sd_log_tail);
458 
459  ail2_empty(sdp, new_tail);
460 
461  atomic_add(dist, &sdp->sd_log_blks_free);
462  trace_gfs2_log_blocks(sdp, dist);
464  sdp->sd_jdesc->jd_blocks);
465 
466  sdp->sd_log_tail = new_tail;
467 }
468 
469 
470 static void log_flush_wait(struct gfs2_sbd *sdp)
471 {
472  DEFINE_WAIT(wait);
473 
474  if (atomic_read(&sdp->sd_log_in_flight)) {
475  do {
478  if (atomic_read(&sdp->sd_log_in_flight))
479  io_schedule();
480  } while(atomic_read(&sdp->sd_log_in_flight));
482  }
483 }
484 
485 static int bd_cmp(void *priv, struct list_head *a, struct list_head *b)
486 {
487  struct gfs2_bufdata *bda, *bdb;
488 
489  bda = list_entry(a, struct gfs2_bufdata, bd_list);
490  bdb = list_entry(b, struct gfs2_bufdata, bd_list);
491 
492  if (bda->bd_bh->b_blocknr < bdb->bd_bh->b_blocknr)
493  return -1;
494  if (bda->bd_bh->b_blocknr > bdb->bd_bh->b_blocknr)
495  return 1;
496  return 0;
497 }
498 
499 static void gfs2_ordered_write(struct gfs2_sbd *sdp)
500 {
501  struct gfs2_bufdata *bd;
502  struct buffer_head *bh;
503  LIST_HEAD(written);
504 
505  gfs2_log_lock(sdp);
506  list_sort(NULL, &sdp->sd_log_le_ordered, &bd_cmp);
507  while (!list_empty(&sdp->sd_log_le_ordered)) {
508  bd = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_bufdata, bd_list);
509  list_move(&bd->bd_list, &written);
510  bh = bd->bd_bh;
511  if (!buffer_dirty(bh))
512  continue;
513  get_bh(bh);
514  gfs2_log_unlock(sdp);
515  lock_buffer(bh);
516  if (buffer_mapped(bh) && test_clear_buffer_dirty(bh)) {
517  bh->b_end_io = end_buffer_write_sync;
518  submit_bh(WRITE_SYNC, bh);
519  } else {
520  unlock_buffer(bh);
521  brelse(bh);
522  }
523  gfs2_log_lock(sdp);
524  }
525  list_splice(&written, &sdp->sd_log_le_ordered);
526  gfs2_log_unlock(sdp);
527 }
528 
529 static void gfs2_ordered_wait(struct gfs2_sbd *sdp)
530 {
531  struct gfs2_bufdata *bd;
532  struct buffer_head *bh;
533 
534  gfs2_log_lock(sdp);
535  while (!list_empty(&sdp->sd_log_le_ordered)) {
536  bd = list_entry(sdp->sd_log_le_ordered.prev, struct gfs2_bufdata, bd_list);
537  bh = bd->bd_bh;
538  if (buffer_locked(bh)) {
539  get_bh(bh);
540  gfs2_log_unlock(sdp);
541  wait_on_buffer(bh);
542  brelse(bh);
543  gfs2_log_lock(sdp);
544  continue;
545  }
546  list_del_init(&bd->bd_list);
547  }
548  gfs2_log_unlock(sdp);
549 }
550 
558 static void log_write_header(struct gfs2_sbd *sdp, u32 flags)
559 {
560  struct gfs2_log_header *lh;
561  unsigned int tail;
562  u32 hash;
563  int rw = WRITE_FLUSH_FUA | REQ_META;
564  struct page *page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
565  lh = page_address(page);
566  clear_page(lh);
567 
568  gfs2_ail1_empty(sdp);
569  tail = current_tail(sdp);
570 
571  lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
572  lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH);
573  lh->lh_header.__pad0 = cpu_to_be64(0);
574  lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH);
575  lh->lh_header.mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
577  lh->lh_flags = cpu_to_be32(flags);
578  lh->lh_tail = cpu_to_be32(tail);
580  hash = gfs2_disk_hash(page_address(page), sizeof(struct gfs2_log_header));
581  lh->lh_hash = cpu_to_be32(hash);
582 
583  if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) {
584  gfs2_ordered_wait(sdp);
585  log_flush_wait(sdp);
586  rw = WRITE_SYNC | REQ_META | REQ_PRIO;
587  }
588 
589  sdp->sd_log_idle = (tail == sdp->sd_log_flush_head);
590  gfs2_log_write_page(sdp, page);
591  gfs2_log_flush_bio(sdp, rw);
592  log_flush_wait(sdp);
593 
594  if (sdp->sd_log_tail != tail)
595  log_pull_tail(sdp, tail);
596 }
597 
605 void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
606 {
607  struct gfs2_ail *ai;
608 
610 
611  /* Log might have been flushed while we waited for the flush lock */
612  if (gl && !test_bit(GLF_LFLUSH, &gl->gl_flags)) {
614  return;
615  }
616  trace_gfs2_log_flush(sdp, 1);
617 
618  ai = kzalloc(sizeof(struct gfs2_ail), GFP_NOFS | __GFP_NOFAIL);
619  INIT_LIST_HEAD(&ai->ai_ail1_list);
620  INIT_LIST_HEAD(&ai->ai_ail2_list);
621 
622  if (sdp->sd_log_num_buf != sdp->sd_log_commited_buf) {
623  printk(KERN_INFO "GFS2: log buf %u %u\n", sdp->sd_log_num_buf,
624  sdp->sd_log_commited_buf);
625  gfs2_assert_withdraw(sdp, 0);
626  }
627  if (sdp->sd_log_num_databuf != sdp->sd_log_commited_databuf) {
628  printk(KERN_INFO "GFS2: log databuf %u %u\n",
630  gfs2_assert_withdraw(sdp, 0);
631  }
634 
635  sdp->sd_log_flush_head = sdp->sd_log_head;
636  sdp->sd_log_flush_wrapped = 0;
637  ai->ai_first = sdp->sd_log_flush_head;
638 
639  gfs2_ordered_write(sdp);
640  lops_before_commit(sdp);
642 
643  if (sdp->sd_log_head != sdp->sd_log_flush_head) {
644  log_write_header(sdp, 0);
645  } else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){
646  atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */
647  trace_gfs2_log_blocks(sdp, -1);
648  log_write_header(sdp, 0);
649  }
650  lops_after_commit(sdp, ai);
651 
652  gfs2_log_lock(sdp);
653  sdp->sd_log_head = sdp->sd_log_flush_head;
654  sdp->sd_log_blks_reserved = 0;
655  sdp->sd_log_commited_buf = 0;
656  sdp->sd_log_commited_databuf = 0;
657  sdp->sd_log_commited_revoke = 0;
658 
659  spin_lock(&sdp->sd_ail_lock);
660  if (!list_empty(&ai->ai_ail1_list)) {
661  list_add(&ai->ai_list, &sdp->sd_ail1_list);
662  ai = NULL;
663  }
664  spin_unlock(&sdp->sd_ail_lock);
665  gfs2_log_unlock(sdp);
666  trace_gfs2_log_flush(sdp, 0);
668 
669  kfree(ai);
670 }
671 
672 static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
673 {
674  unsigned int reserved;
675  unsigned int unused;
676 
677  gfs2_log_lock(sdp);
678 
681  tr->tr_num_databuf_rm;
682  gfs2_assert_withdraw(sdp, (((int)sdp->sd_log_commited_buf) >= 0) ||
683  (((int)sdp->sd_log_commited_databuf) >= 0));
685  reserved = calc_reserved(sdp);
686  gfs2_assert_withdraw(sdp, sdp->sd_log_blks_reserved + tr->tr_reserved >= reserved);
687  unused = sdp->sd_log_blks_reserved - reserved + tr->tr_reserved;
688  atomic_add(unused, &sdp->sd_log_blks_free);
689  trace_gfs2_log_blocks(sdp, unused);
691  sdp->sd_jdesc->jd_blocks);
693 
694  gfs2_log_unlock(sdp);
695 }
696 
712 void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
713 {
714  log_refund(sdp, tr);
715  up_read(&sdp->sd_log_flush_lock);
716 
717  if (atomic_read(&sdp->sd_log_pinned) > atomic_read(&sdp->sd_log_thresh1) ||
718  ((sdp->sd_jdesc->jd_blocks - atomic_read(&sdp->sd_log_blks_free)) >
719  atomic_read(&sdp->sd_log_thresh2)))
720  wake_up(&sdp->sd_logd_waitq);
721 }
722 
729 void gfs2_log_shutdown(struct gfs2_sbd *sdp)
730 {
732 
738  gfs2_assert_withdraw(sdp, list_empty(&sdp->sd_ail1_list));
739 
740  sdp->sd_log_flush_head = sdp->sd_log_head;
741  sdp->sd_log_flush_wrapped = 0;
742 
743  log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT);
744 
745  gfs2_assert_warn(sdp, atomic_read(&sdp->sd_log_blks_free) == sdp->sd_jdesc->jd_blocks);
746  gfs2_assert_warn(sdp, sdp->sd_log_head == sdp->sd_log_tail);
747  gfs2_assert_warn(sdp, list_empty(&sdp->sd_ail2_list));
748 
749  sdp->sd_log_head = sdp->sd_log_flush_head;
750  sdp->sd_log_tail = sdp->sd_log_head;
751 
753 }
754 
755 
762 void gfs2_meta_syncfs(struct gfs2_sbd *sdp)
763 {
764  gfs2_log_flush(sdp, NULL);
765  for (;;) {
766  gfs2_ail1_start(sdp);
767  gfs2_ail1_wait(sdp);
768  if (gfs2_ail1_empty(sdp))
769  break;
770  }
771  gfs2_log_flush(sdp, NULL);
772 }
773 
774 static inline int gfs2_jrnl_flush_reqd(struct gfs2_sbd *sdp)
775 {
776  return (atomic_read(&sdp->sd_log_pinned) >= atomic_read(&sdp->sd_log_thresh1));
777 }
778 
779 static inline int gfs2_ail_flush_reqd(struct gfs2_sbd *sdp)
780 {
781  unsigned int used_blocks = sdp->sd_jdesc->jd_blocks - atomic_read(&sdp->sd_log_blks_free);
782  return used_blocks >= atomic_read(&sdp->sd_log_thresh2);
783 }
784 
793 int gfs2_logd(void *data)
794 {
795  struct gfs2_sbd *sdp = data;
796  unsigned long t = 1;
797  DEFINE_WAIT(wait);
798 
799  while (!kthread_should_stop()) {
800 
801  if (gfs2_jrnl_flush_reqd(sdp) || t == 0) {
802  gfs2_ail1_empty(sdp);
803  gfs2_log_flush(sdp, NULL);
804  }
805 
806  if (gfs2_ail_flush_reqd(sdp)) {
807  gfs2_ail1_start(sdp);
808  gfs2_ail1_wait(sdp);
809  gfs2_ail1_empty(sdp);
810  gfs2_log_flush(sdp, NULL);
811  }
812 
813  if (!gfs2_ail_flush_reqd(sdp))
814  wake_up(&sdp->sd_log_waitq);
815 
816  t = gfs2_tune_get(sdp, gt_logd_secs) * HZ;
817 
818  try_to_freeze();
819 
820  do {
823  if (!gfs2_ail_flush_reqd(sdp) &&
824  !gfs2_jrnl_flush_reqd(sdp) &&
826  t = schedule_timeout(t);
827  } while(t && !gfs2_ail_flush_reqd(sdp) &&
828  !gfs2_jrnl_flush_reqd(sdp) &&
830  finish_wait(&sdp->sd_logd_waitq, &wait);
831  }
832 
833  return 0;
834 }
835