Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
jfs_logmgr.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) International Business Machines Corp., 2000-2004
3  * Portions Copyright (C) Christoph Hellwig, 2001-2002
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
13  * the GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18  */
19 
20 /*
21  * jfs_logmgr.c: log manager
22  *
23  * for related information, see transaction manager (jfs_txnmgr.c), and
24  * recovery manager (jfs_logredo.c).
25  *
26  * note: for detail, RTFS.
27  *
28  * log buffer manager:
29  * special purpose buffer manager supporting log i/o requirements.
30  * per log serial pageout of logpage
31  * queuing i/o requests and redrive i/o at iodone
32  * maintain current logpage buffer
33  * no caching since append only
34  * appropriate jfs buffer cache buffers as needed
35  *
36  * group commit:
37  * transactions which wrote COMMIT records in the same in-memory
38  * log page during the pageout of previous/current log page(s) are
39  * committed together by the pageout of the page.
40  *
41  * TBD lazy commit:
42  * transactions are committed asynchronously when the log page
43  * containing it COMMIT is paged out when it becomes full;
44  *
45  * serialization:
46  * . a per log lock serialize log write.
47  * . a per log lock serialize group commit.
48  * . a per log lock serialize log open/close;
49  *
50  * TBD log integrity:
51  * careful-write (ping-pong) of last logpage to recover from crash
52  * in overwrite.
53  * detection of split (out-of-order) write of physical sectors
54  * of last logpage via timestamp at end of each sector
55  * with its mirror data array at trailer).
56  *
57  * alternatives:
58  * lsn - 64-bit monotonically increasing integer vs
59  * 32-bit lspn and page eor.
60  */
61 
62 #include <linux/fs.h>
63 #include <linux/blkdev.h>
64 #include <linux/interrupt.h>
65 #include <linux/completion.h>
66 #include <linux/kthread.h>
67 #include <linux/buffer_head.h> /* for sync_blockdev() */
68 #include <linux/bio.h>
69 #include <linux/freezer.h>
70 #include <linux/export.h>
71 #include <linux/delay.h>
72 #include <linux/mutex.h>
73 #include <linux/seq_file.h>
74 #include <linux/slab.h>
75 #include "jfs_incore.h"
76 #include "jfs_filsys.h"
77 #include "jfs_metapage.h"
78 #include "jfs_superblock.h"
79 #include "jfs_txnmgr.h"
80 #include "jfs_debug.h"
81 
82 
83 /*
84  * lbuf's ready to be redriven. Protected by log_redrive_lock (jfsIO thread)
85  */
86 static struct lbuf *log_redrive_list;
87 static DEFINE_SPINLOCK(log_redrive_lock);
88 
89 
90 /*
91  * log read/write serialization (per log)
92  */
93 #define LOG_LOCK_INIT(log) mutex_init(&(log)->loglock)
94 #define LOG_LOCK(log) mutex_lock(&((log)->loglock))
95 #define LOG_UNLOCK(log) mutex_unlock(&((log)->loglock))
96 
97 
98 /*
99  * log group commit serialization (per log)
100  */
101 
102 #define LOGGC_LOCK_INIT(log) spin_lock_init(&(log)->gclock)
103 #define LOGGC_LOCK(log) spin_lock_irq(&(log)->gclock)
104 #define LOGGC_UNLOCK(log) spin_unlock_irq(&(log)->gclock)
105 #define LOGGC_WAKEUP(tblk) wake_up_all(&(tblk)->gcwait)
106 
107 /*
108  * log sync serialization (per log)
109  */
110 #define LOGSYNC_DELTA(logsize) min((logsize)/8, 128*LOGPSIZE)
111 #define LOGSYNC_BARRIER(logsize) ((logsize)/4)
112 /*
113 #define LOGSYNC_DELTA(logsize) min((logsize)/4, 256*LOGPSIZE)
114 #define LOGSYNC_BARRIER(logsize) ((logsize)/2)
115 */
116 
117 
118 /*
119  * log buffer cache synchronization
120  */
121 static DEFINE_SPINLOCK(jfsLCacheLock);
122 
123 #define LCACHE_LOCK(flags) spin_lock_irqsave(&jfsLCacheLock, flags)
124 #define LCACHE_UNLOCK(flags) spin_unlock_irqrestore(&jfsLCacheLock, flags)
125 
126 /*
127  * See __SLEEP_COND in jfs_locks.h
128  */
129 #define LCACHE_SLEEP_COND(wq, cond, flags) \
130 do { \
131  if (cond) \
132  break; \
133  __SLEEP_COND(wq, cond, LCACHE_LOCK(flags), LCACHE_UNLOCK(flags)); \
134 } while (0)
135 
136 #define LCACHE_WAKEUP(event) wake_up(event)
137 
138 
139 /*
140  * lbuf buffer cache (lCache) control
141  */
142 /* log buffer manager pageout control (cumulative, inclusive) */
143 #define lbmREAD 0x0001
144 #define lbmWRITE 0x0002 /* enqueue at tail of write queue;
145  * init pageout if at head of queue;
146  */
147 #define lbmRELEASE 0x0004 /* remove from write queue
148  * at completion of pageout;
149  * do not free/recycle it yet:
150  * caller will free it;
151  */
152 #define lbmSYNC 0x0008 /* do not return to freelist
153  * when removed from write queue;
154  */
155 #define lbmFREE 0x0010 /* return to freelist
156  * at completion of pageout;
157  * the buffer may be recycled;
158  */
159 #define lbmDONE 0x0020
160 #define lbmERROR 0x0040
161 #define lbmGC 0x0080 /* lbmIODone to perform post-GC processing
162  * of log page
163  */
164 #define lbmDIRECT 0x0100
165 
166 /*
167  * Global list of active external journals
168  */
169 static LIST_HEAD(jfs_external_logs);
170 static struct jfs_log *dummy_log = NULL;
171 static DEFINE_MUTEX(jfs_log_mutex);
172 
173 /*
174  * forward references
175  */
176 static int lmWriteRecord(struct jfs_log * log, struct tblock * tblk,
177  struct lrd * lrd, struct tlock * tlck);
178 
179 static int lmNextPage(struct jfs_log * log);
180 static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi,
181  int activate);
182 
183 static int open_inline_log(struct super_block *sb);
184 static int open_dummy_log(struct super_block *sb);
185 static int lbmLogInit(struct jfs_log * log);
186 static void lbmLogShutdown(struct jfs_log * log);
187 static struct lbuf *lbmAllocate(struct jfs_log * log, int);
188 static void lbmFree(struct lbuf * bp);
189 static void lbmfree(struct lbuf * bp);
190 static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp);
191 static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag, int cant_block);
192 static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag);
193 static int lbmIOWait(struct lbuf * bp, int flag);
194 static bio_end_io_t lbmIODone;
195 static void lbmStartIO(struct lbuf * bp);
196 static void lmGCwrite(struct jfs_log * log, int cant_block);
197 static int lmLogSync(struct jfs_log * log, int hard_sync);
198 
199 
200 
201 /*
202  * statistics
203  */
204 #ifdef CONFIG_JFS_STATISTICS
205 static struct lmStat {
206  uint commit; /* # of commit */
207  uint pagedone; /* # of page written */
208  uint submitted; /* # of pages submitted */
209  uint full_page; /* # of full pages submitted */
210  uint partial_page; /* # of partial pages submitted */
211 } lmStat;
212 #endif
213 
214 static void write_special_inodes(struct jfs_log *log,
215  int (*writer)(struct address_space *))
216 {
217  struct jfs_sb_info *sbi;
218 
219  list_for_each_entry(sbi, &log->sb_list, log_list) {
220  writer(sbi->ipbmap->i_mapping);
221  writer(sbi->ipimap->i_mapping);
222  writer(sbi->direct_inode->i_mapping);
223  }
224 }
226 /*
227  * NAME: lmLog()
228  *
229  * FUNCTION: write a log record;
230  *
231  * PARAMETER:
232  *
233  * RETURN: lsn - offset to the next log record to write (end-of-log);
234  * -1 - error;
235  *
236  * note: todo: log error handler
237  */
238 int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
239  struct tlock * tlck)
240 {
241  int lsn;
242  int diffp, difft;
243  struct metapage *mp = NULL;
244  unsigned long flags;
245 
246  jfs_info("lmLog: log:0x%p tblk:0x%p, lrd:0x%p tlck:0x%p",
247  log, tblk, lrd, tlck);
248 
249  LOG_LOCK(log);
250 
251  /* log by (out-of-transaction) JFS ? */
252  if (tblk == NULL)
253  goto writeRecord;
254 
255  /* log from page ? */
256  if (tlck == NULL ||
257  tlck->type & tlckBTROOT || (mp = tlck->mp) == NULL)
258  goto writeRecord;
259 
260  /*
261  * initialize/update page/transaction recovery lsn
262  */
263  lsn = log->lsn;
264 
265  LOGSYNC_LOCK(log, flags);
266 
267  /*
268  * initialize page lsn if first log write of the page
269  */
270  if (mp->lsn == 0) {
271  mp->log = log;
272  mp->lsn = lsn;
273  log->count++;
274 
275  /* insert page at tail of logsynclist */
276  list_add_tail(&mp->synclist, &log->synclist);
277  }
278 
279  /*
280  * initialize/update lsn of tblock of the page
281  *
282  * transaction inherits oldest lsn of pages associated
283  * with allocation/deallocation of resources (their
284  * log records are used to reconstruct allocation map
285  * at recovery time: inode for inode allocation map,
286  * B+-tree index of extent descriptors for block
287  * allocation map);
288  * allocation map pages inherit transaction lsn at
289  * commit time to allow forwarding log syncpt past log
290  * records associated with allocation/deallocation of
291  * resources only after persistent map of these map pages
292  * have been updated and propagated to home.
293  */
294  /*
295  * initialize transaction lsn:
296  */
297  if (tblk->lsn == 0) {
298  /* inherit lsn of its first page logged */
299  tblk->lsn = mp->lsn;
300  log->count++;
301 
302  /* insert tblock after the page on logsynclist */
303  list_add(&tblk->synclist, &mp->synclist);
304  }
305  /*
306  * update transaction lsn:
307  */
308  else {
309  /* inherit oldest/smallest lsn of page */
310  logdiff(diffp, mp->lsn, log);
311  logdiff(difft, tblk->lsn, log);
312  if (diffp < difft) {
313  /* update tblock lsn with page lsn */
314  tblk->lsn = mp->lsn;
315 
316  /* move tblock after page on logsynclist */
317  list_move(&tblk->synclist, &mp->synclist);
318  }
319  }
320 
321  LOGSYNC_UNLOCK(log, flags);
322 
323  /*
324  * write the log record
325  */
326  writeRecord:
327  lsn = lmWriteRecord(log, tblk, lrd, tlck);
328 
329  /*
330  * forward log syncpt if log reached next syncpt trigger
331  */
332  logdiff(diffp, lsn, log);
333  if (diffp >= log->nextsync)
334  lsn = lmLogSync(log, 0);
335 
336  /* update end-of-log lsn */
337  log->lsn = lsn;
338 
339  LOG_UNLOCK(log);
340 
341  /* return end-of-log address */
342  return lsn;
343 }
344 
345 /*
346  * NAME: lmWriteRecord()
347  *
348  * FUNCTION: move the log record to current log page
349  *
350  * PARAMETER: cd - commit descriptor
351  *
352  * RETURN: end-of-log address
353  *
354  * serialization: LOG_LOCK() held on entry/exit
355  */
356 static int
357 lmWriteRecord(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
358  struct tlock * tlck)
359 {
360  int lsn = 0; /* end-of-log address */
361  struct lbuf *bp; /* dst log page buffer */
362  struct logpage *lp; /* dst log page */
363  caddr_t dst; /* destination address in log page */
364  int dstoffset; /* end-of-log offset in log page */
365  int freespace; /* free space in log page */
366  caddr_t p; /* src meta-data page */
367  caddr_t src;
368  int srclen;
369  int nbytes; /* number of bytes to move */
370  int i;
371  int len;
372  struct linelock *linelock;
373  struct lv *lv;
374  struct lvd *lvd;
375  int l2linesize;
376 
377  len = 0;
378 
379  /* retrieve destination log page to write */
380  bp = (struct lbuf *) log->bp;
381  lp = (struct logpage *) bp->l_ldata;
382  dstoffset = log->eor;
383 
384  /* any log data to write ? */
385  if (tlck == NULL)
386  goto moveLrd;
387 
388  /*
389  * move log record data
390  */
391  /* retrieve source meta-data page to log */
392  if (tlck->flag & tlckPAGELOCK) {
393  p = (caddr_t) (tlck->mp->data);
394  linelock = (struct linelock *) & tlck->lock;
395  }
396  /* retrieve source in-memory inode to log */
397  else if (tlck->flag & tlckINODELOCK) {
398  if (tlck->type & tlckDTREE)
399  p = (caddr_t) &JFS_IP(tlck->ip)->i_dtroot;
400  else
401  p = (caddr_t) &JFS_IP(tlck->ip)->i_xtroot;
402  linelock = (struct linelock *) & tlck->lock;
403  }
404 #ifdef _JFS_WIP
405  else if (tlck->flag & tlckINLINELOCK) {
406 
407  inlinelock = (struct inlinelock *) & tlck;
408  p = (caddr_t) & inlinelock->pxd;
409  linelock = (struct linelock *) & tlck;
410  }
411 #endif /* _JFS_WIP */
412  else {
413  jfs_err("lmWriteRecord: UFO tlck:0x%p", tlck);
414  return 0; /* Probably should trap */
415  }
416  l2linesize = linelock->l2linesize;
417 
418  moveData:
419  ASSERT(linelock->index <= linelock->maxcnt);
420 
421  lv = linelock->lv;
422  for (i = 0; i < linelock->index; i++, lv++) {
423  if (lv->length == 0)
424  continue;
425 
426  /* is page full ? */
427  if (dstoffset >= LOGPSIZE - LOGPTLRSIZE) {
428  /* page become full: move on to next page */
429  lmNextPage(log);
430 
431  bp = log->bp;
432  lp = (struct logpage *) bp->l_ldata;
433  dstoffset = LOGPHDRSIZE;
434  }
435 
436  /*
437  * move log vector data
438  */
439  src = (u8 *) p + (lv->offset << l2linesize);
440  srclen = lv->length << l2linesize;
441  len += srclen;
442  while (srclen > 0) {
443  freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset;
444  nbytes = min(freespace, srclen);
445  dst = (caddr_t) lp + dstoffset;
446  memcpy(dst, src, nbytes);
447  dstoffset += nbytes;
448 
449  /* is page not full ? */
450  if (dstoffset < LOGPSIZE - LOGPTLRSIZE)
451  break;
452 
453  /* page become full: move on to next page */
454  lmNextPage(log);
455 
456  bp = (struct lbuf *) log->bp;
457  lp = (struct logpage *) bp->l_ldata;
458  dstoffset = LOGPHDRSIZE;
459 
460  srclen -= nbytes;
461  src += nbytes;
462  }
463 
464  /*
465  * move log vector descriptor
466  */
467  len += 4;
468  lvd = (struct lvd *) ((caddr_t) lp + dstoffset);
469  lvd->offset = cpu_to_le16(lv->offset);
470  lvd->length = cpu_to_le16(lv->length);
471  dstoffset += 4;
472  jfs_info("lmWriteRecord: lv offset:%d length:%d",
473  lv->offset, lv->length);
474  }
475 
476  if ((i = linelock->next)) {
477  linelock = (struct linelock *) lid_to_tlock(i);
478  goto moveData;
479  }
480 
481  /*
482  * move log record descriptor
483  */
484  moveLrd:
485  lrd->length = cpu_to_le16(len);
486 
487  src = (caddr_t) lrd;
488  srclen = LOGRDSIZE;
489 
490  while (srclen > 0) {
491  freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset;
492  nbytes = min(freespace, srclen);
493  dst = (caddr_t) lp + dstoffset;
494  memcpy(dst, src, nbytes);
495 
496  dstoffset += nbytes;
497  srclen -= nbytes;
498 
499  /* are there more to move than freespace of page ? */
500  if (srclen)
501  goto pageFull;
502 
503  /*
504  * end of log record descriptor
505  */
506 
507  /* update last log record eor */
508  log->eor = dstoffset;
509  bp->l_eor = dstoffset;
510  lsn = (log->page << L2LOGPSIZE) + dstoffset;
511 
512  if (lrd->type & cpu_to_le16(LOG_COMMIT)) {
513  tblk->clsn = lsn;
514  jfs_info("wr: tclsn:0x%x, beor:0x%x", tblk->clsn,
515  bp->l_eor);
516 
517  INCREMENT(lmStat.commit); /* # of commit */
518 
519  /*
520  * enqueue tblock for group commit:
521  *
522  * enqueue tblock of non-trivial/synchronous COMMIT
523  * at tail of group commit queue
524  * (trivial/asynchronous COMMITs are ignored by
525  * group commit.)
526  */
527  LOGGC_LOCK(log);
528 
529  /* init tblock gc state */
530  tblk->flag = tblkGC_QUEUE;
531  tblk->bp = log->bp;
532  tblk->pn = log->page;
533  tblk->eor = log->eor;
534 
535  /* enqueue transaction to commit queue */
536  list_add_tail(&tblk->cqueue, &log->cqueue);
537 
538  LOGGC_UNLOCK(log);
539  }
540 
541  jfs_info("lmWriteRecord: lrd:0x%04x bp:0x%p pn:%d eor:0x%x",
542  le16_to_cpu(lrd->type), log->bp, log->page, dstoffset);
543 
544  /* page not full ? */
545  if (dstoffset < LOGPSIZE - LOGPTLRSIZE)
546  return lsn;
547 
548  pageFull:
549  /* page become full: move on to next page */
550  lmNextPage(log);
551 
552  bp = (struct lbuf *) log->bp;
553  lp = (struct logpage *) bp->l_ldata;
554  dstoffset = LOGPHDRSIZE;
555  src += nbytes;
556  }
557 
558  return lsn;
559 }
560 
561 
562 /*
563  * NAME: lmNextPage()
564  *
565  * FUNCTION: write current page and allocate next page.
566  *
567  * PARAMETER: log
568  *
569  * RETURN: 0
570  *
571  * serialization: LOG_LOCK() held on entry/exit
572  */
573 static int lmNextPage(struct jfs_log * log)
574 {
575  struct logpage *lp;
576  int lspn; /* log sequence page number */
577  int pn; /* current page number */
578  struct lbuf *bp;
579  struct lbuf *nextbp;
580  struct tblock *tblk;
581 
582  /* get current log page number and log sequence page number */
583  pn = log->page;
584  bp = log->bp;
585  lp = (struct logpage *) bp->l_ldata;
586  lspn = le32_to_cpu(lp->h.page);
587 
588  LOGGC_LOCK(log);
589 
590  /*
591  * write or queue the full page at the tail of write queue
592  */
593  /* get the tail tblk on commit queue */
594  if (list_empty(&log->cqueue))
595  tblk = NULL;
596  else
597  tblk = list_entry(log->cqueue.prev, struct tblock, cqueue);
598 
599  /* every tblk who has COMMIT record on the current page,
600  * and has not been committed, must be on commit queue
601  * since tblk is queued at commit queueu at the time
602  * of writing its COMMIT record on the page before
603  * page becomes full (even though the tblk thread
604  * who wrote COMMIT record may have been suspended
605  * currently);
606  */
607 
608  /* is page bound with outstanding tail tblk ? */
609  if (tblk && tblk->pn == pn) {
610  /* mark tblk for end-of-page */
611  tblk->flag |= tblkGC_EOP;
612 
613  if (log->cflag & logGC_PAGEOUT) {
614  /* if page is not already on write queue,
615  * just enqueue (no lbmWRITE to prevent redrive)
616  * buffer to wqueue to ensure correct serial order
617  * of the pages since log pages will be added
618  * continuously
619  */
620  if (bp->l_wqnext == NULL)
621  lbmWrite(log, bp, 0, 0);
622  } else {
623  /*
624  * No current GC leader, initiate group commit
625  */
626  log->cflag |= logGC_PAGEOUT;
627  lmGCwrite(log, 0);
628  }
629  }
630  /* page is not bound with outstanding tblk:
631  * init write or mark it to be redriven (lbmWRITE)
632  */
633  else {
634  /* finalize the page */
635  bp->l_ceor = bp->l_eor;
636  lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
637  lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE, 0);
638  }
639  LOGGC_UNLOCK(log);
640 
641  /*
642  * allocate/initialize next page
643  */
644  /* if log wraps, the first data page of log is 2
645  * (0 never used, 1 is superblock).
646  */
647  log->page = (pn == log->size - 1) ? 2 : pn + 1;
648  log->eor = LOGPHDRSIZE; /* ? valid page empty/full at logRedo() */
649 
650  /* allocate/initialize next log page buffer */
651  nextbp = lbmAllocate(log, log->page);
652  nextbp->l_eor = log->eor;
653  log->bp = nextbp;
654 
655  /* initialize next log page */
656  lp = (struct logpage *) nextbp->l_ldata;
657  lp->h.page = lp->t.page = cpu_to_le32(lspn + 1);
658  lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE);
659 
660  return 0;
661 }
662 
663 
664 /*
665  * NAME: lmGroupCommit()
666  *
667  * FUNCTION: group commit
668  * initiate pageout of the pages with COMMIT in the order of
669  * page number - redrive pageout of the page at the head of
670  * pageout queue until full page has been written.
671  *
672  * RETURN:
673  *
674  * NOTE:
675  * LOGGC_LOCK serializes log group commit queue, and
676  * transaction blocks on the commit queue.
677  * N.B. LOG_LOCK is NOT held during lmGroupCommit().
678  */
679 int lmGroupCommit(struct jfs_log * log, struct tblock * tblk)
680 {
681  int rc = 0;
682 
683  LOGGC_LOCK(log);
684 
685  /* group committed already ? */
686  if (tblk->flag & tblkGC_COMMITTED) {
687  if (tblk->flag & tblkGC_ERROR)
688  rc = -EIO;
689 
690  LOGGC_UNLOCK(log);
691  return rc;
692  }
693  jfs_info("lmGroup Commit: tblk = 0x%p, gcrtc = %d", tblk, log->gcrtc);
694 
695  if (tblk->xflag & COMMIT_LAZY)
696  tblk->flag |= tblkGC_LAZY;
697 
698  if ((!(log->cflag & logGC_PAGEOUT)) && (!list_empty(&log->cqueue)) &&
699  (!(tblk->xflag & COMMIT_LAZY) || test_bit(log_FLUSH, &log->flag)
700  || jfs_tlocks_low)) {
701  /*
702  * No pageout in progress
703  *
704  * start group commit as its group leader.
705  */
706  log->cflag |= logGC_PAGEOUT;
707 
708  lmGCwrite(log, 0);
709  }
710 
711  if (tblk->xflag & COMMIT_LAZY) {
712  /*
713  * Lazy transactions can leave now
714  */
715  LOGGC_UNLOCK(log);
716  return 0;
717  }
718 
719  /* lmGCwrite gives up LOGGC_LOCK, check again */
720 
721  if (tblk->flag & tblkGC_COMMITTED) {
722  if (tblk->flag & tblkGC_ERROR)
723  rc = -EIO;
724 
725  LOGGC_UNLOCK(log);
726  return rc;
727  }
728 
729  /* upcount transaction waiting for completion
730  */
731  log->gcrtc++;
732  tblk->flag |= tblkGC_READY;
733 
734  __SLEEP_COND(tblk->gcwait, (tblk->flag & tblkGC_COMMITTED),
735  LOGGC_LOCK(log), LOGGC_UNLOCK(log));
736 
737  /* removed from commit queue */
738  if (tblk->flag & tblkGC_ERROR)
739  rc = -EIO;
740 
741  LOGGC_UNLOCK(log);
742  return rc;
743 }
744 
745 /*
746  * NAME: lmGCwrite()
747  *
748  * FUNCTION: group commit write
749  * initiate write of log page, building a group of all transactions
750  * with commit records on that page.
751  *
752  * RETURN: None
753  *
754  * NOTE:
755  * LOGGC_LOCK must be held by caller.
756  * N.B. LOG_LOCK is NOT held during lmGroupCommit().
757  */
758 static void lmGCwrite(struct jfs_log * log, int cant_write)
759 {
760  struct lbuf *bp;
761  struct logpage *lp;
762  int gcpn; /* group commit page number */
763  struct tblock *tblk;
764  struct tblock *xtblk = NULL;
765 
766  /*
767  * build the commit group of a log page
768  *
769  * scan commit queue and make a commit group of all
770  * transactions with COMMIT records on the same log page.
771  */
772  /* get the head tblk on the commit queue */
773  gcpn = list_entry(log->cqueue.next, struct tblock, cqueue)->pn;
774 
775  list_for_each_entry(tblk, &log->cqueue, cqueue) {
776  if (tblk->pn != gcpn)
777  break;
778 
779  xtblk = tblk;
780 
781  /* state transition: (QUEUE, READY) -> COMMIT */
782  tblk->flag |= tblkGC_COMMIT;
783  }
784  tblk = xtblk; /* last tblk of the page */
785 
786  /*
787  * pageout to commit transactions on the log page.
788  */
789  bp = (struct lbuf *) tblk->bp;
790  lp = (struct logpage *) bp->l_ldata;
791  /* is page already full ? */
792  if (tblk->flag & tblkGC_EOP) {
793  /* mark page to free at end of group commit of the page */
794  tblk->flag &= ~tblkGC_EOP;
795  tblk->flag |= tblkGC_FREE;
796  bp->l_ceor = bp->l_eor;
797  lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
798  lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmGC,
799  cant_write);
800  INCREMENT(lmStat.full_page);
801  }
802  /* page is not yet full */
803  else {
804  bp->l_ceor = tblk->eor; /* ? bp->l_ceor = bp->l_eor; */
805  lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
806  lbmWrite(log, bp, lbmWRITE | lbmGC, cant_write);
807  INCREMENT(lmStat.partial_page);
808  }
809 }
810 
811 /*
812  * NAME: lmPostGC()
813  *
814  * FUNCTION: group commit post-processing
815  * Processes transactions after their commit records have been written
816  * to disk, redriving log I/O if necessary.
817  *
818  * RETURN: None
819  *
820  * NOTE:
821  * This routine is called a interrupt time by lbmIODone
822  */
823 static void lmPostGC(struct lbuf * bp)
824 {
825  unsigned long flags;
826  struct jfs_log *log = bp->l_log;
827  struct logpage *lp;
828  struct tblock *tblk, *temp;
829 
830  //LOGGC_LOCK(log);
831  spin_lock_irqsave(&log->gclock, flags);
832  /*
833  * current pageout of group commit completed.
834  *
835  * remove/wakeup transactions from commit queue who were
836  * group committed with the current log page
837  */
838  list_for_each_entry_safe(tblk, temp, &log->cqueue, cqueue) {
839  if (!(tblk->flag & tblkGC_COMMIT))
840  break;
841  /* if transaction was marked GC_COMMIT then
842  * it has been shipped in the current pageout
843  * and made it to disk - it is committed.
844  */
845 
846  if (bp->l_flag & lbmERROR)
847  tblk->flag |= tblkGC_ERROR;
848 
849  /* remove it from the commit queue */
850  list_del(&tblk->cqueue);
851  tblk->flag &= ~tblkGC_QUEUE;
852 
853  if (tblk == log->flush_tblk) {
854  /* we can stop flushing the log now */
855  clear_bit(log_FLUSH, &log->flag);
856  log->flush_tblk = NULL;
857  }
858 
859  jfs_info("lmPostGC: tblk = 0x%p, flag = 0x%x", tblk,
860  tblk->flag);
861 
862  if (!(tblk->xflag & COMMIT_FORCE))
863  /*
864  * Hand tblk over to lazy commit thread
865  */
866  txLazyUnlock(tblk);
867  else {
868  /* state transition: COMMIT -> COMMITTED */
869  tblk->flag |= tblkGC_COMMITTED;
870 
871  if (tblk->flag & tblkGC_READY)
872  log->gcrtc--;
873 
874  LOGGC_WAKEUP(tblk);
875  }
876 
877  /* was page full before pageout ?
878  * (and this is the last tblk bound with the page)
879  */
880  if (tblk->flag & tblkGC_FREE)
881  lbmFree(bp);
882  /* did page become full after pageout ?
883  * (and this is the last tblk bound with the page)
884  */
885  else if (tblk->flag & tblkGC_EOP) {
886  /* finalize the page */
887  lp = (struct logpage *) bp->l_ldata;
888  bp->l_ceor = bp->l_eor;
889  lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
890  jfs_info("lmPostGC: calling lbmWrite");
891  lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE,
892  1);
893  }
894 
895  }
896 
897  /* are there any transactions who have entered lnGroupCommit()
898  * (whose COMMITs are after that of the last log page written.
899  * They are waiting for new group commit (above at (SLEEP 1))
900  * or lazy transactions are on a full (queued) log page,
901  * select the latest ready transaction as new group leader and
902  * wake her up to lead her group.
903  */
904  if ((!list_empty(&log->cqueue)) &&
905  ((log->gcrtc > 0) || (tblk->bp->l_wqnext != NULL) ||
907  /*
908  * Call lmGCwrite with new group leader
909  */
910  lmGCwrite(log, 1);
911 
912  /* no transaction are ready yet (transactions are only just
913  * queued (GC_QUEUE) and not entered for group commit yet).
914  * the first transaction entering group commit
915  * will elect herself as new group leader.
916  */
917  else
918  log->cflag &= ~logGC_PAGEOUT;
919 
920  //LOGGC_UNLOCK(log);
921  spin_unlock_irqrestore(&log->gclock, flags);
922  return;
923 }
924 
925 /*
926  * NAME: lmLogSync()
927  *
928  * FUNCTION: write log SYNCPT record for specified log
929  * if new sync address is available
930  * (normally the case if sync() is executed by back-ground
931  * process).
932  * calculate new value of i_nextsync which determines when
933  * this code is called again.
934  *
935  * PARAMETERS: log - log structure
936  * hard_sync - 1 to force all metadata to be written
937  *
938  * RETURN: 0
939  *
940  * serialization: LOG_LOCK() held on entry/exit
941  */
942 static int lmLogSync(struct jfs_log * log, int hard_sync)
943 {
944  int logsize;
945  int written; /* written since last syncpt */
946  int free; /* free space left available */
947  int delta; /* additional delta to write normally */
948  int more; /* additional write granted */
949  struct lrd lrd;
950  int lsn;
951  struct logsyncblk *lp;
952  unsigned long flags;
953 
954  /* push dirty metapages out to disk */
955  if (hard_sync)
956  write_special_inodes(log, filemap_fdatawrite);
957  else
958  write_special_inodes(log, filemap_flush);
959 
960  /*
961  * forward syncpt
962  */
963  /* if last sync is same as last syncpt,
964  * invoke sync point forward processing to update sync.
965  */
966 
967  if (log->sync == log->syncpt) {
968  LOGSYNC_LOCK(log, flags);
969  if (list_empty(&log->synclist))
970  log->sync = log->lsn;
971  else {
972  lp = list_entry(log->synclist.next,
973  struct logsyncblk, synclist);
974  log->sync = lp->lsn;
975  }
976  LOGSYNC_UNLOCK(log, flags);
977 
978  }
979 
980  /* if sync is different from last syncpt,
981  * write a SYNCPT record with syncpt = sync.
982  * reset syncpt = sync
983  */
984  if (log->sync != log->syncpt) {
985  lrd.logtid = 0;
986  lrd.backchain = 0;
987  lrd.type = cpu_to_le16(LOG_SYNCPT);
988  lrd.length = 0;
989  lrd.log.syncpt.sync = cpu_to_le32(log->sync);
990  lsn = lmWriteRecord(log, NULL, &lrd, NULL);
991 
992  log->syncpt = log->sync;
993  } else
994  lsn = log->lsn;
995 
996  /*
997  * setup next syncpt trigger (SWAG)
998  */
999  logsize = log->logsize;
1000 
1001  logdiff(written, lsn, log);
1002  free = logsize - written;
1003  delta = LOGSYNC_DELTA(logsize);
1004  more = min(free / 2, delta);
1005  if (more < 2 * LOGPSIZE) {
1006  jfs_warn("\n ... Log Wrap ... Log Wrap ... Log Wrap ...\n");
1007  /*
1008  * log wrapping
1009  *
1010  * option 1 - panic ? No.!
1011  * option 2 - shutdown file systems
1012  * associated with log ?
1013  * option 3 - extend log ?
1014  * option 4 - second chance
1015  *
1016  * mark log wrapped, and continue.
1017  * when all active transactions are completed,
1018  * mark log valid for recovery.
1019  * if crashed during invalid state, log state
1020  * implies invalid log, forcing fsck().
1021  */
1022  /* mark log state log wrap in log superblock */
1023  /* log->state = LOGWRAP; */
1024 
1025  /* reset sync point computation */
1026  log->syncpt = log->sync = lsn;
1027  log->nextsync = delta;
1028  } else
1029  /* next syncpt trigger = written + more */
1030  log->nextsync = written + more;
1031 
1032  /* if number of bytes written from last sync point is more
1033  * than 1/4 of the log size, stop new transactions from
1034  * starting until all current transactions are completed
1035  * by setting syncbarrier flag.
1036  */
1037  if (!test_bit(log_SYNCBARRIER, &log->flag) &&
1038  (written > LOGSYNC_BARRIER(logsize)) && log->active) {
1039  set_bit(log_SYNCBARRIER, &log->flag);
1040  jfs_info("log barrier on: lsn=0x%x syncpt=0x%x", lsn,
1041  log->syncpt);
1042  /*
1043  * We may have to initiate group commit
1044  */
1045  jfs_flush_journal(log, 0);
1046  }
1047 
1048  return lsn;
1049 }
1050 
1051 /*
1052  * NAME: jfs_syncpt
1053  *
1054  * FUNCTION: write log SYNCPT record for specified log
1055  *
1056  * PARAMETERS: log - log structure
1057  * hard_sync - set to 1 to force metadata to be written
1058  */
1059 void jfs_syncpt(struct jfs_log *log, int hard_sync)
1060 { LOG_LOCK(log);
1061  lmLogSync(log, hard_sync);
1062  LOG_UNLOCK(log);
1063 }
1064 
1065 /*
1066  * NAME: lmLogOpen()
1067  *
1068  * FUNCTION: open the log on first open;
1069  * insert filesystem in the active list of the log.
1070  *
1071  * PARAMETER: ipmnt - file system mount inode
1072  * iplog - log inode (out)
1073  *
1074  * RETURN:
1075  *
1076  * serialization:
1077  */
1078 int lmLogOpen(struct super_block *sb)
1079 {
1080  int rc;
1081  struct block_device *bdev;
1082  struct jfs_log *log;
1083  struct jfs_sb_info *sbi = JFS_SBI(sb);
1084 
1085  if (sbi->flag & JFS_NOINTEGRITY)
1086  return open_dummy_log(sb);
1087 
1088  if (sbi->mntflag & JFS_INLINELOG)
1089  return open_inline_log(sb);
1090 
1091  mutex_lock(&jfs_log_mutex);
1092  list_for_each_entry(log, &jfs_external_logs, journal_list) {
1093  if (log->bdev->bd_dev == sbi->logdev) {
1094  if (memcmp(log->uuid, sbi->loguuid,
1095  sizeof(log->uuid))) {
1096  jfs_warn("wrong uuid on JFS journal\n");
1097  mutex_unlock(&jfs_log_mutex);
1098  return -EINVAL;
1099  }
1100  /*
1101  * add file system to log active file system list
1102  */
1103  if ((rc = lmLogFileSystem(log, sbi, 1))) {
1104  mutex_unlock(&jfs_log_mutex);
1105  return rc;
1106  }
1107  goto journal_found;
1108  }
1109  }
1110 
1111  if (!(log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL))) {
1112  mutex_unlock(&jfs_log_mutex);
1113  return -ENOMEM;
1114  }
1115  INIT_LIST_HEAD(&log->sb_list);
1117 
1118  /*
1119  * external log as separate logical volume
1120  *
1121  * file systems to log may have n-to-1 relationship;
1122  */
1123 
1125  log);
1126  if (IS_ERR(bdev)) {
1127  rc = PTR_ERR(bdev);
1128  goto free;
1129  }
1130 
1131  log->bdev = bdev;
1132  memcpy(log->uuid, sbi->loguuid, sizeof(log->uuid));
1133 
1134  /*
1135  * initialize log:
1136  */
1137  if ((rc = lmLogInit(log)))
1138  goto close;
1139 
1140  list_add(&log->journal_list, &jfs_external_logs);
1141 
1142  /*
1143  * add file system to log active file system list
1144  */
1145  if ((rc = lmLogFileSystem(log, sbi, 1)))
1146  goto shutdown;
1147 
1148 journal_found:
1149  LOG_LOCK(log);
1150  list_add(&sbi->log_list, &log->sb_list);
1151  sbi->log = log;
1152  LOG_UNLOCK(log);
1153 
1154  mutex_unlock(&jfs_log_mutex);
1155  return 0;
1156 
1157  /*
1158  * unwind on error
1159  */
1160  shutdown: /* unwind lbmLogInit() */
1161  list_del(&log->journal_list);
1162  lbmLogShutdown(log);
1163 
1164  close: /* close external log device */
1166 
1167  free: /* free log descriptor */
1168  mutex_unlock(&jfs_log_mutex);
1169  kfree(log);
1170 
1171  jfs_warn("lmLogOpen: exit(%d)", rc);
1172  return rc;
1173 }
1174 
1175 static int open_inline_log(struct super_block *sb)
1176 {
1177  struct jfs_log *log;
1178  int rc;
1179 
1180  if (!(log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL)))
1181  return -ENOMEM;
1182  INIT_LIST_HEAD(&log->sb_list);
1184 
1185  set_bit(log_INLINELOG, &log->flag);
1186  log->bdev = sb->s_bdev;
1187  log->base = addressPXD(&JFS_SBI(sb)->logpxd);
1188  log->size = lengthPXD(&JFS_SBI(sb)->logpxd) >>
1189  (L2LOGPSIZE - sb->s_blocksize_bits);
1190  log->l2bsize = sb->s_blocksize_bits;
1192 
1193  /*
1194  * initialize log.
1195  */
1196  if ((rc = lmLogInit(log))) {
1197  kfree(log);
1198  jfs_warn("lmLogOpen: exit(%d)", rc);
1199  return rc;
1200  }
1201 
1202  list_add(&JFS_SBI(sb)->log_list, &log->sb_list);
1203  JFS_SBI(sb)->log = log;
1204 
1205  return rc;
1206 }
1207 
1208 static int open_dummy_log(struct super_block *sb)
1209 {
1210  int rc;
1211 
1212  mutex_lock(&jfs_log_mutex);
1213  if (!dummy_log) {
1214  dummy_log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL);
1215  if (!dummy_log) {
1216  mutex_unlock(&jfs_log_mutex);
1217  return -ENOMEM;
1218  }
1219  INIT_LIST_HEAD(&dummy_log->sb_list);
1220  init_waitqueue_head(&dummy_log->syncwait);
1221  dummy_log->no_integrity = 1;
1222  /* Make up some stuff */
1223  dummy_log->base = 0;
1224  dummy_log->size = 1024;
1225  rc = lmLogInit(dummy_log);
1226  if (rc) {
1227  kfree(dummy_log);
1228  dummy_log = NULL;
1229  mutex_unlock(&jfs_log_mutex);
1230  return rc;
1231  }
1232  }
1233 
1234  LOG_LOCK(dummy_log);
1235  list_add(&JFS_SBI(sb)->log_list, &dummy_log->sb_list);
1236  JFS_SBI(sb)->log = dummy_log;
1237  LOG_UNLOCK(dummy_log);
1238  mutex_unlock(&jfs_log_mutex);
1239 
1240  return 0;
1241 }
1242 
1243 /*
1244  * NAME: lmLogInit()
1245  *
1246  * FUNCTION: log initialization at first log open.
1247  *
1248  * logredo() (or logformat()) should have been run previously.
1249  * initialize the log from log superblock.
1250  * set the log state in the superblock to LOGMOUNT and
1251  * write SYNCPT log record.
1252  *
1253  * PARAMETER: log - log structure
1254  *
1255  * RETURN: 0 - if ok
1256  * -EINVAL - bad log magic number or superblock dirty
1257  * error returned from logwait()
1258  *
1259  * serialization: single first open thread
1260  */
1261 int lmLogInit(struct jfs_log * log)
1262 {
1263  int rc = 0;
1264  struct lrd lrd;
1265  struct logsuper *logsuper;
1266  struct lbuf *bpsuper;
1267  struct lbuf *bp;
1268  struct logpage *lp;
1269  int lsn = 0;
1270 
1271  jfs_info("lmLogInit: log:0x%p", log);
1272 
1273  /* initialize the group commit serialization lock */
1274  LOGGC_LOCK_INIT(log);
1275 
1276  /* allocate/initialize the log write serialization lock */
1277  LOG_LOCK_INIT(log);
1278 
1279  LOGSYNC_LOCK_INIT(log);
1280 
1281  INIT_LIST_HEAD(&log->synclist);
1282 
1283  INIT_LIST_HEAD(&log->cqueue);
1284  log->flush_tblk = NULL;
1285 
1286  log->count = 0;
1287 
1288  /*
1289  * initialize log i/o
1290  */
1291  if ((rc = lbmLogInit(log)))
1292  return rc;
1293 
1294  if (!test_bit(log_INLINELOG, &log->flag))
1295  log->l2bsize = L2LOGPSIZE;
1296 
1297  /* check for disabled journaling to disk */
1298  if (log->no_integrity) {
1299  /*
1300  * Journal pages will still be filled. When the time comes
1301  * to actually do the I/O, the write is not done, and the
1302  * endio routine is called directly.
1303  */
1304  bp = lbmAllocate(log , 0);
1305  log->bp = bp;
1306  bp->l_pn = bp->l_eor = 0;
1307  } else {
1308  /*
1309  * validate log superblock
1310  */
1311  if ((rc = lbmRead(log, 1, &bpsuper)))
1312  goto errout10;
1313 
1314  logsuper = (struct logsuper *) bpsuper->l_ldata;
1315 
1316  if (logsuper->magic != cpu_to_le32(LOGMAGIC)) {
1317  jfs_warn("*** Log Format Error ! ***");
1318  rc = -EINVAL;
1319  goto errout20;
1320  }
1321 
1322  /* logredo() should have been run successfully. */
1323  if (logsuper->state != cpu_to_le32(LOGREDONE)) {
1324  jfs_warn("*** Log Is Dirty ! ***");
1325  rc = -EINVAL;
1326  goto errout20;
1327  }
1328 
1329  /* initialize log from log superblock */
1330  if (test_bit(log_INLINELOG,&log->flag)) {
1331  if (log->size != le32_to_cpu(logsuper->size)) {
1332  rc = -EINVAL;
1333  goto errout20;
1334  }
1335  jfs_info("lmLogInit: inline log:0x%p base:0x%Lx "
1336  "size:0x%x", log,
1337  (unsigned long long) log->base, log->size);
1338  } else {
1339  if (memcmp(logsuper->uuid, log->uuid, 16)) {
1340  jfs_warn("wrong uuid on JFS log device");
1341  goto errout20;
1342  }
1343  log->size = le32_to_cpu(logsuper->size);
1344  log->l2bsize = le32_to_cpu(logsuper->l2bsize);
1345  jfs_info("lmLogInit: external log:0x%p base:0x%Lx "
1346  "size:0x%x", log,
1347  (unsigned long long) log->base, log->size);
1348  }
1349 
1350  log->page = le32_to_cpu(logsuper->end) / LOGPSIZE;
1351  log->eor = le32_to_cpu(logsuper->end) - (LOGPSIZE * log->page);
1352 
1353  /*
1354  * initialize for log append write mode
1355  */
1356  /* establish current/end-of-log page/buffer */
1357  if ((rc = lbmRead(log, log->page, &bp)))
1358  goto errout20;
1359 
1360  lp = (struct logpage *) bp->l_ldata;
1361 
1362  jfs_info("lmLogInit: lsn:0x%x page:%d eor:%d:%d",
1363  le32_to_cpu(logsuper->end), log->page, log->eor,
1364  le16_to_cpu(lp->h.eor));
1365 
1366  log->bp = bp;
1367  bp->l_pn = log->page;
1368  bp->l_eor = log->eor;
1369 
1370  /* if current page is full, move on to next page */
1371  if (log->eor >= LOGPSIZE - LOGPTLRSIZE)
1372  lmNextPage(log);
1373 
1374  /*
1375  * initialize log syncpoint
1376  */
1377  /*
1378  * write the first SYNCPT record with syncpoint = 0
1379  * (i.e., log redo up to HERE !);
1380  * remove current page from lbm write queue at end of pageout
1381  * (to write log superblock update), but do not release to
1382  * freelist;
1383  */
1384  lrd.logtid = 0;
1385  lrd.backchain = 0;
1386  lrd.type = cpu_to_le16(LOG_SYNCPT);
1387  lrd.length = 0;
1388  lrd.log.syncpt.sync = 0;
1389  lsn = lmWriteRecord(log, NULL, &lrd, NULL);
1390  bp = log->bp;
1391  bp->l_ceor = bp->l_eor;
1392  lp = (struct logpage *) bp->l_ldata;
1393  lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
1394  lbmWrite(log, bp, lbmWRITE | lbmSYNC, 0);
1395  if ((rc = lbmIOWait(bp, 0)))
1396  goto errout30;
1397 
1398  /*
1399  * update/write superblock
1400  */
1401  logsuper->state = cpu_to_le32(LOGMOUNT);
1402  log->serial = le32_to_cpu(logsuper->serial) + 1;
1403  logsuper->serial = cpu_to_le32(log->serial);
1404  lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1405  if ((rc = lbmIOWait(bpsuper, lbmFREE)))
1406  goto errout30;
1407  }
1408 
1409  /* initialize logsync parameters */
1410  log->logsize = (log->size - 2) << L2LOGPSIZE;
1411  log->lsn = lsn;
1412  log->syncpt = lsn;
1413  log->sync = log->syncpt;
1414  log->nextsync = LOGSYNC_DELTA(log->logsize);
1415 
1416  jfs_info("lmLogInit: lsn:0x%x syncpt:0x%x sync:0x%x",
1417  log->lsn, log->syncpt, log->sync);
1418 
1419  /*
1420  * initialize for lazy/group commit
1421  */
1422  log->clsn = lsn;
1423 
1424  return 0;
1425 
1426  /*
1427  * unwind on error
1428  */
1429  errout30: /* release log page */
1430  log->wqueue = NULL;
1431  bp->l_wqnext = NULL;
1432  lbmFree(bp);
1433 
1434  errout20: /* release log superblock */
1435  lbmFree(bpsuper);
1436 
1437  errout10: /* unwind lbmLogInit() */
1438  lbmLogShutdown(log);
1439 
1440  jfs_warn("lmLogInit: exit(%d)", rc);
1441  return rc;
1442 }
1443 
1445 /*
1446  * NAME: lmLogClose()
1447  *
1448  * FUNCTION: remove file system <ipmnt> from active list of log <iplog>
1449  * and close it on last close.
1450  *
1451  * PARAMETER: sb - superblock
1452  *
1453  * RETURN: errors from subroutines
1454  *
1455  * serialization:
1456  */
1457 int lmLogClose(struct super_block *sb)
1458 {
1459  struct jfs_sb_info *sbi = JFS_SBI(sb);
1460  struct jfs_log *log = sbi->log;
1461  struct block_device *bdev;
1462  int rc = 0;
1463 
1464  jfs_info("lmLogClose: log:0x%p", log);
1465 
1466  mutex_lock(&jfs_log_mutex);
1467  LOG_LOCK(log);
1468  list_del(&sbi->log_list);
1469  LOG_UNLOCK(log);
1470  sbi->log = NULL;
1471 
1472  /*
1473  * We need to make sure all of the "written" metapages
1474  * actually make it to disk
1475  */
1476  sync_blockdev(sb->s_bdev);
1477 
1478  if (test_bit(log_INLINELOG, &log->flag)) {
1479  /*
1480  * in-line log in host file system
1481  */
1482  rc = lmLogShutdown(log);
1483  kfree(log);
1484  goto out;
1485  }
1486 
1487  if (!log->no_integrity)
1488  lmLogFileSystem(log, sbi, 0);
1489 
1490  if (!list_empty(&log->sb_list))
1491  goto out;
1492 
1493  /*
1494  * TODO: ensure that the dummy_log is in a state to allow
1495  * lbmLogShutdown to deallocate all the buffers and call
1496  * kfree against dummy_log. For now, leave dummy_log & its
1497  * buffers in memory, and resuse if another no-integrity mount
1498  * is requested.
1499  */
1500  if (log->no_integrity)
1501  goto out;
1502 
1503  /*
1504  * external log as separate logical volume
1505  */
1506  list_del(&log->journal_list);
1507  bdev = log->bdev;
1508  rc = lmLogShutdown(log);
1509 
1511 
1512  kfree(log);
1513 
1514  out:
1515  mutex_unlock(&jfs_log_mutex);
1516  jfs_info("lmLogClose: exit(%d)", rc);
1517  return rc;
1519 
1520 
1521 /*
1522  * NAME: jfs_flush_journal()
1523  *
1524  * FUNCTION: initiate write of any outstanding transactions to the journal
1525  * and optionally wait until they are all written to disk
1526  *
1527  * wait == 0 flush until latest txn is committed, don't wait
1528  * wait == 1 flush until latest txn is committed, wait
1529  * wait > 1 flush until all txn's are complete, wait
1530  */
1531 void jfs_flush_journal(struct jfs_log *log, int wait)
1532 {
1533  int i;
1534  struct tblock *target = NULL;
1535 
1536  /* jfs_write_inode may call us during read-only mount */
1537  if (!log)
1538  return;
1539 
1540  jfs_info("jfs_flush_journal: log:0x%p wait=%d", log, wait);
1541 
1542  LOGGC_LOCK(log);
1543 
1544  if (!list_empty(&log->cqueue)) {
1545  /*
1546  * This ensures that we will keep writing to the journal as long
1547  * as there are unwritten commit records
1548  */
1549  target = list_entry(log->cqueue.prev, struct tblock, cqueue);
1550 
1551  if (test_bit(log_FLUSH, &log->flag)) {
1552  /*
1553  * We're already flushing.
1554  * if flush_tblk is NULL, we are flushing everything,
1555  * so leave it that way. Otherwise, update it to the
1556  * latest transaction
1557  */
1558  if (log->flush_tblk)
1559  log->flush_tblk = target;
1560  } else {
1561  /* Only flush until latest transaction is committed */
1562  log->flush_tblk = target;
1563  set_bit(log_FLUSH, &log->flag);
1564 
1565  /*
1566  * Initiate I/O on outstanding transactions
1567  */
1568  if (!(log->cflag & logGC_PAGEOUT)) {
1569  log->cflag |= logGC_PAGEOUT;
1570  lmGCwrite(log, 0);
1571  }
1572  }
1573  }
1574  if ((wait > 1) || test_bit(log_SYNCBARRIER, &log->flag)) {
1575  /* Flush until all activity complete */
1576  set_bit(log_FLUSH, &log->flag);
1577  log->flush_tblk = NULL;
1578  }
1579 
1580  if (wait && target && !(target->flag & tblkGC_COMMITTED)) {
1581  DECLARE_WAITQUEUE(__wait, current);
1582 
1583  add_wait_queue(&target->gcwait, &__wait);
1585  LOGGC_UNLOCK(log);
1586  schedule();
1588  LOGGC_LOCK(log);
1589  remove_wait_queue(&target->gcwait, &__wait);
1590  }
1591  LOGGC_UNLOCK(log);
1592 
1593  if (wait < 2)
1594  return;
1595 
1596  write_special_inodes(log, filemap_fdatawrite);
1597 
1598  /*
1599  * If there was recent activity, we may need to wait
1600  * for the lazycommit thread to catch up
1601  */
1602  if ((!list_empty(&log->cqueue)) || !list_empty(&log->synclist)) {
1603  for (i = 0; i < 200; i++) { /* Too much? */
1604  msleep(250);
1605  write_special_inodes(log, filemap_fdatawrite);
1606  if (list_empty(&log->cqueue) &&
1607  list_empty(&log->synclist))
1608  break;
1609  }
1610  }
1611  assert(list_empty(&log->cqueue));
1612 
1613 #ifdef CONFIG_JFS_DEBUG
1614  if (!list_empty(&log->synclist)) {
1615  struct logsyncblk *lp;
1616 
1617  printk(KERN_ERR "jfs_flush_journal: synclist not empty\n");
1618  list_for_each_entry(lp, &log->synclist, synclist) {
1619  if (lp->xflag & COMMIT_PAGE) {
1620  struct metapage *mp = (struct metapage *)lp;
1621  print_hex_dump(KERN_ERR, "metapage: ",
1622  DUMP_PREFIX_ADDRESS, 16, 4,
1623  mp, sizeof(struct metapage), 0);
1624  print_hex_dump(KERN_ERR, "page: ",
1625  DUMP_PREFIX_ADDRESS, 16,
1626  sizeof(long), mp->page,
1627  sizeof(struct page), 0);
1628  } else
1629  print_hex_dump(KERN_ERR, "tblock:",
1630  DUMP_PREFIX_ADDRESS, 16, 4,
1631  lp, sizeof(struct tblock), 0);
1632  }
1633  }
1634 #else
1635  WARN_ON(!list_empty(&log->synclist));
1636 #endif
1637  clear_bit(log_FLUSH, &log->flag);
1638 }
1639 
1640 /*
1641  * NAME: lmLogShutdown()
1642  *
1643  * FUNCTION: log shutdown at last LogClose().
1644  *
1645  * write log syncpt record.
1646  * update super block to set redone flag to 0.
1647  *
1648  * PARAMETER: log - log inode
1649  *
1650  * RETURN: 0 - success
1651  *
1652  * serialization: single last close thread
1653  */
1654 int lmLogShutdown(struct jfs_log * log)
1655 {
1656  int rc;
1657  struct lrd lrd;
1658  int lsn;
1659  struct logsuper *logsuper;
1660  struct lbuf *bpsuper;
1661  struct lbuf *bp;
1662  struct logpage *lp;
1663 
1664  jfs_info("lmLogShutdown: log:0x%p", log);
1665 
1666  jfs_flush_journal(log, 2);
1667 
1668  /*
1669  * write the last SYNCPT record with syncpoint = 0
1670  * (i.e., log redo up to HERE !)
1671  */
1672  lrd.logtid = 0;
1673  lrd.backchain = 0;
1674  lrd.type = cpu_to_le16(LOG_SYNCPT);
1675  lrd.length = 0;
1676  lrd.log.syncpt.sync = 0;
1677 
1678  lsn = lmWriteRecord(log, NULL, &lrd, NULL);
1679  bp = log->bp;
1680  lp = (struct logpage *) bp->l_ldata;
1681  lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
1682  lbmWrite(log, log->bp, lbmWRITE | lbmRELEASE | lbmSYNC, 0);
1683  lbmIOWait(log->bp, lbmFREE);
1684  log->bp = NULL;
1685 
1686  /*
1687  * synchronous update log superblock
1688  * mark log state as shutdown cleanly
1689  * (i.e., Log does not need to be replayed).
1690  */
1691  if ((rc = lbmRead(log, 1, &bpsuper)))
1692  goto out;
1693 
1694  logsuper = (struct logsuper *) bpsuper->l_ldata;
1695  logsuper->state = cpu_to_le32(LOGREDONE);
1696  logsuper->end = cpu_to_le32(lsn);
1697  lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1698  rc = lbmIOWait(bpsuper, lbmFREE);
1699 
1700  jfs_info("lmLogShutdown: lsn:0x%x page:%d eor:%d",
1701  lsn, log->page, log->eor);
1702 
1703  out:
1704  /*
1705  * shutdown per log i/o
1706  */
1707  lbmLogShutdown(log);
1708 
1709  if (rc) {
1710  jfs_warn("lmLogShutdown: exit(%d)", rc);
1711  }
1712  return rc;
1713 }
1714 
1715 
1716 /*
1717  * NAME: lmLogFileSystem()
1718  *
1719  * FUNCTION: insert (<activate> = true)/remove (<activate> = false)
1720  * file system into/from log active file system list.
1721  *
1722  * PARAMETE: log - pointer to logs inode.
1723  * fsdev - kdev_t of filesystem.
1724  * serial - pointer to returned log serial number
1725  * activate - insert/remove device from active list.
1726  *
1727  * RETURN: 0 - success
1728  * errors returned by vms_iowait().
1729  */
1730 static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi,
1731  int activate)
1732 {
1733  int rc = 0;
1734  int i;
1735  struct logsuper *logsuper;
1736  struct lbuf *bpsuper;
1737  char *uuid = sbi->uuid;
1738 
1739  /*
1740  * insert/remove file system device to log active file system list.
1741  */
1742  if ((rc = lbmRead(log, 1, &bpsuper)))
1743  return rc;
1744 
1745  logsuper = (struct logsuper *) bpsuper->l_ldata;
1746  if (activate) {
1747  for (i = 0; i < MAX_ACTIVE; i++)
1748  if (!memcmp(logsuper->active[i].uuid, NULL_UUID, 16)) {
1749  memcpy(logsuper->active[i].uuid, uuid, 16);
1750  sbi->aggregate = i;
1751  break;
1752  }
1753  if (i == MAX_ACTIVE) {
1754  jfs_warn("Too many file systems sharing journal!");
1755  lbmFree(bpsuper);
1756  return -EMFILE; /* Is there a better rc? */
1757  }
1758  } else {
1759  for (i = 0; i < MAX_ACTIVE; i++)
1760  if (!memcmp(logsuper->active[i].uuid, uuid, 16)) {
1761  memcpy(logsuper->active[i].uuid, NULL_UUID, 16);
1762  break;
1763  }
1764  if (i == MAX_ACTIVE) {
1765  jfs_warn("Somebody stomped on the journal!");
1766  lbmFree(bpsuper);
1767  return -EIO;
1768  }
1769 
1770  }
1771 
1772  /*
1773  * synchronous write log superblock:
1774  *
1775  * write sidestream bypassing write queue:
1776  * at file system mount, log super block is updated for
1777  * activation of the file system before any log record
1778  * (MOUNT record) of the file system, and at file system
1779  * unmount, all meta data for the file system has been
1780  * flushed before log super block is updated for deactivation
1781  * of the file system.
1782  */
1783  lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1784  rc = lbmIOWait(bpsuper, lbmFREE);
1785 
1786  return rc;
1787 }
1788 
1789 /*
1790  * log buffer manager (lbm)
1791  * ------------------------
1792  *
1793  * special purpose buffer manager supporting log i/o requirements.
1794  *
1795  * per log write queue:
1796  * log pageout occurs in serial order by fifo write queue and
1797  * restricting to a single i/o in pregress at any one time.
1798  * a circular singly-linked list
1799  * (log->wrqueue points to the tail, and buffers are linked via
1800  * bp->wrqueue field), and
1801  * maintains log page in pageout ot waiting for pageout in serial pageout.
1802  */
1803 
1804 /*
1805  * lbmLogInit()
1806  *
1807  * initialize per log I/O setup at lmLogInit()
1808  */
1809 static int lbmLogInit(struct jfs_log * log)
1810 { /* log inode */
1811  int i;
1812  struct lbuf *lbuf;
1813 
1814  jfs_info("lbmLogInit: log:0x%p", log);
1815 
1816  /* initialize current buffer cursor */
1817  log->bp = NULL;
1818 
1819  /* initialize log device write queue */
1820  log->wqueue = NULL;
1821 
1822  /*
1823  * Each log has its own buffer pages allocated to it. These are
1824  * not managed by the page cache. This ensures that a transaction
1825  * writing to the log does not block trying to allocate a page from
1826  * the page cache (for the log). This would be bad, since page
1827  * allocation waits on the kswapd thread that may be committing inodes
1828  * which would cause log activity. Was that clear? I'm trying to
1829  * avoid deadlock here.
1830  */
1832 
1833  log->lbuf_free = NULL;
1834 
1835  for (i = 0; i < LOGPAGES;) {
1836  char *buffer;
1837  uint offset;
1838  struct page *page;
1839 
1840  buffer = (char *) get_zeroed_page(GFP_KERNEL);
1841  if (buffer == NULL)
1842  goto error;
1843  page = virt_to_page(buffer);
1844  for (offset = 0; offset < PAGE_SIZE; offset += LOGPSIZE) {
1845  lbuf = kmalloc(sizeof(struct lbuf), GFP_KERNEL);
1846  if (lbuf == NULL) {
1847  if (offset == 0)
1848  free_page((unsigned long) buffer);
1849  goto error;
1850  }
1851  if (offset) /* we already have one reference */
1852  get_page(page);
1853  lbuf->l_offset = offset;
1854  lbuf->l_ldata = buffer + offset;
1855  lbuf->l_page = page;
1856  lbuf->l_log = log;
1858 
1859  lbuf->l_freelist = log->lbuf_free;
1860  log->lbuf_free = lbuf;
1861  i++;
1862  }
1863  }
1864 
1865  return (0);
1866 
1867  error:
1868  lbmLogShutdown(log);
1869  return -ENOMEM;
1870 }
1871 
1872 
1873 /*
1874  * lbmLogShutdown()
1875  *
1876  * finalize per log I/O setup at lmLogShutdown()
1877  */
1878 static void lbmLogShutdown(struct jfs_log * log)
1879 {
1880  struct lbuf *lbuf;
1881 
1882  jfs_info("lbmLogShutdown: log:0x%p", log);
1883 
1884  lbuf = log->lbuf_free;
1885  while (lbuf) {
1886  struct lbuf *next = lbuf->l_freelist;
1887  __free_page(lbuf->l_page);
1888  kfree(lbuf);
1889  lbuf = next;
1890  }
1891 }
1892 
1893 
1894 /*
1895  * lbmAllocate()
1896  *
1897  * allocate an empty log buffer
1898  */
1899 static struct lbuf *lbmAllocate(struct jfs_log * log, int pn)
1900 {
1901  struct lbuf *bp;
1902  unsigned long flags;
1903 
1904  /*
1905  * recycle from log buffer freelist if any
1906  */
1907  LCACHE_LOCK(flags);
1908  LCACHE_SLEEP_COND(log->free_wait, (bp = log->lbuf_free), flags);
1909  log->lbuf_free = bp->l_freelist;
1910  LCACHE_UNLOCK(flags);
1911 
1912  bp->l_flag = 0;
1913 
1914  bp->l_wqnext = NULL;
1915  bp->l_freelist = NULL;
1916 
1917  bp->l_pn = pn;
1918  bp->l_blkno = log->base + (pn << (L2LOGPSIZE - log->l2bsize));
1919  bp->l_ceor = 0;
1920 
1921  return bp;
1922 }
1923 
1924 
1925 /*
1926  * lbmFree()
1927  *
1928  * release a log buffer to freelist
1929  */
1930 static void lbmFree(struct lbuf * bp)
1931 {
1932  unsigned long flags;
1933 
1934  LCACHE_LOCK(flags);
1935 
1936  lbmfree(bp);
1937 
1938  LCACHE_UNLOCK(flags);
1939 }
1940 
1941 static void lbmfree(struct lbuf * bp)
1942 {
1943  struct jfs_log *log = bp->l_log;
1944 
1945  assert(bp->l_wqnext == NULL);
1946 
1947  /*
1948  * return the buffer to head of freelist
1949  */
1950  bp->l_freelist = log->lbuf_free;
1951  log->lbuf_free = bp;
1952 
1953  wake_up(&log->free_wait);
1954  return;
1955 }
1956 
1957 
1958 /*
1959  * NAME: lbmRedrive
1960  *
1961  * FUNCTION: add a log buffer to the log redrive list
1962  *
1963  * PARAMETER:
1964  * bp - log buffer
1965  *
1966  * NOTES:
1967  * Takes log_redrive_lock.
1968  */
1969 static inline void lbmRedrive(struct lbuf *bp)
1970 {
1971  unsigned long flags;
1972 
1973  spin_lock_irqsave(&log_redrive_lock, flags);
1974  bp->l_redrive_next = log_redrive_list;
1975  log_redrive_list = bp;
1976  spin_unlock_irqrestore(&log_redrive_lock, flags);
1977 
1979 }
1980 
1981 
1982 /*
1983  * lbmRead()
1984  */
1985 static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp)
1986 {
1987  struct bio *bio;
1988  struct lbuf *bp;
1989 
1990  /*
1991  * allocate a log buffer
1992  */
1993  *bpp = bp = lbmAllocate(log, pn);
1994  jfs_info("lbmRead: bp:0x%p pn:0x%x", bp, pn);
1995 
1996  bp->l_flag |= lbmREAD;
1997 
1998  bio = bio_alloc(GFP_NOFS, 1);
1999 
2000  bio->bi_sector = bp->l_blkno << (log->l2bsize - 9);
2001  bio->bi_bdev = log->bdev;
2002  bio->bi_io_vec[0].bv_page = bp->l_page;
2003  bio->bi_io_vec[0].bv_len = LOGPSIZE;
2004  bio->bi_io_vec[0].bv_offset = bp->l_offset;
2005 
2006  bio->bi_vcnt = 1;
2007  bio->bi_idx = 0;
2008  bio->bi_size = LOGPSIZE;
2009 
2010  bio->bi_end_io = lbmIODone;
2011  bio->bi_private = bp;
2012  submit_bio(READ_SYNC, bio);
2013 
2014  wait_event(bp->l_ioevent, (bp->l_flag != lbmREAD));
2015 
2016  return 0;
2017 }
2018 
2019 
2020 /*
2021  * lbmWrite()
2022  *
2023  * buffer at head of pageout queue stays after completion of
2024  * partial-page pageout and redriven by explicit initiation of
2025  * pageout by caller until full-page pageout is completed and
2026  * released.
2027  *
2028  * device driver i/o done redrives pageout of new buffer at
2029  * head of pageout queue when current buffer at head of pageout
2030  * queue is released at the completion of its full-page pageout.
2031  *
2032  * LOGGC_LOCK() serializes lbmWrite() by lmNextPage() and lmGroupCommit().
2033  * LCACHE_LOCK() serializes xflag between lbmWrite() and lbmIODone()
2034  */
2035 static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag,
2036  int cant_block)
2037 {
2038  struct lbuf *tail;
2039  unsigned long flags;
2040 
2041  jfs_info("lbmWrite: bp:0x%p flag:0x%x pn:0x%x", bp, flag, bp->l_pn);
2042 
2043  /* map the logical block address to physical block address */
2044  bp->l_blkno =
2045  log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
2046 
2047  LCACHE_LOCK(flags); /* disable+lock */
2048 
2049  /*
2050  * initialize buffer for device driver
2051  */
2052  bp->l_flag = flag;
2053 
2054  /*
2055  * insert bp at tail of write queue associated with log
2056  *
2057  * (request is either for bp already/currently at head of queue
2058  * or new bp to be inserted at tail)
2059  */
2060  tail = log->wqueue;
2061 
2062  /* is buffer not already on write queue ? */
2063  if (bp->l_wqnext == NULL) {
2064  /* insert at tail of wqueue */
2065  if (tail == NULL) {
2066  log->wqueue = bp;
2067  bp->l_wqnext = bp;
2068  } else {
2069  log->wqueue = bp;
2070  bp->l_wqnext = tail->l_wqnext;
2071  tail->l_wqnext = bp;
2072  }
2073 
2074  tail = bp;
2075  }
2076 
2077  /* is buffer at head of wqueue and for write ? */
2078  if ((bp != tail->l_wqnext) || !(flag & lbmWRITE)) {
2079  LCACHE_UNLOCK(flags); /* unlock+enable */
2080  return;
2081  }
2082 
2083  LCACHE_UNLOCK(flags); /* unlock+enable */
2084 
2085  if (cant_block)
2086  lbmRedrive(bp);
2087  else if (flag & lbmSYNC)
2088  lbmStartIO(bp);
2089  else {
2090  LOGGC_UNLOCK(log);
2091  lbmStartIO(bp);
2092  LOGGC_LOCK(log);
2093  }
2094 }
2095 
2096 
2097 /*
2098  * lbmDirectWrite()
2099  *
2100  * initiate pageout bypassing write queue for sidestream
2101  * (e.g., log superblock) write;
2102  */
2103 static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag)
2104 {
2105  jfs_info("lbmDirectWrite: bp:0x%p flag:0x%x pn:0x%x",
2106  bp, flag, bp->l_pn);
2107 
2108  /*
2109  * initialize buffer for device driver
2110  */
2111  bp->l_flag = flag | lbmDIRECT;
2112 
2113  /* map the logical block address to physical block address */
2114  bp->l_blkno =
2115  log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
2116 
2117  /*
2118  * initiate pageout of the page
2119  */
2120  lbmStartIO(bp);
2121 }
2122 
2123 
2124 /*
2125  * NAME: lbmStartIO()
2126  *
2127  * FUNCTION: Interface to DD strategy routine
2128  *
2129  * RETURN: none
2130  *
2131  * serialization: LCACHE_LOCK() is NOT held during log i/o;
2132  */
2133 static void lbmStartIO(struct lbuf * bp)
2134 {
2135  struct bio *bio;
2136  struct jfs_log *log = bp->l_log;
2137 
2138  jfs_info("lbmStartIO\n");
2139 
2140  bio = bio_alloc(GFP_NOFS, 1);
2141  bio->bi_sector = bp->l_blkno << (log->l2bsize - 9);
2142  bio->bi_bdev = log->bdev;
2143  bio->bi_io_vec[0].bv_page = bp->l_page;
2144  bio->bi_io_vec[0].bv_len = LOGPSIZE;
2145  bio->bi_io_vec[0].bv_offset = bp->l_offset;
2146 
2147  bio->bi_vcnt = 1;
2148  bio->bi_idx = 0;
2149  bio->bi_size = LOGPSIZE;
2150 
2151  bio->bi_end_io = lbmIODone;
2152  bio->bi_private = bp;
2153 
2154  /* check if journaling to disk has been disabled */
2155  if (log->no_integrity) {
2156  bio->bi_size = 0;
2157  lbmIODone(bio, 0);
2158  } else {
2159  submit_bio(WRITE_SYNC, bio);
2160  INCREMENT(lmStat.submitted);
2161  }
2162 }
2163 
2164 
2165 /*
2166  * lbmIOWait()
2167  */
2168 static int lbmIOWait(struct lbuf * bp, int flag)
2169 {
2170  unsigned long flags;
2171  int rc = 0;
2172 
2173  jfs_info("lbmIOWait1: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag);
2174 
2175  LCACHE_LOCK(flags); /* disable+lock */
2176 
2177  LCACHE_SLEEP_COND(bp->l_ioevent, (bp->l_flag & lbmDONE), flags);
2178 
2179  rc = (bp->l_flag & lbmERROR) ? -EIO : 0;
2180 
2181  if (flag & lbmFREE)
2182  lbmfree(bp);
2183 
2184  LCACHE_UNLOCK(flags); /* unlock+enable */
2185 
2186  jfs_info("lbmIOWait2: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag);
2187  return rc;
2188 }
2189 
2190 /*
2191  * lbmIODone()
2192  *
2193  * executed at INTIODONE level
2194  */
2195 static void lbmIODone(struct bio *bio, int error)
2196 {
2197  struct lbuf *bp = bio->bi_private;
2198  struct lbuf *nextbp, *tail;
2199  struct jfs_log *log;
2200  unsigned long flags;
2201 
2202  /*
2203  * get back jfs buffer bound to the i/o buffer
2204  */
2205  jfs_info("lbmIODone: bp:0x%p flag:0x%x", bp, bp->l_flag);
2206 
2207  LCACHE_LOCK(flags); /* disable+lock */
2208 
2209  bp->l_flag |= lbmDONE;
2210 
2211  if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
2212  bp->l_flag |= lbmERROR;
2213 
2214  jfs_err("lbmIODone: I/O error in JFS log");
2215  }
2216 
2217  bio_put(bio);
2218 
2219  /*
2220  * pagein completion
2221  */
2222  if (bp->l_flag & lbmREAD) {
2223  bp->l_flag &= ~lbmREAD;
2224 
2225  LCACHE_UNLOCK(flags); /* unlock+enable */
2226 
2227  /* wakeup I/O initiator */
2228  LCACHE_WAKEUP(&bp->l_ioevent);
2229 
2230  return;
2231  }
2232 
2233  /*
2234  * pageout completion
2235  *
2236  * the bp at the head of write queue has completed pageout.
2237  *
2238  * if single-commit/full-page pageout, remove the current buffer
2239  * from head of pageout queue, and redrive pageout with
2240  * the new buffer at head of pageout queue;
2241  * otherwise, the partial-page pageout buffer stays at
2242  * the head of pageout queue to be redriven for pageout
2243  * by lmGroupCommit() until full-page pageout is completed.
2244  */
2245  bp->l_flag &= ~lbmWRITE;
2246  INCREMENT(lmStat.pagedone);
2247 
2248  /* update committed lsn */
2249  log = bp->l_log;
2250  log->clsn = (bp->l_pn << L2LOGPSIZE) + bp->l_ceor;
2251 
2252  if (bp->l_flag & lbmDIRECT) {
2253  LCACHE_WAKEUP(&bp->l_ioevent);
2254  LCACHE_UNLOCK(flags);
2255  return;
2256  }
2257 
2258  tail = log->wqueue;
2259 
2260  /* single element queue */
2261  if (bp == tail) {
2262  /* remove head buffer of full-page pageout
2263  * from log device write queue
2264  */
2265  if (bp->l_flag & lbmRELEASE) {
2266  log->wqueue = NULL;
2267  bp->l_wqnext = NULL;
2268  }
2269  }
2270  /* multi element queue */
2271  else {
2272  /* remove head buffer of full-page pageout
2273  * from log device write queue
2274  */
2275  if (bp->l_flag & lbmRELEASE) {
2276  nextbp = tail->l_wqnext = bp->l_wqnext;
2277  bp->l_wqnext = NULL;
2278 
2279  /*
2280  * redrive pageout of next page at head of write queue:
2281  * redrive next page without any bound tblk
2282  * (i.e., page w/o any COMMIT records), or
2283  * first page of new group commit which has been
2284  * queued after current page (subsequent pageout
2285  * is performed synchronously, except page without
2286  * any COMMITs) by lmGroupCommit() as indicated
2287  * by lbmWRITE flag;
2288  */
2289  if (nextbp->l_flag & lbmWRITE) {
2290  /*
2291  * We can't do the I/O at interrupt time.
2292  * The jfsIO thread can do it
2293  */
2294  lbmRedrive(nextbp);
2295  }
2296  }
2297  }
2298 
2299  /*
2300  * synchronous pageout:
2301  *
2302  * buffer has not necessarily been removed from write queue
2303  * (e.g., synchronous write of partial-page with COMMIT):
2304  * leave buffer for i/o initiator to dispose
2305  */
2306  if (bp->l_flag & lbmSYNC) {
2307  LCACHE_UNLOCK(flags); /* unlock+enable */
2308 
2309  /* wakeup I/O initiator */
2310  LCACHE_WAKEUP(&bp->l_ioevent);
2311  }
2312 
2313  /*
2314  * Group Commit pageout:
2315  */
2316  else if (bp->l_flag & lbmGC) {
2317  LCACHE_UNLOCK(flags);
2318  lmPostGC(bp);
2319  }
2320 
2321  /*
2322  * asynchronous pageout:
2323  *
2324  * buffer must have been removed from write queue:
2325  * insert buffer at head of freelist where it can be recycled
2326  */
2327  else {
2328  assert(bp->l_flag & lbmRELEASE);
2329  assert(bp->l_flag & lbmFREE);
2330  lbmfree(bp);
2331 
2332  LCACHE_UNLOCK(flags); /* unlock+enable */
2333  }
2334 }
2335 
2336 int jfsIOWait(void *arg)
2337 {
2338  struct lbuf *bp;
2339 
2340  do {
2341  spin_lock_irq(&log_redrive_lock);
2342  while ((bp = log_redrive_list)) {
2343  log_redrive_list = bp->l_redrive_next;
2344  bp->l_redrive_next = NULL;
2345  spin_unlock_irq(&log_redrive_lock);
2346  lbmStartIO(bp);
2347  spin_lock_irq(&log_redrive_lock);
2348  }
2349 
2350  if (freezing(current)) {
2351  spin_unlock_irq(&log_redrive_lock);
2352  try_to_freeze();
2353  } else {
2355  spin_unlock_irq(&log_redrive_lock);
2356  schedule();
2358  }
2359  } while (!kthread_should_stop());
2360 
2361  jfs_info("jfsIOWait being killed!");
2362  return 0;
2363 }
2364 
2365 /*
2366  * NAME: lmLogFormat()/jfs_logform()
2367  *
2368  * FUNCTION: format file system log
2369  *
2370  * PARAMETERS:
2371  * log - volume log
2372  * logAddress - start address of log space in FS block
2373  * logSize - length of log space in FS block;
2374  *
2375  * RETURN: 0 - success
2376  * -EIO - i/o error
2377  *
2378  * XXX: We're synchronously writing one page at a time. This needs to
2379  * be improved by writing multiple pages at once.
2380  */
2381 int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize)
2382 {
2383  int rc = -EIO;
2384  struct jfs_sb_info *sbi;
2385  struct logsuper *logsuper;
2386  struct logpage *lp;
2387  int lspn; /* log sequence page number */
2388  struct lrd *lrd_ptr;
2389  int npages = 0;
2390  struct lbuf *bp;
2391 
2392  jfs_info("lmLogFormat: logAddress:%Ld logSize:%d",
2393  (long long)logAddress, logSize);
2394 
2395  sbi = list_entry(log->sb_list.next, struct jfs_sb_info, log_list);
2396 
2397  /* allocate a log buffer */
2398  bp = lbmAllocate(log, 1);
2399 
2400  npages = logSize >> sbi->l2nbperpage;
2401 
2402  /*
2403  * log space:
2404  *
2405  * page 0 - reserved;
2406  * page 1 - log superblock;
2407  * page 2 - log data page: A SYNC log record is written
2408  * into this page at logform time;
2409  * pages 3-N - log data page: set to empty log data pages;
2410  */
2411  /*
2412  * init log superblock: log page 1
2413  */
2414  logsuper = (struct logsuper *) bp->l_ldata;
2415 
2416  logsuper->magic = cpu_to_le32(LOGMAGIC);
2417  logsuper->version = cpu_to_le32(LOGVERSION);
2418  logsuper->state = cpu_to_le32(LOGREDONE);
2419  logsuper->flag = cpu_to_le32(sbi->mntflag); /* ? */
2420  logsuper->size = cpu_to_le32(npages);
2421  logsuper->bsize = cpu_to_le32(sbi->bsize);
2422  logsuper->l2bsize = cpu_to_le32(sbi->l2bsize);
2423  logsuper->end = cpu_to_le32(2 * LOGPSIZE + LOGPHDRSIZE + LOGRDSIZE);
2424 
2425  bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2426  bp->l_blkno = logAddress + sbi->nbperpage;
2427  lbmStartIO(bp);
2428  if ((rc = lbmIOWait(bp, 0)))
2429  goto exit;
2430 
2431  /*
2432  * init pages 2 to npages-1 as log data pages:
2433  *
2434  * log page sequence number (lpsn) initialization:
2435  *
2436  * pn: 0 1 2 3 n-1
2437  * +-----+-----+=====+=====+===.....===+=====+
2438  * lspn: N-1 0 1 N-2
2439  * <--- N page circular file ---->
2440  *
2441  * the N (= npages-2) data pages of the log is maintained as
2442  * a circular file for the log records;
2443  * lpsn grows by 1 monotonically as each log page is written
2444  * to the circular file of the log;
2445  * and setLogpage() will not reset the page number even if
2446  * the eor is equal to LOGPHDRSIZE. In order for binary search
2447  * still work in find log end process, we have to simulate the
2448  * log wrap situation at the log format time.
2449  * The 1st log page written will have the highest lpsn. Then
2450  * the succeeding log pages will have ascending order of
2451  * the lspn starting from 0, ... (N-2)
2452  */
2453  lp = (struct logpage *) bp->l_ldata;
2454  /*
2455  * initialize 1st log page to be written: lpsn = N - 1,
2456  * write a SYNCPT log record is written to this page
2457  */
2458  lp->h.page = lp->t.page = cpu_to_le32(npages - 3);
2459  lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE + LOGRDSIZE);
2460 
2461  lrd_ptr = (struct lrd *) &lp->data;
2462  lrd_ptr->logtid = 0;
2463  lrd_ptr->backchain = 0;
2464  lrd_ptr->type = cpu_to_le16(LOG_SYNCPT);
2465  lrd_ptr->length = 0;
2466  lrd_ptr->log.syncpt.sync = 0;
2467 
2468  bp->l_blkno += sbi->nbperpage;
2469  bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2470  lbmStartIO(bp);
2471  if ((rc = lbmIOWait(bp, 0)))
2472  goto exit;
2473 
2474  /*
2475  * initialize succeeding log pages: lpsn = 0, 1, ..., (N-2)
2476  */
2477  for (lspn = 0; lspn < npages - 3; lspn++) {
2478  lp->h.page = lp->t.page = cpu_to_le32(lspn);
2479  lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE);
2480 
2481  bp->l_blkno += sbi->nbperpage;
2482  bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2483  lbmStartIO(bp);
2484  if ((rc = lbmIOWait(bp, 0)))
2485  goto exit;
2486  }
2487 
2488  rc = 0;
2489 exit:
2490  /*
2491  * finalize log
2492  */
2493  /* release the buffer */
2494  lbmFree(bp);
2495 
2496  return rc;
2497 }
2498 
2499 #ifdef CONFIG_JFS_STATISTICS
2500 static int jfs_lmstats_proc_show(struct seq_file *m, void *v)
2501 {
2502  seq_printf(m,
2503  "JFS Logmgr stats\n"
2504  "================\n"
2505  "commits = %d\n"
2506  "writes submitted = %d\n"
2507  "writes completed = %d\n"
2508  "full pages submitted = %d\n"
2509  "partial pages submitted = %d\n",
2510  lmStat.commit,
2511  lmStat.submitted,
2512  lmStat.pagedone,
2513  lmStat.full_page,
2514  lmStat.partial_page);
2515  return 0;
2516 }
2517 
2518 static int jfs_lmstats_proc_open(struct inode *inode, struct file *file)
2519 {
2520  return single_open(file, jfs_lmstats_proc_show, NULL);
2521 }
2522 
2523 const struct file_operations jfs_lmstats_proc_fops = {
2524  .owner = THIS_MODULE,
2525  .open = jfs_lmstats_proc_open,
2526  .read = seq_read,
2527  .llseek = seq_lseek,
2528  .release = single_release,
2529 };
2530 #endif /* CONFIG_JFS_STATISTICS */