Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
jfs_logmgr.h
Go to the documentation of this file.
1 /*
2  * Copyright (C) International Business Machines Corp., 2000-2004
3  * Portions Copyright (C) Christoph Hellwig, 2001-2002
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
13  * the GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18  */
19 #ifndef _H_JFS_LOGMGR
20 #define _H_JFS_LOGMGR
21 
22 #include "jfs_filsys.h"
23 #include "jfs_lock.h"
24 
25 /*
26  * log manager configuration parameters
27  */
28 
29 /* log page size */
30 #define LOGPSIZE 4096
31 #define L2LOGPSIZE 12
32 
33 #define LOGPAGES 16 /* Log pages per mounted file system */
34 
35 /*
36  * log logical volume
37  *
38  * a log is used to make the commit operation on journalled
39  * files within the same logical volume group atomic.
40  * a log is implemented with a logical volume.
41  * there is one log per logical volume group.
42  *
43  * block 0 of the log logical volume is not used (ipl etc).
44  * block 1 contains a log "superblock" and is used by logFormat(),
45  * lmLogInit(), lmLogShutdown(), and logRedo() to record status
46  * of the log but is not otherwise used during normal processing.
47  * blocks 2 - (N-1) are used to contain log records.
48  *
49  * when a volume group is varied-on-line, logRedo() must have
50  * been executed before the file systems (logical volumes) in
51  * the volume group can be mounted.
52  */
53 /*
54  * log superblock (block 1 of logical volume)
55  */
56 #define LOGSUPER_B 1
57 #define LOGSTART_B 2
58 
59 #define LOGMAGIC 0x87654321
60 #define LOGVERSION 1
61 
62 #define MAX_ACTIVE 128 /* Max active file systems sharing log */
63 
64 struct logsuper {
65  __le32 magic; /* 4: log lv identifier */
66  __le32 version; /* 4: version number */
67  __le32 serial; /* 4: log open/mount counter */
68  __le32 size; /* 4: size in number of LOGPSIZE blocks */
69  __le32 bsize; /* 4: logical block size in byte */
70  __le32 l2bsize; /* 4: log2 of bsize */
71 
72  __le32 flag; /* 4: option */
73  __le32 state; /* 4: state - see below */
74 
75  __le32 end; /* 4: addr of last log record set by logredo */
76  char uuid[16]; /* 16: 128-bit journal uuid */
77  char label[16]; /* 16: journal label */
78  struct {
79  char uuid[16];
80  } active[MAX_ACTIVE]; /* 2048: active file systems list */
81 };
82 
83 #define NULL_UUID "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
84 
85 /* log flag: commit option (see jfs_filsys.h) */
86 
87 /* log state */
88 #define LOGMOUNT 0 /* log mounted by lmLogInit() */
89 #define LOGREDONE 1 /* log shutdown by lmLogShutdown().
90  * log redo completed by logredo().
91  */
92 #define LOGWRAP 2 /* log wrapped */
93 #define LOGREADERR 3 /* log read error detected in logredo() */
94 
95 
96 /*
97  * log logical page
98  *
99  * (this comment should be rewritten !)
100  * the header and trailer structures (h,t) will normally have
101  * the same page and eor value.
102  * An exception to this occurs when a complete page write is not
103  * accomplished on a power failure. Since the hardware may "split write"
104  * sectors in the page, any out of order sequence may occur during powerfail
105  * and needs to be recognized during log replay. The xor value is
106  * an "exclusive or" of all log words in the page up to eor. This
107  * 32 bit eor is stored with the top 16 bits in the header and the
108  * bottom 16 bits in the trailer. logredo can easily recognize pages
109  * that were not completed by reconstructing this eor and checking
110  * the log page.
111  *
112  * Previous versions of the operating system did not allow split
113  * writes and detected partially written records in logredo by
114  * ordering the updates to the header, trailer, and the move of data
115  * into the logdata area. The order: (1) data is moved (2) header
116  * is updated (3) trailer is updated. In logredo, when the header
117  * differed from the trailer, the header and trailer were reconciled
118  * as follows: if h.page != t.page they were set to the smaller of
119  * the two and h.eor and t.eor set to 8 (i.e. empty page). if (only)
120  * h.eor != t.eor they were set to the smaller of their two values.
121  */
122 struct logpage {
123  struct { /* header */
124  __le32 page; /* 4: log sequence page number */
125  __le16 rsrvd; /* 2: */
126  __le16 eor; /* 2: end-of-log offset of lasrt record write */
127  } h;
128 
129  __le32 data[LOGPSIZE / 4 - 4]; /* log record area */
130 
131  struct { /* trailer */
132  __le32 page; /* 4: normally the same as h.page */
133  __le16 rsrvd; /* 2: */
134  __le16 eor; /* 2: normally the same as h.eor */
135  } t;
136 };
138 #define LOGPHDRSIZE 8 /* log page header size */
139 #define LOGPTLRSIZE 8 /* log page trailer size */
140 
141 
142 /*
143  * log record
144  *
145  * (this comment should be rewritten !)
146  * jfs uses only "after" log records (only a single writer is allowed
147  * in a page, pages are written to temporary paging space if
148  * if they must be written to disk before commit, and i/o is
149  * scheduled for modified pages to their home location after
150  * the log records containing the after values and the commit
151  * record is written to the log on disk, undo discards the copy
152  * in main-memory.)
153  *
154  * a log record consists of a data area of variable length followed by
155  * a descriptor of fixed size LOGRDSIZE bytes.
156  * the data area is rounded up to an integral number of 4-bytes and
157  * must be no longer than LOGPSIZE.
158  * the descriptor is of size of multiple of 4-bytes and aligned on a
159  * 4-byte boundary.
160  * records are packed one after the other in the data area of log pages.
161  * (sometimes a DUMMY record is inserted so that at least one record ends
162  * on every page or the longest record is placed on at most two pages).
163  * the field eor in page header/trailer points to the byte following
164  * the last record on a page.
165  */
167 /* log record types */
168 #define LOG_COMMIT 0x8000
169 #define LOG_SYNCPT 0x4000
170 #define LOG_MOUNT 0x2000
171 #define LOG_REDOPAGE 0x0800
172 #define LOG_NOREDOPAGE 0x0080
173 #define LOG_NOREDOINOEXT 0x0040
174 #define LOG_UPDATEMAP 0x0008
175 #define LOG_NOREDOFILE 0x0001
177 /* REDOPAGE/NOREDOPAGE log record data type */
178 #define LOG_INODE 0x0001
179 #define LOG_XTREE 0x0002
180 #define LOG_DTREE 0x0004
181 #define LOG_BTROOT 0x0010
182 #define LOG_EA 0x0020
183 #define LOG_ACL 0x0040
184 #define LOG_DATA 0x0080
185 #define LOG_NEW 0x0100
186 #define LOG_EXTEND 0x0200
187 #define LOG_RELOCATE 0x0400
188 #define LOG_DIR_XTREE 0x0800 /* Xtree is in directory inode */
190 /* UPDATEMAP log record descriptor type */
191 #define LOG_ALLOCXADLIST 0x0080
192 #define LOG_ALLOCPXDLIST 0x0040
193 #define LOG_ALLOCXAD 0x0020
194 #define LOG_ALLOCPXD 0x0010
195 #define LOG_FREEXADLIST 0x0008
196 #define LOG_FREEPXDLIST 0x0004
197 #define LOG_FREEXAD 0x0002
198 #define LOG_FREEPXD 0x0001
200 
201 struct lrd {
202  /*
203  * type independent area
204  */
205  __le32 logtid; /* 4: log transaction identifier */
206  __le32 backchain; /* 4: ptr to prev record of same transaction */
207  __le16 type; /* 2: record type */
208  __le16 length; /* 2: length of data in record (in byte) */
209  __le32 aggregate; /* 4: file system lv/aggregate */
210  /* (16) */
211 
212  /*
213  * type dependent area (20)
214  */
215  union {
216 
217  /*
218  * COMMIT: commit
219  *
220  * transaction commit: no type-dependent information;
221  */
222 
223  /*
224  * REDOPAGE: after-image
225  *
226  * apply after-image;
227  *
228  * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format;
229  */
230  struct {
231  __le32 fileset; /* 4: fileset number */
232  __le32 inode; /* 4: inode number */
233  __le16 type; /* 2: REDOPAGE record type */
234  __le16 l2linesize; /* 2: log2 of line size */
235  pxd_t pxd; /* 8: on-disk page pxd */
236  } redopage; /* (20) */
237 
238  /*
239  * NOREDOPAGE: the page is freed
240  *
241  * do not apply after-image records which precede this record
242  * in the log with the same page block number to this page.
243  *
244  * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format;
245  */
246  struct {
247  __le32 fileset; /* 4: fileset number */
248  __le32 inode; /* 4: inode number */
249  __le16 type; /* 2: NOREDOPAGE record type */
250  __le16 rsrvd; /* 2: reserved */
251  pxd_t pxd; /* 8: on-disk page pxd */
252  } noredopage; /* (20) */
253 
254  /*
255  * UPDATEMAP: update block allocation map
256  *
257  * either in-line PXD,
258  * or out-of-line XADLIST;
259  *
260  * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format;
261  */
262  struct {
263  __le32 fileset; /* 4: fileset number */
264  __le32 inode; /* 4: inode number */
265  __le16 type; /* 2: UPDATEMAP record type */
266  __le16 nxd; /* 2: number of extents */
267  pxd_t pxd; /* 8: pxd */
268  } updatemap; /* (20) */
269 
270  /*
271  * NOREDOINOEXT: the inode extent is freed
272  *
273  * do not apply after-image records which precede this
274  * record in the log with the any of the 4 page block
275  * numbers in this inode extent.
276  *
277  * NOTE: The fileset and pxd fields MUST remain in
278  * the same fields in the REDOPAGE record format.
279  *
280  */
281  struct {
282  __le32 fileset; /* 4: fileset number */
283  __le32 iagnum; /* 4: IAG number */
284  __le32 inoext_idx; /* 4: inode extent index */
285  pxd_t pxd; /* 8: on-disk page pxd */
286  } noredoinoext; /* (20) */
287 
288  /*
289  * SYNCPT: log sync point
290  *
291  * replay log up to syncpt address specified;
292  */
293  struct {
294  __le32 sync; /* 4: syncpt address (0 = here) */
295  } syncpt;
296 
297  /*
298  * MOUNT: file system mount
299  *
300  * file system mount: no type-dependent information;
301  */
302 
303  /*
304  * ? FREEXTENT: free specified extent(s)
305  *
306  * free specified extent(s) from block allocation map
307  * N.B.: nextents should be length of data/sizeof(xad_t)
308  */
309  struct {
310  __le32 type; /* 4: FREEXTENT record type */
311  __le32 nextent; /* 4: number of extents */
312 
313  /* data: PXD or XAD list */
314  } freextent;
315 
316  /*
317  * ? NOREDOFILE: this file is freed
318  *
319  * do not apply records which precede this record in the log
320  * with the same inode number.
321  *
322  * NOREDOFILE must be the first to be written at commit
323  * (last to be read in logredo()) - it prevents
324  * replay of preceding updates of all preceding generations
325  * of the inumber esp. the on-disk inode itself.
326  */
327  struct {
328  __le32 fileset; /* 4: fileset number */
329  __le32 inode; /* 4: inode number */
330  } noredofile;
331 
332  /*
333  * ? NEWPAGE:
334  *
335  * metadata type dependent
336  */
337  struct {
338  __le32 fileset; /* 4: fileset number */
339  __le32 inode; /* 4: inode number */
340  __le32 type; /* 4: NEWPAGE record type */
341  pxd_t pxd; /* 8: on-disk page pxd */
342  } newpage;
343 
344  /*
345  * ? DUMMY: filler
346  *
347  * no type-dependent information
348  */
349  } log;
350 }; /* (36) */
351 
352 #define LOGRDSIZE (sizeof(struct lrd))
353 
354 /*
355  * line vector descriptor
356  */
357 struct lvd {
358  __le16 offset;
359  __le16 length;
360 };
361 
362 
363 /*
364  * log logical volume
365  */
366 struct jfs_log {
367 
368  struct list_head sb_list;/* This is used to sync metadata
369  * before writing syncpt.
370  */
371  struct list_head journal_list; /* Global list */
372  struct block_device *bdev; /* 4: log lv pointer */
373  int serial; /* 4: log mount serial number */
375  s64 base; /* @8: log extent address (inline log ) */
376  int size; /* 4: log size in log page (in page) */
377  int l2bsize; /* 4: log2 of bsize */
378 
379  unsigned long flag; /* 4: flag */
381  struct lbuf *lbuf_free; /* 4: free lbufs */
382  wait_queue_head_t free_wait; /* 4: */
384  /* log write */
385  int logtid; /* 4: log tid */
386  int page; /* 4: page number of eol page */
387  int eor; /* 4: eor of last record in eol page */
388  struct lbuf *bp; /* 4: current log page buffer */
389 
390  struct mutex loglock; /* 4: log write serialization lock */
392  /* syncpt */
393  int nextsync; /* 4: bytes to write before next syncpt */
394  int active; /* 4: */
395  wait_queue_head_t syncwait; /* 4: */
397  /* commit */
398  uint cflag; /* 4: */
399  struct list_head cqueue; /* FIFO commit queue */
400  struct tblock *flush_tblk; /* tblk we're waiting on for flush */
401  int gcrtc; /* 4: GC_READY transaction count */
402  struct tblock *gclrt; /* 4: latest GC_READY transaction */
403  spinlock_t gclock; /* 4: group commit lock */
404  int logsize; /* 4: log data area size in byte */
405  int lsn; /* 4: end-of-log */
406  int clsn; /* 4: clsn */
407  int syncpt; /* 4: addr of last syncpt record */
408  int sync; /* 4: addr from last logsync() */
409  struct list_head synclist; /* 8: logsynclist anchor */
410  spinlock_t synclock; /* 4: synclist lock */
411  struct lbuf *wqueue; /* 4: log pageout queue */
412  int count; /* 4: count */
413  char uuid[16]; /* 16: 128-bit uuid of log device */
414 
415  int no_integrity; /* 3: flag to disable journaling to disk */
416 };
417 
418 /*
419  * Log flag
420  */
421 #define log_INLINELOG 1
422 #define log_SYNCBARRIER 2
423 #define log_QUIESCE 3
424 #define log_FLUSH 4
425 
426 /*
427  * group commit flag
428  */
429 /* jfs_log */
430 #define logGC_PAGEOUT 0x00000001
432 /* tblock/lbuf */
433 #define tblkGC_QUEUE 0x0001
434 #define tblkGC_READY 0x0002
435 #define tblkGC_COMMIT 0x0004
436 #define tblkGC_COMMITTED 0x0008
437 #define tblkGC_EOP 0x0010
438 #define tblkGC_FREE 0x0020
439 #define tblkGC_LEADER 0x0040
440 #define tblkGC_ERROR 0x0080
441 #define tblkGC_LAZY 0x0100 // D230860
442 #define tblkGC_UNLOCKED 0x0200 // D230860
443 
444 /*
445  * log cache buffer header
446  */
447 struct lbuf {
448  struct jfs_log *l_log; /* 4: log associated with buffer */
449 
450  /*
451  * data buffer base area
452  */
453  uint l_flag; /* 4: pageout control flags */
455  struct lbuf *l_wqnext; /* 4: write queue link */
456  struct lbuf *l_freelist; /* 4: freelistlink */
458  int l_pn; /* 4: log page number */
459  int l_eor; /* 4: log record eor */
460  int l_ceor; /* 4: committed log record eor */
462  s64 l_blkno; /* 8: log page block number */
463  caddr_t l_ldata; /* 4: data page */
464  struct page *l_page; /* The page itself */
465  uint l_offset; /* Offset of l_ldata within the page */
466 
467  wait_queue_head_t l_ioevent; /* 4: i/o done event */
468 };
470 /* Reuse l_freelist for redrive list */
471 #define l_redrive_next l_freelist
472 
473 /*
474  * logsynclist block
475  *
476  * common logsyncblk prefix for jbuf_t and tblock
477  */
478 struct logsyncblk {
479  u16 xflag; /* flags */
480  u16 flag; /* only meaninful in tblock */
481  lid_t lid; /* lock id */
482  s32 lsn; /* log sequence number */
483  struct list_head synclist; /* log sync list link */
484 };
485 
486 /*
487  * logsynclist serialization (per log)
488  */
490 #define LOGSYNC_LOCK_INIT(log) spin_lock_init(&(log)->synclock)
491 #define LOGSYNC_LOCK(log, flags) spin_lock_irqsave(&(log)->synclock, flags)
492 #define LOGSYNC_UNLOCK(log, flags) \
493  spin_unlock_irqrestore(&(log)->synclock, flags)
495 /* compute the difference in bytes of lsn from sync point */
496 #define logdiff(diff, lsn, log)\
497 {\
498  diff = (lsn) - (log)->syncpt;\
499  if (diff < 0)\
500  diff += (log)->logsize;\
501 }
502 
503 extern int lmLogOpen(struct super_block *sb);
504 extern int lmLogClose(struct super_block *sb);
505 extern int lmLogShutdown(struct jfs_log * log);
506 extern int lmLogInit(struct jfs_log * log);
507 extern int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize);
508 extern int lmGroupCommit(struct jfs_log *, struct tblock *);
509 extern int jfsIOWait(void *);
510 extern void jfs_flush_journal(struct jfs_log * log, int wait);
511 extern void jfs_syncpt(struct jfs_log *log, int hard_sync);
512 
513 #endif /* _H_JFS_LOGMGR */