xref: /linux/fs/jfs/jfs_logmgr.h (revision 490cc3c5e724502667a104a4e818dc071faf5e77)
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /*
3  *   Copyright (C) International Business Machines Corp., 2000-2004
4  *   Portions Copyright (C) Christoph Hellwig, 2001-2002
5  */
6 #ifndef	_H_JFS_LOGMGR
7 #define _H_JFS_LOGMGR
8 
9 #include <linux/uuid.h>
10 
11 #include "jfs_filsys.h"
12 #include "jfs_lock.h"
13 
14 /*
15  *	log manager configuration parameters
16  */
17 
18 /* log page size */
19 #define	LOGPSIZE	4096
20 #define	L2LOGPSIZE	12
21 
22 #define LOGPAGES	16	/* Log pages per mounted file system */
23 
24 /*
25  *	log logical volume
26  *
27  * a log is used to make the commit operation on journalled
28  * files within the same logical volume group atomic.
29  * a log is implemented with a logical volume.
30  * there is one log per logical volume group.
31  *
32  * block 0 of the log logical volume is not used (ipl etc).
33  * block 1 contains a log "superblock" and is used by logFormat(),
34  * lmLogInit(), lmLogShutdown(), and logRedo() to record status
35  * of the log but is not otherwise used during normal processing.
36  * blocks 2 - (N-1) are used to contain log records.
37  *
38  * when a volume group is varied-on-line, logRedo() must have
39  * been executed before the file systems (logical volumes) in
40  * the volume group can be mounted.
41  */
42 /*
43  *	log superblock (block 1 of logical volume)
44  */
45 #define	LOGSUPER_B	1
46 #define	LOGSTART_B	2
47 
48 #define	LOGMAGIC	0x87654321
49 #define	LOGVERSION	1
50 
51 #define MAX_ACTIVE	128	/* Max active file systems sharing log */
52 
53 struct logsuper {
54 	__le32 magic;		/* 4: log lv identifier */
55 	__le32 version;		/* 4: version number */
56 	__le32 serial;		/* 4: log open/mount counter */
57 	__le32 size;		/* 4: size in number of LOGPSIZE blocks */
58 	__le32 bsize;		/* 4: logical block size in byte */
59 	__le32 l2bsize;		/* 4: log2 of bsize */
60 
61 	__le32 flag;		/* 4: option */
62 	__le32 state;		/* 4: state - see below */
63 
64 	__le32 end;		/* 4: addr of last log record set by logredo */
65 	uuid_t uuid;		/* 16: 128-bit journal uuid */
66 	char label[16];		/* 16: journal label */
67 	struct {
68 		uuid_t uuid;
69 	} active[MAX_ACTIVE];	/* 2048: active file systems list */
70 };
71 
72 /* log flag: commit option (see jfs_filsys.h) */
73 
74 /* log state */
75 #define	LOGMOUNT	0	/* log mounted by lmLogInit() */
76 #define LOGREDONE	1	/* log shutdown by lmLogShutdown().
77 				 * log redo completed by logredo().
78 				 */
79 #define LOGWRAP		2	/* log wrapped */
80 #define LOGREADERR	3	/* log read error detected in logredo() */
81 
82 
83 /*
84  *	log logical page
85  *
86  * (this comment should be rewritten !)
87  * the header and trailer structures (h,t) will normally have
88  * the same page and eor value.
89  * An exception to this occurs when a complete page write is not
90  * accomplished on a power failure. Since the hardware may "split write"
91  * sectors in the page, any out of order sequence may occur during powerfail
92  * and needs to be recognized during log replay.  The xor value is
93  * an "exclusive or" of all log words in the page up to eor.  This
94  * 32 bit eor is stored with the top 16 bits in the header and the
95  * bottom 16 bits in the trailer.  logredo can easily recognize pages
96  * that were not completed by reconstructing this eor and checking
97  * the log page.
98  *
99  * Previous versions of the operating system did not allow split
100  * writes and detected partially written records in logredo by
101  * ordering the updates to the header, trailer, and the move of data
102  * into the logdata area.  The order: (1) data is moved (2) header
103  * is updated (3) trailer is updated.  In logredo, when the header
104  * differed from the trailer, the header and trailer were reconciled
105  * as follows: if h.page != t.page they were set to the smaller of
106  * the two and h.eor and t.eor set to 8 (i.e. empty page). if (only)
107  * h.eor != t.eor they were set to the smaller of their two values.
108  */
109 struct logpage {
110 	struct {		/* header */
111 		__le32 page;	/* 4: log sequence page number */
112 		__le16 rsrvd;	/* 2: */
113 		__le16 eor;	/* 2: end-of-log offset of lasrt record write */
114 	} h;
115 
116 	__le32 data[LOGPSIZE / 4 - 4];	/* log record area */
117 
118 	struct {		/* trailer */
119 		__le32 page;	/* 4: normally the same as h.page */
120 		__le16 rsrvd;	/* 2: */
121 		__le16 eor;	/* 2: normally the same as h.eor */
122 	} t;
123 };
124 
125 #define LOGPHDRSIZE	8	/* log page header size */
126 #define LOGPTLRSIZE	8	/* log page trailer size */
127 
128 
129 /*
130  *	log record
131  *
132  * (this comment should be rewritten !)
133  * jfs uses only "after" log records (only a single writer is allowed
134  * in a page, pages are written to temporary paging space if
135  * they must be written to disk before commit, and i/o is
136  * scheduled for modified pages to their home location after
137  * the log records containing the after values and the commit
138  * record is written to the log on disk, undo discards the copy
139  * in main-memory.)
140  *
141  * a log record consists of a data area of variable length followed by
142  * a descriptor of fixed size LOGRDSIZE bytes.
143  * the data area is rounded up to an integral number of 4-bytes and
144  * must be no longer than LOGPSIZE.
145  * the descriptor is of size of multiple of 4-bytes and aligned on a
146  * 4-byte boundary.
147  * records are packed one after the other in the data area of log pages.
148  * (sometimes a DUMMY record is inserted so that at least one record ends
149  * on every page or the longest record is placed on at most two pages).
150  * the field eor in page header/trailer points to the byte following
151  * the last record on a page.
152  */
153 
154 /* log record types */
155 #define LOG_COMMIT		0x8000
156 #define LOG_SYNCPT		0x4000
157 #define LOG_MOUNT		0x2000
158 #define LOG_REDOPAGE		0x0800
159 #define LOG_NOREDOPAGE		0x0080
160 #define LOG_NOREDOINOEXT	0x0040
161 #define LOG_UPDATEMAP		0x0008
162 #define LOG_NOREDOFILE		0x0001
163 
164 /* REDOPAGE/NOREDOPAGE log record data type */
165 #define	LOG_INODE		0x0001
166 #define	LOG_XTREE		0x0002
167 #define	LOG_DTREE		0x0004
168 #define	LOG_BTROOT		0x0010
169 #define	LOG_EA			0x0020
170 #define	LOG_ACL			0x0040
171 #define	LOG_DATA		0x0080
172 #define	LOG_NEW			0x0100
173 #define	LOG_EXTEND		0x0200
174 #define LOG_RELOCATE		0x0400
175 #define LOG_DIR_XTREE		0x0800	/* Xtree is in directory inode */
176 
177 /* UPDATEMAP log record descriptor type */
178 #define	LOG_ALLOCXADLIST	0x0080
179 #define	LOG_ALLOCPXDLIST	0x0040
180 #define	LOG_ALLOCXAD		0x0020
181 #define	LOG_ALLOCPXD		0x0010
182 #define	LOG_FREEXADLIST		0x0008
183 #define	LOG_FREEPXDLIST		0x0004
184 #define	LOG_FREEXAD		0x0002
185 #define	LOG_FREEPXD		0x0001
186 
187 
188 struct lrd {
189 	/*
190 	 * type independent area
191 	 */
192 	__le32 logtid;		/* 4: log transaction identifier */
193 	__le32 backchain;	/* 4: ptr to prev record of same transaction */
194 	__le16 type;		/* 2: record type */
195 	__le16 length;		/* 2: length of data in record (in byte) */
196 	__le32 aggregate;	/* 4: file system lv/aggregate */
197 	/* (16) */
198 
199 	/*
200 	 * type dependent area (20)
201 	 */
202 	union {
203 
204 		/*
205 		 *	COMMIT: commit
206 		 *
207 		 * transaction commit: no type-dependent information;
208 		 */
209 
210 		/*
211 		 *	REDOPAGE: after-image
212 		 *
213 		 * apply after-image;
214 		 *
215 		 * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format;
216 		 */
217 		struct {
218 			__le32 fileset;	/* 4: fileset number */
219 			__le32 inode;	/* 4: inode number */
220 			__le16 type;	/* 2: REDOPAGE record type */
221 			__le16 l2linesize;	/* 2: log2 of line size */
222 			pxd_t pxd;	/* 8: on-disk page pxd */
223 		} redopage;	/* (20) */
224 
225 		/*
226 		 *	NOREDOPAGE: the page is freed
227 		 *
228 		 * do not apply after-image records which precede this record
229 		 * in the log with the same page block number to this page.
230 		 *
231 		 * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format;
232 		 */
233 		struct {
234 			__le32 fileset;	/* 4: fileset number */
235 			__le32 inode;	/* 4: inode number */
236 			__le16 type;	/* 2: NOREDOPAGE record type */
237 			__le16 rsrvd;	/* 2: reserved */
238 			pxd_t pxd;	/* 8: on-disk page pxd */
239 		} noredopage;	/* (20) */
240 
241 		/*
242 		 *	UPDATEMAP: update block allocation map
243 		 *
244 		 * either in-line PXD,
245 		 * or     out-of-line  XADLIST;
246 		 *
247 		 * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format;
248 		 */
249 		struct {
250 			__le32 fileset;	/* 4: fileset number */
251 			__le32 inode;	/* 4: inode number */
252 			__le16 type;	/* 2: UPDATEMAP record type */
253 			__le16 nxd;	/* 2: number of extents */
254 			pxd_t pxd;	/* 8: pxd */
255 		} updatemap;	/* (20) */
256 
257 		/*
258 		 *	NOREDOINOEXT: the inode extent is freed
259 		 *
260 		 * do not apply after-image records which precede this
261 		 * record in the log with the any of the 4 page block
262 		 * numbers in this inode extent.
263 		 *
264 		 * NOTE: The fileset and pxd fields MUST remain in
265 		 *       the same fields in the REDOPAGE record format.
266 		 *
267 		 */
268 		struct {
269 			__le32 fileset;	/* 4: fileset number */
270 			__le32 iagnum;	/* 4: IAG number     */
271 			__le32 inoext_idx;	/* 4: inode extent index */
272 			pxd_t pxd;	/* 8: on-disk page pxd */
273 		} noredoinoext;	/* (20) */
274 
275 		/*
276 		 *	SYNCPT: log sync point
277 		 *
278 		 * replay log up to syncpt address specified;
279 		 */
280 		struct {
281 			__le32 sync;	/* 4: syncpt address (0 = here) */
282 		} syncpt;
283 
284 		/*
285 		 *	MOUNT: file system mount
286 		 *
287 		 * file system mount: no type-dependent information;
288 		 */
289 
290 		/*
291 		 *	? FREEXTENT: free specified extent(s)
292 		 *
293 		 * free specified extent(s) from block allocation map
294 		 * N.B.: nextents should be length of data/sizeof(xad_t)
295 		 */
296 		struct {
297 			__le32 type;	/* 4: FREEXTENT record type */
298 			__le32 nextent;	/* 4: number of extents */
299 
300 			/* data: PXD or XAD list */
301 		} freextent;
302 
303 		/*
304 		 *	? NOREDOFILE: this file is freed
305 		 *
306 		 * do not apply records which precede this record in the log
307 		 * with the same inode number.
308 		 *
309 		 * NOREDOFILE must be the first to be written at commit
310 		 * (last to be read in logredo()) - it prevents
311 		 * replay of preceding updates of all preceding generations
312 		 * of the inumber esp. the on-disk inode itself.
313 		 */
314 		struct {
315 			__le32 fileset;	/* 4: fileset number */
316 			__le32 inode;	/* 4: inode number */
317 		} noredofile;
318 
319 		/*
320 		 *	? NEWPAGE:
321 		 *
322 		 * metadata type dependent
323 		 */
324 		struct {
325 			__le32 fileset;	/* 4: fileset number */
326 			__le32 inode;	/* 4: inode number */
327 			__le32 type;	/* 4: NEWPAGE record type */
328 			pxd_t pxd;	/* 8: on-disk page pxd */
329 		} newpage;
330 
331 		/*
332 		 *	? DUMMY: filler
333 		 *
334 		 * no type-dependent information
335 		 */
336 	} log;
337 };					/* (36) */
338 
339 #define	LOGRDSIZE	(sizeof(struct lrd))
340 
341 /*
342  *	line vector descriptor
343  */
344 struct lvd {
345 	__le16 offset;
346 	__le16 length;
347 };
348 
349 
350 /*
351  *	log logical volume
352  */
353 struct jfs_log {
354 
355 	struct list_head sb_list;/*  This is used to sync metadata
356 				 *    before writing syncpt.
357 				 */
358 	struct list_head journal_list; /* Global list */
359 	struct bdev_handle *bdev_handle; /* 4: log lv pointer */
360 	int serial;		/* 4: log mount serial number */
361 
362 	s64 base;		/* @8: log extent address (inline log ) */
363 	int size;		/* 4: log size in log page (in page) */
364 	int l2bsize;		/* 4: log2 of bsize */
365 
366 	unsigned long flag;	/* 4: flag */
367 
368 	struct lbuf *lbuf_free;	/* 4: free lbufs */
369 	wait_queue_head_t free_wait;	/* 4: */
370 
371 	/* log write */
372 	int logtid;		/* 4: log tid */
373 	int page;		/* 4: page number of eol page */
374 	int eor;		/* 4: eor of last record in eol page */
375 	struct lbuf *bp;	/* 4: current log page buffer */
376 
377 	struct mutex loglock;	/* 4: log write serialization lock */
378 
379 	/* syncpt */
380 	int nextsync;		/* 4: bytes to write before next syncpt */
381 	int active;		/* 4: */
382 	wait_queue_head_t syncwait;	/* 4: */
383 
384 	/* commit */
385 	uint cflag;		/* 4: */
386 	struct list_head cqueue; /* FIFO commit queue */
387 	struct tblock *flush_tblk; /* tblk we're waiting on for flush */
388 	int gcrtc;		/* 4: GC_READY transaction count */
389 	struct tblock *gclrt;	/* 4: latest GC_READY transaction */
390 	spinlock_t gclock;	/* 4: group commit lock */
391 	int logsize;		/* 4: log data area size in byte */
392 	int lsn;		/* 4: end-of-log */
393 	int clsn;		/* 4: clsn */
394 	int syncpt;		/* 4: addr of last syncpt record */
395 	int sync;		/* 4: addr from last logsync() */
396 	struct list_head synclist;	/* 8: logsynclist anchor */
397 	spinlock_t synclock;	/* 4: synclist lock */
398 	struct lbuf *wqueue;	/* 4: log pageout queue */
399 	int count;		/* 4: count */
400 	uuid_t uuid;		/* 16: 128-bit uuid of log device */
401 
402 	int no_integrity;	/* 3: flag to disable journaling to disk */
403 };
404 
405 /*
406  * Log flag
407  */
408 #define log_INLINELOG	1
409 #define log_SYNCBARRIER	2
410 #define log_QUIESCE	3
411 #define log_FLUSH	4
412 
413 /*
414  * group commit flag
415  */
416 /* jfs_log */
417 #define logGC_PAGEOUT	0x00000001
418 
419 /* tblock/lbuf */
420 #define tblkGC_QUEUE		0x0001
421 #define tblkGC_READY		0x0002
422 #define tblkGC_COMMIT		0x0004
423 #define tblkGC_COMMITTED	0x0008
424 #define tblkGC_EOP		0x0010
425 #define tblkGC_FREE		0x0020
426 #define tblkGC_LEADER		0x0040
427 #define tblkGC_ERROR		0x0080
428 #define tblkGC_LAZY		0x0100	// D230860
429 #define tblkGC_UNLOCKED		0x0200	// D230860
430 
431 /*
432  *		log cache buffer header
433  */
434 struct lbuf {
435 	struct jfs_log *l_log;	/* 4: log associated with buffer */
436 
437 	/*
438 	 * data buffer base area
439 	 */
440 	uint l_flag;		/* 4: pageout control flags */
441 
442 	struct lbuf *l_wqnext;	/* 4: write queue link */
443 	struct lbuf *l_freelist;	/* 4: freelistlink */
444 
445 	int l_pn;		/* 4: log page number */
446 	int l_eor;		/* 4: log record eor */
447 	int l_ceor;		/* 4: committed log record eor */
448 
449 	s64 l_blkno;		/* 8: log page block number */
450 	caddr_t l_ldata;	/* 4: data page */
451 	struct page *l_page;	/* The page itself */
452 	uint l_offset;		/* Offset of l_ldata within the page */
453 
454 	wait_queue_head_t l_ioevent;	/* 4: i/o done event */
455 };
456 
457 /* Reuse l_freelist for redrive list */
458 #define l_redrive_next l_freelist
459 
460 /*
461  *	logsynclist block
462  *
463  * common logsyncblk prefix for jbuf_t and tblock
464  */
465 struct logsyncblk {
466 	u16 xflag;		/* flags */
467 	u16 flag;		/* only meaninful in tblock */
468 	lid_t lid;		/* lock id */
469 	s32 lsn;		/* log sequence number */
470 	struct list_head synclist;	/* log sync list link */
471 };
472 
473 /*
474  *	logsynclist serialization (per log)
475  */
476 
477 #define LOGSYNC_LOCK_INIT(log) spin_lock_init(&(log)->synclock)
478 #define LOGSYNC_LOCK(log, flags) spin_lock_irqsave(&(log)->synclock, flags)
479 #define LOGSYNC_UNLOCK(log, flags) \
480 	spin_unlock_irqrestore(&(log)->synclock, flags)
481 
482 /* compute the difference in bytes of lsn from sync point */
483 #define logdiff(diff, lsn, log)\
484 {\
485 	diff = (lsn) - (log)->syncpt;\
486 	if (diff < 0)\
487 		diff += (log)->logsize;\
488 }
489 
490 extern int lmLogOpen(struct super_block *sb);
491 extern int lmLogClose(struct super_block *sb);
492 extern int lmLogShutdown(struct jfs_log * log);
493 extern int lmLogInit(struct jfs_log * log);
494 extern int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize);
495 extern int lmGroupCommit(struct jfs_log *, struct tblock *);
496 extern int jfsIOWait(void *);
497 extern void jfs_flush_journal(struct jfs_log * log, int wait);
498 extern void jfs_syncpt(struct jfs_log *log, int hard_sync);
499 
500 #endif				/* _H_JFS_LOGMGR */
501