xref: /titanic_41/usr/src/uts/common/sys/lvm/md_trans.h (revision 88f8b78a88cbdc6d8c1af5c3e54bc49d25095c98)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #ifndef _SYS_MD_TRANS_H
28 #define	_SYS_MD_TRANS_H
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 #include <sys/lvm/mdvar.h>
33 #include <sys/buf.h>
34 #include <sys/fs/ufs_trans.h>
35 #include <sys/lvm/md_rename.h>
36 
37 #ifdef	__cplusplus
38 extern "C" {
39 #endif
40 
41 #define	LDL_META_SBLK		(16)
42 
43 #define	LDL_MINLOGSIZE		(1024*1024)
44 #define	LDL_MAXLOGSIZE		(1024*1024*1024)
45 #define	LDL_MINBUFSIZE		(32*1024)
46 #define	LDL_USABLE_BSIZE	(DEV_BSIZE - sizeof (sect_trailer_t))
47 #define	NB_LEFT_IN_SECTOR(off) 	(LDL_USABLE_BSIZE - ((off) - dbtob(btodb(off))))
48 
49 typedef struct cirbuf32 {
50 	caddr32_t	xx_cb_bp;	/* buf's with space in circular buf */
51 	caddr32_t	xx_cb_dirty;	/* filling this buffer for log write */
52 	caddr32_t	xx_cb_free;	/* free bufs list */
53 	caddr32_t	xx_cb_va;	/* address of circular buffer */
54 	uint_t		xx_cb_nb;	/* size of circular buffer */
55 	uint_t		xx_cb_rwlock[3]; /* r/w lock to protect list mgmt. */
56 } cirbuf32_t;
57 
58 typedef struct cirbuf_ic {
59 	buf_t		*cb_bp;		/* buf's with space in circular buf */
60 	buf_t		*cb_dirty;	/* filling this buffer for log write */
61 	buf_t		*cb_free;	/* free bufs list */
62 	caddr_t		cb_va;		/* address of circular buffer */
63 	size_t		cb_nb;		/* size of circular buffer */
64 	md_krwlock_t	cb_rwlock;	/* r/w lock to protect list mgmt. */
65 } cirbuf_ic_t;
66 
67 
68 typedef struct ml_unit {
69 	uint_t		un_revision;	/* revision number */
70 	/*
71 	 * mdd infrastructure stuff
72 	 */
73 	mddb_recid_t	un_recid;	/* db record id */
74 	mdkey_t		un_key;		/* namespace key */
75 	md_dev64_t	un_dev;		/* device number */
76 	uint_t		un_opencnt;	/* open count */
77 
78 	/*
79 	 * metatrans infrastructure stuff
80 	 */
81 	uint_t		un_transcnt;	/* #open metatrans devices */
82 
83 	/*
84 	 * log specific stuff
85 	 */
86 	off32_t		un_head_lof;	/* byte offset of head */
87 	uint_t		un_head_ident;	/* head sector id # */
88 	off32_t		un_tail_lof;	/* byte offset of tail */
89 	uint_t		un_tail_ident;	/* tail sector id # */
90 	off32_t		un_bol_lof;	/* byte offset of begin of log */
91 	off32_t		un_eol_lof;	/* byte offset of end of log */
92 	daddr32_t	un_nblks;	/* total blocks of log space */
93 	daddr32_t	un_tblks;	/* total blocks in log device */
94 	uint_t		un_maxtransfer;	/* max transfer in bytes */
95 	uint_t		un_status;	/* status bits */
96 	uint_t		un_maxresv;	/* maximum reservable space */
97 	daddr32_t	un_pwsblk;	/* block number of prewrite area */
98 	ulong_t		un_devbsize;	/* device bsize */
99 	uint_t		un_resv;	/* reserved byte count for this trans */
100 	uint_t		un_resv_wantin;	/* reserved byte count for next trans */
101 	mt_l_error_t	un_error;	/* error state */
102 	uint_t		un_tid;		/* used during logscan */
103 	uint_t		un_head_tid;	/* used for logscan; set at sethead */
104 	struct timeval32 un_timestamp;	/* time of last state change */
105 	/*
106 	 * spares
107 	 */
108 	uint_t		un_spare[16];
109 	/*
110 	 * following are incore only elements.
111 	 * Incore elements must always be at the end
112 	 * of this data struture.
113 	 */
114 	struct ml_unit	*un_next;
115 	struct mt_unit	*un_utlist;
116 	struct mt_map	*un_logmap;
117 	cirbuf_ic_t	un_rdbuf;
118 	cirbuf_ic_t	un_wrbuf;
119 	kmutex_t	un_log_mutex;
120 } ml_unit_t;
121 
122 
123 #if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
124 #pragma pack(4)
125 #endif
126 typedef struct ml_unit32_od {
127 	uint_t		un_revision;	/* revision number */
128 	/*
129 	 * mdd infrastructure stuff
130 	 */
131 	caddr32_t	xx_un_next;	/* next log unit struct */
132 	mddb_recid_t	un_recid;	/* db record id */
133 	mdkey_t		un_key;		/* namespace key */
134 	dev32_t		un_dev;		/* device number */
135 	uint_t		un_opencnt;	/* open count */
136 
137 	/*
138 	 * metatrans infrastructure stuff
139 	 */
140 	uint_t		un_transcnt;	/* #open metatrans devices */
141 	caddr32_t	xx_un_utlist;	/* list of metatrans devices */
142 	caddr32_t	xx_un_logmap;	/* address of logmap */
143 
144 	/*
145 	 * log specific stuff
146 	 */
147 	off32_t		un_head_lof;	/* byte offset of head */
148 	uint_t		un_head_ident;	/* head sector id # */
149 	off32_t		un_tail_lof;	/* byte offset of tail */
150 	uint_t		un_tail_ident;	/* tail sector id # */
151 	off32_t		un_bol_lof;	/* byte offset of begin of log */
152 	off32_t		un_eol_lof;	/* byte offset of end of log */
153 	daddr32_t	un_nblks;	/* total blocks of log space */
154 	daddr32_t	un_tblks;	/* total blocks in log device */
155 	uint_t		un_maxtransfer;	/* max transfer in bytes */
156 	uint_t		un_status;	/* status bits */
157 	uint_t		un_maxresv;	/* maximum reservable space */
158 	daddr32_t	un_pwsblk;	/* block number of prewrite area */
159 	uint_t		un_devbsize;	/* device bsize */
160 	uint_t		un_resv;	/* reserved byte count for this trans */
161 	uint_t		un_resv_wantin;	/* reserved byte count for next trans */
162 	mt_l_error_t	un_error;	/* error state */
163 	uint_t		un_tid;		/* used during logscan */
164 	uint_t		un_head_tid;	/* used for logscan; set at sethead */
165 	cirbuf32_t	xx_un_rdbuf;	/* read buffer space */
166 	cirbuf32_t	xx_un_wrbuf;	/* write buffer space */
167 	int		xx_un_log_mutex[2]; /* allows one log write at a time */
168 	struct timeval32 un_timestamp;	/* time of last state change */
169 	/*
170 	 * spares
171 	 */
172 	uint_t		un_spare[16];
173 } ml_unit32_od_t;
174 #if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
175 #pragma pack()
176 #endif
177 
178 
179 
180 #define	ML_UNIT_ONDSZ	((size_t)((caddr_t)&((ml_unit_t *)0)->un_spare[15] +\
181 				sizeof (uint_t)))
182 
183 
184 /*
185  *	un_status
186  */
187 #define	LDL_BEING_RESET	0x0001	/* delete the log record at snarf */
188 #define	LDL_FIND_TAIL	0x0002	/* find tail of the log */
189 #define	LDL_SCAN_ACTIVE	0x0004	/* log scan in progress */
190 #define	LDL_METADEVICE	0x0008	/* underlying device is metadevice */
191 #define	LDL_PWVALID	0x0010	/* prewrite area is valid */
192 #define	LDL_INFO	0x0020	/* prewrite state is valid */
193 
194 typedef struct sect_trailer {
195 	uint_t		st_tid;		/* transaction id */
196 	uint_t		st_ident;	/* unique sector id */
197 } sect_trailer_t;
198 
199 
200 /*
201  * ioctls
202  */
203 #define	MD_IOCGET_LOG		(MDIOC_MISC|0)
204 #define	MD_IOC_DEBUG		(MDIOC_MISC|4)
205 #define	MD_IOCGET_TRANSSTATS	(MDIOC_MISC|5)
206 #define	MD_IOC_TSD		(MDIOC_MISC|6)
207 #define	MD_IOC_TRYGETBLK	(MDIOC_MISC|7)
208 #define	MD_IOC_TRYPAGE		(MDIOC_MISC|8)
209 #define	MD_IOC_SETSHADOW	(MDIOC_MISC|11)
210 #define	MD_IOC_INJECTERRORS	(MDIOC_MISC|13)
211 #define	MD_IOC_STOPERRORS	(MDIOC_MISC|14)
212 #define	MD_IOC_UFSERROR		(MDIOC_MISC|15)
213 #define	MD_IOC_ISDEBUG		(MDIOC_MISC|17)
214 
215 #define	MD_IOC_TRANS_DETACH	(MDIOC_MISC|32)
216 
217 /*
218  * following bits are used in status word in the common section
219  * of unit structure
220  */
221 #define	MD_UN_LOG_DELETED	(0x00010000)	/* don't need to del @snarf */
222 
223 /*
224  * map block
225  */
226 #define	MAPBLOCKSIZE	(8192)
227 #define	MAPBLOCKSHIFT	(13)
228 #define	MAPBLOCKOFF	(MAPBLOCKSIZE-1)
229 #define	MAPBLOCKMASK	(~MAPBLOCKOFF)
230 
231 /*
232  * delta header
233  */
234 struct delta {
235 	offset_t	d_mof;
236 	off32_t		d_nb;
237 	dev32_t		d_dev;
238 	delta_t 	d_typ;
239 };
240 
241 /*
242  * common map entry
243  */
244 typedef struct mapentry	mapentry_t;
245 struct mapentry {
246 	/*
247 	 * doubly linked list of all mapentries in map -- MUST BE FIRST
248 	 */
249 	mapentry_t	*me_next;
250 	mapentry_t	*me_prev;
251 
252 	mapentry_t	*me_hash;
253 	mapentry_t	*me_agenext;
254 	mapentry_t	*me_cancel;
255 	int		(*me_func)();
256 	uintptr_t	me_arg;
257 	off_t		me_lof;
258 	uint_t		me_flags;
259 	uint_t		me_tid;
260 	uint_t		me_age;
261 	struct delta	me_delta;
262 };
263 
264 #define	me_mof	me_delta.d_mof
265 #define	me_nb	me_delta.d_nb
266 #define	me_dt	me_delta.d_typ
267 #define	me_dev	me_delta.d_dev
268 
269 /*
270  * me_flags
271  */
272 #define	ME_FREE		(0x0001)	/* on free   list */
273 #define	ME_HASH		(0x0002)	/* on hash   list */
274 #define	ME_CANCEL	(0x0004)	/* on cancel list */
275 #define	ME_AGE		(0x0008)	/* on age    list */
276 #define	ME_LIST		(0x0010)	/* on list   list */
277 #define	ME_ROLL		(0x0020)	/* on pseudo-roll list */
278 
279 /*
280  * TRANSACTION OPS STATS
281  * mt_top_size_* should be 64bit but that would
282  * require test recompilations. It does not hurt the kernel
283  * so leave as 32 bit for now.
284  */
285 struct topstats {
286 	uint_t		mtm_top_num[TOP_MAX];
287 	uint_t		mtm_top_size_etot[TOP_MAX];
288 	uint_t		mtm_top_size_rtot[TOP_MAX];
289 	uint_t		mtm_top_size_max[TOP_MAX];
290 	uint_t		mtm_top_size_min[TOP_MAX];
291 	uint_t		mtm_delta_num[DT_MAX];
292 };
293 
294 /*
295  * MAP STATS (global struct that is not updated if compiled w/o ASSERTs)
296  * some members of transstats need to be 64bit. See the comment above.
297  */
298 struct transstats {
299 	/* trans.c */
300 	uint_t		ts_trans_zalloc;
301 	uint_t		ts_trans_zalloc_nosleep;
302 	uint_t		ts_trans_alloc;
303 	uint_t		ts_trans_alloc_nosleep;
304 	uint_t		ts_trans_free;
305 	uint_t		ts_trans_alloced;
306 	uint_t		ts_trans_freed;
307 	uint_t		ts_trans_write;
308 	uint_t		ts_trans_write_roll;
309 
310 	/* trans_delta.c */
311 	uint_t		ts_mapentry_alloc;
312 	uint_t		ts_mapentry_alloc_list;
313 	uint_t		ts_mapentry_free;
314 
315 	uint_t		ts_delta_add;
316 	uint_t		ts_delta_add_scan;
317 	uint_t		ts_delta_add_hit;
318 
319 	uint_t		ts_delta_remove;
320 	uint_t		ts_delta_remove_scan;
321 	uint_t		ts_delta_remove_hit;
322 
323 	uint_t		ts_delta_del;
324 	uint_t		ts_delta_del_scan;
325 
326 	uint_t		ts_delta_push;
327 
328 	uint_t		ts_overlap;
329 	uint_t		ts_overlap_scan;
330 	uint_t		ts_overlap_hit;
331 
332 	uint_t		ts_remove_roll;
333 	uint_t		ts_remove_roll_scan;
334 	uint_t		ts_remove_roll_hit;
335 	uint_t		ts_remove_roll_dolock;
336 	uint_t		ts_remove_roll_sud;
337 
338 	uint_t		ts_next_roll;
339 	uint_t		ts_next_roll_scan;
340 	uint_t		ts_next_roll_hit;
341 
342 	uint_t		ts_list_age;
343 	uint_t		ts_list_age_scan;
344 
345 	uint_t		ts_list_get;
346 	uint_t		ts_list_get_scan;
347 	uint_t		ts_list_get_hit;
348 	uint_t		ts_list_get_again;
349 
350 	uint_t		ts_list_put;
351 	uint_t		ts_list_put_scan;
352 
353 	uint_t		ts_read_mstr;
354 
355 	uint_t		ts_logmap_secmap_roll;
356 
357 	uint_t		ts_read_log;
358 
359 	uint_t		ts_logmap_abort;
360 	uint_t		ts_logmap_abort_hit;
361 
362 	uint_t		ts_list_add;
363 	uint_t		ts_list_add_scan;
364 	uint_t		ts_list_add_cancel;
365 	uint_t		ts_list_add_unhash;
366 
367 	uint_t		ts_free_cancel;
368 	uint_t		ts_free_cancel_again;
369 	uint_t		ts_free_cancel_scan;
370 	uint_t		ts_free_cancel_hit;
371 
372 	uint_t		ts_commit;
373 	uint_t		ts_commit_hit;
374 
375 	uint_t		ts_logmap_roll_dev;
376 	uint_t		ts_logmap_roll_dev_scan;
377 	uint_t		ts_logmap_roll_dev_hit;
378 
379 	uint_t		ts_logmap_roll_sud;
380 	uint_t		ts_logmap_roll_sud_hit;
381 
382 	uint_t		ts_logmap_ud_done;
383 	uint_t		ts_logmap_ud_done_scan;
384 
385 	uint_t		ts_logmap_ud_wait;
386 	uint_t		ts_logmap_ud_wait_hit;
387 
388 	uint_t		ts_logmap_ud_commit;
389 	uint_t		ts_logmap_ud_commit_scan;
390 
391 	uint_t		ts_logmap_cancel;
392 	uint_t		ts_logmap_cancel_scan;
393 	uint_t		ts_logmap_cancel_hit;
394 
395 	uint_t		ts_logmap_iscancel;
396 	uint_t		ts_logmap_iscancel_scan;
397 	uint_t		ts_logmap_iscancel_hit;
398 
399 	uint_t		ts_logscan;
400 	uint_t		ts_logscan_ud;
401 	uint_t		ts_logscan_delta;
402 	uint_t		ts_logscan_cancel;
403 	uint_t		ts_logscan_commit;
404 
405 	/* trans_thread.c */
406 	uint_t		ts_prewrite;
407 	uint_t		ts_prewrite_read;
408 	uint_t		ts_prewrite_write;
409 	uint_t		ts_trans_roll;
410 	uint_t		ts_trans_roll_wait;
411 	uint_t		ts_trans_roll_wait_nada;
412 	uint_t		ts_trans_roll_wait_slow;
413 	uint_t		ts_trans_roll_force;
414 	uint_t		ts_trans_roll_nsud;
415 	uint_t		ts_trans_roll_ref;
416 	uint_t		ts_trans_roll_full;
417 	uint_t		ts_trans_roll_logmap;
418 	uint_t		ts_trans_roll_read;
419 	uint_t		ts_trans_roll_reread;
420 	uint_t		ts_trans_roll_wait_inuse;
421 	uint_t		ts_trans_roll_prewrite;
422 	uint_t		ts_trans_roll_write;
423 
424 	/* trans_top.c */
425 	uint_t		ts_delta;
426 	uint_t		ts_ud_delta;
427 	uint_t		ts_ud_delta_log;
428 	uint_t		ts_cancel;
429 	uint_t		ts_iscancel;
430 	uint_t		ts_error;
431 	uint_t		ts_iserror;
432 	uint_t		ts_beginsync;
433 	uint_t		ts_active;
434 	uint_t		ts_activesync;
435 	uint_t		ts_beginasync;
436 	uint_t		ts_endsync;
437 	uint_t		ts_wantin;
438 	uint_t		ts_endasync;
439 	uint_t		ts_read;
440 	uint_t		ts_read_roll;
441 	uint_t		ts_readmt;
442 	uint_t		ts_write;
443 	uint_t		ts_writemt;
444 	uint_t		ts_writemt_done;
445 	uint_t		ts_log;
446 
447 	/* trans_log.c */
448 	uint_t		ts_logcommitdb;
449 
450 	uint_t		ts_push_dirty_bp;
451 	uint_t		ts_push_dirty_bp_extra;
452 	uint_t		ts_push_dirty_bp_fail;
453 
454 	uint_t		ts_alloc_bp;
455 	uint_t		ts_alloc_bp_free;
456 
457 	uint_t		ts_find_bp;
458 	uint_t		ts_find_bp_scan;
459 	uint_t		ts_find_bp_hit;
460 
461 	uint_t		ts_find_read_lof;
462 	uint_t		ts_find_read_lof_scan;
463 	uint_t		ts_find_read_lof_hit;
464 
465 	uint_t		ts_get_read_bp;
466 	uint_t		ts_get_read_bp_wr;
467 	uint_t		ts_get_read_bp_rd;
468 
469 	uint_t		ts_extend_write_bp;
470 	uint_t		ts_extend_write_bp_hit;
471 
472 	uint_t		ts_storebuf;
473 	uint_t		ts_fetchbuf;
474 	uint_t		ts_round_commit;
475 	uint_t		ts_push_commit;
476 
477 	uint_t		ts_inval_range;
478 	uint_t		ts_inval_range_scan;
479 	uint_t		ts_inval_range_hit;
480 
481 	uint_t		ts_writelog;
482 	uint_t		ts_writelog_max;
483 
484 	uint_t		ts_readlog;
485 	uint_t		ts_readlog_max;
486 
487 	uint_t		ts_get_write_bp;
488 	uint_t		ts_get_write_bp_steal;
489 
490 	uint_t		ts_writesync;
491 	uint_t		ts_writesync_log;
492 	uint_t		ts_writesync_nolog;
493 
494 	uint_t		ts_longmof_cnt;
495 
496 } transstats;
497 
498 #ifdef	DEBUG
499 #define	TRANSSTATS(f)		(transstats.f++)
500 #define	TRANSSTATSADD(f, n)	(transstats.f += (n))
501 #define	TRANSSTATSMAX(m, v)	\
502 		if ((v) > transstats.m)\
503 			transstats.m = (v);
504 #else
505 #define	TRANSSTATS(f)
506 #define	TRANSSTATSADD(f, n)
507 #define	TRANSSTATSMAX(m, v)
508 #endif /* DEBUG */
509 
510 /*
511  * MAP TYPES
512  */
513 enum maptypes	{
514 	deltamaptype, udmaptype, logmaptype, matamaptype, shadowmaptype
515 };
516 
517 /*
518  * MAP
519  */
520 #define	DELTAMAP_NHASH	(512)
521 #define	LOGMAP_NHASH	(2048)
522 #define	MAP_INDEX(dev, mof, mtm) \
523 	((((mof) >> MAPBLOCKSHIFT) + (dev)) & ((mtm)->mtm_nhash-1))
524 #define	MAP_HASH(dev, mof, mtm) \
525 	(mtm->mtm_hash + MAP_INDEX(dev, mof, mtm))
526 
527 typedef struct mt_map {
528 	/*
529 	 * anchor doubly linked list this map's entries -- MUST BE FIRST
530 	 */
531 	mapentry_t	*mtm_next;
532 	mapentry_t	*mtm_prev;
533 
534 	int		mtm_flags;	/* generic flags */
535 	int		mtm_ref;	/* PTE like ref bit */
536 	uint_t		mtm_debug;	/* set at create time */
537 	uint_t		mtm_age;	/* mono-inc; tags mapentries */
538 	mapentry_t	*mtm_cancel;	/* to be canceled at commit */
539 	uint_t		mtm_nhash;	/* # of hash anchors */
540 	mapentry_t	**mtm_hash;	/* array of singly linked lists */
541 	struct topstats	*mtm_tops;	/* trans ops - enabled by an ioctl */
542 	int		mtm_nme;	/* # of mapentries */
543 	int		mtm_nmet;	/* # of mapentries this transaction */
544 	int		mtm_nud;	/* # of active userdata writes */
545 	int		mtm_nsud;	/* # of userdata scanned deltas */
546 	md_dev64_t	mtm_dev;	/* device identifying map */
547 
548 	/*
549 	 * the following are protected by the global map_mutex
550 	 */
551 	struct mt_map	*mtm_mapnext;	/* singly linked list of all maps */
552 	uint_t		mtm_refcnt;	/* reference count to this map */
553 	enum maptypes	mtm_type;	/* type of map */
554 
555 	/*
556 	 * used after logscan to set the log's tail
557 	 */
558 	off_t		mtm_tail_lof;
559 	size_t		mtm_tail_nb;
560 
561 	/*
562 	 * debug field for Scan test
563 	 */
564 	off_t		mtm_trimlof;	/* log was trimmed to this lof */
565 	off_t		mtm_trimtail;	/* tail lof before trimming */
566 	off_t		mtm_trimalof;	/* lof of last allocation delta */
567 	off_t		mtm_trimclof;	/* lof of last commit delta */
568 	off_t		mtm_trimrlof;	/* lof of last rolled delta */
569 	struct ml_unit	*mtm_ul;	/* log unit for this map */
570 
571 	/*
572 	 * moby trans stuff
573 	 */
574 	uint_t			mtm_tid;
575 	uint_t			mtm_committid;
576 	ushort_t		mtm_closed;
577 	ushort_t		mtm_seq;
578 	int			mtm_wantin;
579 	int			mtm_active;
580 	int			mtm_activesync;
581 	uint_t			mtm_dirty;
582 	kmutex_t		mtm_lock;
583 	kcondvar_t		mtm_cv_commit;
584 	kcondvar_t		mtm_cv_next;
585 	kcondvar_t		mtm_cv_eot;
586 
587 	/*
588 	 * mutex that protects all the fields in mt_map except
589 	 * mtm_mapnext and mtm_refcnt
590 	 */
591 	kmutex_t	mtm_mutex;
592 	kcondvar_t	mtm_cv;		/* generic conditional */
593 
594 	/*
595 	 * rw lock for the mapentry fields agenext and locnext
596 	 */
597 	md_krwlock_t	mtm_rwlock;
598 	/*
599 	 * DEBUG: runtestscan
600 	 */
601 	kmutex_t	mtm_scan_mutex;
602 } mt_map_t;
603 
604 /*
605  * mtm_flags
606  */
607 #define	MTM_ROLL_EXIT		(0x00000001)
608 #define	MTM_ROLL_RUNNING	(0x00000002)
609 #define	MTM_FORCE_ROLL		(0x00000004)
610 
611 /*
612  * Generic range checking macros
613  */
614 #define	OVERLAP(sof, snb, dof, dnb) \
615 	((sof >= dof && sof < (dof + dnb)) || \
616 	(dof >= sof && dof < (sof + snb)))
617 
618 #define	WITHIN(sof, snb, dof, dnb) ((sof >= dof) && ((sof+snb) <= (dof+dnb)))
619 
620 #define	DATAoverlapME(mof, hnb, me) (OVERLAP(mof, hnb, me->me_mof, me->me_nb))
621 #define	MEwithinDATA(me, mof, hnb) (WITHIN(me->me_mof, me->me_nb, mof, hnb))
622 #define	DATAwithinME(mof, hnb, me) (WITHIN(mof, hnb, me->me_mof, me->me_nb))
623 
624 
625 typedef struct mt_unit {
626 	struct mdc_unit	c;		/* common stuff */
627 	/*
628 	 * infrastructure
629 	 */
630 	mt_flags_t	un_flags;
631 	/*
632 	 * log and master device
633 	 */
634 	mdkey_t		un_m_key;
635 	md_dev64_t	un_m_dev;
636 	mdkey_t		un_l_key;
637 	md_dev64_t	un_l_dev;
638 	daddr32_t	un_l_sblk;	/* start block */
639 	daddr32_t	un_l_pwsblk;	/* prewrite start block */
640 	daddr32_t	un_l_nblks;	/* # of usable log blocks */
641 	daddr32_t	un_l_tblks;	/* total log blocks */
642 	daddr32_t	un_l_head;	/* sector offset of log head */
643 	daddr32_t	un_l_tail;	/* sector offset of log tail */
644 	uint_t		un_l_resv;	/* current log reservations */
645 	uint_t		un_l_maxresv;	/* max log reservations */
646 	uint_t		un_l_maxtransfer; /* maximum transfer at init */
647 	mddb_recid_t	un_l_recid;	/* database id */
648 	mt_l_error_t	un_l_error;	/* error state */
649 	struct timeval32 un_l_timestamp; /* time of last log state chg */
650 	md_dev64_t	un_s_dev;	/* shadow device for testing only */
651 	mt_debug_t	un_debug;	/* debug flags; set at create */
652 	md_dev64_t	un_dev;		/* this metatrans device */
653 	int		un_logreset;	/* part of _FIOLOGRESET ioctl stuff */
654 	struct timeval32 un_timestamp;	/* time of last trans state change */
655 	/*
656 	 * spares
657 	 */
658 	ulong_t		un_spare[16];
659 	/*
660 	 * following are incore only elements.
661 	 * Incore elements must always be at the end
662 	 * of this data struture.
663 	 */
664 	struct mt_unit	*un_next;
665 	struct ml_unit	*un_l_unit;
666 	struct ufstrans *un_ut;
667 	mt_map_t	*un_deltamap;
668 	mt_map_t	*un_udmap;
669 	mt_map_t	*un_logmap;
670 	mt_map_t	*un_matamap;
671 	mt_map_t	*un_shadowmap;
672 } mt_unit_t;
673 
674 
675 typedef struct mt_unit32_od {
676 	mdc_unit32_od_t	c;		/* common stuff */
677 	/*
678 	 * infrastructure
679 	 */
680 	mt_flags_t	un_flags;
681 	caddr32_t	xx_un_next;	/* anchored in log unit */
682 	/*
683 	 * log and master device
684 	 */
685 	mdkey_t		un_m_key;
686 	dev32_t		un_m_dev;
687 	mdkey_t		un_l_key;
688 	dev32_t		un_l_dev;
689 	daddr32_t	un_l_sblk;	/* start block */
690 	daddr32_t	un_l_pwsblk;	/* prewrite start block */
691 	daddr32_t	un_l_nblks;	/* # of usable log blocks */
692 	daddr32_t	un_l_tblks;	/* total log blocks */
693 	daddr32_t	un_l_head;	/* sector offset of log head */
694 	daddr32_t	un_l_tail;	/* sector offset of log tail */
695 	uint_t		un_l_resv;	/* current log reservations */
696 	uint_t		un_l_maxresv;	/* max log reservations */
697 	uint_t		un_l_maxtransfer; /* maximum transfer at init */
698 	mddb_recid_t	un_l_recid;	/* database id */
699 	caddr32_t	xx_un_l_unit;	/* log device unit struct */
700 	mt_l_error_t	un_l_error;	/* error state */
701 	struct timeval32 un_l_timestamp;	/* time of last log state chg */
702 	dev32_t		un_s_dev;	/* shadow device for testing only */
703 
704 	mt_debug_t	un_debug;	/* debug flags; set at create */
705 	caddr32_t	xx_un_ut;	/* ufstrans struct */
706 	dev32_t		un_dev;		/* this metatrans device */
707 	caddr32_t	xx_un_deltamap;	/* deltamap */
708 	caddr32_t	xx_un_udmap;	/* userdata map */
709 	caddr32_t	xx_un_logmap;	/* logmap includes moby trans stuff */
710 	caddr32_t	xx_un_matamap;	/* optional - matamap */
711 	caddr32_t	xx_un_shadowmap; /* optional - shadowmap */
712 	int		un_logreset;	/* part of _FIOLOGRESET ioctl stuff */
713 	struct timeval32 un_timestamp;	/* time of last trans state change */
714 	/*
715 	 * spares
716 	 */
717 	uint_t		un_spare[16];
718 } mt_unit32_od_t;
719 
720 /*
721  * prewrite info (per buf); stored as array at beginning of prewrite area
722  */
723 struct prewrite {
724 	int		pw_bufsize;	/* every buffer is this size */
725 	daddr32_t	pw_blkno;	/* block number */
726 	dev32_t		pw_dev;		/* device to write to */
727 	ushort_t	pw_secmap;	/* bitmap 	*/
728 					/* 1's write this sector in the buf */
729 	ushort_t	pw_flags;
730 };
731 /*
732  * pw_flags
733  */
734 #define	PW_INUSE	0x0001	/* this prewrite buf is in use */
735 #define	PW_WAIT		0x0002	/* write in progress; wait for completion */
736 #define	PW_REM		0x0004	/* remove deltas */
737 
738 /*
739  * log state
740  */
741 struct  logstate {
742 	off32_t		ls_head_lof;	/* log head */
743 	uint_t		ls_head_ident;	/* log head ident */
744 	uint_t		ls_head_tid;	/* log head tid */
745 	uint_t		ls_chksum;	/* checksum of structure */
746 	off32_t		ls_bol_lof;	/* needed for TS_Tools/dumplog.c */
747 	off32_t		ls_eol_lof;	/* needed for TS_Tools/dumplog.c */
748 	uint_t		ls_maxtransfer;	/* needed for TS_Tools/dumplog.c */
749 	daddr32_t	ls_pwsblk;	/* needed for TS_Tools/dumplog.c */
750 };
751 
752 /*
753  * log state defines
754  */
755 #define	LS_SECTORS	(2)	/* number of sectors used by state area */
756 
757 /*
758  * un_debug
759  *	MT_TRANSACT		- keep per thread accounting of tranactions
760  *	MT_MATAMAP		- double check deltas and ops against matamap
761  *	MT_WRITE_CHECK		- check master+deltas against metadata write
762  *	MT_LOG_WRITE_CHECK	- read after write for log writes
763  *	MT_CHECK_MAP		- check map after every insert/delete
764  *	MT_TRACE		- trace transactions (used with MT_TRANSACT)
765  *	MT_SIZE			- fail on size errors (used with MT_TRANSACT)
766  *	MT_NOASYNC		- force every op to be sync
767  *	MT_FORCEROLL		- forcibly roll the log after every commit
768  *	MT_SCAN			- running runtestscan; special case as needed
769  *	MT_SHADOW		- copy metatrans device writes to shadow dev.
770  *	MT_PREWRITE		- process prewrite area every roll
771  */
772 #define	MT_TRANSACT		(0x00000001)
773 #define	MT_MATAMAP		(0x00000002)
774 #define	MT_WRITE_CHECK		(0x00000004)
775 #define	MT_LOG_WRITE_CHECK	(0x00000008)
776 #define	MT_CHECK_MAP		(0x00000010)
777 #define	MT_TRACE		(0x00000020)
778 #define	MT_SIZE			(0x00000040)
779 #define	MT_NOASYNC		(0x00000080)
780 #define	MT_FORCEROLL		(0x00000100)
781 #define	MT_SCAN			(0x00000200)
782 #define	MT_SHADOW		(0x00000400)
783 #define	MT_PREWRITE		(0x00000800)
784 
785 /* Type 2 trans records */
786 #define	TRANS_REC	1
787 #define	LOG_REC		2
788 
789 #ifdef _KERNEL
790 
791 typedef struct md_tps {			/* trans parent save */
792 	DAEMON_QUEUE
793 	struct mt_unit	*ps_un;
794 	mdi_unit_t	*ps_ui;
795 	buf_t		*ps_bp;
796 	size_t		ps_count;	/* Used for testing only. */
797 	kmutex_t	ps_mx;		/* protects ps_count. */
798 } md_tps_t;
799 
800 /*
801  * Log layer protos -- trans_log.c
802  */
803 extern void		_init_ldl(void);
804 extern void		_fini_ldl(void);
805 extern void		md_ldl_round_commit(mt_unit_t *);
806 extern void		md_ldl_push_commit(mt_unit_t *);
807 extern int		md_ldl_need_commit(ml_unit_t *);
808 extern int		md_ldl_has_space(ml_unit_t *, mapentry_t *);
809 extern void		md_ldl_write(mt_unit_t *, caddr_t, offset_t,
810 					mapentry_t *);
811 extern void		md_ldl_waito(ml_unit_t *);
812 extern int		md_ldl_read(ml_unit_t *, caddr_t, offset_t, off_t,
813 					mapentry_t *);
814 extern void		md_ldl_sethead(ml_unit_t *, off_t, uint_t,
815 					struct buf *);
816 extern void		md_ldl_settail(ml_unit_t *, off_t, off_t,
817 					struct buf *);
818 extern void		ldl_setpwvalid(ml_unit_t *);
819 extern int		ldl_build_incore(ml_unit_t *, int);
820 extern ml_unit_t	*ldl_findlog(mddb_recid_t);
821 extern mddb_recid_t	ldl_create(mdkey_t, mt_unit_t *);
822 extern void		ldl_utadd(mt_unit_t *);
823 extern int		ldl_open_dev(mt_unit_t *, ml_unit_t *);
824 extern void		ldl_close_dev(ml_unit_t *);
825 extern int		ldl_snarf(void);
826 extern void		ldl_logscan_seterror(ml_unit_t *);
827 extern void		ldl_logscan_saverror(ml_unit_t *);
828 extern size_t		md_ldl_logscan_nbcommit(off_t);
829 extern int		md_ldl_logscan_read(ml_unit_t *, off_t *, size_t,
830 					caddr_t);
831 extern void		md_ldl_logscan_begin(ml_unit_t *, daddr_t);
832 extern void		md_ldl_logscan_end(ml_unit_t *);
833 extern int		md_ldl_need_roll(ml_unit_t *);
834 extern int		md_ldl_empty(ml_unit_t *);
835 extern int		ldl_pwvalid(ml_unit_t *);
836 extern void		ldl_waitscan(ml_unit_t *);
837 extern void		ldl_errorbp(set_t, buf_t *, char *);
838 extern void		md_ldl_seterror(ml_unit_t *);
839 extern int		ldl_isherror(ml_unit_t *);
840 extern int		ldl_iserror(ml_unit_t *);
841 extern int		ldl_isanyerror(ml_unit_t *);
842 extern void		ldl_start_scan(mt_unit_t *);
843 extern void		ldl_opened_trans(mt_unit_t *, int);
844 extern void		ldl_open_trans(mt_unit_t *, int);
845 extern int		ldl_logreset(mt_unit_t *, buf_t *);
846 extern void		ldl_close_trans(mt_unit_t *);
847 extern size_t		md_ldl_bufsize(ml_unit_t *);
848 extern void		ldl_open_underlying(mt_unit_t *);
849 extern void		ldl_snarf_done();
850 extern int		ldl_reset(mt_unit_t *, int, int);
851 extern void		ldl_cleanup(ml_unit_t *);
852 
853 /*
854  * trans driver layer -- mdtrans.c
855  */
856 extern kmem_cache_t	*trans_child_cache;
857 extern void		*md_trans_zalloc(size_t);
858 extern void		*md_trans_zalloc_nosleep(size_t);
859 extern void		*md_trans_alloc(size_t);
860 extern void		*md_trans_alloc_nosleep(size_t);
861 extern void		md_trans_free(void *, size_t);
862 extern int		md_trans_not_wait(struct buf *cb);
863 extern int		md_trans_not_done(struct buf *cb);
864 extern int		md_trans_wait(struct buf *cb);
865 extern int		trans_done(struct buf *cb);
866 extern int		trans_done_shadow(struct buf *cb);
867 extern void		trans_child_init(struct buf *bp);
868 extern void		trans_close_all_devs(mt_unit_t *);
869 extern int		trans_open_all_devs(mt_unit_t *);
870 extern int		trans_build_incore(void *, int);
871 extern void		trans_commit(mt_unit_t *, int);
872 extern int		trans_detach(mt_unit_t *, int);
873 extern void		trans_attach(mt_unit_t *, int);
874 extern int		trans_reset(mt_unit_t *, minor_t, int, int);
875 
876 /*
877  * transaction ioctl -- trans_ioctl.c
878  */
879 
880 /* rename named service functions */
881 md_ren_list_svc_t	trans_rename_listkids;
882 md_ren_svc_t		trans_rename_check;
883 md_ren_roleswap_svc_t	trans_renexch_update_kids;
884 md_ren_roleswap_svc_t	trans_rename_update_self;
885 md_ren_roleswap_svc_t	trans_exchange_parent_update_to;
886 md_ren_roleswap_svc_t	trans_exchange_self_update_from_down;
887 
888 /*
889  * transaction op layer -- trans_top.c
890  */
891 extern void	_init_md_top(void);
892 extern void	_fini_top(void);
893 extern void	top_read(struct buf *, char *, mt_unit_t *, int, void *);
894 extern void	md_top_read_roll(struct buf *, mt_unit_t *, ushort_t *);
895 extern void	top_build_incore(mt_unit_t *);
896 extern void	top_reset(mt_unit_t *, int, int);
897 extern void	top_write(struct buf *, char *, mt_unit_t *, int, void *);
898 
899 /*
900  * map layer -- trans_delta.c
901  */
902 extern void		md_map_free_entries(mt_map_t *);
903 extern int		md_matamap_overlap(mt_map_t *, offset_t, off_t);
904 extern int		md_matamap_within(mt_map_t *, offset_t, off_t);
905 extern int		md_deltamap_need_commit(mt_map_t *);
906 extern void		md_deltamap_add(mt_map_t *, offset_t, off_t, delta_t,
907 				int (*)(), uintptr_t);
908 extern mapentry_t	*md_deltamap_remove(mt_map_t *, offset_t, off_t);
909 extern void		md_deltamap_del(mt_map_t *, offset_t, off_t);
910 extern void		md_deltamap_push(mt_unit_t *);
911 extern int		md_logmap_need_commit(mt_map_t *);
912 extern int		md_logmap_need_roll_async(mt_map_t *);
913 extern int		md_logmap_need_roll_sync(mt_map_t *);
914 extern int		md_logmap_need_roll(mt_map_t *);
915 extern void		md_logmap_start_roll(mt_unit_t *);
916 extern void		md_logmap_kill_roll(mt_map_t *);
917 extern void		md_logmap_forceroll(mt_map_t *);
918 extern int		md_logmap_overlap(mt_map_t *, md_dev64_t, offset_t,
919 				off_t);
920 extern void		md_logmap_remove_roll(mt_map_t *, md_dev64_t, offset_t,
921 				off_t);
922 extern int		md_logmap_next_roll(mt_map_t *, offset_t *,
923 				md_dev64_t *);
924 extern void		md_logmap_list_get(mt_map_t *, md_dev64_t, offset_t,
925 				off_t, mapentry_t **);
926 extern void		md_logmap_list_get_roll(mt_map_t *, md_dev64_t,
927 				offset_t, off_t, mapentry_t **);
928 extern void		md_logmap_list_put(mt_map_t *, mapentry_t *);
929 extern void		md_logmap_read_mstr(ml_unit_t *, struct buf *, int,
930 				void *);
931 extern void		md_logmap_secmap_roll(mapentry_t *, offset_t,
932 				ushort_t *);
933 extern int		logmap_read_log(ml_unit_t *, char *, offset_t, off_t,
934 				mapentry_t *);
935 extern void		md_logmap_make_space(mt_map_t *, ml_unit_t *,
936 				mapentry_t *);
937 extern void		md_logmap_add(mt_unit_t *, md_dev64_t, char *, offset_t,
938 				mapentry_t *);
939 extern void		md_logmap_add_ud(mt_unit_t *, md_dev64_t, char *,
940 				offset_t, mapentry_t *);
941 extern void		md_logmap_commit(mt_unit_t *);
942 extern void		md_logmap_sethead(mt_map_t *, ml_unit_t *,
943 				struct buf *);
944 extern void		md_logmap_roll_dev(mt_map_t *, ml_unit_t *ul,
945 				md_dev64_t);
946 extern void		md_logmap_roll_sud(mt_map_t *, ml_unit_t *ul,
947 				md_dev64_t, offset_t, off_t);
948 extern int		md_logmap_ud_done(struct buf *);
949 extern void		md_logmap_ud_wait();
950 extern void		md_logmap_cancel(mt_unit_t *, md_dev64_t, offset_t,
951 				off_t);
952 extern int		md_logmap_iscancel(mt_map_t *, md_dev64_t, offset_t,
953 				off_t);
954 extern void		md_logmap_logscan(mt_unit_t *, daddr_t);
955 extern void		map_build_incore(mt_unit_t *);
956 extern void		map_reset(mt_unit_t *, int, int);
957 extern void		_init_md_map(void);
958 extern void		_fini_map(void);
959 
960 /*
961  * scan and roll threads -- trans_thread.c
962  */
963 extern void	md_trans_roll(ml_unit_t *);
964 extern void	trans_scan(mt_unit_t *);
965 extern void	trans_roll_prewrite(ml_unit_t *);
966 
967 #endif	/* _KERNEL */
968 
969 #ifdef	__cplusplus
970 }
971 #endif
972 
973 #endif	/* _SYS_MD_TRANS_H */
974