xref: /illumos-gate/usr/src/cmd/th_tools/th_define.c (revision b77a2dc4455ca028e52fdf96385a530a2d168316)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/time_impl.h>
27 #include <sys/wait.h>
28 #include <stdio.h>
29 #include <stdio_ext.h>
30 #include <stdlib.h>
31 #include <stdarg.h>
32 #include <ctype.h>
33 #include <time.h>
34 #include <fcntl.h>
35 #include <sys/stat.h>
36 #include <sys/resource.h>
37 #include <limits.h>
38 #include <string.h>
39 #include <unistd.h>
40 #include <errno.h>
41 #include <signal.h>
42 #include <libdevinfo.h>
43 #define	_KERNEL
44 #include <sys/dditypes.h>
45 #include <sys/sunddi.h>
46 #include <sys/bofi.h>
47 
48 #define	BOFI_DEV	"/devices/pseudo/bofi@0:bofi,ctl"
49 
50 #define	GETSTRUCT(s, num)	\
51 	((s *) memalign(sizeof (void*), (num) * sizeof (s)))
52 
53 #define	MAXEDEFS	(0x64)		/* controls max no of concurent edefs */
54 #define	DFLTLOGSZ	(0x4000)	/* default size of an access log */
55 #define	DFLT_NONPIO_LOGSZ	(0x400)	/* default size of a log */
56 #define	MAXALRMCALL	(0x1000ull)	/* alarm does not permit big values */
57 #define	MIN_REPORT_TIME	(5)		/* min time to wait for edef status */
58 #define	DISTRIB_CUTOFF	(3)		/* useful when reducing a log */
59 #define	myLLMAX		(0x7fffffffffffffffll)
60 #define	myULLMAX	(0xffffffffffffffffull)
61 
62 /*
63  * default interval to wait between kicking off workload and injecting fault
64  */
65 #define	DEFAULT_EDEF_SLEEP 3
66 /*
67  * when generating dma corruptions, it is best to corrupt each double word
68  * individually for control areas - however for data areas this can be
69  * excessive and would generate so many cases we would never finish the run.
70  * So set a cut-off value where we switch from corrupting each double word
71  * separately to corrupting th elot in one go. 0x100 bytes seems a good value
72  * on the drivers we have seen so far.
73  */
74 #define	DMA_INDIVIDUAL_CORRUPT_CUTOFF 0x100
75 
76 struct collector_def {
77 	struct bofi_errdef ed;		/* definition of the log criteria */
78 	struct bofi_errstate es;	/* the current status of the log */
79 	struct acc_log_elem *lp;	/* array of logged accesses */
80 	pid_t pid;
81 };
82 
83 static uint16_t policy;
84 
85 #define	BYTEPOLICY	(0xf)
86 #define	MULTIPOLICY	(0x10)
87 #define	SIZEPOLICY	(BYTEPOLICY|MULTIPOLICY)
88 #define	UNBIASEDPOLICY	0x20
89 #define	UNCOMMONPOLICY	0x40
90 #define	COMMONPOLICY	0x80
91 #define	MEDIANPOLICY	0x100
92 #define	MAXIMALPOLICY	0x200
93 #define	OPERATORSPOLICY	0x400
94 #define	VALIDPOLICY	(0x7ff)
95 
96 typedef
97 struct coding {
98 	char	*str;
99 	uint_t	code;
100 } coding_t;
101 
102 static coding_t ptypes[] = {
103 	{"onebyte", 0x1}, {"twobyte", 0x2},
104 	{"fourbyte", 0x4}, {"eightbyte", 0x8},
105 	{"multibyte", 0x10}, {"unbiased", 0x20}, {"uncommon", 0x40},
106 	{"common", 0x80}, {"median", 0x100}, {"maximal", 0x200},
107 	{"operators", 0x400},  {0, 0}
108 };
109 static coding_t atypes[] = {
110 	{"pio_r", BOFI_PIO_R}, {"pio_w", BOFI_PIO_W},
111 	{"dma_r", BOFI_DMA_R}, {"dma_w", BOFI_DMA_W},
112 	{"pio", BOFI_PIO_RW}, {"dma", BOFI_DMA_RW},
113 	{"log", BOFI_LOG}, {"intr", BOFI_INTR},
114 	{"PIO_R", BOFI_PIO_R}, {"PIO_W", BOFI_PIO_W},
115 	{"DMA_R", BOFI_DMA_R}, {"DMA_W", BOFI_DMA_W},
116 	{"PIO", BOFI_PIO_RW}, {"DMA", BOFI_DMA_RW},
117 	{"LOG", BOFI_LOG}, {"INTR", BOFI_INTR}, {0, 0}
118 };
119 static coding_t optypes[] = {
120 	{"EQ", BOFI_EQUAL}, {"AND", BOFI_AND}, {"OR", BOFI_OR},
121 	{"XOR", BOFI_XOR}, {"NO", BOFI_NO_TRANSFER},
122 	{"DELAY", BOFI_DELAY_INTR}, {"LOSE", BOFI_LOSE_INTR},
123 	{"EXTRA", BOFI_EXTRA_INTR}, {0, 0}
124 };
125 static coding_t doptypes[] = {
126 	{"EQ", BOFI_EQUAL}, {"AND", BOFI_AND}, {"OR", BOFI_OR},
127 	{"XOR", BOFI_XOR}, {0, 0}
128 };
129 static coding_t ioptypes[] = {
130 	{"DELAY", BOFI_DELAY_INTR}, {"LOSE", BOFI_LOSE_INTR},
131 	{"EXTRA", BOFI_EXTRA_INTR}, {0, 0}
132 };
133 
134 static const unsigned long long	DFLTLOGTIME	= -1ull; /* log forever */
135 
136 /*
137  * This global controls the generation of errdefs for PIO_W. The default should
138  * be to only perform an access check errdef but not to corrupt writes - this
139  * may trash non-FT platforms.
140  */
141 static uint_t atype_is_default;	/* do not corrupt PIO_W by default */
142 static uint_t lsize_is_default;	/* set when the user has not given a size */
143 
144 static uint64_t random_operand = 0xdeadbeafdeadbeafull;
145 #define	NPIO_DEFAULTS	(3)	/* number of default corruption values */
146 static longlong_t pio_default_values[NPIO_DEFAULTS] = {
147 	0x0ull,			/* corresponds to a line going high/low */
148 	0x32f1f03232f1f032ull,	/* the value returned when the fake ta is set */
149 	(longlong_t)(~0)	/* corresponds to a line going high/low */
150 };
151 
152 static uint_t dbglvl		= 0;	/* debug this program */
153 static int alarmed		= 0;
154 static int killed		= 0;
155 
156 /*
157  * name of a script to call before offlining a driver being tested
158  */
159 static char **fixup_script = 0;
160 static int	scriptargs = 0;
161 static char **pargv;
162 static int	pargc;
163 
164 static int	max_edef_wait = 0;
165 static int	edef_sleep = 0;
166 static int	do_status = 0;	/* report edef status in parsable format */
167 static char *user_comment = 0;
168 
169 static char *Progname;
170 static FILE *errfile;
171 static FILE *outfile;
172 
173 /*
174  * The th_define utility provides an interface to the bus_ops fault injection
175  * bofi device driver for defining error injection specifications (referred to
176  * as errdefs). An errdef corresponds to a specification of how to corrupt a
177  * device driver's accesses to its hardware. The command line arguments
178  * determine the precise nature of the fault to be injected. If the supplied
179  * arguments define a consistent errdef, the th_define process will store the
180  * errdef with the bofi driver and suspend itself until the criteria given by
181  * the errdef become satisfied (in practice, this will occur when the access
182  * counts go to zero).
183  *
184  * When the resulting errdef is activated using the th_manage(1M) user command
185  * utility, the bofi driver will act upon the errdef by matching the number of
186  * hardware accesses - specified in count, that are of the type specified in
187  * acc_types, made by instance number instance - of the driver whose name is
188  * name, (or by the driver instance specified by * path ) to the register set
189  * (or DMA handle) specified by rnumber, that lie within the range offset to
190  * offset + length from the beginning of the register set or DMA handle. It then
191  * applies operator and operand to the next failcount matching accesses.
192  *
193  * If acc_types includes LOG, th_define runs in automatic test script generation
194  * mode, and a set of test scripts (written in the Korn shell) is created and
195  * placed in a sub-directory of the current directory with the name
196  * driver.test.<id>. A separate, executable script is generated for each access
197  * handle that matches the logging criteria. The log of accesses is placed at
198  * the top of each script as a record of the session. If the current directory
199  * is not writable, file output is written to standard output. The base name of
200  * each test file is the driver name, and the extension is a number that
201  * discriminates between different access handles. A control script (with the
202  * same name as the created test directory) is generated that will run all the
203  * test scripts sequentially.
204  *
205  * Executing the scripts will install, and then activate, the resulting error
206  * definitions. Error definitions are activated sequentially and the driver
207  * instance under test is taken offline and brought back online before each test
208  * (refer to the -e option for more information). By default, logging will apply
209  * to all PIO accesses, interrupts and DMA accesses to and from areas mapped
210  * for both reading and writing, but it can be constrained by specifying
211  * additional acc_types, rnumber, offset and length. Logging will continue for
212  * count matching accesses, with an optional time limit of collect_time seconds.
213  *
214  * Either the -n or -P option must be provided. The other options are optional.
215  * If an option (other than the -a option) is specified multiple times, only
216  * the final value for the option is used. If an option is not specified, its
217  * associated value is set to an appropriate default, which will provide
218  * maximal error coverage as described below.
219  */
220 
221 /*PRINTFLIKE2*/
222 static void
223 msg(uint_t lvl, char *msg, ...)
224 {
225 #define	BUFSZ	128
226 
227 	if (lvl <= dbglvl) {
228 		int count;
229 		va_list args;
230 		char buf[BUFSZ];
231 		int	pos = 0;
232 
233 		va_start(args, msg);
234 		count = vsnprintf(buf, BUFSZ, msg, args);
235 		va_end(args);
236 		if (count > 0) {
237 			count += pos;
238 			if (count >= sizeof (buf))
239 				count = BUFSZ - 1;
240 			buf[count] = '\0';
241 			(void) fprintf(errfile, "%s", buf);
242 		}
243 	}
244 }
245 
246 static void
247 kill_sighandler(int sig)
248 {
249 	switch (sig) {
250 		case SIGALRM:
251 			alarmed = 1;
252 			break;
253 		default:
254 			killed = 1;
255 			break;
256 	}
257 }
258 
259 static void
260 set_handler(int sig)
261 {
262 	struct sigaction sa;
263 
264 	(void) sigfillset(&(sa.sa_mask));
265 	sa.sa_flags = 0;
266 	sa.sa_handler = kill_sighandler;
267 	if (sigaction(sig, &sa, NULL) != 0)
268 		/* install handler */
269 		msg(0, "bad sigaction: %s\n", strerror(errno));
270 }
271 
272 /*
273  * Compare two driver access handles
274  */
275 static int
276 hdl_cmp(const void *p1, const void *p2)
277 {
278 	struct handle_info *e1 = (struct handle_info *)p1;
279 	struct handle_info *e2 = (struct handle_info *)p2;
280 
281 	if (e1->instance < e2->instance)
282 		return (-1);
283 	else if (e1->instance > e2->instance)
284 		return (1);
285 	else if (e1->access_type < e2->access_type)
286 		return (-1);
287 	else if (e1->access_type > e2->access_type)
288 		return (1);
289 	else if (e1->rnumber < e2->rnumber)
290 		return (-1);
291 	else if (e1->rnumber > e2->rnumber)
292 		return (1);
293 	else if (e1->len < e2->len)
294 		return (-1);
295 	else if (e1->len > e2->len)
296 		return (1);
297 	else if (e1->offset < e2->offset)
298 		return (-1);
299 	else if (e1->offset > e2->offset)
300 		return (1);
301 	else if (e1->addr_cookie < e2->addr_cookie)
302 		return (-1);
303 	else if (e1->addr_cookie > e2->addr_cookie)
304 		return (1);
305 	else
306 		return (0);
307 }
308 
309 /*
310  * Compare two hardware accesses.
311  */
312 static int
313 elem_cmp(const void *p1, const void *p2)
314 {
315 	struct acc_log_elem *e1 = (struct acc_log_elem *)p1;
316 	struct acc_log_elem *e2 = (struct acc_log_elem *)p2;
317 
318 	if (e1->access_type < e2->access_type)
319 		return (-1);
320 	else if (e1->access_type > e2->access_type)
321 		return (1);
322 	else if (e1->offset < e2->offset)
323 		return (-1);
324 	else if (e1->offset > e2->offset)
325 		return (1);
326 	else if (e1->size < e2->size)
327 		return (-1);
328 	else if (e1->size > e2->size)
329 		return (1);
330 	else
331 		return (0);
332 }
333 
334 /*
335  * Another way of comparing two hardware accesses.
336  */
337 static int
338 log_cmp(const void *p1, const void *p2)
339 {
340 	struct acc_log_elem *e1 = (struct acc_log_elem *)p1;
341 	struct acc_log_elem *e2 = (struct acc_log_elem *)p2;
342 
343 	int rval = elem_cmp(p1, p2);
344 
345 	if (rval == 0)
346 		if (e1->repcount < e2->repcount)
347 			return (-1);
348 		else if (e1->repcount > e2->repcount)
349 			return (1);
350 		else
351 			return (0);
352 	else
353 		return (rval);
354 }
355 
356 /*
357  * And a final way of sorting a log (by access type followed by repcount).
358  */
359 static int
360 log_cmp2(const void *p1, const void *p2)
361 {
362 	struct acc_log_elem *e1 = (struct acc_log_elem *)p1;
363 	struct acc_log_elem *e2 = (struct acc_log_elem *)p2;
364 
365 	if (e1->access_type < e2->access_type)
366 		return (-1);
367 	else if (e1->access_type > e2->access_type)
368 		return (1);
369 	else if (e1->repcount < e2->repcount)
370 		return (-1);
371 	else if (e1->repcount > e2->repcount)
372 		return (1);
373 	else
374 		return (0);
375 }
376 
377 static void
378 dump_log(uint_t lvl, FILE *fp, struct acc_log_elem *items,
379     size_t nitems, uint_t logflags)
380 {
381 	if (lvl <= dbglvl) {
382 		int i;
383 		uint_t offset, allthesame = 1;
384 
385 		if (logflags & BOFI_LOG_TIMESTAMP &&
386 		    getenv("DUMP_FULL_LOG") != 0)
387 			allthesame = 0;
388 		else
389 			for (i = 1; i < nitems; i++)
390 				if (elem_cmp(items+i, items) != 0)
391 					allthesame = 0;
392 		if (fp != 0)
393 			(void) fprintf(fp,
394 			    "# Logged Accesses:\n# %-4s\t%-12s\t%-4s\t%-18s"
395 			    " (%-1s)\t%-10s\n\n", "type",
396 			    (items->access_type & BOFI_DMA_RW) ?
397 			    "address" : "offset",
398 			    "size", "value", "repcnt", "time");
399 
400 		for (i = 0; i < nitems; i++, items++) {
401 			offset = items->offset;
402 			if (fp != 0) {
403 				(void) fprintf(fp,
404 				    "# 0x%-2x\t0x%-10x\t%-4d\t0x%-16llx"
405 				    " (0x%-1x)\t%-8llu\n",
406 				    items->access_type, offset, items->size,
407 				    items->value, items->repcount,
408 				    (logflags & BOFI_LOG_TIMESTAMP) ?
409 				    items->access_time : 0ull);
410 
411 				if (allthesame) {
412 					(void) fprintf(fp,
413 					    "# Access duplicated %d times\n",
414 					    nitems);
415 					break;
416 				}
417 			} else
418 				msg(lvl, "# 0x%x 0x%x %d 0x%llx(0x%x) %llu\n",
419 				    items->access_type, offset, items->size,
420 				    items->value, items->repcount,
421 				    (logflags & BOFI_LOG_TIMESTAMP) ?
422 				    items->access_time : 0ull);
423 		}
424 	}
425 }
426 
427 static int
428 str_to_bm(char *optarg, coding_t *c, uint_t *bm)
429 {
430 	char *str;
431 	char *s = "\t\n ";
432 	int err = EINVAL;
433 
434 	msg(2, "str_to_bm: optarg %s\n", optarg);
435 	if (optarg != NULL && (str = strtok(optarg, s))) {
436 		msg(2, "str_to_bm: str %s\n", str);
437 		do {
438 			for (; c->str != 0; c++)
439 				if (strcmp(str, c->str) == 0) {
440 					*bm |= c->code;
441 					msg(2, "str_to_bm: %s matches\n",
442 					    c->str);
443 					err = 0;
444 					break;
445 				}
446 		} while ((str = strtok(NULL, s)));
447 	} else
448 		return (EINVAL);
449 	msg(2, "str_to_bm: done 0x%x\n", *bm);
450 	return (err);
451 }
452 
453 
454 /*
455  * Generic routine for commands that apply to a particular instance of
456  * a driver under test (e.g. activate all errdefs defined on an instance).
457  */
458 static int
459 manage_instance(int fd, char *namep, int instance, int cmd)
460 {
461 	struct bofi_errctl errctl;
462 
463 	errctl.namesize = strlen(namep);
464 	(void) strncpy(errctl.name, namep, MAXNAMELEN);
465 	errctl.instance = instance;
466 
467 	msg(8, "manage_instance: %s %d\n", namep, instance);
468 	if (ioctl(fd, cmd, &errctl) == -1) {
469 		msg(0, "bofi ioctl %d failed: %s\n", cmd, strerror(errno));
470 		return (-1);
471 	}
472 	return (0);
473 }
474 
475 
476 static int
477 define_one_error(
478 	FILE *fp,
479 	struct bofi_errdef *edp,
480 	struct acc_log_elem *item,
481 	ulong_t	nttime,
482 	ulong_t interval,
483 	char	*type,
484 	int fon,	/* corrupt after this many accesses */
485 	size_t fcnt,	/* and then fail it fcnt times */
486 	uint_t	acc_chk,
487 	char	*opname,
488 	uint64_t	operand)
489 {
490 	(void) fprintf(fp,
491 	    "-n %s -i %d -r %d -l 0x%llx 0x%x -a %s -c %d %d -f %d"
492 	    " -o %s 0x%llx",
493 	    (char *)edp->name,
494 	    edp->instance,
495 	    edp->rnumber,
496 	    edp->offset + item->offset,	/* offset into the regset */
497 	    item->size,	/* corrupt addrs from offset to offset+size */
498 	    type,
499 	    fon,	/* corrupt after this many accesses */
500 	    fcnt,	/* and then fail it fcnt times */
501 	    acc_chk,
502 	    opname,
503 	    operand);
504 
505 	(void) fprintf(fp, " -w %lu %lu\n", nttime, interval);
506 	return (0);
507 }
508 
509 static void
510 define_op_err(FILE *fp, int *ecnt, struct bofi_errdef *edp,
511     struct acc_log_elem *item, ulong_t nttime, ulong_t interval, char *type,
512     int fon, size_t fcnt)
513 {
514 	coding_t *ct;
515 	char	*opname;
516 	uint_t	op;
517 	uint64_t	operand;
518 	int k, save_size;
519 	uint64_t save_offset;
520 
521 	if (item->access_type & BOFI_INTR)
522 		ct = &ioptypes[0];
523 	else
524 		ct = &doptypes[0];
525 
526 	/*
527 	 * errdefs for dma accesses are too numerous so assume that dma writes
528 	 * (DDI_DMA_SYNC_FORDEV) create less exposure to potential errors than
529 	 * do dma reads (DDI_DMA_SYNC_FORCPU).
530 	 *
531 	 * also by default do not corrupt PIO_W - it may hang a non-FT platform.
532 	 */
533 	if (item->access_type != BOFI_DMA_W &&
534 	    ((item->access_type & BOFI_PIO_W) == 0 || !atype_is_default)) {
535 		/*
536 		 * user has asked for PIO_W
537 		 */
538 		for (; ct->str != 0; ct++) {
539 			op = ct->code;
540 			opname = ct->str;
541 			switch (op) {
542 			case BOFI_EQUAL:
543 				operand = random_operand; /* a random value */
544 				random_operand = lrand48() | ((uint64_t)
545 				    (lrand48()) << 32);
546 				break;
547 			case BOFI_AND:
548 				operand = 0xaddedabadb00bull;
549 				break;
550 			case BOFI_OR:
551 				operand = 0x1;
552 				break;
553 			case BOFI_XOR:
554 			default:
555 				operand = myULLMAX;
556 				break;
557 			case BOFI_DELAY_INTR: /* delay for 1 msec */
558 				operand = 1000000;
559 				break;
560 			case BOFI_LOSE_INTR: /* op not applicable */
561 				operand = 0;
562 				break;
563 			case BOFI_EXTRA_INTR: /* extra intrs */
564 				operand = 0xfff;
565 				break;
566 			}
567 			*ecnt = *ecnt + 1;
568 
569 			if ((item->access_type == BOFI_DMA_W ||
570 			    item->access_type == BOFI_DMA_R) &&
571 			    item->size > sizeof (uint64_t) && item->size <
572 			    DMA_INDIVIDUAL_CORRUPT_CUTOFF) {
573 				save_size = item->size;
574 				save_offset = item->offset;
575 				for (k = (item->size +
576 				    sizeof (uint64_t) - 1) &
577 				    ~(sizeof (uint64_t) - 1);
578 				    k > 0; k -= sizeof (uint64_t)) {
579 					item->size = sizeof (uint64_t);
580 					(void) define_one_error(fp, edp,
581 					    item, nttime, interval, type, fon,
582 					    fcnt, edp->acc_chk, opname,
583 					    operand);
584 					item->offset += sizeof (uint64_t);
585 				}
586 				item->size = save_size;
587 				item->offset = save_offset;
588 			} else {
589 				(void) define_one_error(fp, edp, item,
590 				    nttime, interval, type, fon, fcnt,
591 				    edp->acc_chk, opname, operand);
592 			}
593 
594 			if (op == BOFI_EQUAL) {
595 				uint_t cnt;
596 				for (cnt = 0; cnt < NPIO_DEFAULTS;
597 				    cnt++, *ecnt = *ecnt + 1) {
598 					if ((item->access_type == BOFI_DMA_W ||
599 					    item->access_type == BOFI_DMA_R) &&
600 					    item->size > sizeof (uint64_t) &&
601 					    item->size <
602 					    DMA_INDIVIDUAL_CORRUPT_CUTOFF) {
603 						save_size = item->size;
604 						save_offset = item->offset;
605 						for (k = (item->size +
606 						    sizeof (uint64_t) - 1) &
607 						    ~(sizeof (uint64_t) - 1);
608 						    k > 0;
609 						    k -= sizeof (uint64_t)) {
610 							item->size =
611 							    sizeof (uint64_t);
612 							(void) define_one_error(
613 							    fp, edp, item,
614 							    nttime, interval,
615 							    type, fon, fcnt,
616 							    edp->acc_chk,
617 							    opname,
618 							    pio_default_values
619 							    [cnt]);
620 							item->offset +=
621 							    sizeof (uint64_t);
622 						}
623 						item->size = save_size;
624 						item->offset = save_offset;
625 					} else {
626 						(void) define_one_error(fp,
627 						    edp, item, nttime, interval,
628 						    type, fon, fcnt,
629 						    edp->acc_chk, opname,
630 						    pio_default_values[cnt]);
631 					}
632 				}
633 			}
634 		}
635 	}
636 
637 	if ((item->access_type & BOFI_PIO_W) && !atype_is_default) {
638 		/*
639 		 * user has asked for PIO_W
640 		 */
641 		(void) define_one_error(fp, edp, item, nttime, interval,
642 		    type, fon, fcnt, edp->acc_chk, "NO", 0);
643 		*ecnt = *ecnt + 1;
644 	}
645 
646 	/*
647 	 * and finally an access check errdef
648 	 */
649 	if (item->access_type & BOFI_PIO_RW)
650 		(void) define_one_error(fp, edp, item, nttime, interval,
651 		    type, fon, fcnt, 1, "OR", 0);
652 
653 	if (item->access_type & BOFI_DMA_RW)
654 		(void) define_one_error(fp, edp, item, nttime, interval,
655 		    type, fon, fcnt, 2, "OR", 0);
656 
657 }
658 
659 /*
660  * Convert a collection of log entries into error definitions.
661  */
662 /* ARGSUSED */
663 static int
664 define_nerrs(int fd, FILE *fp, int *ecnt, struct bofi_errdef *edp,
665     struct acc_log_elem *items, size_t nitems, uint_t naccess, uint_t minac,
666     uint_t maxac, ulong_t logtime, ulong_t logsize)
667 {
668 	char	*type;
669 	uint_t	at;
670 	int	i;
671 	struct acc_log_elem	*item;
672 	char	*opname;
673 	uint_t	op;
674 	uint64_t	operand;
675 	int	cycleiops, cycledops;
676 	int	intrs = 0;
677 	ulong_t	ttime, nttime, interval;
678 
679 	op = edp->optype;
680 	operand = edp->operand;
681 	msg(3, "define_nerrs: nitems %d (ac %d at 0x%x): (%d %d)"
682 	    " (op 0x%x 0x%llx)\n\n", nitems, naccess, items->access_type,
683 	    minac, maxac, op, operand);
684 
685 	/*
686 	 * all items are guaranteed have values in the two element set {0, at}
687 	 * where at is a valid access type (so find the value of at)
688 	 */
689 	for (i = 0, item = items, at = 0; i < nitems; i++, item++)
690 		if (item->access_type != 0) {
691 			at = item->access_type;
692 			break;
693 		}
694 	if (at == 0)
695 		return (-1);
696 
697 	/*
698 	 * find the string form of the access type
699 	 */
700 	for (i = 0, type = 0; atypes[i].str != 0; i++) {
701 		if (atypes[i].code == at) {
702 			type = atypes[i].str;
703 			break;
704 		}
705 	}
706 	if (type == 0) {
707 		msg(0, "Unknown access type returned from bofi\n\t");
708 		dump_log(0, 0, item, 1, BOFI_LOG_TIMESTAMP);
709 		msg(1, "0x%x 0x%x 0x%x 0x%x\n", BOFI_LOG, BOFI_INTR,
710 		    BOFI_DMA_RW, BOFI_PIO_RW);
711 		return (-1);
712 	}
713 
714 	msg(1, "define_n: at = 0x%d (%s)\n", at, type == 0 ? "null" : type);
715 	/*
716 	 * find the string form of the operator
717 	 */
718 	for (i = 0, opname = 0; optypes[i].str != 0; i++) {
719 		if (op == optypes[i].code) {
720 			opname = optypes[i].str;
721 			break;
722 		}
723 	}
724 
725 	/*
726 	 * if not found or inconsistent default to XOR
727 	 */
728 	if (opname == 0 ||
729 	    (op == BOFI_NO_TRANSFER &&
730 	    (at & (BOFI_DMA_RW|BOFI_PIO_R))) ||
731 	    (op >= BOFI_DELAY_INTR && (at & BOFI_INTR) == 0)) {
732 		opname = optypes[3].str;	/* "XOR" */
733 		operand = myULLMAX;
734 		op = optypes[3].code;
735 	}
736 
737 	/*
738 	 * if operator and access type are inconsistent choose a sensible
739 	 * default
740 	 */
741 	cycleiops = 0;
742 	if (at & BOFI_INTR)
743 		if (op < BOFI_DELAY_INTR)
744 			cycleiops = 1;
745 		else if (op == BOFI_LOSE_INTR)
746 			operand = 0;
747 
748 	cycledops = 0;
749 	if (nitems == 1 && (at & BOFI_DMA_RW))
750 		cycledops = 1;
751 	/*
752 	 * for each access in the list define one or more error definitions
753 	 */
754 	for (i = 0, item = items; i < nitems; i++, item++) {
755 		size_t acnt, fcnt;
756 		int j, fon;
757 
758 		if (item->access_type == 0)
759 			continue;
760 
761 		/*
762 		 * base number of errors to inject on 3% of number of
763 		 * similar accesses seen during LOG phase
764 		 */
765 		acnt = item->repcount / 10 + 1; /* 10% */
766 		fcnt = (acnt >= 3) ? acnt / 3 : 1; /* 3% */
767 
768 		/*
769 		 * wait for twice the time it took during LOG phase
770 		 */
771 		if ((ttime = (item->access_time * 2)) < MIN_REPORT_TIME)
772 			ttime = MIN_REPORT_TIME;
773 		else if (max_edef_wait != 0 && ttime > max_edef_wait)
774 			ttime = max_edef_wait;
775 		/*
776 		 * if edef_sleep set (-w) the use that, otherwise use default
777 		 */
778 		interval = edef_sleep ? edef_sleep : DEFAULT_EDEF_SLEEP;
779 
780 		msg(10,
781 		    "define_n: item %d limit %d step %d (intr %d) tt(%lu)\n",
782 		    i, item->repcount, acnt, intrs, ttime);
783 
784 		for (j = 0, fon = 1, nttime = ttime; j < item->repcount;
785 		    j += acnt) {
786 			if (policy & OPERATORSPOLICY) {
787 				define_op_err(fp, ecnt, edp, item,
788 				    nttime, interval, type, fon, fcnt);
789 			} else {
790 				if (cycleiops) {
791 					op = ioptypes[intrs].code;
792 					opname = ioptypes[intrs++].str;
793 					switch (op) {
794 					case BOFI_DELAY_INTR:
795 						/* delay for 1 sec */
796 						operand = 1000000;
797 						break;
798 					case BOFI_LOSE_INTR:
799 						/* op not applicable */
800 						operand = 0;
801 						break;
802 					case BOFI_EXTRA_INTR:
803 					default:
804 						/* generate 2 extra intrs */
805 						operand = 0xfff;
806 						break;
807 					}
808 					intrs %= 3;
809 				} else if (cycledops) {
810 					op = doptypes[intrs].code;
811 					opname = doptypes[intrs++].str;
812 					switch (op) {
813 					case BOFI_EQUAL:
814 						random_operand = lrand48() |
815 						    ((uint64_t)
816 						    (lrand48()) << 32);
817 						break; /* a random value */
818 					case BOFI_AND:
819 						operand = 0xaddedabadb00bull;
820 						break;
821 					case BOFI_OR:
822 						operand = 0xd1ab011c0af1a5c0ull;
823 						break;
824 					case BOFI_XOR:
825 					default:
826 						operand = myULLMAX;
827 						break;
828 					}
829 					intrs %= 4;
830 				}
831 				(void) define_one_error(fp, edp, item,
832 				    nttime, interval, type, fon,
833 				    fcnt, edp->acc_chk, opname, operand);
834 				*ecnt = *ecnt + 1;
835 				if (op == BOFI_EQUAL) {
836 					uint_t cnt;
837 					for (cnt = 0; cnt < NPIO_DEFAULTS;
838 					    cnt++, *ecnt = *ecnt + 1)
839 						(void) define_one_error(fp,
840 						    edp, item, nttime,
841 						    interval, type, fon, fcnt,
842 						    edp->acc_chk, opname,
843 						    pio_default_values[cnt]);
844 				}
845 			}
846 
847 			/*
848 			 * all non maximal policies should only generate
849 			 * a single error definition set per access.
850 			 */
851 			if (!(policy & MAXIMALPOLICY))
852 				break;
853 
854 			nttime = (logtime - item->access_time) *
855 			    (j + acnt + fcnt - 1) / logsize;
856 			if (nttime < MIN_REPORT_TIME)
857 				nttime = MIN_REPORT_TIME;
858 			else if (nttime > max_edef_wait)
859 				nttime = max_edef_wait;
860 
861 			msg(11, "define_nerrs: %lu %d %d %d %llu\n", nttime,
862 			    max_edef_wait, fon, fcnt, item->access_time);
863 
864 			if (item->access_type != BOFI_INTR)
865 				fon += j;
866 		}
867 	}
868 
869 	return (0);
870 }
871 
872 static int
873 reduce_log(uint16_t pol, struct acc_log *log,		/* input args */
874     struct acc_log_elem **llp, size_t *cntp)		/* output args */
875 {
876 	ulong_t logtime;
877 	struct acc_log_elem *items, *item, *elem;
878 	int cnt, nitems, acnt;
879 	int i, j, k, lb, ub, mina, maxa, cutoff[2], mean;
880 
881 	if (llp == 0 || cntp == 0)	/* subroutine interface violated */
882 		return (-1);
883 
884 	if (*llp == 0) {
885 		items = (void *)log->logbase;
886 		nitems = log->entries;
887 	} else {
888 		items = *llp;	/* outputs double up as inputs */
889 		nitems = *cntp;
890 	}
891 	/* has the utc time wrapped over ULMAX - unlikely so fix it at 10 */
892 	logtime = (log->stop_time >= log->start_time) ?
893 	    log->stop_time - log->start_time : 10ul;
894 
895 	msg(1, "reduce %d: logtime %lu\n", nitems, logtime);
896 	/*
897 	 * Sort the log by access type - do not remove duplicates yet (but do
898 	 * remove access that do not match the requested log -> errdef policy
899 	 * (defined by union pu pol). Set the repcount field of each entry to a
900 	 * unique value (in the control statement of the for loop) - this
901 	 * ensures that the qsort (following the for loop) will not remove any
902 	 * entries.
903 	 */
904 	for (i = 0, cnt = 0, elem = items; i < nitems;
905 	    elem->repcount = i, i++, elem++) {
906 		/*
907 		 * If interested in the I/O transfer size and this access
908 		 * does not match the requested size then ignore the access
909 		 */
910 		if ((pol & SIZEPOLICY) &&
911 		    (!(pol & MULTIPOLICY) || elem->repcount == 1) &&
912 		    /* req for DMA / ddi_rep */
913 		    (pol & elem->size) == 0)
914 			elem->access_type = 0;
915 			/* these will end up sorted at the head */
916 		else {
917 			cnt += 1;
918 			elem->size *= elem->repcount;
919 			if (log->flags & BOFI_LOG_TIMESTAMP)
920 				/* real access time */
921 				elem->access_time -= log->start_time;
922 			else
923 				/* linear fit */
924 				elem->access_time = logtime * (i + 1) / nitems;
925 		}
926 	}
927 
928 	qsort((void *)items, nitems, sizeof (*items), log_cmp);
929 
930 	msg(5, "qsorted log raw (nitems %d cnt %d:\n", nitems, cnt);
931 	dump_log(14, 0, items, nitems, log->flags);
932 
933 	if (cnt != nitems) {	/* some items should be ignored */
934 		items += (nitems - cnt);	/* ignore these ones */
935 		if ((nitems = cnt) == 0) {
936 			*cntp = 0;
937 			*llp = 0;
938 			return (0);
939 			/* the chosen policy has ignored everything */
940 		}
941 
942 	}
943 	/*
944 	 * Now remove duplicate entries based on access type, address and size.
945 	 * Reuse the repcount field to store the no. of duplicate accesses.
946 	 * Store the average access time in the single remaining
947 	 * representative of the duplicate set.
948 	 */
949 
950 	for (i = 1, cnt = 1, elem = items, elem->repcount = 1, item = elem + 1;
951 	    i < nitems; i++, item++) {
952 		if (elem_cmp(elem, item) == 0) {
953 			elem->access_time += item->access_time;
954 			elem->repcount++;
955 		} else {	/* not a duplicate */
956 			elem->access_time = logtime / elem->repcount;
957 			elem++;
958 			*elem = *item;
959 			cnt++;
960 			elem->repcount = 1;
961 		}
962 	}
963 	elem->access_time = logtime / elem->repcount;
964 
965 	/*
966 	 * The log is sorted by access type - now resort to order by frequency
967 	 * of accesses (ie for a given access type uncommon access will come
968 	 * first.
969 	 */
970 
971 	qsort((void *)items, cnt, sizeof (*items), log_cmp2);
972 	msg(4, "qsorted log2: cnt is %d\n", cnt);
973 	dump_log(4, 0, items, cnt, log->flags);
974 
975 	for (i = 0; i < cnt; i = j) {
976 
977 		/*
978 		 * Pick out the set [i, j) consisting of elements with the same
979 		 * access type
980 		 */
981 		for (j = i + 1, acnt = items[i].repcount; j < cnt &&
982 		    items[j].access_type == items[i].access_type; j++)
983 			acnt += items[j].repcount;
984 
985 		if (j - i == 1)	/* never ignore solo accesses of a given type */
986 			continue;
987 		/*
988 		 * Now determine what constitutes uncommon and common accesses:
989 		 */
990 		mina = items[i].repcount;
991 		maxa = items[j-1].repcount;
992 		mean = acnt / (j - i); /* mean value */
993 
994 		if (pol & (UNCOMMONPOLICY|MEDIANPOLICY)) {
995 			cutoff[0] = (mean - mina) / DISTRIB_CUTOFF + mina;
996 
997 			for (ub = i; ub < j; ub++)
998 				if (items[ub].repcount > cutoff[0])
999 					break;
1000 			lb = j - 1;
1001 		} else {
1002 			lb = i;
1003 			ub = j-1;
1004 		}
1005 
1006 		if (pol & (COMMONPOLICY|MEDIANPOLICY)) {
1007 			cutoff[1] = maxa - (maxa - mean) / DISTRIB_CUTOFF;
1008 			for (lb = j - 1; lb >= i; lb--)
1009 				if (items[lb].repcount < cutoff[1])
1010 					break;
1011 			if (!(pol & (UNCOMMONPOLICY|MEDIANPOLICY)))
1012 				ub = i;
1013 		}
1014 
1015 		msg(3, "reduce_log: p 0x%x at %d:0x%x %d:0x%x acnt mina maxa"
1016 		    " (%d %d %d)"
1017 		    " mean %d cutoffs(%d %d) bnds(%d, %d)\n",
1018 		    pol, i, items[i].access_type, j, items[j].access_type,
1019 		    acnt, mina, maxa, mean, cutoff[0], cutoff[1], lb, ub);
1020 
1021 		if (ub <= lb)
1022 			if (!(pol & MEDIANPOLICY))
1023 				/* delete all the mid accesses */
1024 				for (k = ub; k <= lb; k++)
1025 					items[k].access_type = 0;
1026 			else {
1027 				if (!(pol & UNCOMMONPOLICY))
1028 					/* delete uncommon accesses */
1029 					for (k = i; k < ub; k++)
1030 						items[k].access_type = 0;
1031 				if (!(pol & COMMONPOLICY))
1032 					/* delete common accesses */
1033 					for (k = lb+1; k < j; k++)
1034 						items[k].access_type = 0;
1035 			}
1036 	}
1037 	msg(4, "reduce_log: returning %d items\n", cnt);
1038 	dump_log(5, 0, items, cnt, log->flags);
1039 	*cntp = cnt;
1040 	*llp = items;
1041 	return (0);
1042 }
1043 
1044 static void
1045 log2errdefs(int fd, struct bofi_errdef *edp, struct acc_log *log,
1046     char *devpath)
1047 {
1048 	struct acc_log_elem	*items;
1049 	size_t			nitems;
1050 	int			i, j;
1051 	uint_t			acc_cnt;
1052 	char			fname[_POSIX_PATH_MAX];
1053 	FILE			*fp = 0;
1054 	time_t			utc = time(NULL);
1055 	int			ecnt = 0;
1056 	int			err;
1057 	ulong_t			logtime;
1058 	char			*buffer;
1059 	struct stat		statbuf;
1060 
1061 	items = (void *)log->logbase;
1062 	nitems = log->entries;
1063 	logtime = (log->stop_time >= log->start_time) ?
1064 	    log->stop_time - log->start_time : 10ul;
1065 
1066 	if (nitems == 0)
1067 		return;
1068 
1069 	/* ensure that generated errdefs complete in bounded time */
1070 	if (max_edef_wait == 0)
1071 		max_edef_wait =
1072 		    logtime > MIN_REPORT_TIME ? logtime : MIN_REPORT_TIME * 2;
1073 
1074 	msg(4, "log2errdefs(0x%p, 0x%p, %d, 0x%x):\n",
1075 	    (void *) edp, (void *) items, nitems, policy);
1076 
1077 	(void) snprintf(fname, sizeof (fname), "%s.%d", (char *)edp->name,
1078 	    (int)getpid());
1079 	if ((fp = fopen(fname, "w")) == 0)
1080 		fp = outfile;
1081 
1082 	(void) fprintf(fp, "#!/bin/ksh -p\n\n");
1083 	(void) fprintf(fp, "# %-24s%s\n", "Script creation time:", ctime(&utc));
1084 	(void) fprintf(fp, "# %-24s%llu\n",
1085 	    "Activation time:", log->start_time);
1086 	(void) fprintf(fp, "# %-24s%llu\n",
1087 	    "Deactivation time:", log->stop_time);
1088 	(void) fprintf(fp, "# %-24s%d\n", "Log size:", nitems);
1089 	(void) fprintf(fp, "# %-24s", "Errdef policy:");
1090 	for (i = 0; ptypes[i].str != 0; i++)
1091 		if (policy & ptypes[i].code)
1092 			(void) fprintf(fp, "%s ", ptypes[i].str);
1093 	(void) fprintf(fp, "\n");
1094 	(void) fprintf(fp, "# %-24s%s\n", "Driver:", (char *)edp->name);
1095 	(void) fprintf(fp, "# %-24s%d\n", "Instance:", edp->instance);
1096 	if (edp->access_type & BOFI_PIO_RW) {
1097 		(void) fprintf(fp, "# %-24s%d\n",
1098 		    "Register set:", edp->rnumber);
1099 		(void) fprintf(fp, "# %-24s0x%llx\n", "Offset:", edp->offset);
1100 		(void) fprintf(fp, "# %-24s0x%llx\n", "Length:", edp->len);
1101 	} else if (edp->access_type & BOFI_DMA_RW) {
1102 		(void) fprintf(fp, "# %-24s%d\n", "DMA handle:", edp->rnumber);
1103 		(void) fprintf(fp, "# %-24s0x%llx\n", "Offset:", edp->offset);
1104 		(void) fprintf(fp, "# %-24s0x%llx\n", "Length:", edp->len);
1105 	} else if ((edp->access_type & BOFI_INTR) == 0) {
1106 		(void) fprintf(fp, "# %-24s%d\n",
1107 		    "Unknown Handle Type:", edp->rnumber);
1108 	}
1109 
1110 	(void) fprintf(fp, "# %-24s0x%x ( ", "Access type:",
1111 	    (edp->access_type & ~BOFI_LOG));
1112 	if (edp->access_type & BOFI_PIO_R)
1113 		(void) fprintf(fp, "%s ", "pio_r");
1114 	if (edp->access_type & BOFI_PIO_W)
1115 		(void) fprintf(fp, "%s ", "pio_w");
1116 	if (edp->access_type & BOFI_DMA_W)
1117 		(void) fprintf(fp, "%s ", "dma_w");
1118 	if (edp->access_type & BOFI_DMA_R)
1119 		(void) fprintf(fp, "%s ", "dma_r");
1120 	if (edp->access_type & BOFI_INTR)
1121 		(void) fprintf(fp, "%s ", "intr");
1122 	(void) fprintf(fp, ")\n\n");
1123 	if (user_comment)
1124 		(void) fprintf(fp, "# %-24s%s\n\n",
1125 		    "Test Comment:", user_comment);
1126 
1127 	dump_log(0, fp, items, nitems, log->flags);
1128 
1129 	items = 0;
1130 	if ((err = reduce_log(policy, log, &items, &nitems)) < 0 ||
1131 	    nitems == 0) {
1132 		msg(4, "log2errdefs: reduce_log err %d nitems %d\n",
1133 		    err, nitems);
1134 		return;
1135 	}
1136 	(void) fprintf(fp, "\nerror() { echo \""
1137 	    "${0##*/}: $@\""
1138 	    " >&2; exit 2; }\n");
1139 	(void) fprintf(fp,
1140 	    "trap ' ' 16\t# ignore - it is trapped by abort monitor_edef\n");
1141 
1142 	(void) fprintf(fp, "\nfixup_script()\n{\n");
1143 	if (scriptargs > 0) {
1144 		(void) fprintf(fp, "\tif [[ $1 -eq 1 ]]\n\tthen\n");
1145 		(void) fprintf(fp, "\t\t# Call a user defined workload\n");
1146 		(void) fprintf(fp, "\t\t# while injecting errors\n\t\t");
1147 		for (i = 0; i < scriptargs; i++)
1148 			(void) fprintf(fp, "%s ", fixup_script[i]);
1149 		(void) fprintf(fp, "\n\tfi\n");
1150 		(void) fprintf(fp, "\treturn 0\n");
1151 	} else {
1152 		(void) fprintf(fp, "\tif [[ $1 -eq 0 ]]\n\tthen\n");
1153 		(void) fprintf(fp,
1154 		    "\t\t# terminate any outstanding workload\n");
1155 		(void) fprintf(fp, "\t\tif [ $script_pid -gt 0 ]; then\n");
1156 		(void) fprintf(fp, "\t\t\tkill $script_pid\n");
1157 		(void) fprintf(fp, "\t\t\tscript_pid=0\n");
1158 		(void) fprintf(fp, "\t\tfi\n");
1159 		(void) fprintf(fp, "\tfi\n");
1160 		(void) fprintf(fp, "\treturn -1\n");
1161 	}
1162 	(void) fprintf(fp, "}\n\n");
1163 	(void) fprintf(fp, "devpath=/devices%s\n\n", devpath);
1164 	(void) fprintf(fp, "#\n");
1165 	(void) fprintf(fp, "# following text extracted from th_script\n");
1166 	(void) fprintf(fp, "#\n");
1167 	if (stat("/usr/lib/th_script", &statbuf) == -1) {
1168 		msg(0, "log2errdefs: stat of /usr/lib/th_script failed\n");
1169 		return;
1170 	}
1171 	fd = open("/usr/lib/th_script", O_RDONLY);
1172 	if (fd == -1) {
1173 		msg(0, "log2errdefs: open of /usr/lib/th_script failed\n");
1174 		return;
1175 	}
1176 	buffer = malloc(statbuf.st_size);
1177 	if (!buffer) {
1178 		msg(0, "log2errdefs: malloc for /usr/lib/th_script failed\n");
1179 		return;
1180 	}
1181 	if (read(fd, buffer, statbuf.st_size) != statbuf.st_size) {
1182 		msg(0, "log2errdefs: read of /usr/lib/th_script failed\n");
1183 		return;
1184 	}
1185 	(void) fwrite(buffer, statbuf.st_size, 1, fp);
1186 	(void) close(fd);
1187 	(void) fprintf(fp, "#\n");
1188 	(void) fprintf(fp, "# end of extracted text\n");
1189 	(void) fprintf(fp, "#\n");
1190 	(void) fprintf(fp, "run_subtest %s %d <<ERRDEFS\n",
1191 	    (char *)edp->name, edp->instance);
1192 
1193 	for (i = 0; i < nitems; i = j) {
1194 
1195 		acc_cnt = items[i].repcount;
1196 		for (j = i + 1;
1197 		    j < nitems && items[j].access_type == items[i].access_type;
1198 		    j++)
1199 			acc_cnt += items[j].repcount;
1200 		msg(1, "l2e: nitems %d i %d j %d at 0x%x\n",
1201 		    nitems, i, j, items[i].access_type);
1202 		if (items[i].access_type != 0)
1203 			(void) define_nerrs(fd, fp, &ecnt, edp, items+i, j-i,
1204 			    acc_cnt, items[i].repcount, items[j-1].repcount,
1205 			    logtime, log->entries);
1206 	}
1207 
1208 	(void) fprintf(fp, "ERRDEFS\n");
1209 	(void) fprintf(fp, "exit 0\n");
1210 
1211 	if (fp != stdout && fp != stderr) {
1212 		if (fchmod(fileno(fp), S_IRWXU|S_IRGRP|S_IROTH))
1213 			msg(0, "fchmod failed: %s\n", strerror(errno));
1214 		if (fclose(fp) != 0)
1215 			msg(0, "close of %s failed: %s\n", fname,
1216 			    strerror(errno));
1217 	}
1218 	msg(10, "log2errdefs: done\n");
1219 }
1220 
1221 #define	LLSZMASK (sizeof (longlong_t) -1)
1222 
1223 static int
1224 add_edef(
1225 	int fd,
1226 	struct bofi_errdef *errdef,	/* returned access criteria */
1227 	struct bofi_errstate *errstate,
1228 	struct handle_info *hdl,	/* handle to match against request */
1229 	struct bofi_errdef *edp)	/* requested access criteria */
1230 {
1231 	*errdef = *edp;
1232 	errdef->instance = hdl->instance;
1233 
1234 
1235 	if (hdl->access_type == 0)
1236 		return (EINVAL);
1237 
1238 	errdef->access_type =
1239 	    errdef->access_type & (hdl->access_type|BOFI_LOG);
1240 
1241 	/* use a big log for PIO and a small one otherwise */
1242 	if (lsize_is_default &&
1243 	    (errdef->access_type & BOFI_PIO_RW) == 0) {
1244 		errdef->access_count = DFLT_NONPIO_LOGSZ;
1245 		errdef->fail_count = 0;
1246 	}
1247 	errdef->log.logsize = errstate->log.logsize =
1248 	    errdef->access_count + errdef->fail_count - 1;
1249 	if (errdef->log.logsize == -1U) {
1250 		errdef->log.logsize = errstate->log.logsize = 0;
1251 	}
1252 	errdef->log.logbase = errstate->log.logbase =
1253 	    (caddr_t)GETSTRUCT(struct acc_log_elem, errdef->log.logsize);
1254 
1255 	if (errdef->log.logbase == 0)
1256 		return (EAGAIN);
1257 
1258 	errdef->rnumber = hdl->rnumber;
1259 	errdef->offset = hdl->offset;
1260 	errdef->len = hdl->len;
1261 
1262 	msg(4, "creating errdef: %d %s %d %d 0x%llx 0x%llx 0x%x 0x%x 0x%x"
1263 	    " 0x%x 0x%x 0x%llx\n",
1264 	    errdef->namesize, (char *)errdef->name,
1265 	    errdef->instance, errdef->rnumber,
1266 	    errdef->offset, errdef->len,
1267 	    errdef->access_type,
1268 	    errdef->access_count, errdef->fail_count,
1269 	    errdef->acc_chk, errdef->optype, errdef->operand);
1270 	if (ioctl(fd, BOFI_ADD_DEF, errdef) == -1) {
1271 		perror("th_define - adding errdef failed");
1272 		return (errno);
1273 	}
1274 	errdef->optype = edp->optype; /* driver clears it if fcnt is zero */
1275 	errstate->errdef_handle = errdef->errdef_handle;
1276 	return (0);
1277 }
1278 
1279 static void
1280 collect_state(int fd, int cmd, struct bofi_errstate *errstate,
1281     struct bofi_errdef *errdef, char *devpath)
1282 {
1283 	int rval;
1284 	size_t ls = errstate->log.logsize;
1285 
1286 	msg(2, "collect_state: pre: edp->access_type 0x%x (logsize %d)\n",
1287 	    errdef->access_type, errdef->log.logsize);
1288 
1289 	do {
1290 		errstate->log.logsize = 0; /* only copy the driver log once */
1291 
1292 		msg(10, "collecting state (lsize %d) ...\n",
1293 		    errstate->log.logsize);
1294 		errno = 0;
1295 
1296 		if (ioctl(fd, cmd, errstate) == -1 && errno != EINTR) {
1297 			perror("th_define (collect) -"
1298 			    " waiting for error report failed");
1299 			break;
1300 		}
1301 
1302 		(void) fprintf(outfile, "Logged %d out of %d accesses"
1303 		    " (%s %d %d 0x%x %d).\n",
1304 		    errstate->log.entries, ls,
1305 		    (char *)errdef->name, errdef->instance, errdef->rnumber,
1306 		    errdef->access_type, errstate->log.wrapcnt);
1307 
1308 		(void) msg(1, "\t(ac %d fc %d lf 0x%x wc %d).\n",
1309 		    errstate->access_count, errstate->fail_count,
1310 		    errstate->log.flags, errstate->log.wrapcnt);
1311 
1312 		rval = errno;
1313 		if ((errstate->log.flags & BOFI_LOG_WRAP) &&
1314 		    errstate->access_count > 0)
1315 			continue;
1316 		if (errstate->access_count <= 1 &&
1317 		    errstate->fail_count == 0 &&
1318 		    errstate->acc_chk == 0) {
1319 			msg(3, "collecting state complete entries %d\n",
1320 			    errstate->log.entries);
1321 			break;
1322 		}
1323 
1324 		msg(5, "still collecting state: %d, %d, %d\n",
1325 		    errstate->access_count, errstate->fail_count,
1326 		    errstate->acc_chk);
1327 		(void) msg(2, "Log: errno %d size %d entries %d "
1328 		    "(off 0x%llx len 0x%llx) ac %d\n", errno,
1329 		    errstate->log.logsize, errstate->log.entries,
1330 		    errdef->offset, errdef->len, errstate->access_count);
1331 
1332 	} while (rval == 0 && errstate->log.entries < ls);
1333 
1334 	/* now grab the log itself */
1335 	errstate->log.logsize = ls;
1336 	if (errstate->log.entries != 0) {
1337 		if (ioctl(fd, BOFI_CHK_STATE, errstate) == -1) {
1338 			msg(0,
1339 			    "%s: errorwhile retrieving %d log entries: %s\n",
1340 			    Progname, errstate->log.entries, strerror(errno));
1341 		} else {
1342 			msg(2, "collect_state: post: edp->access_type 0x%x"
1343 			    " (log entries %d %d) (%llu - %llu)\n",
1344 			    errdef->access_type,
1345 			    errstate->log.entries, errstate->access_count,
1346 			    errstate->log.start_time, errstate->log.stop_time);
1347 
1348 			log2errdefs(fd, errdef, &(errstate->log), devpath);
1349 		}
1350 	}
1351 }
1352 
1353 static void
1354 print_err_reports(FILE *fp, struct bofi_errstate *esp,
1355     char *fname, char *cmt, int id)
1356 {
1357 	if (fname != 0 && *fname != 0)
1358 		(void) fprintf(fp, "%sErrdef file %s definition %d:",
1359 		    cmt, fname, id);
1360 	else
1361 		(void) fprintf(fp, "%s", cmt);
1362 
1363 	if (esp->access_count != 0) {
1364 		(void) fprintf(fp, " (access count %d).\n", esp->access_count);
1365 	} else {
1366 		(void) fprintf(fp, "\n%s\tremaining fail count %d acc_chk %d\n",
1367 		    cmt, esp->fail_count, esp->acc_chk);
1368 		(void) fprintf(fp, "%s\tfail time 0x%llx error reported time"
1369 		    " 0x%llx errors reported %d\n", cmt,
1370 		    esp->fail_time, esp->msg_time,
1371 		    esp->errmsg_count);
1372 		if (esp->msg_time)
1373 			(void) fprintf(fp, "%s\tmessage \"%s\" severity 0x%x\n",
1374 			    cmt, esp->buffer, (uint_t)esp->severity);
1375 	}
1376 }
1377 
1378 static void
1379 thr_collect(void *arg, char *devpath)
1380 {
1381 	int fd;
1382 	struct collector_def *hi = (struct collector_def *)arg;
1383 
1384 	msg(4, "thr_collect: collecting %s inst %d rn %d at = 0x%x.\n",
1385 	    hi->ed.name, hi->ed.instance,
1386 	    hi->ed.rnumber, hi->ed.access_type);
1387 
1388 	if ((fd = open(BOFI_DEV, O_RDWR)) == -1) {
1389 		if (errno == EAGAIN)
1390 			msg(0, "Too many instances of bofi currently open\n");
1391 		else
1392 			msg(0, "Error while opening bofi driver: %s",
1393 			    strerror(errno));
1394 	} else {
1395 		/*
1396 		 * Activate the logging errdefs - then collect the results.
1397 		 */
1398 		(void) manage_instance(fd, hi->ed.name,
1399 		    hi->ed.instance, BOFI_START);
1400 		collect_state(fd, BOFI_CHK_STATE_W, &hi->es, &hi->ed, devpath);
1401 	}
1402 
1403 	/*
1404 	 * there is no more work to do on this access handle so clean up / exit.
1405 	 */
1406 	msg(3, "thr_collect: closing and broadcasting.\n");
1407 	exit(0);
1408 }
1409 
1410 /*
1411  * Given an access handle known to the bofi driver see if the user has
1412  * specified access criteria that match that handle. Note: this matching
1413  * algorithm should be kept consistent with the drivers alogorithm.
1414  */
1415 static int
1416 match_hinfo(struct handle_info *hp, int instance, uint_t access_type,
1417     int rnumber, offset_t offset, offset_t len)
1418 {
1419 
1420 	msg(9, "matching (%d %d) 0x%x %d offset (%llx, %llx) len (%llx %llx)\n",
1421 	    hp->instance, instance, access_type, rnumber,
1422 	    hp->offset, offset, hp->len, len);
1423 
1424 	if (instance != -1 && hp->instance != instance)
1425 		return (0);
1426 	if ((access_type & BOFI_DMA_RW) &&
1427 	    (hp->access_type & BOFI_DMA_RW) &&
1428 	    (rnumber == -1 || hp->rnumber == rnumber))
1429 		return (1);
1430 	else if ((access_type & BOFI_INTR) &&
1431 	    (hp->access_type & BOFI_INTR))
1432 		return (1);
1433 	else if ((access_type & BOFI_PIO_RW) &&
1434 	    (hp->access_type & BOFI_PIO_RW) &&
1435 	    (rnumber == -1 || hp->rnumber == rnumber) &&
1436 	    (len == 0 || hp->offset < offset + len) &&
1437 	    (hp->len == 0 || hp->offset + hp->len > offset))
1438 		return (1);
1439 	else
1440 		return (0);
1441 }
1442 
1443 /*
1444  * Obtain all the handles created by the driver specified by the name parameter
1445  * that match the remaining arguments. The output parameter nhdls indicates how
1446  * many of the structures pointed to by the output parameter hip match the
1447  * specification.
1448  *
1449  * It is the responsibility of the caller to free *hip when *nhdls != 0.
1450  */
1451 static int
1452 get_hinfo(int fd, char *name, struct handle_info **hip, size_t *nhdls,
1453     int instance, int atype, int rset, offset_t offset, offset_t len,
1454     int new_semantics)
1455 {
1456 	struct bofi_get_hdl_info hdli;
1457 	int command;
1458 
1459 	command = BOFI_GET_HANDLE_INFO;
1460 	hdli.namesize = strlen(name);
1461 	(void) strncpy(hdli.name, name, MAXNAMELEN);
1462 	/*
1463 	 * Initially ask for the number of access handles (not the structures)
1464 	 * in order to allocate memory
1465 	 */
1466 	hdli.hdli = 0;
1467 	*hip = 0;
1468 	hdli.count = 0;
1469 
1470 	/*
1471 	 * Ask the bofi driver for all handles created by the driver under test.
1472 	 */
1473 	if (ioctl(fd, command, &hdli) == -1) {
1474 		*nhdls = 0;
1475 		msg(0, "driver failed to return handles: %s\n",
1476 		    strerror(errno));
1477 		return (errno);
1478 	} else if ((*nhdls = hdli.count) == 0) {
1479 		msg(1, "get_hinfo: no registered handles\n");
1480 		return (0);	/* no handles */
1481 	} else if ((*hip = GETSTRUCT(struct handle_info, *nhdls)) == 0) {
1482 		return (EAGAIN);
1483 	} else {
1484 		struct handle_info *hp, **chosen;
1485 		int i;
1486 
1487 		/* Ask for *nhdls handles */
1488 		hdli.hdli = (caddr_t)*hip;
1489 		if (ioctl(fd, command, &hdli) == -1) {
1490 			int err = errno;
1491 
1492 			msg(0, "BOFI_GET_HANDLE_INFO ioctl returned error %d\n",
1493 			    err);
1494 			free(*hip);
1495 			return (err);
1496 		}
1497 
1498 		if (hdli.count < *nhdls)
1499 			*nhdls = hdli.count; /* some handles have gone away */
1500 
1501 		msg(4, "qsorting %d handles\n", *nhdls);
1502 		if (*nhdls > 1)
1503 			/* sort them naturally (NB ordering is not mandatory) */
1504 			qsort((void *)*hip, *nhdls, sizeof (**hip), hdl_cmp);
1505 
1506 		if ((chosen = malloc(sizeof (hp) * *nhdls)) != NULL) {
1507 			struct handle_info **ip;
1508 			/* the selected handles */
1509 			struct handle_info *prev = 0;
1510 			int scnt = 0;
1511 
1512 			for (i = 0, hp = *hip, ip = chosen; i < *nhdls;
1513 			    i++, hp++) {
1514 				/*
1515 				 * Remark: unbound handles never match
1516 				 * (access_type == 0)
1517 				 */
1518 				if (match_hinfo(hp, instance, atype, rset,
1519 				    offset&0x7fffffff, len&0x7fffffff)) {
1520 					msg(3, "match: 0x%x 0x%llx 0x%llx"
1521 					    " 0x%llx (0x%llx)\n",
1522 					    hp->access_type, hp->addr_cookie,
1523 					    hp->offset, hp->len,
1524 					    (hp->len & 0x7fffffff));
1525 					if (prev &&
1526 					    (prev->access_type & BOFI_DMA_RW) &&
1527 					    (hp->access_type & BOFI_DMA_RW) &&
1528 					    hp->instance == prev->instance &&
1529 					    hp->len == prev->len &&
1530 					    hp->addr_cookie ==
1531 					    prev->addr_cookie)
1532 						continue;
1533 
1534 					if ((hp->access_type & BOFI_DMA_RW) &&
1535 					    (atype & BOFI_DMA_RW) !=
1536 					    hp->access_type)
1537 						if (new_semantics)
1538 							continue;
1539 
1540 					if (prev)
1541 						msg(3, "match_hinfo: match:"
1542 						    " 0x%llx (%d %d) (%d %d)"
1543 						    " (0x%x 0x%x) (0x%llx,"
1544 						    " 0x%llx)\n",
1545 						    hp->addr_cookie,
1546 						    prev->instance,
1547 						    hp->instance, prev->rnumber,
1548 						    hp->rnumber,
1549 						    prev->access_type,
1550 						    hp->access_type, prev->len,
1551 						    hp->len);
1552 
1553 					/* it matches so remember it */
1554 					prev = *ip++ = hp;
1555 					scnt += 1;
1556 				}
1557 			}
1558 
1559 			if (*nhdls != scnt) {
1560 				/*
1561 				 * Reuse the alloc'ed memory to return
1562 				 * only those handles the user has asked for.
1563 				 * But first prune the handles to get rid of
1564 				 * overlapping ranges (they are ordered by
1565 				 * offset and length).
1566 				 */
1567 				*nhdls = scnt;
1568 				for (i = 0, hp = *hip, ip = chosen; i < scnt;
1569 				    i++, ip++, hp++)
1570 					if (hp != *ip)
1571 						(void) memcpy(hp, *ip,
1572 						    sizeof (*hp));
1573 			}
1574 			free(chosen);
1575 		}
1576 
1577 		for (i = 0, hp = *hip; i < *nhdls; i++, hp++) {
1578 			msg(4, "\t%d 0x%x %d 0x%llx 0x%llx 0x%llx\n",
1579 			    hp->instance, hp->access_type, hp->rnumber,
1580 			    hp->len, hp->offset, hp->addr_cookie);
1581 		}
1582 	}
1583 	if (*nhdls == 0 && *hip)
1584 		free(*hip);
1585 
1586 	msg(4, "get_info: %s got %d handles\n", name, *nhdls);
1587 	return (0);
1588 }
1589 
1590 static void
1591 init_sigs()
1592 {
1593 	struct sigaction sa;
1594 	int *ip, sigs[] = {SIGINT, SIGTERM, 0};
1595 
1596 	sa.sa_handler = kill_sighandler;
1597 	(void) sigemptyset(&sa.sa_mask);
1598 	for (ip = sigs; *ip; ip++)
1599 		(void) sigaddset(&sa.sa_mask, *ip);
1600 	sa.sa_flags = 0;
1601 	for (ip = sigs; *ip; ip++)
1602 		(void) sigaction(*ip, &sa, NULL);
1603 }
1604 
1605 static void
1606 up_resources()
1607 {
1608 	struct rlimit rl;
1609 
1610 	/* Potentially hungry on resources so up them all to their maximums */
1611 	if (getrlimit(RLIMIT_NOFILE, &rl) < 0)
1612 		msg(0, "failed to obtain RLIMIT_NOFILE: %s\n", strerror(errno));
1613 	else {
1614 		msg(12, "RLIMIT_NOFILE\t %lu (%lu)\n",
1615 		    rl.rlim_cur, rl.rlim_max);
1616 		rl.rlim_cur = rl.rlim_max;
1617 		if (setrlimit(RLIMIT_NOFILE, &rl) < 0)
1618 			msg(0, "failed to set RLIMIT_NOFILE: %s\n",
1619 			    strerror(errno));
1620 		(void) enable_extended_FILE_stdio(-1, -1);
1621 	}
1622 	if (getrlimit(RLIMIT_DATA, &rl) < 0)
1623 		msg(0, "failed to obtain RLIMIT_DATA: %s\n", strerror(errno));
1624 	else {
1625 		msg(12, "RLIMIT_DATA\t %lu (%lu)\n", rl.rlim_cur, rl.rlim_max);
1626 		rl.rlim_cur = rl.rlim_max;
1627 		if (setrlimit(RLIMIT_DATA, &rl) < 0)
1628 			msg(0, "failed to set RLIMIT_DATA: %s\n",
1629 			    strerror(errno));
1630 	}
1631 	if (getrlimit(RLIMIT_FSIZE, &rl) < 0)
1632 		msg(0, "failed to obtain RLIMIT_FSIZE: %s\n", strerror(errno));
1633 	else {
1634 		msg(12, "RLIMIT_FSIZE\t %lu (%lu)\n", rl.rlim_cur, rl.rlim_max);
1635 		rl.rlim_cur = rl.rlim_max;
1636 		if (setrlimit(RLIMIT_FSIZE, &rl) < 0)
1637 			msg(0, "failed to set RLIMIT_FSIZE: %s\n",
1638 			    strerror(errno));
1639 	}
1640 }
1641 
1642 static FILE *
1643 create_test_file(char *drvname)
1644 {
1645 	char dirname[_POSIX_PATH_MAX];
1646 	char testname[_POSIX_PATH_MAX];
1647 	FILE *fp = 0;
1648 	time_t utc = time(NULL);
1649 
1650 	if (snprintf(dirname, sizeof (dirname), "%s.test.%lu",
1651 	    drvname, utc) == -1 ||
1652 	    snprintf(testname, sizeof (testname), "%s.test.%lu",
1653 	    drvname, utc) == -1)
1654 		return (0);
1655 
1656 	if (mkdir(dirname, S_IRWXU|S_IRGRP|S_IROTH)) {
1657 		msg(0, "Error creating %s: %s\n", dirname, strerror(errno));
1658 		return (0);
1659 	}
1660 	if (chdir(dirname)) {
1661 		(void) rmdir(dirname);
1662 		return (0);
1663 	}
1664 	if ((fp = fopen(testname, "w")) == 0)
1665 		return (0);	/* leave created directory intact */
1666 
1667 	return (fp);
1668 }
1669 
1670 struct walk_arg {
1671 	char *path;
1672 	int instance;
1673 	char name[MAXPATHLEN];
1674 	int pathlen;
1675 };
1676 
1677 static int
1678 walk_callback(di_node_t node, void *arg)
1679 {
1680 	struct walk_arg *warg = (struct walk_arg *)arg;
1681 	char *driver_name;
1682 	char *path;
1683 
1684 	driver_name = di_driver_name(node);
1685 	if (driver_name != NULL) {
1686 		if (strcmp(driver_name, warg->name) == 0 &&
1687 		    di_instance(node) == warg->instance) {
1688 			path = di_devfs_path(node);
1689 			if (path == NULL)
1690 				warg->path = NULL;
1691 			else
1692 				(void) strncpy(warg->path, path, warg->pathlen);
1693 			return (DI_WALK_TERMINATE);
1694 		}
1695 	}
1696 	return (DI_WALK_CONTINUE);
1697 }
1698 
1699 static int
1700 getpath(char *path, int instance, char *name, int pathlen)
1701 {
1702 	di_node_t node;
1703 	struct walk_arg warg;
1704 
1705 	warg.instance = instance;
1706 	(void) strncpy(warg.name, name, MAXPATHLEN);
1707 	warg.path = path;
1708 	warg.pathlen = pathlen;
1709 	if ((node = di_init("/", DINFOSUBTREE)) == DI_NODE_NIL)
1710 		return (-1);
1711 	if (di_walk_node(node, DI_WALK_CLDFIRST, &warg, walk_callback) == -1) {
1712 		di_fini(node);
1713 		return (-1);
1714 	}
1715 	if (warg.path == NULL) {
1716 		di_fini(node);
1717 		return (-1);
1718 	}
1719 	di_fini(node);
1720 	return (0);
1721 }
1722 
1723 /*
1724  * Record logsize h/w accesses of type 'edp->access_type' made by instance
1725  * 'edp->instance' of driver 'edp->name' to the register set (or dma handle)
1726  * 'edp->rnumber' that lie within the range 'edp->offset' to
1727  * 'edp->offset' + 'edp->len'.
1728  * Access criteria may be mixed and matched:
1729  * -	access types may be combined (PIO read/write, DMA read write or intrs);
1730  * -	if 'edp->instance' is -1 all instances are checked for the criteria;
1731  * -	if 'edp->rnumber' is -1 all register sets and dma handles are matched;
1732  * -	'offset' and 'len' indicate that only PIO and DMA accesses within the
1733  *	range 'edp->offset' to 'edp->len' will be logged. Putting 'edp->offset'
1734  *      to zero and 'edp->len' to -1ull gives maximal coverage.
1735  *
1736  * 'collecttime' is the number of seconds used to log accesses
1737  *		(default is infinity).
1738  */
1739 static void
1740 test_driver(struct bofi_errdef *edp,
1741     unsigned long long collecttime)
1742 {
1743 	pid_t pid;
1744 	int statloc;
1745 	struct collector_def *cdefs, *cdp;
1746 	struct handle_info *hdls, *hdl;
1747 	int i, fd;
1748 	size_t cnt;
1749 	size_t nchildren;
1750 	unsigned long long timechunk;
1751 	FILE *sfp;	/* generated control test file */
1752 	char buf[MAXPATHLEN];
1753 	char devpath[MAXPATHLEN];
1754 	char *devpathp = "NULL";
1755 	int drv_inst;
1756 	int got_it = 0;
1757 
1758 	char *name = (char *)edp->name;
1759 	uint_t logsize = edp->access_count + edp->fail_count - 1;
1760 	int inst = edp->instance;
1761 	uint_t atype = edp->access_type;
1762 	int rset = edp->rnumber;
1763 	offset_t offset = edp->offset;
1764 	offset_t len = edp->len;
1765 
1766 	msg(4, "test_driver: %s %d inst %d 0x%x rset %d %llx %llx\n",
1767 	    name, logsize, inst, atype, rset, offset, len);
1768 
1769 	drv_inst = inst;
1770 	if (getpath(devpath, inst, name, MAXPATHLEN) != -1) {
1771 		devpathp = devpath;
1772 		got_it = 1;
1773 	}
1774 	if (logsize == -1U)
1775 		logsize = 0;
1776 	fd = open(BOFI_DEV, O_RDWR);
1777 	if (fd == -1) {
1778 		perror("get_hdl_info - bad open of bofi driver");
1779 		return;
1780 	}
1781 	if (got_it) {
1782 		(void) snprintf(buf, sizeof (buf),
1783 		    "th_manage /devices%s offline", devpathp);
1784 		(void) system(buf);
1785 		(void) snprintf(buf, sizeof (buf),
1786 		    "th_manage /devices%s online", devpathp);
1787 		(void) system(buf);
1788 		(void) snprintf(buf, sizeof (buf),
1789 		    "th_manage /devices%s getstate >/dev/null", devpathp);
1790 		(void) system(buf);
1791 	}
1792 	if (get_hinfo(fd, name, &hdls, &cnt,
1793 	    inst, atype, rset, offset, len, 1) != 0) {
1794 		msg(0, "driver_test: bad get_info for %d hdls\n", cnt);
1795 		return;
1796 	} else if (logsize == 0 || collecttime == 0 || cnt == 0) {
1797 		if (cnt == 0)
1798 			msg(1, "No matching handles.\n");
1799 		return;
1800 	}
1801 	if ((cdefs = GETSTRUCT(struct collector_def, cnt)) == 0) {
1802 		msg(0, "driver_test: can't get memory for %d cdefs\n", cnt);
1803 		return;
1804 	}
1805 	up_resources();
1806 	if (got_it) {
1807 		if (scriptargs > 0) {
1808 			(void) snprintf(buf, sizeof (buf),
1809 			    "DRIVER_PATH=/devices%s DRIVER_INSTANCE=%d"
1810 			    " DRIVER_UNCONFIGURE=0 DRIVER_CONFIGURE=1",
1811 			    devpathp, drv_inst);
1812 			for (i = 0; i < scriptargs; i++) {
1813 				(void) strcat(buf, " ");
1814 				(void) strcat(buf, fixup_script[i]);
1815 			}
1816 			(void) strcat(buf, " &");
1817 		} else {
1818 			(void) snprintf(buf, sizeof (buf),
1819 			    "while : ; do th_manage /devices%s online;"
1820 			    " th_manage /devices%s getstate >/dev/null;"
1821 			    " th_manage /devices%s offline;done &"
1822 			    " echo $! >/tmp/bofi.pid",
1823 			    devpathp, devpathp, devpathp);
1824 		}
1825 		(void) system(buf);
1826 		(void) snprintf(buf, sizeof (buf), "sleep %d",
1827 		    edef_sleep ? edef_sleep : DEFAULT_EDEF_SLEEP);
1828 		(void) system(buf);
1829 	}
1830 
1831 	(void) fprintf(outfile,
1832 	    "Logging accesses to instances ");
1833 	for (i = 0, inst = -1, hdl = hdls; i < cnt;
1834 	    i++, hdl++) {
1835 		if (inst != hdl->instance) {
1836 			inst = hdl->instance;
1837 			(void) fprintf(outfile, "%d ", inst);
1838 		}
1839 	}
1840 	(void) fprintf(outfile, " (%d logs of size 0x%x).\n\t"
1841 	    "(Use th_manage ... clear_errdefs to terminate"
1842 	    " logging)\n", cnt, logsize);
1843 
1844 	sfp = create_test_file(name);
1845 	/*
1846 	 * Install a logging errdef for each matching handle,
1847 	 * and then create a child to collect the log.
1848 	 * The child is responsible for activating the log.
1849 	 */
1850 	for (i = 0, cdp = cdefs, hdl = hdls, nchildren = 0;
1851 	    i < cnt; i++, cdp++, hdl++) {
1852 		if (add_edef(fd, &cdp->ed, &cdp->es, hdl, edp) != 0) {
1853 			cdp->lp = 0;
1854 			cdp->pid = 0;
1855 		} else {
1856 			cdp->lp = (void *)cdp->ed.log.logbase;
1857 			msg(1, "test_driver: thr_create:"
1858 			    " lsize 0x%x 0x%x at 0x%x\n",
1859 			    cdp->es.log.logsize,
1860 			    cdp->ed.log.logsize,
1861 			    cdp->ed.access_type);
1862 			if ((pid = fork()) == -1) {
1863 				msg(0, "fork failed for handle"
1864 				    " %d: %s\n", i, strerror(errno));
1865 				cdp->pid = 0;	/* ignore */
1866 			} else if (pid == 0) {
1867 				thr_collect(cdp, devpathp);
1868 			} else {
1869 				cdp->pid = pid;
1870 				nchildren += 1;
1871 			}
1872 		}
1873 	}
1874 
1875 	if (nchildren != 0) {
1876 		if (sfp) {
1877 			(void) fprintf(sfp, "#!/bin/ksh -p\n\n");
1878 			(void) fprintf(sfp,
1879 			    "\n# Test control script generated using:\n#");
1880 			for (i = 0; i < pargc; i++)
1881 				(void) fprintf(sfp, " %s", pargv[i]);
1882 			(void) fprintf(sfp, "\n\n");
1883 			(void) fprintf(sfp, "\nrun_tests()\n{\n");
1884 			for (i = 0, cdp = cdefs; i < cnt; i++, cdp++)
1885 				if (cdp->pid) {
1886 					(void) fprintf(sfp,
1887 					    "\tif [ -x ./%s.%d ]\n\tthen\n",
1888 					    name, (int)cdp->pid);
1889 					(void) fprintf(sfp,
1890 					    "\t\techo \"Starting test"
1891 					    " %d (id %d)\"\n",
1892 					    i, (int)cdp->pid);
1893 					(void) fprintf(sfp, "\t\t./%s.%d\n",
1894 					    name, (int)cdp->pid);
1895 					(void) fprintf(sfp, "\t\techo \""
1896 					    "Test %d (id %d) complete\"\n",
1897 					    i, (int)cdp->pid);
1898 					(void) fprintf(sfp, "\tfi\n");
1899 				}
1900 			(void) fprintf(sfp, "}\n\nrun_tests\n");
1901 			if (fchmod(fileno(sfp), S_IRWXU|S_IRGRP|S_IROTH))
1902 				msg(0, "fchmod on control script failed: %s\n",
1903 				    strerror(errno));
1904 			if (fclose(sfp) != 0)
1905 				msg(0, "Error closing control script: %s\n",
1906 				    strerror(errno));
1907 		}
1908 
1909 		set_handler(SIGALRM);	/* handle it */
1910 		/*
1911 		 * The user may want to terminate logging before the log fills
1912 		 * so use a timer to signal the logging children to handle this
1913 		 * case.
1914 		 */
1915 		timechunk = collecttime / MAXALRMCALL;
1916 		collecttime = collecttime - timechunk * MAXALRMCALL;
1917 
1918 		msg(2, "logging for (0x%llx 0x%llx)\n", timechunk, collecttime);
1919 
1920 		(void) alarm(collecttime); /* odd bit of collect time */
1921 
1922 		/* wait for the log to fill or deadline satisfied */
1923 		for (;;) {
1924 			pid = wait(&statloc);
1925 			for (i = 0, nchildren = 0, cdp = cdefs;
1926 			    i < cnt; i++, cdp++)
1927 				if (cdp->pid == pid)
1928 					cdp->pid = 0;
1929 			for (i = 0, nchildren = 0, cdp = cdefs;
1930 			    i < cnt; i++, cdp++)
1931 				if (cdp->pid)
1932 					nchildren++;
1933 			if (nchildren == 0)
1934 				break;
1935 			if (killed)
1936 				break;
1937 			if (alarmed) {
1938 				if (timechunk-- > 0) {
1939 					/*
1940 					 * prepare for the next timeslice by
1941 					 * rearming the clock
1942 					 */
1943 					if (alarm(MAXALRMCALL) == 0)
1944 						alarmed = 0;
1945 					else {
1946 						/*
1947 						 * must have been a user abort
1948 						 * (via SIGALRM)
1949 						 */
1950 						(void) alarm(0);
1951 						break;
1952 					}
1953 				} else
1954 					break;
1955 			}
1956 		}
1957 
1958 		(void) fprintf(outfile, "Logging complete.\n");
1959 	}
1960 	if (got_it) {
1961 		if (scriptargs > 0) {
1962 			(void) snprintf(buf, sizeof (buf),
1963 			    "DRIVER_PATH=/devices%s DRIVER_INSTANCE=%d"
1964 			    " DRIVER_UNCONFIGURE=1 DRIVER_CONFIGURE=0",
1965 			    devpathp, drv_inst);
1966 			for (i = 0; i < scriptargs; i++) {
1967 				(void) strcat(buf, " ");
1968 				(void) strcat(buf, fixup_script[i]);
1969 			}
1970 			(void) system(buf);
1971 		} else {
1972 			(void) system("kill `cat /tmp/bofi.pid`");
1973 		}
1974 	}
1975 	msg(2, "test_driver: terminating\n");
1976 }
1977 
1978 static int
1979 getnameinst(char *orig_path, int *instance, char *name, int namelen)
1980 {
1981 	di_node_t node;
1982 	char *binding_name;
1983 
1984 	if ((node = di_init(&orig_path[8], DINFOSUBTREE|DINFOMINOR)) ==
1985 	    DI_NODE_NIL)
1986 		return (-1);
1987 	if ((binding_name = di_driver_name(node)) == NULL)
1988 		return (-1);
1989 	*instance = di_instance(node);
1990 	(void) strncpy(name, binding_name, namelen);
1991 	di_fini(node);
1992 	return (0);
1993 }
1994 
1995 static char syntax[] =
1996 	"          [ -n name [ -i instance ] | -P path ]\n"
1997 	"          [ -a acc_types ] [ -r rnumber ]\n"
1998 	"          [ -l offset [ length ] ] [ -c count [ failcount ] ]\n"
1999 	"          [ -o operator [ operand ] ] [ -f acc_chk  ]\n"
2000 	"          [ -w max_wait_period [ report_interval ] ]\n"
2001 	"     or\n"
2002 	"          [ -n name [ -i instance ] | -P path ]\n"
2003 	"          -a  LOG  [  acc_types ]  [ -r rnumber]\n"
2004 	"          [ -l offset [ length ] ] [ -c count [ failcount ] ]\n"
2005 	"          [ -s collect_time ] [ -p policy ] [ -x flags ]\n"
2006 	"          [ -C ] [-e fixup_script ]\n"
2007 	"     or\n"
2008 	"          -h";
2009 
2010 int
2011 main(int argc, char *argv[])
2012 {
2013 	extern char *optarg;
2014 	extern int optind;
2015 
2016 	char	c;		/* for parsing getopts */
2017 	int	nopts = 0;	/* for backward compatibility */
2018 	int	err = 0;
2019 
2020 	/* use a maximal set of defaults for logging or injecting */
2021 	struct bofi_errdef errdef = {
2022 		0,		/* length of driver name */
2023 		{0},		/* driver name */
2024 		-1,		/* monitor all instances */
2025 		-1,		/* monitor all register sets and DMA handles */
2026 		(offset_t)0,	/* monitor from start of reg. set or DMA hd */
2027 		myLLMAX,	/* monitor whole reg set or DMA hdl(no LLMAX) */
2028 		0,		/* qualify all */
2029 		DFLTLOGSZ,	/* default no. of accesses before corrupting */
2030 		0u,		/* default no. of accesses to corrupt */
2031 		0u,		/* no check access corruption */
2032 		BOFI_NOP,	/* no corruption operator by default */
2033 		myULLMAX,	/* default operand */
2034 		{0, 0, BOFI_LOG_TIMESTAMP, /* timestamp by default */
2035 		0, 0, 0, 0},	/* no logging by default */
2036 		0};
2037 
2038 
2039 	/* specify the default no of seconds for which to monitor */
2040 	unsigned long long	collecttime = DFLTLOGTIME;
2041 
2042 	char	*str;	/* temporary variable */
2043 	long	tmpl;	/* another one */
2044 	int		i;
2045 	uint_t	tmpui;
2046 
2047 	char buf[MAXPATHLEN];
2048 
2049 	Progname = (char *)strrchr(*argv, '/');
2050 	Progname = (Progname == NULL) ? *argv : Progname + 1;
2051 
2052 	errfile = stderr;
2053 	outfile = stdout;
2054 	policy = 0;
2055 	lsize_is_default = 1;
2056 	pargv = argv;
2057 	pargc = argc;
2058 
2059 	while ((c = getopt(argc, argv, "a:c:C:dD:e:f:h:i:l:n:o:p:P:r:s:tw:x"))
2060 	    != EOF) {
2061 		nopts++;
2062 		switch (c) {
2063 		case 'a':
2064 			msg(2, "option a: optarg %s optind %d argc %d\n",
2065 			    optarg, optind, argc);
2066 			if ((err = str_to_bm(optarg, atypes,
2067 			    &errdef.access_type)) == 0)
2068 				while (optind < argc && *argv[optind] != '-') {
2069 					if ((err = str_to_bm(argv[optind++],
2070 					    atypes, &errdef.access_type)))
2071 						break;
2072 				}
2073 			break;
2074 		case 'c':
2075 			lsize_is_default = 0;
2076 			/* zero is valid */
2077 			errdef.access_count = strtoul(optarg, &str, 0);
2078 			if (str == optarg)
2079 				err = EINVAL;
2080 			else if (optind < argc && (argv[optind][0] != '-' ||
2081 			    (strlen(argv[optind]) > 1 &&
2082 			    isdigit(argv[optind][1]))))
2083 				errdef.fail_count =
2084 				    strtoull(argv[optind++], 0, 0);
2085 			break;
2086 		case 'C':
2087 			user_comment = optarg;
2088 			if (optind < argc && argv[optind][0] != '-')
2089 				err = EINVAL;
2090 			break;
2091 		case 'D':
2092 			dbglvl = strtoul(optarg, &str, 0);
2093 			break;
2094 		case 'e':
2095 			fixup_script = 0;
2096 			scriptargs = 0;
2097 			fixup_script = &argv[optind - 1];
2098 			scriptargs += 1;
2099 			while (optind < argc) {
2100 				optind += 1;
2101 				scriptargs += 1;
2102 			}
2103 			break;
2104 		case 'f':
2105 			tmpl = strtol(optarg, &str, 0);
2106 
2107 			if (str != optarg)
2108 				errdef.acc_chk = tmpl;
2109 			else if (strcmp(optarg, "PIO") == 0)
2110 				errdef.acc_chk = 1;
2111 			else if (strcmp(optarg, "DMA") == 0)
2112 				errdef.acc_chk = 2;
2113 			else if (strcmp(optarg, "U4FT_ACC_NO_PIO") == 0)
2114 				errdef.acc_chk = 1;
2115 			else if (strcmp(optarg, "U4FT_ACC_NO_DMA") == 0)
2116 				errdef.acc_chk = 2;
2117 			else
2118 				err = EINVAL;
2119 			break;
2120 		case 'i':
2121 			if ((errdef.instance = strtol(optarg, &str, 0)) < 0)
2122 				errdef.instance = -1;
2123 			else if (str == optarg)
2124 				err = EINVAL;
2125 			break;
2126 		case 'l':
2127 			errdef.offset = strtoull(optarg, &str, 0);
2128 			if (str == optarg)
2129 				err = EINVAL;
2130 			else if (optind < argc &&
2131 			    (argv[optind][0] != '-' ||
2132 			    (strlen(argv[optind]) > 1 &&
2133 			    isdigit(argv[optind][1])))) {
2134 				/* -1 indicates the rest of register set */
2135 				errdef.len = strtoull(argv[optind++], 0, 0);
2136 			}
2137 			break;
2138 		case 'n':
2139 			(void) strncpy(errdef.name, optarg, MAXNAMELEN);
2140 			if ((errdef.namesize = strlen(errdef.name)) == 0)
2141 				err = EINVAL;
2142 			break;
2143 		case 'o':
2144 			for (i = 0; optypes[i].str != 0; i++)
2145 				if (strcmp(optarg, optypes[i].str) == 0) {
2146 					errdef.optype = optypes[i].code;
2147 					break;
2148 				}
2149 			if (optypes[i].str == 0)
2150 				err = EINVAL;
2151 			else if (optind < argc &&
2152 			    (argv[optind][0] != '-' ||
2153 			    (strlen(argv[optind]) > 1 &&
2154 			    isdigit(argv[optind][1]))))
2155 				errdef.operand =
2156 				    strtoull(argv[optind++], 0, 0);
2157 			break;
2158 		case 'p':
2159 			tmpui = 0x0u;
2160 			if ((err = str_to_bm(optarg, ptypes, &tmpui)) == 0) {
2161 				while (optind < argc && *argv[optind] != '-')
2162 					if ((err = str_to_bm(argv[optind++],
2163 					    ptypes, &tmpui)))
2164 						break;
2165 				policy = (uint16_t)tmpui;
2166 			}
2167 			if (err == 0 && (policy & BYTEPOLICY))
2168 				errdef.log.flags |= BOFI_LOG_REPIO;
2169 			break;
2170 		case 'P':
2171 			if (getnameinst(optarg, &errdef.instance, buf,
2172 			    MAXPATHLEN) == -1)
2173 				err = EINVAL;
2174 			else
2175 				(void) strncpy(errdef.name, buf, MAXNAMELEN);
2176 			break;
2177 		case 'r':
2178 			if ((errdef.rnumber = strtol(optarg, &str, 0)) < 0)
2179 				errdef.rnumber = -1;
2180 			if (str == optarg) err = EINVAL;
2181 			break;
2182 		case 's':
2183 			collecttime = strtoull(optarg, &str, 0);
2184 			if (str == optarg)
2185 				err = EINVAL;	/* zero is valid */
2186 			break;
2187 		case 'w':
2188 			do_status = 1;
2189 			max_edef_wait = strtoul(optarg, &str, 0);
2190 			/* zero is valid */
2191 			if (str == optarg)
2192 				err = EINVAL;
2193 			else if (optind < argc &&
2194 			    (argv[optind][0] != '-' ||
2195 			    (strlen(argv[optind]) > 1 &&
2196 			    isdigit(argv[optind][1]))))
2197 				edef_sleep = strtoull(argv[optind++], 0, 0);
2198 
2199 			break;
2200 		case 'x':
2201 			if ((optind < argc && *argv[optind] == '-') ||
2202 			    optind == argc)
2203 				errdef.log.flags |= BOFI_LOG_WRAP;
2204 			else {
2205 				if (strchr(argv[optind], 'w') != 0)
2206 					errdef.log.flags |= BOFI_LOG_WRAP;
2207 				if (strchr(argv[optind], 'r') != 0)
2208 					errdef.log.flags |= BOFI_LOG_REPIO;
2209 				if (strchr(argv[optind], 't') != 0)
2210 					errdef.log.flags |= BOFI_LOG_TIMESTAMP;
2211 				if (strstr(argv[optind], "~t") != 0)
2212 					errdef.log.flags &= ~BOFI_LOG_TIMESTAMP;
2213 				optind++;
2214 			}
2215 			break;
2216 		case 'h':
2217 			(void) fprintf(errfile, "usage: %s %s\n",
2218 			    Progname, syntax);
2219 			exit(0);
2220 			break;
2221 		case '?':	/* also picks up missing parameters */
2222 		default:
2223 			(void) fprintf(errfile, "usage: %s %s\n",
2224 			    Progname, syntax);
2225 			exit(2);
2226 		}
2227 
2228 		if (err) {
2229 			(void) fprintf(errfile, "usage: %s %s\n",
2230 			    Progname, syntax);
2231 			exit(2);
2232 		}
2233 		if (c == 'e')
2234 			break;	/* the -e option must be the final option */
2235 	}
2236 
2237 
2238 	if (errdef.name[0] == 0) {
2239 		msg(0, "%s - invalid name parameter\n", Progname);
2240 		exit(1);
2241 	}
2242 	errdef.namesize = strlen(errdef.name);
2243 
2244 	if (policy == 0) {
2245 		policy |= UNBIASEDPOLICY;
2246 		policy |= OPERATORSPOLICY;
2247 	}
2248 
2249 	if (errdef.optype == BOFI_NOP)
2250 		errdef.optype = BOFI_XOR;
2251 	if (errdef.access_type == BOFI_LOG) { /* qualify all accesses */
2252 		errdef.access_type =
2253 		    (BOFI_LOG|BOFI_DMA_RW|BOFI_PIO_RW|BOFI_INTR);
2254 		atype_is_default = 1;
2255 	} else if (errdef.access_type == 0) { /* qualify all accesses */
2256 		errdef.access_type =
2257 		    (BOFI_DMA_RW|BOFI_PIO_RW|BOFI_INTR);
2258 		atype_is_default = 1;
2259 	} else
2260 		atype_is_default = 0;
2261 
2262 	init_sigs();
2263 	if ((errdef.access_type & BOFI_LOG) == 0) {
2264 		int fd, i, instance;
2265 		size_t cnt;
2266 		struct handle_info *hdls, *hp;
2267 
2268 		if ((fd = open(BOFI_DEV, O_RDWR)) == -1) {
2269 			msg(0, "%s: error opening bofi driver: %s\n",
2270 			    Progname, strerror(errno));
2271 			exit(1);
2272 		}
2273 		if ((err = get_hinfo(fd, errdef.name, &hdls, &cnt,
2274 		    errdef.instance, errdef.access_type, errdef.rnumber,
2275 		    errdef.offset, errdef.len, 0)) != 0) {
2276 			msg(0, "%s: Bad lookup on bofi driver.\n", Progname);
2277 			(void) close(fd);
2278 			exit(1);
2279 		} else if (cnt == 0) {
2280 			msg(0,
2281 			    "%s: No handles match request access criteria.\n",
2282 			    Progname);
2283 			(void) close(fd);
2284 			exit(1);
2285 		}
2286 		if (errdef.instance == -1)
2287 			instance = -1;
2288 		else {
2289 			instance = hdls->instance;
2290 			for (i = 0, hp = hdls; i < cnt; i++, hp++) {
2291 				if (instance != hp->instance) {
2292 					instance = -1;
2293 					break;
2294 				}
2295 			}
2296 		}
2297 		if (instance == -1) {
2298 			msg(0, "Multiple instances match access criteria"
2299 			    " (only allowed when logging):\n");
2300 			msg(0, "\tinst\taccess\trnumber\toffset\tlength\n");
2301 			for (i = 0, hp = hdls; i < cnt; i++, hp++)
2302 				msg(0, "\t%d\t0x%x\t%d\t0x%llx\t0x%llx\n",
2303 				    hp->instance, hp->access_type,
2304 				    hp->rnumber, hp->offset, hp->len);
2305 		} else {
2306 			struct bofi_errstate es;
2307 			int timeleft = max_edef_wait;
2308 
2309 			if (ioctl(fd, BOFI_ADD_DEF, &errdef) == -1) {
2310 				perror("th_define - adding errdef failed");
2311 			} else {
2312 				es.errdef_handle = errdef.errdef_handle;
2313 				msg(4, "waiting for edef:"
2314 				    " %d %s %d %d 0x%llx 0x%llx 0x%x 0x%x"
2315 				    " 0x%x 0x%x 0x%x 0x%llx\n",
2316 				    errdef.namesize, errdef.name,
2317 				    errdef.instance, errdef.rnumber,
2318 				    errdef.offset, errdef.len,
2319 				    errdef.access_type, errdef.access_count,
2320 				    errdef.fail_count, errdef.acc_chk,
2321 				    errdef.optype, errdef.operand);
2322 
2323 				set_handler(SIGALRM);	/* handle it */
2324 
2325 				do {
2326 					if (do_status)
2327 						(void) alarm(edef_sleep);
2328 					if (ioctl(fd, BOFI_CHK_STATE_W,
2329 					    &es) == -1) {
2330 						if (errno != EINTR) {
2331 							perror("bad"
2332 							    " BOFI_CHK_STATE");
2333 							break;
2334 						} else if (!do_status) {
2335 							break;
2336 						}
2337 					}
2338 					if (do_status)
2339 						(void) fprintf(outfile,
2340 						    "%llu:%llu:%u:%u:%u:"
2341 						    "%u:%d:\"%s\"\n",
2342 						    es.fail_time, es.msg_time,
2343 						    es.access_count,
2344 						    es.fail_count,
2345 						    es.acc_chk, es.errmsg_count,
2346 						    (uint_t)es.severity,
2347 						    (es.msg_time) ?
2348 						    es.buffer : "");
2349 					if (es.acc_chk == 0 &&
2350 					    es.fail_count == 0 && !do_status)
2351 						print_err_reports(outfile,
2352 						    &es, "", "", -1);
2353 					else if (alarmed) {
2354 						alarmed = 0;
2355 						if ((timeleft -= edef_sleep) <=
2356 						    0) {
2357 							if (do_status)
2358 								break;
2359 							print_err_reports(
2360 							    outfile, &es, "",
2361 							    "", -1);
2362 							break;
2363 						}
2364 					} else if (!do_status)
2365 						print_err_reports(outfile,
2366 						    &es, "", "", -1);
2367 				} while (es.acc_chk != 0 || es.fail_count != 0);
2368 
2369 				msg(2, "done: acc_chk 0x%x fcnt %d\n",
2370 				    es.acc_chk, es.fail_count);
2371 			}
2372 
2373 			(void) close(fd);
2374 		}
2375 		free(hdls);
2376 		return (0);
2377 	}
2378 	test_driver(&errdef, collecttime);
2379 	return (0);
2380 }
2381