xref: /titanic_51/usr/src/cmd/th_tools/th_define.c (revision 6bbe05905a1c10a2703f95fb4912eb14b87f6670)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/time_impl.h>
29 #include <sys/wait.h>
30 #include <stdio.h>
31 #include <stdio_ext.h>
32 #include <stdlib.h>
33 #include <stdarg.h>
34 #include <ctype.h>
35 #include <time.h>
36 #include <fcntl.h>
37 #include <sys/stat.h>
38 #include <sys/resource.h>
39 #include <limits.h>
40 #include <string.h>
41 #include <unistd.h>
42 #include <errno.h>
43 #include <signal.h>
44 #include <libdevinfo.h>
45 #define	_KERNEL
46 #include <sys/dditypes.h>
47 #include <sys/sunddi.h>
48 #include <sys/bofi.h>
49 
50 #define	BOFI_DEV	"/devices/pseudo/bofi@0:bofi,ctl"
51 
52 #define	GETSTRUCT(s, num)	\
53 	((s *) memalign(sizeof (void*), (num) * sizeof (s)))
54 
55 #define	MAXEDEFS	(0x64)		/* controls max no of concurent edefs */
56 #define	DFLTLOGSZ	(0x4000)	/* default size of an access log */
57 #define	DFLT_NONPIO_LOGSZ	(0x400)	/* default size of a log */
58 #define	MAXALRMCALL	(0x1000ull)	/* alarm does not permit big values */
59 #define	MIN_REPORT_TIME	(5)		/* min time to wait for edef status */
60 #define	DISTRIB_CUTOFF	(3)		/* useful when reducing a log */
61 #define	myLLMAX		(0x7fffffffffffffffll)
62 #define	myULLMAX	(0xffffffffffffffffull)
63 
64 /*
65  * default interval to wait between kicking off workload and injecting fault
66  */
67 #define	DEFAULT_EDEF_SLEEP 3
68 /*
69  * when generating dma corruptions, it is best to corrupt each double word
70  * individually for control areas - however for data areas this can be
71  * excessive and would generate so many cases we would never finish the run.
72  * So set a cut-off value where we switch from corrupting each double word
73  * separately to corrupting th elot in one go. 0x100 bytes seems a good value
74  * on the drivers we have seen so far.
75  */
76 #define	DMA_INDIVIDUAL_CORRUPT_CUTOFF 0x100
77 
78 struct collector_def {
79 	struct bofi_errdef ed;		/* definition of the log criteria */
80 	struct bofi_errstate es;	/* the current status of the log */
81 	struct acc_log_elem *lp;	/* array of logged accesses */
82 	pid_t pid;
83 };
84 
85 static uint16_t policy;
86 
87 #define	BYTEPOLICY	(0xf)
88 #define	MULTIPOLICY	(0x10)
89 #define	SIZEPOLICY	(BYTEPOLICY|MULTIPOLICY)
90 #define	UNBIASEDPOLICY	0x20
91 #define	UNCOMMONPOLICY	0x40
92 #define	COMMONPOLICY	0x80
93 #define	MEDIANPOLICY	0x100
94 #define	MAXIMALPOLICY	0x200
95 #define	OPERATORSPOLICY	0x400
96 #define	VALIDPOLICY	(0x7ff)
97 
98 typedef
99 struct coding {
100 	char	*str;
101 	uint_t	code;
102 } coding_t;
103 
104 static coding_t ptypes[] = {
105 	{"onebyte", 0x1}, {"twobyte", 0x2},
106 	{"fourbyte", 0x4}, {"eightbyte", 0x8},
107 	{"multibyte", 0x10}, {"unbiased", 0x20}, {"uncommon", 0x40},
108 	{"common", 0x80}, {"median", 0x100}, {"maximal", 0x200},
109 	{"operators", 0x400},  {0, 0}
110 };
111 static coding_t atypes[] = {
112 	{"pio_r", BOFI_PIO_R}, {"pio_w", BOFI_PIO_W},
113 	{"dma_r", BOFI_DMA_R}, {"dma_w", BOFI_DMA_W},
114 	{"pio", BOFI_PIO_RW}, {"dma", BOFI_DMA_RW},
115 	{"log", BOFI_LOG}, {"intr", BOFI_INTR},
116 	{"PIO_R", BOFI_PIO_R}, {"PIO_W", BOFI_PIO_W},
117 	{"DMA_R", BOFI_DMA_R}, {"DMA_W", BOFI_DMA_W},
118 	{"PIO", BOFI_PIO_RW}, {"DMA", BOFI_DMA_RW},
119 	{"LOG", BOFI_LOG}, {"INTR", BOFI_INTR}, {0, 0}
120 };
121 static coding_t optypes[] = {
122 	{"EQ", BOFI_EQUAL}, {"AND", BOFI_AND}, {"OR", BOFI_OR},
123 	{"XOR", BOFI_XOR}, {"NO", BOFI_NO_TRANSFER},
124 	{"DELAY", BOFI_DELAY_INTR}, {"LOSE", BOFI_LOSE_INTR},
125 	{"EXTRA", BOFI_EXTRA_INTR}, {0, 0}
126 };
127 static coding_t doptypes[] = {
128 	{"EQ", BOFI_EQUAL}, {"AND", BOFI_AND}, {"OR", BOFI_OR},
129 	{"XOR", BOFI_XOR}, {0, 0}
130 };
131 static coding_t ioptypes[] = {
132 	{"DELAY", BOFI_DELAY_INTR}, {"LOSE", BOFI_LOSE_INTR},
133 	{"EXTRA", BOFI_EXTRA_INTR}, {0, 0}
134 };
135 
136 static const unsigned long long	DFLTLOGTIME	= -1ull; /* log forever */
137 
138 /*
139  * This global controls the generation of errdefs for PIO_W. The default should
140  * be to only perform an access check errdef but not to corrupt writes - this
141  * may trash non-FT platforms.
142  */
143 static uint_t atype_is_default;	/* do not corrupt PIO_W by default */
144 static uint_t lsize_is_default;	/* set when the user has not given a size */
145 
146 static uint64_t random_operand = 0xdeadbeafdeadbeafull;
147 #define	NPIO_DEFAULTS	(3)	/* number of default corruption values */
148 static longlong_t pio_default_values[NPIO_DEFAULTS] = {
149 	0x0ull,			/* corresponds to a line going high/low */
150 	0x32f1f03232f1f032ull,	/* the value returned when the fake ta is set */
151 	(longlong_t)(~0)	/* corresponds to a line going high/low */
152 };
153 
154 static uint_t dbglvl		= 0;	/* debug this program */
155 static int alarmed		= 0;
156 static int killed		= 0;
157 
158 /*
159  * name of a script to call before offlining a driver being tested
160  */
161 static char **fixup_script = 0;
162 static int	scriptargs = 0;
163 static char **pargv;
164 static int	pargc;
165 
166 static int	max_edef_wait = 0;
167 static int	edef_sleep = 0;
168 static int	do_status = 0;	/* report edef status in parsable format */
169 static char *user_comment = 0;
170 
171 static char *Progname;
172 static FILE *errfile;
173 static FILE *outfile;
174 
175 /*
176  * The th_define utility provides an interface to the bus_ops fault injection
177  * bofi device driver for defining error injection specifications (referred to
178  * as errdefs). An errdef corresponds to a specification of how to corrupt a
179  * device driver's accesses to its hardware. The command line arguments
180  * determine the precise nature of the fault to be injected. If the supplied
181  * arguments define a consistent errdef, the th_define process will store the
182  * errdef with the bofi driver and suspend itself until the criteria given by
183  * the errdef become satisfied (in practice, this will occur when the access
184  * counts go to zero).
185  *
186  * When the resulting errdef is activated using the th_manage(1M) user command
187  * utility, the bofi driver will act upon the errdef by matching the number of
188  * hardware accesses - specified in count, that are of the type specified in
189  * acc_types, made by instance number instance - of the driver whose name is
190  * name, (or by the driver instance specified by * path ) to the register set
191  * (or DMA handle) specified by rnumber, that lie within the range offset to
192  * offset + length from the beginning of the register set or DMA handle. It then
193  * applies operator and operand to the next failcount matching accesses.
194  *
195  * If acc_types includes LOG, th_define runs in automatic test script generation
196  * mode, and a set of test scripts (written in the Korn shell) is created and
197  * placed in a sub-directory of the current directory with the name
198  * driver.test.<id>. A separate, executable script is generated for each access
199  * handle that matches the logging criteria. The log of accesses is placed at
200  * the top of each script as a record of the session. If the current directory
201  * is not writable, file output is written to standard output. The base name of
202  * each test file is the driver name, and the extension is a number that
203  * discriminates between different access handles. A control script (with the
204  * same name as the created test directory) is generated that will run all the
205  * test scripts sequentially.
206  *
207  * Executing the scripts will install, and then activate, the resulting error
208  * definitions. Error definitions are activated sequentially and the driver
209  * instance under test is taken offline and brought back online before each test
210  * (refer to the -e option for more information). By default, logging will apply
211  * to all PIO accesses, interrupts and DMA accesses to and from areas mapped
212  * for both reading and writing, but it can be constrained by specifying
213  * additional acc_types, rnumber, offset and length. Logging will continue for
214  * count matching accesses, with an optional time limit of collect_time seconds.
215  *
216  * Either the -n or -P option must be provided. The other options are optional.
217  * If an option (other than the -a option) is specified multiple times, only
218  * the final value for the option is used. If an option is not specified, its
219  * associated value is set to an appropriate default, which will provide
220  * maximal error coverage as described below.
221  */
222 
223 /*PRINTFLIKE2*/
224 static void
225 msg(uint_t lvl, char *msg, ...)
226 {
227 #define	BUFSZ	128
228 
229 	if (lvl <= dbglvl) {
230 		int count;
231 		va_list args;
232 		char buf[BUFSZ];
233 		int	pos = 0;
234 
235 		va_start(args, msg);
236 		count = vsnprintf(buf, BUFSZ, msg, args);
237 		va_end(args);
238 		if (count > 0) {
239 			count += pos;
240 			if (count >= sizeof (buf))
241 				count = BUFSZ - 1;
242 			buf[count] = '\0';
243 			(void) fprintf(errfile, "%s", buf);
244 		}
245 	}
246 }
247 
248 static void
249 kill_sighandler(int sig)
250 {
251 	switch (sig) {
252 		case SIGALRM:
253 			alarmed = 1;
254 			break;
255 		default:
256 			killed = 1;
257 			break;
258 	}
259 }
260 
261 static void
262 set_handler(int sig)
263 {
264 	struct sigaction sa;
265 
266 	(void) sigfillset(&(sa.sa_mask));
267 	sa.sa_flags = 0;
268 	sa.sa_handler = kill_sighandler;
269 	if (sigaction(sig, &sa, NULL) != 0)
270 		/* install handler */
271 		msg(0, "bad sigaction: %s\n", strerror(errno));
272 }
273 
274 /*
275  * Compare two driver access handles
276  */
277 static int
278 hdl_cmp(const void *p1, const void *p2)
279 {
280 	struct handle_info *e1 = (struct handle_info *)p1;
281 	struct handle_info *e2 = (struct handle_info *)p2;
282 
283 	if (e1->instance < e2->instance)
284 		return (-1);
285 	else if (e1->instance > e2->instance)
286 		return (1);
287 	else if (e1->access_type < e2->access_type)
288 		return (-1);
289 	else if (e1->access_type > e2->access_type)
290 		return (1);
291 	else if (e1->rnumber < e2->rnumber)
292 		return (-1);
293 	else if (e1->rnumber > e2->rnumber)
294 		return (1);
295 	else if (e1->len < e2->len)
296 		return (-1);
297 	else if (e1->len > e2->len)
298 		return (1);
299 	else if (e1->offset < e2->offset)
300 		return (-1);
301 	else if (e1->offset > e2->offset)
302 		return (1);
303 	else if (e1->addr_cookie < e2->addr_cookie)
304 		return (-1);
305 	else if (e1->addr_cookie > e2->addr_cookie)
306 		return (1);
307 	else
308 		return (0);
309 }
310 
311 /*
312  * Compare two hardware accesses.
313  */
314 static int
315 elem_cmp(const void *p1, const void *p2)
316 {
317 	struct acc_log_elem *e1 = (struct acc_log_elem *)p1;
318 	struct acc_log_elem *e2 = (struct acc_log_elem *)p2;
319 
320 	if (e1->access_type < e2->access_type)
321 		return (-1);
322 	else if (e1->access_type > e2->access_type)
323 		return (1);
324 	else if (e1->offset < e2->offset)
325 		return (-1);
326 	else if (e1->offset > e2->offset)
327 		return (1);
328 	else if (e1->size < e2->size)
329 		return (-1);
330 	else if (e1->size > e2->size)
331 		return (1);
332 	else
333 		return (0);
334 }
335 
336 /*
337  * Another way of comparing two hardware accesses.
338  */
339 static int
340 log_cmp(const void *p1, const void *p2)
341 {
342 	struct acc_log_elem *e1 = (struct acc_log_elem *)p1;
343 	struct acc_log_elem *e2 = (struct acc_log_elem *)p2;
344 
345 	int rval = elem_cmp(p1, p2);
346 
347 	if (rval == 0)
348 		if (e1->repcount < e2->repcount)
349 			return (-1);
350 		else if (e1->repcount > e2->repcount)
351 			return (1);
352 		else
353 			return (0);
354 	else
355 		return (rval);
356 }
357 
358 /*
359  * And a final way of sorting a log (by access type followed by repcount).
360  */
361 static int
362 log_cmp2(const void *p1, const void *p2)
363 {
364 	struct acc_log_elem *e1 = (struct acc_log_elem *)p1;
365 	struct acc_log_elem *e2 = (struct acc_log_elem *)p2;
366 
367 	if (e1->access_type < e2->access_type)
368 		return (-1);
369 	else if (e1->access_type > e2->access_type)
370 		return (1);
371 	else if (e1->repcount < e2->repcount)
372 		return (-1);
373 	else if (e1->repcount > e2->repcount)
374 		return (1);
375 	else
376 		return (0);
377 }
378 
379 static void
380 dump_log(uint_t lvl, FILE *fp, struct acc_log_elem *items,
381 	size_t nitems, uint_t logflags)
382 {
383 	if (lvl <= dbglvl) {
384 		int i;
385 		uint_t offset, allthesame = 1;
386 
387 		if (logflags & BOFI_LOG_TIMESTAMP &&
388 		    getenv("DUMP_FULL_LOG") != 0)
389 			allthesame = 0;
390 		else
391 			for (i = 1; i < nitems; i++)
392 				if (elem_cmp(items+i, items) != 0)
393 					allthesame = 0;
394 		if (fp != 0)
395 			(void) fprintf(fp,
396 			    "# Logged Accesses:\n# %-4s\t%-12s\t%-4s\t%-18s"
397 			    " (%-1s)\t%-10s\n\n", "type",
398 			    (items->access_type & BOFI_DMA_RW) ?
399 			    "address" : "offset",
400 			    "size", "value", "repcnt", "time");
401 
402 		for (i = 0; i < nitems; i++, items++) {
403 			offset = items->offset;
404 			if (fp != 0) {
405 				(void) fprintf(fp,
406 				    "# 0x%-2x\t0x%-10x\t%-4d\t0x%-16llx"
407 				    " (0x%-1x)\t%-8llu\n",
408 				    items->access_type, offset, items->size,
409 				    items->value, items->repcount,
410 				    (logflags & BOFI_LOG_TIMESTAMP) ?
411 				    items->access_time : 0ull);
412 
413 				if (allthesame) {
414 					(void) fprintf(fp,
415 					    "# Access duplicated %d times\n",
416 					    nitems);
417 					break;
418 				}
419 			} else
420 				msg(lvl, "# 0x%x 0x%x %d 0x%llx(0x%x) %llu\n",
421 				    items->access_type, offset, items->size,
422 				    items->value, items->repcount,
423 				    (logflags & BOFI_LOG_TIMESTAMP) ?
424 				    items->access_time : 0ull);
425 		}
426 	}
427 }
428 
429 static int
430 str_to_bm(char *optarg, coding_t *c, uint_t *bm)
431 {
432 	char *str;
433 	char *s = "\t\n ";
434 	int err = EINVAL;
435 
436 	msg(2, "str_to_bm: optarg %s\n", optarg);
437 	if (optarg != NULL && (str = strtok(optarg, s))) {
438 		msg(2, "str_to_bm: str %s\n", str);
439 		do {
440 			for (; c->str != 0; c++)
441 				if (strcmp(str, c->str) == 0) {
442 					*bm |= c->code;
443 					msg(2, "str_to_bm: %s matches\n",
444 					    c->str);
445 					err = 0;
446 					break;
447 				}
448 		} while ((str = strtok(NULL, s)));
449 	} else
450 		return (EINVAL);
451 	msg(2, "str_to_bm: done 0x%x\n", *bm);
452 	return (err);
453 }
454 
455 
456 /*
457  * Generic routine for commands that apply to a particular instance of
458  * a driver under test (e.g. activate all errdefs defined on an instance).
459  */
460 static int
461 manage_instance(int fd, char *namep, int instance, int cmd)
462 {
463 	struct bofi_errctl errctl;
464 
465 	errctl.namesize = strlen(namep);
466 	(void) strncpy(errctl.name, namep, MAXNAMELEN);
467 	errctl.instance = instance;
468 
469 	msg(8, "manage_instance: %s %d\n", namep, instance);
470 	if (ioctl(fd, cmd, &errctl) == -1) {
471 		msg(0, "bofi ioctl %d failed: %s\n", cmd, strerror(errno));
472 		return (-1);
473 	}
474 	return (0);
475 }
476 
477 
478 static int
479 define_one_error(
480 	FILE *fp,
481 	struct bofi_errdef *edp,
482 	struct acc_log_elem *item,
483 	ulong_t	nttime,
484 	ulong_t interval,
485 	char	*type,
486 	int fon,	/* corrupt after this many accesses */
487 	size_t fcnt,	/* and then fail it fcnt times */
488 	uint_t	acc_chk,
489 	char	*opname,
490 	uint64_t	operand)
491 {
492 	(void) fprintf(fp,
493 	    "-n %s -i %d -r %d -l 0x%llx 0x%x -a %s -c %d %d -f %d"
494 	    " -o %s 0x%llx",
495 	    (char *)edp->name,
496 	    edp->instance,
497 	    edp->rnumber,
498 	    edp->offset + item->offset,	/* offset into the regset */
499 	    item->size,	/* corrupt addrs from offset to offset+size */
500 	    type,
501 	    fon,	/* corrupt after this many accesses */
502 	    fcnt,	/* and then fail it fcnt times */
503 	    acc_chk,
504 	    opname,
505 	    operand);
506 
507 	(void) fprintf(fp, " -w %lu %lu\n", nttime, interval);
508 	return (0);
509 }
510 
511 static void
512 define_op_err(FILE *fp, int *ecnt, struct bofi_errdef *edp,
513 	struct acc_log_elem *item, ulong_t nttime, ulong_t interval, char *type,
514 	int fon, size_t fcnt)
515 {
516 	coding_t *ct;
517 	char	*opname;
518 	uint_t	op;
519 	uint64_t	operand;
520 	int k, save_size;
521 	uint64_t save_offset;
522 
523 	if (item->access_type & BOFI_INTR)
524 		ct = &ioptypes[0];
525 	else
526 		ct = &doptypes[0];
527 
528 	/*
529 	 * errdefs for dma accesses are too numerous so assume that dma writes
530 	 * (DDI_DMA_SYNC_FORDEV) create less exposure to potential errors than
531 	 * do dma reads (DDI_DMA_SYNC_FORCPU).
532 	 *
533 	 * also by default do not corrupt PIO_W - it may hang a non-FT platform.
534 	 */
535 	if (item->access_type != BOFI_DMA_W &&
536 	    ((item->access_type & BOFI_PIO_W) == 0 || !atype_is_default)) {
537 		/*
538 		 * user has asked for PIO_W
539 		 */
540 		for (; ct->str != 0; ct++) {
541 			op = ct->code;
542 			opname = ct->str;
543 			switch (op) {
544 			case BOFI_EQUAL:
545 				operand = random_operand; /* a random value */
546 				random_operand = lrand48() | ((uint64_t)
547 				    (lrand48()) << 32);
548 				break;
549 			case BOFI_AND:
550 				operand = 0xaddedabadb00bull;
551 				break;
552 			case BOFI_OR:
553 				operand = 0x1;
554 				break;
555 			case BOFI_XOR:
556 			default:
557 				operand = myULLMAX;
558 				break;
559 			case BOFI_DELAY_INTR: /* delay for 1 msec */
560 				operand = 1000000;
561 				break;
562 			case BOFI_LOSE_INTR: /* op not applicable */
563 				operand = 0;
564 				break;
565 			case BOFI_EXTRA_INTR: /* extra intrs */
566 				operand = 0xfff;
567 				break;
568 			}
569 			*ecnt = *ecnt + 1;
570 
571 			if ((item->access_type == BOFI_DMA_W ||
572 			    item->access_type == BOFI_DMA_R) &&
573 			    item->size > sizeof (uint64_t) && item->size <
574 			    DMA_INDIVIDUAL_CORRUPT_CUTOFF) {
575 				save_size = item->size;
576 				save_offset = item->offset;
577 				for (k = (item->size +
578 				    sizeof (uint64_t) - 1) &
579 				    ~(sizeof (uint64_t) - 1);
580 				    k > 0; k -= sizeof (uint64_t)) {
581 					item->size = sizeof (uint64_t);
582 					(void) define_one_error(fp, edp,
583 					    item, nttime, interval, type, fon,
584 					    fcnt, edp->acc_chk, opname,
585 					    operand);
586 					item->offset += sizeof (uint64_t);
587 				}
588 				item->size = save_size;
589 				item->offset = save_offset;
590 			} else {
591 				(void) define_one_error(fp, edp, item,
592 				    nttime, interval, type, fon, fcnt,
593 				    edp->acc_chk, opname, operand);
594 			}
595 
596 			if (op == BOFI_EQUAL) {
597 				uint_t cnt;
598 				for (cnt = 0; cnt < NPIO_DEFAULTS;
599 				    cnt++, *ecnt = *ecnt + 1) {
600 					if ((item->access_type == BOFI_DMA_W ||
601 					    item->access_type == BOFI_DMA_R) &&
602 					    item->size > sizeof (uint64_t) &&
603 					    item->size <
604 					    DMA_INDIVIDUAL_CORRUPT_CUTOFF) {
605 						save_size = item->size;
606 						save_offset = item->offset;
607 						for (k = (item->size +
608 						    sizeof (uint64_t) - 1) &
609 						    ~(sizeof (uint64_t) - 1);
610 						    k > 0;
611 						    k -= sizeof (uint64_t)) {
612 							item->size =
613 							    sizeof (uint64_t);
614 							(void) define_one_error(
615 							    fp, edp, item,
616 							    nttime, interval,
617 							    type, fon, fcnt,
618 							    edp->acc_chk,
619 							    opname,
620 							    pio_default_values
621 							    [cnt]);
622 							item->offset +=
623 							    sizeof (uint64_t);
624 						}
625 						item->size = save_size;
626 						item->offset = save_offset;
627 					} else {
628 						(void) define_one_error(fp,
629 						    edp, item, nttime, interval,
630 						    type, fon, fcnt,
631 						    edp->acc_chk, opname,
632 						    pio_default_values[cnt]);
633 					}
634 				}
635 			}
636 		}
637 	}
638 
639 	if ((item->access_type & BOFI_PIO_W) && !atype_is_default) {
640 		/*
641 		 * user has asked for PIO_W
642 		 */
643 		(void) define_one_error(fp, edp, item, nttime, interval,
644 		    type, fon, fcnt, edp->acc_chk, "NO", 0);
645 		*ecnt = *ecnt + 1;
646 	}
647 
648 	/*
649 	 * and finally an access check errdef
650 	 */
651 	if (item->access_type & BOFI_PIO_RW)
652 		(void) define_one_error(fp, edp, item, nttime, interval,
653 		    type, fon, fcnt, 1, "OR", 0);
654 
655 	if (item->access_type & BOFI_DMA_RW)
656 		(void) define_one_error(fp, edp, item, nttime, interval,
657 		    type, fon, fcnt, 2, "OR", 0);
658 
659 }
660 
661 /*
662  * Convert a collection of log entries into error definitions.
663  */
664 /* ARGSUSED */
665 static int
666 define_nerrs(int fd, FILE *fp, int *ecnt, struct bofi_errdef *edp,
667 	struct acc_log_elem *items,
668 	size_t nitems,
669 	uint_t naccess,
670 	uint_t minac,
671 	uint_t maxac,
672 	ulong_t	logtime,
673 	ulong_t	logsize)
674 {
675 	char	*type;
676 	uint_t	at;
677 	int	i;
678 	struct acc_log_elem	*item;
679 	char	*opname;
680 	uint_t	op;
681 	uint64_t	operand;
682 	int	cycleiops, cycledops;
683 	int	intrs = 0;
684 	ulong_t	ttime, nttime, interval;
685 
686 	op = edp->optype;
687 	operand = edp->operand;
688 	msg(3, "define_nerrs: nitems %d (ac %d at 0x%x): (%d %d)"
689 	    " (op 0x%x 0x%llx)\n\n", nitems, naccess, items->access_type,
690 	    minac, maxac, op, operand);
691 
692 	/*
693 	 * all items are guaranteed have values in the two element set {0, at}
694 	 * where at is a valid access type (so find the value of at)
695 	 */
696 	for (i = 0, item = items, at = 0; i < nitems; i++, item++)
697 		if (item->access_type != 0) {
698 			at = item->access_type;
699 			break;
700 		}
701 	if (at == 0)
702 		return (-1);
703 
704 	/*
705 	 * find the string form of the access type
706 	 */
707 	for (i = 0, type = 0; atypes[i].str != 0; i++) {
708 		if (atypes[i].code == at) {
709 			type = atypes[i].str;
710 			break;
711 		}
712 	}
713 	if (type == 0) {
714 		msg(0, "Unknown access type returned from bofi\n\t");
715 		dump_log(0, 0, item, 1, BOFI_LOG_TIMESTAMP);
716 		msg(1, "0x%x 0x%x 0x%x 0x%x\n", BOFI_LOG, BOFI_INTR,
717 		    BOFI_DMA_RW, BOFI_PIO_RW);
718 		return (-1);
719 	}
720 
721 	msg(1, "define_n: at = 0x%d (%s)\n", at, type == 0 ? "null" : type);
722 	/*
723 	 * find the string form of the operator
724 	 */
725 	for (i = 0, opname = 0; optypes[i].str != 0; i++) {
726 		if (op == optypes[i].code) {
727 			opname = optypes[i].str;
728 			break;
729 		}
730 	}
731 
732 	/*
733 	 * if not found or inconsistent default to XOR
734 	 */
735 	if (opname == 0 ||
736 	    (op == BOFI_NO_TRANSFER &&
737 	    (at & (BOFI_DMA_RW|BOFI_PIO_R))) ||
738 	    (op >= BOFI_DELAY_INTR && (at & BOFI_INTR) == 0)) {
739 		opname = optypes[3].str;	/* "XOR" */
740 		operand = myULLMAX;
741 		op = optypes[3].code;
742 	}
743 
744 	/*
745 	 * if operator and access type are inconsistent choose a sensible
746 	 * default
747 	 */
748 	cycleiops = 0;
749 	if (at & BOFI_INTR)
750 		if (op < BOFI_DELAY_INTR)
751 			cycleiops = 1;
752 		else if (op == BOFI_LOSE_INTR)
753 			operand = 0;
754 
755 	cycledops = 0;
756 	if (nitems == 1 && (at & BOFI_DMA_RW))
757 		cycledops = 1;
758 	/*
759 	 * for each access in the list define one or more error definitions
760 	 */
761 	for (i = 0, item = items; i < nitems; i++, item++) {
762 		size_t acnt, fcnt;
763 		int j, fon;
764 
765 		if (item->access_type == 0)
766 			continue;
767 
768 		/*
769 		 * base number of errors to inject on 3% of number of
770 		 * similar accesses seen during LOG phase
771 		 */
772 		acnt = item->repcount / 10 + 1; /* 10% */
773 		fcnt = (acnt >= 3) ? acnt / 3 : 1; /* 3% */
774 
775 		/*
776 		 * wait for twice the time it took during LOG phase
777 		 */
778 		if ((ttime = (item->access_time * 2)) < MIN_REPORT_TIME)
779 			ttime = MIN_REPORT_TIME;
780 		else if (max_edef_wait != 0 && ttime > max_edef_wait)
781 			ttime = max_edef_wait;
782 		/*
783 		 * if edef_sleep set (-w) the use that, otherwise use default
784 		 */
785 		interval = edef_sleep ? edef_sleep : DEFAULT_EDEF_SLEEP;
786 
787 		msg(10,
788 		    "define_n: item %d limit %d step %d (intr %d) tt(%lu)\n",
789 		    i, item->repcount, acnt, intrs, ttime);
790 
791 		for (j = 0, fon = 1, nttime = ttime; j < item->repcount;
792 		    j += acnt) {
793 			if (policy & OPERATORSPOLICY) {
794 				define_op_err(fp, ecnt, edp, item,
795 				    nttime, interval, type, fon, fcnt);
796 			} else {
797 				if (cycleiops) {
798 					op = ioptypes[intrs].code;
799 					opname = ioptypes[intrs++].str;
800 					switch (op) {
801 					case BOFI_DELAY_INTR:
802 						/* delay for 1 sec */
803 						operand = 1000000;
804 						break;
805 					case BOFI_LOSE_INTR:
806 						/* op not applicable */
807 						operand = 0;
808 						break;
809 					case BOFI_EXTRA_INTR:
810 					default:
811 						/* generate 2 extra intrs */
812 						operand = 0xfff;
813 						break;
814 					}
815 					intrs %= 3;
816 				} else if (cycledops) {
817 					op = doptypes[intrs].code;
818 					opname = doptypes[intrs++].str;
819 					switch (op) {
820 					case BOFI_EQUAL:
821 						random_operand = lrand48() |
822 						    ((uint64_t)
823 						    (lrand48()) << 32);
824 						break; /* a random value */
825 					case BOFI_AND:
826 						operand = 0xaddedabadb00bull;
827 						break;
828 					case BOFI_OR:
829 						operand = 0xd1ab011c0af1a5c0ull;
830 						break;
831 					case BOFI_XOR:
832 					default:
833 						operand = myULLMAX;
834 						break;
835 					}
836 					intrs %= 4;
837 				}
838 				(void) define_one_error(fp, edp, item,
839 				    nttime, interval, type, fon,
840 				    fcnt, edp->acc_chk, opname, operand);
841 				*ecnt = *ecnt + 1;
842 				if (op == BOFI_EQUAL) {
843 					uint_t cnt;
844 					for (cnt = 0; cnt < NPIO_DEFAULTS;
845 					    cnt++, *ecnt = *ecnt + 1)
846 						(void) define_one_error(fp,
847 						    edp, item, nttime,
848 						    interval, type, fon, fcnt,
849 						    edp->acc_chk, opname,
850 						    pio_default_values[cnt]);
851 				}
852 			}
853 
854 			/*
855 			 * all non maximal policies should only generate
856 			 * a single error definition set per access.
857 			 */
858 			if (!(policy & MAXIMALPOLICY))
859 				break;
860 
861 			nttime = (logtime - item->access_time) *
862 			    (j + acnt + fcnt - 1) / logsize;
863 			if (nttime < MIN_REPORT_TIME)
864 				nttime = MIN_REPORT_TIME;
865 			else if (nttime > max_edef_wait)
866 				nttime = max_edef_wait;
867 
868 			msg(11, "define_nerrs: %lu %d %d %d %llu\n", nttime,
869 			    max_edef_wait, fon, fcnt, item->access_time);
870 
871 			if (item->access_type != BOFI_INTR)
872 				fon += j;
873 		}
874 	}
875 
876 	return (0);
877 }
878 
879 static int
880 reduce_log(uint16_t pol, struct acc_log *log,		/* input args */
881 	struct acc_log_elem **llp, size_t *cntp)	/* output args */
882 {
883 	ulong_t logtime;
884 	struct acc_log_elem *items, *item, *elem;
885 	int cnt, nitems, acnt;
886 	int i, j, k, lb, ub, mina, maxa, cutoff[2], mean;
887 
888 	if (llp == 0 || cntp == 0)	/* subroutine interface violated */
889 		return (-1);
890 
891 	if (*llp == 0) {
892 		items = (void *)log->logbase;
893 		nitems = log->entries;
894 	} else {
895 		items = *llp;	/* outputs double up as inputs */
896 		nitems = *cntp;
897 	}
898 	/* has the utc time wrapped over ULMAX - unlikely so fix it at 10 */
899 	logtime = (log->stop_time >= log->start_time) ?
900 	    log->stop_time - log->start_time : 10ul;
901 
902 	msg(1, "reduce %d: logtime %lu\n", nitems, logtime);
903 	/*
904 	 * Sort the log by access type - do not remove duplicates yet (but do
905 	 * remove access that do not match the requested log -> errdef policy
906 	 * (defined by union pu pol). Set the repcount field of each entry to a
907 	 * unique value (in the control statement of the for loop) - this
908 	 * ensures that the qsort (following the for loop) will not remove any
909 	 * entries.
910 	 */
911 	for (i = 0, cnt = 0, elem = items; i < nitems;
912 	    elem->repcount = i, i++, elem++) {
913 		/*
914 		 * If interested in the I/O transfer size and this access
915 		 * does not match the requested size then ignore the access
916 		 */
917 		if ((pol & SIZEPOLICY) &&
918 		    (!(pol & MULTIPOLICY) || elem->repcount == 1) &&
919 		    /* req for DMA / ddi_rep */
920 		    (pol & elem->size) == 0)
921 			elem->access_type = 0;
922 			/* these will end up sorted at the head */
923 		else {
924 			cnt += 1;
925 			elem->size *= elem->repcount;
926 			if (log->flags & BOFI_LOG_TIMESTAMP)
927 				/* real access time */
928 				elem->access_time -= log->start_time;
929 			else
930 				/* linear fit */
931 				elem->access_time = logtime * (i + 1) / nitems;
932 		}
933 	}
934 
935 	qsort((void *)items, nitems, sizeof (*items), log_cmp);
936 
937 	msg(5, "qsorted log raw (nitems %d cnt %d:\n", nitems, cnt);
938 	dump_log(14, 0, items, nitems, log->flags);
939 
940 	if (cnt != nitems) {	/* some items should be ignored */
941 		items += (nitems - cnt);	/* ignore these ones */
942 		if ((nitems = cnt) == 0) {
943 			*cntp = 0;
944 			*llp = 0;
945 			return (0);
946 			/* the chosen policy has ignored everything */
947 		}
948 
949 	}
950 	/*
951 	 * Now remove duplicate entries based on access type, address and size.
952 	 * Reuse the repcount field to store the no. of duplicate accesses.
953 	 * Store the average access time in the single remaining
954 	 * representative of the duplicate set.
955 	 */
956 
957 	for (i = 1, cnt = 1, elem = items, elem->repcount = 1, item = elem + 1;
958 	    i < nitems; i++, item++) {
959 		if (elem_cmp(elem, item) == 0) {
960 			elem->access_time += item->access_time;
961 			elem->repcount++;
962 		} else {	/* not a duplicate */
963 			elem->access_time = logtime / elem->repcount;
964 			elem++;
965 			*elem = *item;
966 			cnt++;
967 			elem->repcount = 1;
968 		}
969 	}
970 	elem->access_time = logtime / elem->repcount;
971 
972 	/*
973 	 * The log is sorted by access type - now resort to order by frequency
974 	 * of accesses (ie for a given access type uncommon access will come
975 	 * first.
976 	 */
977 
978 	qsort((void *)items, cnt, sizeof (*items), log_cmp2);
979 	msg(4, "qsorted log2: cnt is %d\n", cnt);
980 	dump_log(4, 0, items, cnt, log->flags);
981 
982 	for (i = 0; i < cnt; i = j) {
983 
984 		/*
985 		 * Pick out the set [i, j) consisting of elements with the same
986 		 * access type
987 		 */
988 		for (j = i + 1, acnt = items[i].repcount; j < cnt &&
989 		    items[j].access_type == items[i].access_type; j++)
990 			acnt += items[j].repcount;
991 
992 		if (j - i == 1)	/* never ignore solo accesses of a given type */
993 			continue;
994 		/*
995 		 * Now determine what constitutes uncommon and common accesses:
996 		 */
997 		mina = items[i].repcount;
998 		maxa = items[j-1].repcount;
999 		mean = acnt / (j - i); /* mean value */
1000 
1001 		if (pol & (UNCOMMONPOLICY|MEDIANPOLICY)) {
1002 			cutoff[0] = (mean - mina) / DISTRIB_CUTOFF + mina;
1003 
1004 			for (ub = i; ub < j; ub++)
1005 				if (items[ub].repcount > cutoff[0])
1006 					break;
1007 			lb = j - 1;
1008 		} else {
1009 			lb = i;
1010 			ub = j-1;
1011 		}
1012 
1013 		if (pol & (COMMONPOLICY|MEDIANPOLICY)) {
1014 			cutoff[1] = maxa - (maxa - mean) / DISTRIB_CUTOFF;
1015 			for (lb = j - 1; lb >= i; lb--)
1016 				if (items[lb].repcount < cutoff[1])
1017 					break;
1018 			if (!(pol & (UNCOMMONPOLICY|MEDIANPOLICY)))
1019 				ub = i;
1020 		}
1021 
1022 		msg(3, "reduce_log: p 0x%x at %d:0x%x %d:0x%x acnt mina maxa"
1023 		    " (%d %d %d)"
1024 		    " mean %d cutoffs(%d %d) bnds(%d, %d)\n",
1025 		    pol, i, items[i].access_type, j, items[j].access_type,
1026 		    acnt, mina, maxa, mean, cutoff[0], cutoff[1], lb, ub);
1027 
1028 		if (ub <= lb)
1029 			if (!(pol & MEDIANPOLICY))
1030 				/* delete all the mid accesses */
1031 				for (k = ub; k <= lb; k++)
1032 					items[k].access_type = 0;
1033 			else {
1034 				if (!(pol & UNCOMMONPOLICY))
1035 					/* delete uncommon accesses */
1036 					for (k = i; k < ub; k++)
1037 						items[k].access_type = 0;
1038 				if (!(pol & COMMONPOLICY))
1039 					/* delete common accesses */
1040 					for (k = lb+1; k < j; k++)
1041 						items[k].access_type = 0;
1042 			}
1043 	}
1044 	msg(4, "reduce_log: returning %d items\n", cnt);
1045 	dump_log(5, 0, items, cnt, log->flags);
1046 	*cntp = cnt;
1047 	*llp = items;
1048 	return (0);
1049 }
1050 
1051 static void
1052 log2errdefs(int fd, struct bofi_errdef *edp, struct acc_log *log,
1053 	char *devpath)
1054 {
1055 	struct acc_log_elem	*items;
1056 	size_t			nitems;
1057 	int			i, j;
1058 	uint_t			acc_cnt;
1059 	char			fname[_POSIX_PATH_MAX];
1060 	FILE			*fp = 0;
1061 	time_t			utc = time(NULL);
1062 	int			ecnt = 0;
1063 	int			err;
1064 	ulong_t			logtime;
1065 	char			*buffer;
1066 	struct stat		statbuf;
1067 
1068 	items = (void *)log->logbase;
1069 	nitems = log->entries;
1070 	logtime = (log->stop_time >= log->start_time) ?
1071 	    log->stop_time - log->start_time : 10ul;
1072 
1073 	if (nitems == 0)
1074 		return;
1075 
1076 	/* ensure that generated errdefs complete in bounded time */
1077 	if (max_edef_wait == 0)
1078 		max_edef_wait =
1079 		    logtime > MIN_REPORT_TIME ? logtime : MIN_REPORT_TIME * 2;
1080 
1081 	msg(4, "log2errdefs(0x%p, 0x%p, %d, 0x%x):\n",
1082 	    (void *) edp, (void *) items, nitems, policy);
1083 
1084 	(void) snprintf(fname, sizeof (fname), "%s.%d", (char *)edp->name,
1085 	    (int)getpid());
1086 	if ((fp = fopen(fname, "w")) == 0)
1087 		fp = outfile;
1088 
1089 	(void) fprintf(fp, "#!/bin/ksh -p\n\n");
1090 	(void) fprintf(fp, "# %-24s%s\n", "Script creation time:", ctime(&utc));
1091 	(void) fprintf(fp, "# %-24s%llu\n",
1092 	    "Activation time:", log->start_time);
1093 	(void) fprintf(fp, "# %-24s%llu\n",
1094 	    "Deactivation time:", log->stop_time);
1095 	(void) fprintf(fp, "# %-24s%d\n", "Log size:", nitems);
1096 	(void) fprintf(fp, "# %-24s", "Errdef policy:");
1097 	for (i = 0; ptypes[i].str != 0; i++)
1098 		if (policy & ptypes[i].code)
1099 			(void) fprintf(fp, "%s ", ptypes[i].str);
1100 	(void) fprintf(fp, "\n");
1101 	(void) fprintf(fp, "# %-24s%s\n", "Driver:", (char *)edp->name);
1102 	(void) fprintf(fp, "# %-24s%d\n", "Instance:", edp->instance);
1103 	if (edp->access_type & BOFI_PIO_RW) {
1104 		(void) fprintf(fp, "# %-24s%d\n",
1105 		    "Register set:", edp->rnumber);
1106 		(void) fprintf(fp, "# %-24s0x%llx\n", "Offset:", edp->offset);
1107 		(void) fprintf(fp, "# %-24s0x%llx\n", "Length:", edp->len);
1108 	} else if (edp->access_type & BOFI_DMA_RW) {
1109 		(void) fprintf(fp, "# %-24s%d\n", "DMA handle:", edp->rnumber);
1110 		(void) fprintf(fp, "# %-24s0x%llx\n", "Offset:", edp->offset);
1111 		(void) fprintf(fp, "# %-24s0x%llx\n", "Length:", edp->len);
1112 	} else if ((edp->access_type & BOFI_INTR) == 0) {
1113 		(void) fprintf(fp, "# %-24s%d\n",
1114 		    "Unknown Handle Type:", edp->rnumber);
1115 	}
1116 
1117 	(void) fprintf(fp, "# %-24s0x%x ( ", "Access type:",
1118 	    (edp->access_type & ~BOFI_LOG));
1119 	if (edp->access_type & BOFI_PIO_R)
1120 		(void) fprintf(fp, "%s ", "pio_r");
1121 	if (edp->access_type & BOFI_PIO_W)
1122 		(void) fprintf(fp, "%s ", "pio_w");
1123 	if (edp->access_type & BOFI_DMA_W)
1124 		(void) fprintf(fp, "%s ", "dma_w");
1125 	if (edp->access_type & BOFI_DMA_R)
1126 		(void) fprintf(fp, "%s ", "dma_r");
1127 	if (edp->access_type & BOFI_INTR)
1128 		(void) fprintf(fp, "%s ", "intr");
1129 	(void) fprintf(fp, ")\n\n");
1130 	if (user_comment)
1131 		(void) fprintf(fp, "# %-24s%s\n\n",
1132 		    "Test Comment:", user_comment);
1133 
1134 	dump_log(0, fp, items, nitems, log->flags);
1135 
1136 	items = 0;
1137 	if ((err = reduce_log(policy, log, &items, &nitems)) < 0 ||
1138 	    nitems == 0) {
1139 		msg(4, "log2errdefs: reduce_log err %d nitems %d\n",
1140 		    err, nitems);
1141 		return;
1142 	}
1143 	(void) fprintf(fp, "\nerror() { echo \""
1144 	    "${0##*/}: $@\""
1145 	    " >&2; exit 2; }\n");
1146 	(void) fprintf(fp,
1147 	    "trap ' ' 16\t# ignore - it is trapped by abort monitor_edef\n");
1148 
1149 	(void) fprintf(fp, "\nfixup_script()\n{\n");
1150 	if (scriptargs > 0) {
1151 		(void) fprintf(fp, "\tif [[ $1 -eq 1 ]]\n\tthen\n");
1152 		(void) fprintf(fp, "\t\t# Call a user defined workload\n");
1153 		(void) fprintf(fp, "\t\t# while injecting errors\n\t\t");
1154 		for (i = 0; i < scriptargs; i++)
1155 			(void) fprintf(fp, "%s ", fixup_script[i]);
1156 		(void) fprintf(fp, "\n\tfi\n");
1157 		(void) fprintf(fp, "\treturn 0\n");
1158 	} else {
1159 		(void) fprintf(fp, "\tif [[ $1 -eq 0 ]]\n\tthen\n");
1160 		(void) fprintf(fp,
1161 		    "\t\t# terminate any outstanding workload\n");
1162 		(void) fprintf(fp, "\t\tif [ $script_pid -gt 0 ]; then\n");
1163 		(void) fprintf(fp, "\t\t\tkill $script_pid\n");
1164 		(void) fprintf(fp, "\t\t\tscript_pid=0\n");
1165 		(void) fprintf(fp, "\t\tfi\n");
1166 		(void) fprintf(fp, "\tfi\n");
1167 		(void) fprintf(fp, "\treturn -1\n");
1168 	}
1169 	(void) fprintf(fp, "}\n\n");
1170 	(void) fprintf(fp, "devpath=/devices%s\n\n", devpath);
1171 	(void) fprintf(fp, "#\n");
1172 	(void) fprintf(fp, "# following text extracted from th_script\n");
1173 	(void) fprintf(fp, "#\n");
1174 	if (stat("/usr/lib/th_script", &statbuf) == -1) {
1175 		msg(0, "log2errdefs: stat of /usr/lib/th_script failed\n");
1176 		return;
1177 	}
1178 	fd = open("/usr/lib/th_script", O_RDONLY);
1179 	if (fd == -1) {
1180 		msg(0, "log2errdefs: open of /usr/lib/th_script failed\n");
1181 		return;
1182 	}
1183 	buffer = malloc(statbuf.st_size);
1184 	if (!buffer) {
1185 		msg(0, "log2errdefs: malloc for /usr/lib/th_script failed\n");
1186 		return;
1187 	}
1188 	if (read(fd, buffer, statbuf.st_size) != statbuf.st_size) {
1189 		msg(0, "log2errdefs: read of /usr/lib/th_script failed\n");
1190 		return;
1191 	}
1192 	(void) fwrite(buffer, statbuf.st_size, 1, fp);
1193 	(void) close(fd);
1194 	(void) fprintf(fp, "#\n");
1195 	(void) fprintf(fp, "# end of extracted text\n");
1196 	(void) fprintf(fp, "#\n");
1197 	(void) fprintf(fp, "run_subtest %s %d <<ERRDEFS\n",
1198 	    (char *)edp->name, edp->instance);
1199 
1200 	for (i = 0; i < nitems; i = j) {
1201 
1202 		acc_cnt = items[i].repcount;
1203 		for (j = i + 1;
1204 		    j < nitems && items[j].access_type == items[i].access_type;
1205 		    j++)
1206 			acc_cnt += items[j].repcount;
1207 		msg(1, "l2e: nitems %d i %d j %d at 0x%x\n",
1208 		    nitems, i, j, items[i].access_type);
1209 		if (items[i].access_type != 0)
1210 			(void) define_nerrs(fd, fp, &ecnt, edp, items+i, j-i,
1211 			    acc_cnt, items[i].repcount, items[j-1].repcount,
1212 			    logtime, log->entries);
1213 	}
1214 
1215 	(void) fprintf(fp, "ERRDEFS\n");
1216 	(void) fprintf(fp, "exit 0\n");
1217 
1218 	if (fp != stdout && fp != stderr) {
1219 		if (fchmod(fileno(fp), S_IRWXU|S_IRGRP|S_IROTH))
1220 			msg(0, "fchmod failed: %s\n", strerror(errno));
1221 		if (fclose(fp) != 0)
1222 			msg(0, "close of %s failed: %s\n", fname,
1223 			    strerror(errno));
1224 	}
1225 	msg(10, "log2errdefs: done\n");
1226 }
1227 
1228 #define	LLSZMASK (sizeof (longlong_t) -1)
1229 
1230 static int
1231 add_edef(int fd,
1232 	struct bofi_errdef *errdef,	/* returned access criteria */
1233 	struct bofi_errstate *errstate,
1234 	struct handle_info *hdl,	/* handle to match against request */
1235 	struct bofi_errdef *edp)	/* requested access criteria */
1236 {
1237 	*errdef = *edp;
1238 	errdef->instance = hdl->instance;
1239 
1240 
1241 	if (hdl->access_type == 0)
1242 		return (EINVAL);
1243 
1244 	errdef->access_type =
1245 	    errdef->access_type & (hdl->access_type|BOFI_LOG);
1246 
1247 	/* use a big log for PIO and a small one otherwise */
1248 	if (lsize_is_default &&
1249 	    (errdef->access_type & BOFI_PIO_RW) == 0) {
1250 		errdef->access_count = DFLT_NONPIO_LOGSZ;
1251 		errdef->fail_count = 0;
1252 	}
1253 	errdef->log.logsize = errstate->log.logsize =
1254 	    errdef->access_count + errdef->fail_count - 1;
1255 	if (errdef->log.logsize == -1U) {
1256 		errdef->log.logsize = errstate->log.logsize = 0;
1257 	}
1258 	errdef->log.logbase = errstate->log.logbase =
1259 	    (caddr_t)GETSTRUCT(struct acc_log_elem, errdef->log.logsize);
1260 
1261 	if (errdef->log.logbase == 0)
1262 		return (EAGAIN);
1263 
1264 	errdef->rnumber = hdl->rnumber;
1265 	errdef->offset = hdl->offset;
1266 	errdef->len = hdl->len;
1267 
1268 	msg(4, "creating errdef: %d %s %d %d 0x%llx 0x%llx 0x%x 0x%x 0x%x"
1269 	    " 0x%x 0x%x 0x%llx\n",
1270 	    errdef->namesize, (char *)errdef->name,
1271 	    errdef->instance, errdef->rnumber,
1272 	    errdef->offset, errdef->len,
1273 	    errdef->access_type,
1274 	    errdef->access_count, errdef->fail_count,
1275 	    errdef->acc_chk, errdef->optype, errdef->operand);
1276 	if (ioctl(fd, BOFI_ADD_DEF, errdef) == -1) {
1277 		perror("th_define - adding errdef failed");
1278 		return (errno);
1279 	}
1280 	errdef->optype = edp->optype; /* driver clears it if fcnt is zero */
1281 	errstate->errdef_handle = errdef->errdef_handle;
1282 	return (0);
1283 }
1284 
1285 static void
1286 collect_state(int fd, int cmd,
1287 	struct bofi_errstate *errstate,
1288 	struct bofi_errdef *errdef,
1289 	char *devpath)
1290 {
1291 	int rval;
1292 	size_t ls = errstate->log.logsize;
1293 
1294 	msg(2, "collect_state: pre: edp->access_type 0x%x (logsize %d)\n",
1295 	    errdef->access_type, errdef->log.logsize);
1296 
1297 	do {
1298 		errstate->log.logsize = 0; /* only copy the driver log once */
1299 
1300 		msg(10, "collecting state (lsize %d) ...\n",
1301 		    errstate->log.logsize);
1302 		errno = 0;
1303 
1304 		if (ioctl(fd, cmd, errstate) == -1 && errno != EINTR) {
1305 			perror("th_define (collect) -"
1306 			    " waiting for error report failed");
1307 			break;
1308 		}
1309 
1310 		(void) fprintf(outfile, "Logged %d out of %d accesses"
1311 		    " (%s %d %d 0x%x %d).\n",
1312 		    errstate->log.entries, ls,
1313 		    (char *)errdef->name, errdef->instance, errdef->rnumber,
1314 		    errdef->access_type, errstate->log.wrapcnt);
1315 
1316 		(void) msg(1, "\t(ac %d fc %d lf 0x%x wc %d).\n",
1317 		    errstate->access_count, errstate->fail_count,
1318 		    errstate->log.flags, errstate->log.wrapcnt);
1319 
1320 		rval = errno;
1321 		if ((errstate->log.flags & BOFI_LOG_WRAP) &&
1322 		    errstate->access_count > 0)
1323 			continue;
1324 		if (errstate->access_count <= 1 &&
1325 		    errstate->fail_count == 0 &&
1326 		    errstate->acc_chk == 0) {
1327 			msg(3, "collecting state complete entries %d\n",
1328 			    errstate->log.entries);
1329 			break;
1330 		}
1331 
1332 		msg(5, "still collecting state: %d, %d, %d\n",
1333 		    errstate->access_count, errstate->fail_count,
1334 		    errstate->acc_chk);
1335 		(void) msg(2, "Log: errno %d size %d entries %d "
1336 		    "(off 0x%llx len 0x%llx) ac %d\n", errno,
1337 		    errstate->log.logsize, errstate->log.entries,
1338 		    errdef->offset, errdef->len, errstate->access_count);
1339 
1340 	} while (rval == 0 && errstate->log.entries < ls);
1341 
1342 	/* now grab the log itself */
1343 	errstate->log.logsize = ls;
1344 	if (errstate->log.entries != 0) {
1345 		if (ioctl(fd, BOFI_CHK_STATE, errstate) == -1) {
1346 			msg(0,
1347 			    "%s: errorwhile retrieving %d log entries: %s\n",
1348 			    Progname, errstate->log.entries, strerror(errno));
1349 		} else {
1350 			msg(2, "collect_state: post: edp->access_type 0x%x"
1351 			    " (log entries %d %d) (%llu - %llu)\n",
1352 			    errdef->access_type,
1353 			    errstate->log.entries, errstate->access_count,
1354 			    errstate->log.start_time, errstate->log.stop_time);
1355 
1356 			log2errdefs(fd, errdef, &(errstate->log), devpath);
1357 		}
1358 	}
1359 }
1360 
1361 static void
1362 print_err_reports(FILE *fp, struct bofi_errstate *esp,
1363 	char *fname, char *cmt, int id)
1364 {
1365 	if (fname != 0 && *fname != 0)
1366 		(void) fprintf(fp, "%sErrdef file %s definition %d:",
1367 		    cmt, fname, id);
1368 	else
1369 		(void) fprintf(fp, "%s", cmt);
1370 
1371 	if (esp->access_count != 0) {
1372 		(void) fprintf(fp, " (access count %d).\n", esp->access_count);
1373 	} else {
1374 		(void) fprintf(fp, "\n%s\tremaining fail count %d acc_chk %d\n",
1375 		    cmt, esp->fail_count, esp->acc_chk);
1376 		(void) fprintf(fp, "%s\tfail time 0x%llx error reported time"
1377 		    " 0x%llx errors reported %d\n", cmt,
1378 		    esp->fail_time, esp->msg_time,
1379 		    esp->errmsg_count);
1380 		if (esp->msg_time)
1381 			(void) fprintf(fp, "%s\tmessage \"%s\" severity 0x%x\n",
1382 			    cmt, esp->buffer, (uint_t)esp->severity);
1383 	}
1384 }
1385 
1386 static void
1387 thr_collect(void *arg, char *devpath)
1388 {
1389 	int fd;
1390 	struct collector_def *hi = (struct collector_def *)arg;
1391 
1392 	msg(4, "thr_collect: collecting %s inst %d rn %d at = 0x%x.\n",
1393 	    hi->ed.name, hi->ed.instance,
1394 	    hi->ed.rnumber, hi->ed.access_type);
1395 
1396 	if ((fd = open(BOFI_DEV, O_RDWR)) == -1) {
1397 		if (errno == EAGAIN)
1398 			msg(0, "Too many instances of bofi currently open\n");
1399 		else
1400 			msg(0, "Error while opening bofi driver: %s",
1401 			    strerror(errno));
1402 	} else {
1403 		/*
1404 		 * Activate the logging errdefs - then collect the results.
1405 		 */
1406 		(void) manage_instance(fd, hi->ed.name,
1407 		    hi->ed.instance, BOFI_START);
1408 		collect_state(fd, BOFI_CHK_STATE_W, &hi->es, &hi->ed, devpath);
1409 	}
1410 
1411 	/*
1412 	 * there is no more work to do on this access handle so clean up / exit.
1413 	 */
1414 	msg(3, "thr_collect: closing and broadcasting.\n");
1415 	exit(0);
1416 }
1417 
1418 /*
1419  * Given an access handle known to the bofi driver see if the user has
1420  * specified access criteria that match that handle. Note: this matching
1421  * algorithm should be kept consistent with the drivers alogorithm.
1422  */
1423 static int
1424 match_hinfo(struct handle_info *hp, int instance, uint_t access_type,
1425 	int rnumber, offset_t offset, offset_t len)
1426 {
1427 
1428 	msg(9, "matching (%d %d) 0x%x %d offset (%llx, %llx) len (%llx %llx)\n",
1429 	    hp->instance, instance, access_type, rnumber,
1430 	    hp->offset, offset, hp->len, len);
1431 
1432 	if (instance != -1 && hp->instance != instance)
1433 		return (0);
1434 	if ((access_type & BOFI_DMA_RW) &&
1435 	    (hp->access_type & BOFI_DMA_RW) &&
1436 	    (rnumber == -1 || hp->rnumber == rnumber))
1437 		return (1);
1438 	else if ((access_type & BOFI_INTR) &&
1439 	    (hp->access_type & BOFI_INTR))
1440 		return (1);
1441 	else if ((access_type & BOFI_PIO_RW) &&
1442 	    (hp->access_type & BOFI_PIO_RW) &&
1443 	    (rnumber == -1 || hp->rnumber == rnumber) &&
1444 	    (len == 0 || hp->offset < offset + len) &&
1445 	    (hp->len == 0 || hp->offset + hp->len > offset))
1446 		return (1);
1447 	else
1448 		return (0);
1449 }
1450 
1451 /*
1452  * Obtain all the handles created by the driver specified by the name parameter
1453  * that match the remaining arguments. The output parameter nhdls indicates how
1454  * many of the structures pointed to by the output parameter hip match the
1455  * specification.
1456  *
1457  * It is the responsibility of the caller to free *hip when *nhdls != 0.
1458  */
1459 static int
1460 get_hinfo(int fd, char *name, struct handle_info **hip, size_t *nhdls,
1461     int instance, int atype, int rset, offset_t offset, offset_t len,
1462     int new_semantics)
1463 {
1464 	struct bofi_get_hdl_info hdli;
1465 	int command;
1466 
1467 	command = BOFI_GET_HANDLE_INFO;
1468 	hdli.namesize = strlen(name);
1469 	(void) strncpy(hdli.name, name, MAXNAMELEN);
1470 	/*
1471 	 * Initially ask for the number of access handles (not the structures)
1472 	 * in order to allocate memory
1473 	 */
1474 	hdli.hdli = 0;
1475 	*hip = 0;
1476 	hdli.count = 0;
1477 
1478 	/*
1479 	 * Ask the bofi driver for all handles created by the driver under test.
1480 	 */
1481 	if (ioctl(fd, command, &hdli) == -1) {
1482 		*nhdls = 0;
1483 		msg(0, "driver failed to return handles: %s\n",
1484 		    strerror(errno));
1485 		return (errno);
1486 	} else if ((*nhdls = hdli.count) == 0) {
1487 		msg(1, "get_hinfo: no registered handles\n");
1488 		return (0);	/* no handles */
1489 	} else if ((*hip = GETSTRUCT(struct handle_info, *nhdls)) == 0) {
1490 		return (EAGAIN);
1491 	} else {
1492 		struct handle_info *hp, **chosen;
1493 		int i;
1494 
1495 		/* Ask for *nhdls handles */
1496 		hdli.hdli = (caddr_t)*hip;
1497 		if (ioctl(fd, command, &hdli) == -1) {
1498 			int err = errno;
1499 
1500 			msg(0, "BOFI_GET_HANDLE_INFO ioctl returned error %d\n",
1501 			    err);
1502 			free(*hip);
1503 			return (err);
1504 		}
1505 
1506 		if (hdli.count < *nhdls)
1507 			*nhdls = hdli.count; /* some handles have gone away */
1508 
1509 		msg(4, "qsorting %d handles\n", *nhdls);
1510 		if (*nhdls > 1)
1511 			/* sort them naturally (NB ordering is not mandatory) */
1512 			qsort((void *)*hip, *nhdls, sizeof (**hip), hdl_cmp);
1513 
1514 		if ((chosen = malloc(sizeof (hp) * *nhdls)) != NULL) {
1515 			struct handle_info **ip;
1516 			/* the selected handles */
1517 			struct handle_info *prev = 0;
1518 			int scnt = 0;
1519 
1520 			for (i = 0, hp = *hip, ip = chosen; i < *nhdls;
1521 			    i++, hp++) {
1522 				/*
1523 				 * Remark: unbound handles never match
1524 				 * (access_type == 0)
1525 				 */
1526 				if (match_hinfo(hp, instance, atype, rset,
1527 				    offset&0x7fffffff, len&0x7fffffff)) {
1528 					msg(3, "match: 0x%x 0x%llx 0x%llx"
1529 					    " 0x%llx (0x%llx)\n",
1530 					    hp->access_type, hp->addr_cookie,
1531 					    hp->offset, hp->len,
1532 					    (hp->len & 0x7fffffff));
1533 					if (prev &&
1534 					    (prev->access_type & BOFI_DMA_RW) &&
1535 					    (hp->access_type & BOFI_DMA_RW) &&
1536 					    hp->instance == prev->instance &&
1537 					    hp->len == prev->len &&
1538 					    hp->addr_cookie ==
1539 					    prev->addr_cookie)
1540 						continue;
1541 
1542 					if ((hp->access_type & BOFI_DMA_RW) &&
1543 					    (atype & BOFI_DMA_RW) !=
1544 					    hp->access_type)
1545 						if (new_semantics)
1546 							continue;
1547 
1548 					if (prev)
1549 						msg(3, "match_hinfo: match:"
1550 						    " 0x%llx (%d %d) (%d %d)"
1551 						    " (0x%x 0x%x) (0x%llx,"
1552 						    " 0x%llx)\n",
1553 						    hp->addr_cookie,
1554 						    prev->instance,
1555 						    hp->instance, prev->rnumber,
1556 						    hp->rnumber,
1557 						    prev->access_type,
1558 						    hp->access_type, prev->len,
1559 						    hp->len);
1560 
1561 					/* it matches so remember it */
1562 					prev = *ip++ = hp;
1563 					scnt += 1;
1564 				}
1565 			}
1566 
1567 			if (*nhdls != scnt) {
1568 				/*
1569 				 * Reuse the alloc'ed memory to return
1570 				 * only those handles the user has asked for.
1571 				 * But first prune the handles to get rid of
1572 				 * overlapping ranges (they are ordered by
1573 				 * offset and length).
1574 				 */
1575 				*nhdls = scnt;
1576 				for (i = 0, hp = *hip, ip = chosen; i < scnt;
1577 				    i++, ip++, hp++)
1578 					if (hp != *ip)
1579 						(void) memcpy(hp, *ip,
1580 						    sizeof (*hp));
1581 			}
1582 			free(chosen);
1583 		}
1584 
1585 		for (i = 0, hp = *hip; i < *nhdls; i++, hp++) {
1586 			msg(4, "\t%d 0x%x %d 0x%llx 0x%llx 0x%llx\n",
1587 			    hp->instance, hp->access_type, hp->rnumber,
1588 			    hp->len, hp->offset, hp->addr_cookie);
1589 		}
1590 	}
1591 	if (*nhdls == 0 && *hip)
1592 		free(*hip);
1593 
1594 	msg(4, "get_info: %s got %d handles\n", name, *nhdls);
1595 	return (0);
1596 }
1597 
1598 static void
1599 init_sigs()
1600 {
1601 	struct sigaction sa;
1602 	int *ip, sigs[] = {SIGINT, SIGTERM, 0};
1603 
1604 	sa.sa_handler = kill_sighandler;
1605 	(void) sigemptyset(&sa.sa_mask);
1606 	for (ip = sigs; *ip; ip++)
1607 		(void) sigaddset(&sa.sa_mask, *ip);
1608 	sa.sa_flags = 0;
1609 	for (ip = sigs; *ip; ip++)
1610 		(void) sigaction(*ip, &sa, NULL);
1611 }
1612 
1613 static void
1614 up_resources()
1615 {
1616 	struct rlimit rl;
1617 
1618 	/* Potentially hungry on resources so up them all to their maximums */
1619 	if (getrlimit(RLIMIT_NOFILE, &rl) < 0)
1620 		msg(0, "failed to obtain RLIMIT_NOFILE: %s\n", strerror(errno));
1621 	else {
1622 		msg(12, "RLIMIT_NOFILE\t %lu (%lu)\n",
1623 		    rl.rlim_cur, rl.rlim_max);
1624 		rl.rlim_cur = rl.rlim_max;
1625 		if (setrlimit(RLIMIT_NOFILE, &rl) < 0)
1626 			msg(0, "failed to set RLIMIT_NOFILE: %s\n",
1627 			    strerror(errno));
1628 		(void) enable_extended_FILE_stdio(-1, -1);
1629 	}
1630 	if (getrlimit(RLIMIT_DATA, &rl) < 0)
1631 		msg(0, "failed to obtain RLIMIT_DATA: %s\n", strerror(errno));
1632 	else {
1633 		msg(12, "RLIMIT_DATA\t %lu (%lu)\n", rl.rlim_cur, rl.rlim_max);
1634 		rl.rlim_cur = rl.rlim_max;
1635 		if (setrlimit(RLIMIT_DATA, &rl) < 0)
1636 			msg(0, "failed to set RLIMIT_DATA: %s\n",
1637 			    strerror(errno));
1638 	}
1639 	if (getrlimit(RLIMIT_FSIZE, &rl) < 0)
1640 		msg(0, "failed to obtain RLIMIT_FSIZE: %s\n", strerror(errno));
1641 	else {
1642 		msg(12, "RLIMIT_FSIZE\t %lu (%lu)\n", rl.rlim_cur, rl.rlim_max);
1643 		rl.rlim_cur = rl.rlim_max;
1644 		if (setrlimit(RLIMIT_FSIZE, &rl) < 0)
1645 			msg(0, "failed to set RLIMIT_FSIZE: %s\n",
1646 			    strerror(errno));
1647 	}
1648 }
1649 
1650 static FILE *
1651 create_test_file(char *drvname)
1652 {
1653 	char dirname[_POSIX_PATH_MAX];
1654 	char testname[_POSIX_PATH_MAX];
1655 	FILE *fp = 0;
1656 	time_t utc = time(NULL);
1657 
1658 	if (snprintf(dirname, sizeof (dirname), "%s.test.%lu",
1659 	    drvname, utc) == -1 ||
1660 	    snprintf(testname, sizeof (testname), "%s.test.%lu",
1661 	    drvname, utc) == -1)
1662 		return (0);
1663 
1664 	if (mkdir(dirname, S_IRWXU|S_IRGRP|S_IROTH)) {
1665 		msg(0, "Error creating %s: %s\n", dirname, strerror(errno));
1666 		return (0);
1667 	}
1668 	if (chdir(dirname)) {
1669 		(void) rmdir(dirname);
1670 		return (0);
1671 	}
1672 	if ((fp = fopen(testname, "w")) == 0)
1673 		return (0);	/* leave created directory intact */
1674 
1675 	return (fp);
1676 }
1677 
1678 struct walk_arg {
1679 	char *path;
1680 	int instance;
1681 	char name[MAXPATHLEN];
1682 	int pathlen;
1683 };
1684 
1685 static int
1686 walk_callback(di_node_t node, void *arg)
1687 {
1688 	struct walk_arg *warg = (struct walk_arg *)arg;
1689 	char *driver_name;
1690 	char *path;
1691 
1692 	driver_name = di_driver_name(node);
1693 	if (driver_name != NULL) {
1694 		if (strcmp(driver_name, warg->name) == NULL &&
1695 		    di_instance(node) == warg->instance) {
1696 			path = di_devfs_path(node);
1697 			if (path == NULL)
1698 				warg->path = NULL;
1699 			else
1700 				(void) strncpy(warg->path, path, warg->pathlen);
1701 			return (DI_WALK_TERMINATE);
1702 		}
1703 	}
1704 	return (DI_WALK_CONTINUE);
1705 }
1706 
1707 static int
1708 getpath(char *path, int instance, char *name, int pathlen)
1709 {
1710 	di_node_t node;
1711 	struct walk_arg warg;
1712 
1713 	warg.instance = instance;
1714 	(void) strncpy(warg.name, name, MAXPATHLEN);
1715 	warg.path = path;
1716 	warg.pathlen = pathlen;
1717 	if ((node = di_init("/", DINFOSUBTREE)) == DI_NODE_NIL)
1718 		return (-1);
1719 	if (di_walk_node(node, DI_WALK_CLDFIRST, &warg, walk_callback) == -1) {
1720 		di_fini(node);
1721 		return (-1);
1722 	}
1723 	if (warg.path == NULL) {
1724 		di_fini(node);
1725 		return (-1);
1726 	}
1727 	di_fini(node);
1728 	return (0);
1729 }
1730 
1731 /*
1732  * Record logsize h/w accesses of type 'edp->access_type' made by instance
1733  * 'edp->instance' of driver 'edp->name' to the register set (or dma handle)
1734  * 'edp->rnumber' that lie within the range 'edp->offset' to
1735  * 'edp->offset' + 'edp->len'.
1736  * Access criteria may be mixed and matched:
1737  * -	access types may be combined (PIO read/write, DMA read write or intrs);
1738  * -	if 'edp->instance' is -1 all instances are checked for the criteria;
1739  * -	if 'edp->rnumber' is -1 all register sets and dma handles are matched;
1740  * -	'offset' and 'len' indicate that only PIO and DMA accesses within the
1741  *	range 'edp->offset' to 'edp->len' will be logged. Putting 'edp->offset'
1742  *      to zero and 'edp->len' to -1ull gives maximal coverage.
1743  *
1744  * 'collecttime' is the number of seconds used to log accesses
1745  *		(default is infinity).
1746  */
1747 static void
1748 test_driver(struct bofi_errdef *edp,
1749 	unsigned long long collecttime)
1750 {
1751 	pid_t pid;
1752 	int statloc;
1753 	struct collector_def *cdefs, *cdp;
1754 	struct handle_info *hdls, *hdl;
1755 	int i, fd;
1756 	size_t cnt;
1757 	size_t nchildren;
1758 	unsigned long long timechunk;
1759 	FILE *sfp;	/* generated control test file */
1760 	char buf[MAXPATHLEN];
1761 	char devpath[MAXPATHLEN];
1762 	char *devpathp = "NULL";
1763 	int drv_inst;
1764 	int got_it = 0;
1765 
1766 	char *name = (char *)edp->name;
1767 	uint_t logsize = edp->access_count + edp->fail_count - 1;
1768 	int inst = edp->instance;
1769 	uint_t atype = edp->access_type;
1770 	int rset = edp->rnumber;
1771 	offset_t offset = edp->offset;
1772 	offset_t len = edp->len;
1773 
1774 	msg(4, "test_driver: %s %d inst %d 0x%x rset %d %llx %llx\n",
1775 	    name, logsize, inst, atype, rset, offset, len);
1776 
1777 	drv_inst = inst;
1778 	if (getpath(devpath, inst, name, MAXPATHLEN) != -1) {
1779 		devpathp = devpath;
1780 		got_it = 1;
1781 	}
1782 	if (logsize == -1U)
1783 		logsize = 0;
1784 	fd = open(BOFI_DEV, O_RDWR);
1785 	if (fd == -1) {
1786 		perror("get_hdl_info - bad open of bofi driver");
1787 		return;
1788 	}
1789 	if (got_it) {
1790 		(void) snprintf(buf, sizeof (buf),
1791 		    "th_manage /devices%s offline", devpathp);
1792 		(void) system(buf);
1793 		(void) snprintf(buf, sizeof (buf),
1794 		    "th_manage /devices%s online", devpathp);
1795 		(void) system(buf);
1796 		(void) snprintf(buf, sizeof (buf),
1797 		    "th_manage /devices%s getstate >/dev/null", devpathp);
1798 		(void) system(buf);
1799 	}
1800 	if (get_hinfo(fd, name, &hdls, &cnt,
1801 	    inst, atype, rset, offset, len, 1) != 0) {
1802 		msg(0, "driver_test: bad get_info for %d hdls\n", cnt);
1803 		return;
1804 	} else if (logsize == 0 || collecttime == 0 || cnt == 0) {
1805 		if (cnt == 0)
1806 			msg(1, "No matching handles.\n");
1807 		return;
1808 	}
1809 	if ((cdefs = GETSTRUCT(struct collector_def, cnt)) == 0) {
1810 		msg(0, "driver_test: can't get memory for %d cdefs\n", cnt);
1811 		return;
1812 	}
1813 	up_resources();
1814 	if (got_it) {
1815 		if (scriptargs > 0) {
1816 			(void) snprintf(buf, sizeof (buf),
1817 			    "DRIVER_PATH=/devices%s DRIVER_INSTANCE=%d"
1818 			    " DRIVER_UNCONFIGURE=0 DRIVER_CONFIGURE=1",
1819 			    devpathp, drv_inst);
1820 			for (i = 0; i < scriptargs; i++) {
1821 				(void) strcat(buf, " ");
1822 				(void) strcat(buf, fixup_script[i]);
1823 			}
1824 			(void) strcat(buf, " &");
1825 		} else {
1826 			(void) snprintf(buf, sizeof (buf),
1827 			    "while : ; do th_manage /devices%s online;"
1828 			    " th_manage /devices%s getstate >/dev/null;"
1829 			    " th_manage /devices%s offline;done &"
1830 			    " echo $! >/tmp/bofi.pid",
1831 			    devpathp, devpathp, devpathp);
1832 		}
1833 		(void) system(buf);
1834 		(void) snprintf(buf, sizeof (buf), "sleep %d",
1835 		    edef_sleep ? edef_sleep : DEFAULT_EDEF_SLEEP);
1836 		(void) system(buf);
1837 	}
1838 
1839 	(void) fprintf(outfile,
1840 	    "Logging accesses to instances ");
1841 	for (i = 0, inst = -1, hdl = hdls; i < cnt;
1842 	    i++, hdl++) {
1843 		if (inst != hdl->instance) {
1844 			inst = hdl->instance;
1845 			(void) fprintf(outfile, "%d ", inst);
1846 		}
1847 	}
1848 	(void) fprintf(outfile, " (%d logs of size 0x%x).\n\t"
1849 	    "(Use th_manage ... clear_errdefs to terminate"
1850 	    " logging)\n", cnt, logsize);
1851 
1852 	sfp = create_test_file(name);
1853 	/*
1854 	 * Install a logging errdef for each matching handle,
1855 	 * and then create a child to collect the log.
1856 	 * The child is responsible for activating the log.
1857 	 */
1858 	for (i = 0, cdp = cdefs, hdl = hdls, nchildren = 0;
1859 	    i < cnt; i++, cdp++, hdl++) {
1860 		if (add_edef(fd, &cdp->ed, &cdp->es, hdl, edp) != 0) {
1861 			cdp->lp = 0;
1862 			cdp->pid = 0;
1863 		} else {
1864 			cdp->lp = (void *)cdp->ed.log.logbase;
1865 			msg(1, "test_driver: thr_create:"
1866 			    " lsize 0x%x 0x%x at 0x%x\n",
1867 			    cdp->es.log.logsize,
1868 			    cdp->ed.log.logsize,
1869 			    cdp->ed.access_type);
1870 			if ((pid = fork()) == -1) {
1871 				msg(0, "fork failed for handle"
1872 				    " %d: %s\n", i, strerror(errno));
1873 				cdp->pid = 0;	/* ignore */
1874 			} else if (pid == 0) {
1875 				thr_collect(cdp, devpathp);
1876 			} else {
1877 				cdp->pid = pid;
1878 				nchildren += 1;
1879 			}
1880 		}
1881 	}
1882 
1883 	if (nchildren != 0) {
1884 		if (sfp) {
1885 			(void) fprintf(sfp, "#!/bin/ksh -p\n\n");
1886 			(void) fprintf(sfp,
1887 			    "\n# Test control script generated using:\n#");
1888 			for (i = 0; i < pargc; i++)
1889 				(void) fprintf(sfp, " %s", pargv[i]);
1890 			(void) fprintf(sfp, "\n\n");
1891 			(void) fprintf(sfp, "\nrun_tests()\n{\n");
1892 			for (i = 0, cdp = cdefs; i < cnt; i++, cdp++)
1893 				if (cdp->pid) {
1894 					(void) fprintf(sfp,
1895 					    "\tif [ -x ./%s.%d ]\n\tthen\n",
1896 					    name, (int)cdp->pid);
1897 					(void) fprintf(sfp,
1898 					    "\t\techo \"Starting test"
1899 					    " %d (id %d)\"\n",
1900 					    i, (int)cdp->pid);
1901 					(void) fprintf(sfp, "\t\t./%s.%d\n",
1902 					    name, (int)cdp->pid);
1903 					(void) fprintf(sfp, "\t\techo \""
1904 					    "Test %d (id %d) complete\"\n",
1905 					    i, (int)cdp->pid);
1906 					(void) fprintf(sfp, "\tfi\n");
1907 				}
1908 			(void) fprintf(sfp, "}\n\nrun_tests\n");
1909 			if (fchmod(fileno(sfp), S_IRWXU|S_IRGRP|S_IROTH))
1910 				msg(0, "fchmod on control script failed: %s\n",
1911 				    strerror(errno));
1912 			if (fclose(sfp) != 0)
1913 				msg(0, "Error closing control script: %s\n",
1914 				    strerror(errno));
1915 		}
1916 
1917 		set_handler(SIGALRM);	/* handle it */
1918 		/*
1919 		 * The user may want to terminate logging before the log fills
1920 		 * so use a timer to signal the logging children to handle this
1921 		 * case.
1922 		 */
1923 		timechunk = collecttime / MAXALRMCALL;
1924 		collecttime = collecttime - timechunk * MAXALRMCALL;
1925 
1926 		msg(2, "logging for (0x%llx 0x%llx)\n", timechunk, collecttime);
1927 
1928 		(void) alarm(collecttime); /* odd bit of collect time */
1929 
1930 		/* wait for the log to fill or deadline satisfied */
1931 		for (;;) {
1932 			pid = wait(&statloc);
1933 			for (i = 0, nchildren = 0, cdp = cdefs;
1934 			    i < cnt; i++, cdp++)
1935 				if (cdp->pid == pid)
1936 					cdp->pid = 0;
1937 			for (i = 0, nchildren = 0, cdp = cdefs;
1938 			    i < cnt; i++, cdp++)
1939 				if (cdp->pid)
1940 					nchildren++;
1941 			if (nchildren == 0)
1942 				break;
1943 			if (killed)
1944 				break;
1945 			if (alarmed) {
1946 				if (timechunk-- > 0) {
1947 					/*
1948 					 * prepare for the next timeslice by
1949 					 * rearming the clock
1950 					 */
1951 					if (alarm(MAXALRMCALL) == 0)
1952 						alarmed = 0;
1953 					else {
1954 						/*
1955 						 * must have been a user abort
1956 						 * (via SIGALRM)
1957 						 */
1958 						(void) alarm(0);
1959 						break;
1960 					}
1961 				} else
1962 					break;
1963 			}
1964 		}
1965 
1966 		(void) fprintf(outfile, "Logging complete.\n");
1967 	}
1968 	if (got_it) {
1969 		if (scriptargs > 0) {
1970 			(void) snprintf(buf, sizeof (buf),
1971 			    "DRIVER_PATH=/devices%s DRIVER_INSTANCE=%d"
1972 			    " DRIVER_UNCONFIGURE=1 DRIVER_CONFIGURE=0",
1973 			    devpathp, drv_inst);
1974 			for (i = 0; i < scriptargs; i++) {
1975 				(void) strcat(buf, " ");
1976 				(void) strcat(buf, fixup_script[i]);
1977 			}
1978 			(void) system(buf);
1979 		} else {
1980 			(void) system("kill `cat /tmp/bofi.pid`");
1981 		}
1982 	}
1983 	msg(2, "test_driver: terminating\n");
1984 }
1985 
1986 static int
1987 getnameinst(char *orig_path, int *instance, char *name, int namelen)
1988 {
1989 	di_node_t node;
1990 	char *binding_name;
1991 
1992 	if ((node = di_init(&orig_path[8], DINFOSUBTREE|DINFOMINOR)) ==
1993 	    DI_NODE_NIL)
1994 		return (-1);
1995 	if ((binding_name = di_driver_name(node)) == NULL)
1996 		return (-1);
1997 	*instance = di_instance(node);
1998 	(void) strncpy(name, binding_name, namelen);
1999 	di_fini(node);
2000 	return (0);
2001 }
2002 
2003 static char syntax[] =
2004 	"          [ -n name [ -i instance ] | -P path ]\n"
2005 	"          [ -a acc_types ] [ -r rnumber ]\n"
2006 	"          [ -l offset [ length ] ] [ -c count [ failcount ] ]\n"
2007 	"          [ -o operator [ operand ] ] [ -f acc_chk  ]\n"
2008 	"          [ -w max_wait_period [ report_interval ] ]\n"
2009 	"     or\n"
2010 	"          [ -n name [ -i instance ] | -P path ]\n"
2011 	"          -a  LOG  [  acc_types ]  [ -r rnumber]\n"
2012 	"          [ -l offset [ length ] ] [ -c count [ failcount ] ]\n"
2013 	"          [ -s collect_time ] [ -p policy ] [ -x flags ]\n"
2014 	"          [ -C ] [-e fixup_script ]\n"
2015 	"     or\n"
2016 	"          -h";
2017 
2018 int
2019 main(int argc, char *argv[])
2020 {
2021 	extern char *optarg;
2022 	extern int optind;
2023 
2024 	char	c;		/* for parsing getopts */
2025 	int	nopts = 0;	/* for backward compatibility */
2026 	int	err = 0;
2027 
2028 	/* use a maximal set of defaults for logging or injecting */
2029 	struct bofi_errdef errdef = {
2030 		0,		/* length of driver name */
2031 		{0},		/* driver name */
2032 		-1,		/* monitor all instances */
2033 		-1,		/* monitor all register sets and DMA handles */
2034 		(offset_t)0,	/* monitor from start of reg. set or DMA hd */
2035 		myLLMAX,	/* monitor whole reg set or DMA hdl(no LLMAX) */
2036 		0,		/* qualify all */
2037 		DFLTLOGSZ,	/* default no. of accesses before corrupting */
2038 		0u,		/* default no. of accesses to corrupt */
2039 		0u,		/* no check access corruption */
2040 		BOFI_NOP,	/* no corruption operator by default */
2041 		myULLMAX,	/* default operand */
2042 		{0, 0, BOFI_LOG_TIMESTAMP, /* timestamp by default */
2043 		0, 0, 0, 0},	/* no logging by default */
2044 		0};
2045 
2046 
2047 	/* specify the default no of seconds for which to monitor */
2048 	unsigned long long	collecttime = DFLTLOGTIME;
2049 
2050 	char	*str;	/* temporary variable */
2051 	long	tmpl;	/* another one */
2052 	int		i;
2053 	uint_t	tmpui;
2054 
2055 	char buf[MAXPATHLEN];
2056 
2057 	Progname = (char *)strrchr(*argv, '/');
2058 	Progname = (Progname == NULL) ? *argv : Progname + 1;
2059 
2060 	errfile = stderr;
2061 	outfile = stdout;
2062 	policy = 0;
2063 	lsize_is_default = 1;
2064 	pargv = argv;
2065 	pargc = argc;
2066 
2067 	while ((c = getopt(argc, argv, "a:c:C:dD:e:f:h:i:l:n:o:p:P:r:s:tw:x"))
2068 	    != EOF) {
2069 		nopts++;
2070 		switch (c) {
2071 		case 'a':
2072 			msg(2, "option a: optarg %s optind %d argc %d\n",
2073 			    optarg, optind, argc);
2074 			if ((err = str_to_bm(optarg, atypes,
2075 			    &errdef.access_type)) == 0)
2076 				while (optind < argc && *argv[optind] != '-') {
2077 					if ((err = str_to_bm(argv[optind++],
2078 					    atypes, &errdef.access_type)))
2079 						break;
2080 				}
2081 			break;
2082 		case 'c':
2083 			lsize_is_default = 0;
2084 			/* zero is valid */
2085 			errdef.access_count = strtoul(optarg, &str, 0);
2086 			if (str == optarg)
2087 				err = EINVAL;
2088 			else if (optind < argc && (argv[optind][0] != '-' ||
2089 			    (strlen(argv[optind]) > 1 &&
2090 			    isdigit(argv[optind][1]))))
2091 				errdef.fail_count =
2092 				    strtoull(argv[optind++], 0, 0);
2093 			break;
2094 		case 'C':
2095 			user_comment = optarg;
2096 			if (optind < argc && argv[optind][0] != '-')
2097 				err = EINVAL;
2098 			break;
2099 		case 'D':
2100 			dbglvl = strtoul(optarg, &str, 0);
2101 			break;
2102 		case 'e':
2103 			fixup_script = 0;
2104 			scriptargs = 0;
2105 			fixup_script = &argv[optind - 1];
2106 			scriptargs += 1;
2107 			while (optind < argc) {
2108 				optind += 1;
2109 				scriptargs += 1;
2110 			}
2111 			break;
2112 		case 'f':
2113 			tmpl = strtol(optarg, &str, 0);
2114 
2115 			if (str != optarg)
2116 				errdef.acc_chk = tmpl;
2117 			else if (strcmp(optarg, "PIO") == NULL)
2118 				errdef.acc_chk = 1;
2119 			else if (strcmp(optarg, "DMA") == NULL)
2120 				errdef.acc_chk = 2;
2121 			else if (strcmp(optarg, "U4FT_ACC_NO_PIO") == NULL)
2122 				errdef.acc_chk = 1;
2123 			else if (strcmp(optarg, "U4FT_ACC_NO_DMA") == NULL)
2124 				errdef.acc_chk = 2;
2125 			else
2126 				err = EINVAL;
2127 			break;
2128 		case 'i':
2129 			if ((errdef.instance = strtol(optarg, &str, 0)) < 0)
2130 				errdef.instance = -1;
2131 			else if (str == optarg)
2132 				err = EINVAL;
2133 			break;
2134 		case 'l':
2135 			errdef.offset = strtoull(optarg, &str, 0);
2136 			if (str == optarg)
2137 				err = EINVAL;
2138 			else if (optind < argc &&
2139 			    (argv[optind][0] != '-' ||
2140 			    (strlen(argv[optind]) > 1 &&
2141 			    isdigit(argv[optind][1])))) {
2142 				/* -1 indicates the rest of register set */
2143 				errdef.len = strtoull(argv[optind++], 0, 0);
2144 			}
2145 			break;
2146 		case 'n':
2147 			(void) strncpy(errdef.name, optarg, MAXNAMELEN);
2148 			if ((errdef.namesize = strlen(errdef.name)) == 0)
2149 				err = EINVAL;
2150 			break;
2151 		case 'o':
2152 			for (i = 0; optypes[i].str != 0; i++)
2153 				if (strcmp(optarg, optypes[i].str) == 0) {
2154 					errdef.optype = optypes[i].code;
2155 					break;
2156 				}
2157 			if (optypes[i].str == 0)
2158 				err = EINVAL;
2159 			else if (optind < argc &&
2160 			    (argv[optind][0] != '-' ||
2161 			    (strlen(argv[optind]) > 1 &&
2162 			    isdigit(argv[optind][1]))))
2163 				errdef.operand =
2164 				    strtoull(argv[optind++], 0, 0);
2165 			break;
2166 		case 'p':
2167 			tmpui = 0x0u;
2168 			if ((err = str_to_bm(optarg, ptypes, &tmpui)) == 0) {
2169 				while (optind < argc && *argv[optind] != '-')
2170 					if ((err = str_to_bm(argv[optind++],
2171 					    ptypes, &tmpui)))
2172 						break;
2173 				policy = (uint16_t)tmpui;
2174 			}
2175 			if (err == 0 && (policy & BYTEPOLICY))
2176 				errdef.log.flags |= BOFI_LOG_REPIO;
2177 			break;
2178 		case 'P':
2179 			if (getnameinst(optarg, &errdef.instance, buf,
2180 			    MAXPATHLEN) == -1)
2181 				err = EINVAL;
2182 			else
2183 				(void) strncpy(errdef.name, buf, MAXNAMELEN);
2184 			break;
2185 		case 'r':
2186 			if ((errdef.rnumber = strtol(optarg, &str, 0)) < 0)
2187 				errdef.rnumber = -1;
2188 			if (str == optarg) err = EINVAL;
2189 			break;
2190 		case 's':
2191 			collecttime = strtoull(optarg, &str, 0);
2192 			if (str == optarg)
2193 				err = EINVAL;	/* zero is valid */
2194 			break;
2195 		case 'w':
2196 			do_status = 1;
2197 			max_edef_wait = strtoul(optarg, &str, 0);
2198 			/* zero is valid */
2199 			if (str == optarg)
2200 				err = EINVAL;
2201 			else if (optind < argc &&
2202 			    (argv[optind][0] != '-' ||
2203 			    (strlen(argv[optind]) > 1 &&
2204 			    isdigit(argv[optind][1]))))
2205 				edef_sleep = strtoull(argv[optind++], 0, 0);
2206 
2207 			break;
2208 		case 'x':
2209 			if ((optind < argc && *argv[optind] == '-') ||
2210 			    optind == argc)
2211 				errdef.log.flags |= BOFI_LOG_WRAP;
2212 			else {
2213 				if (strchr(argv[optind], 'w') != 0)
2214 					errdef.log.flags |= BOFI_LOG_WRAP;
2215 				if (strchr(argv[optind], 'r') != 0)
2216 					errdef.log.flags |= BOFI_LOG_REPIO;
2217 				if (strchr(argv[optind], 't') != 0)
2218 					errdef.log.flags |= BOFI_LOG_TIMESTAMP;
2219 				if (strstr(argv[optind], "~t") != 0)
2220 					errdef.log.flags &= ~BOFI_LOG_TIMESTAMP;
2221 				optind++;
2222 			}
2223 			break;
2224 		case 'h':
2225 			(void) fprintf(errfile, "usage: %s %s\n",
2226 			    Progname, syntax);
2227 			exit(0);
2228 			break;
2229 		case '?':	/* also picks up missing parameters */
2230 		default:
2231 			(void) fprintf(errfile, "usage: %s %s\n",
2232 			    Progname, syntax);
2233 			exit(2);
2234 		}
2235 
2236 		if (err) {
2237 			(void) fprintf(errfile, "usage: %s %s\n",
2238 			    Progname, syntax);
2239 			exit(2);
2240 		}
2241 		if (c == 'e')
2242 			break;	/* the -e option must be the final option */
2243 	}
2244 
2245 
2246 	if (errdef.name[0] == 0) {
2247 		msg(0, "%s - invalid name parameter\n", Progname);
2248 		exit(1);
2249 	}
2250 	errdef.namesize = strlen(errdef.name);
2251 
2252 	if (policy == 0) {
2253 		policy |= UNBIASEDPOLICY;
2254 		policy |= OPERATORSPOLICY;
2255 	}
2256 
2257 	if (errdef.optype == BOFI_NOP)
2258 		errdef.optype = BOFI_XOR;
2259 	if (errdef.access_type == BOFI_LOG) { /* qualify all accesses */
2260 		errdef.access_type =
2261 		    (BOFI_LOG|BOFI_DMA_RW|BOFI_PIO_RW|BOFI_INTR);
2262 		atype_is_default = 1;
2263 	} else if (errdef.access_type == 0) { /* qualify all accesses */
2264 		errdef.access_type =
2265 		    (BOFI_DMA_RW|BOFI_PIO_RW|BOFI_INTR);
2266 		atype_is_default = 1;
2267 	} else
2268 		atype_is_default = 0;
2269 
2270 	init_sigs();
2271 	if ((errdef.access_type & BOFI_LOG) == 0) {
2272 		int fd, i, instance;
2273 		size_t cnt;
2274 		struct handle_info *hdls, *hp;
2275 
2276 		if ((fd = open(BOFI_DEV, O_RDWR)) == -1) {
2277 			msg(0, "%s: error opening bofi driver: %s\n",
2278 			    Progname, strerror(errno));
2279 			exit(1);
2280 		}
2281 		if ((err = get_hinfo(fd, errdef.name, &hdls, &cnt,
2282 		    errdef.instance, errdef.access_type, errdef.rnumber,
2283 		    errdef.offset, errdef.len, 0)) != 0) {
2284 			msg(0, "%s: Bad lookup on bofi driver.\n", Progname);
2285 			(void) close(fd);
2286 			exit(1);
2287 		} else if (cnt == 0) {
2288 			msg(0,
2289 			    "%s: No handles match request access criteria.\n",
2290 			    Progname);
2291 			(void) close(fd);
2292 			exit(1);
2293 		}
2294 		if (errdef.instance == -1)
2295 			instance = -1;
2296 		else {
2297 			instance = hdls->instance;
2298 			for (i = 0, hp = hdls; i < cnt; i++, hp++) {
2299 				if (instance != hp->instance) {
2300 					instance = -1;
2301 					break;
2302 				}
2303 			}
2304 		}
2305 		if (instance == -1) {
2306 			msg(0, "Multiple instances match access criteria"
2307 			    " (only allowed when logging):\n");
2308 			msg(0, "\tinst\taccess\trnumber\toffset\tlength\n");
2309 			for (i = 0, hp = hdls; i < cnt; i++, hp++)
2310 				msg(0, "\t%d\t0x%x\t%d\t0x%llx\t0x%llx\n",
2311 				    hp->instance, hp->access_type,
2312 				    hp->rnumber, hp->offset, hp->len);
2313 		} else {
2314 			struct bofi_errstate es;
2315 			int timeleft = max_edef_wait;
2316 
2317 			if (ioctl(fd, BOFI_ADD_DEF, &errdef) == -1) {
2318 				perror("th_define - adding errdef failed");
2319 			} else {
2320 				es.errdef_handle = errdef.errdef_handle;
2321 				msg(4, "waiting for edef:"
2322 				    " %d %s %d %d 0x%llx 0x%llx 0x%x 0x%x"
2323 				    " 0x%x 0x%x 0x%x 0x%llx\n",
2324 				    errdef.namesize, errdef.name,
2325 				    errdef.instance, errdef.rnumber,
2326 				    errdef.offset, errdef.len,
2327 				    errdef.access_type, errdef.access_count,
2328 				    errdef.fail_count, errdef.acc_chk,
2329 				    errdef.optype, errdef.operand);
2330 
2331 				set_handler(SIGALRM);	/* handle it */
2332 
2333 				do {
2334 					if (do_status)
2335 						(void) alarm(edef_sleep);
2336 					if (ioctl(fd, BOFI_CHK_STATE_W,
2337 					    &es) == -1) {
2338 						if (errno != EINTR) {
2339 							perror("bad"
2340 							    " BOFI_CHK_STATE");
2341 							break;
2342 						} else if (!do_status) {
2343 							break;
2344 						}
2345 					}
2346 					if (do_status)
2347 						(void) fprintf(outfile,
2348 						    "%llu:%llu:%u:%u:%u:"
2349 						    "%u:%d:\"%s\"\n",
2350 						    es.fail_time, es.msg_time,
2351 						    es.access_count,
2352 						    es.fail_count,
2353 						    es.acc_chk, es.errmsg_count,
2354 						    (uint_t)es.severity,
2355 						    (es.msg_time) ?
2356 						    es.buffer : "");
2357 					if (es.acc_chk == 0 &&
2358 					    es.fail_count == 0 && !do_status)
2359 						print_err_reports(outfile,
2360 						    &es, "", "", -1);
2361 					else if (alarmed) {
2362 						alarmed = 0;
2363 						if ((timeleft -= edef_sleep) <=
2364 						    0) {
2365 							if (do_status)
2366 								break;
2367 							print_err_reports(
2368 							    outfile, &es, "",
2369 							    "", -1);
2370 							break;
2371 						}
2372 					} else if (!do_status)
2373 						print_err_reports(outfile,
2374 						    &es, "", "", -1);
2375 				} while (es.acc_chk != 0 || es.fail_count != 0);
2376 
2377 				msg(2, "done: acc_chk 0x%x fcnt %d\n",
2378 				    es.acc_chk, es.fail_count);
2379 			}
2380 
2381 			(void) close(fd);
2382 		}
2383 		free(hdls);
2384 		return (0);
2385 	}
2386 	test_driver(&errdef, collecttime);
2387 	return (0);
2388 }
2389