xref: /titanic_44/usr/src/cmd/th_tools/th_define.c (revision 84ab085a13f931bc78e7415e7ce921dbaa14fcb3)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/time_impl.h>
30 #include <sys/wait.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <stdarg.h>
34 #include <ctype.h>
35 #include <time.h>
36 #include <fcntl.h>
37 #include <sys/stat.h>
38 #include <sys/resource.h>
39 #include <limits.h>
40 #include <string.h>
41 #include <unistd.h>
42 #include <errno.h>
43 #include <signal.h>
44 #include <libdevinfo.h>
45 #define	_KERNEL
46 #include <sys/dditypes.h>
47 #include <sys/sunddi.h>
48 #include <sys/bofi.h>
49 
50 #define	BOFI_DEV	"/devices/pseudo/bofi@0:bofi,ctl"
51 
52 #define	GETSTRUCT(s, num)	\
53 	((s *) memalign(sizeof (void*), (num) * sizeof (s)))
54 
55 #define	MAXEDEFS	(0x64)		/* controls max no of concurent edefs */
56 #define	DFLTLOGSZ	(0x4000)	/* default size of an access log */
57 #define	DFLT_NONPIO_LOGSZ	(0x400)	/* default size of a log */
58 #define	MAXALRMCALL	(0x1000ull)	/* alarm does not permit big values */
59 #define	MIN_REPORT_TIME	(5)		/* min time to wait for edef status */
60 #define	DISTRIB_CUTOFF	(3)		/* useful when reducing a log */
61 #define	myLLMAX		(0x7fffffffffffffffll)
62 #define	myULLMAX	(0xffffffffffffffffull)
63 
64 /*
65  * default interval to wait between kicking off workload and injecting fault
66  */
67 #define	DEFAULT_EDEF_SLEEP 3
68 /*
69  * when generating dma corruptions, it is best to corrupt each double word
70  * individually for control areas - however for data areas this can be
71  * excessive and would generate so many cases we would never finish the run.
72  * So set a cut-off value where we switch from corrupting each double word
73  * separately to corrupting th elot in one go. 0x100 bytes seems a good value
74  * on the drivers we have seen so far.
75  */
76 #define	DMA_INDIVIDUAL_CORRUPT_CUTOFF 0x100
77 
78 struct collector_def {
79 	struct bofi_errdef ed;		/* definition of the log criteria */
80 	struct bofi_errstate es;	/* the current status of the log */
81 	struct acc_log_elem *lp;	/* array of logged accesses */
82 	pid_t pid;
83 };
84 
85 static uint16_t policy;
86 
87 #define	BYTEPOLICY	(0xf)
88 #define	MULTIPOLICY	(0x10)
89 #define	SIZEPOLICY	(BYTEPOLICY|MULTIPOLICY)
90 #define	UNBIASEDPOLICY	0x20
91 #define	UNCOMMONPOLICY	0x40
92 #define	COMMONPOLICY	0x80
93 #define	MEDIANPOLICY	0x100
94 #define	MAXIMALPOLICY	0x200
95 #define	OPERATORSPOLICY	0x400
96 #define	VALIDPOLICY	(0x7ff)
97 
98 typedef
99 struct coding {
100 	char	*str;
101 	uint_t	code;
102 } coding_t;
103 
104 static coding_t ptypes[] = {
105 	{"onebyte", 0x1}, {"twobyte", 0x2},
106 	{"fourbyte", 0x4}, {"eightbyte", 0x8},
107 	{"multibyte", 0x10}, {"unbiased", 0x20}, {"uncommon", 0x40},
108 	{"common", 0x80}, {"median", 0x100}, {"maximal", 0x200},
109 	{"operators", 0x400},  {0, 0}
110 };
111 static coding_t atypes[] = {
112 	{"pio_r", BOFI_PIO_R}, {"pio_w", BOFI_PIO_W},
113 	{"dma_r", BOFI_DMA_R}, {"dma_w", BOFI_DMA_W},
114 	{"pio", BOFI_PIO_RW}, {"dma", BOFI_DMA_RW},
115 	{"log", BOFI_LOG}, {"intr", BOFI_INTR},
116 	{"PIO_R", BOFI_PIO_R}, {"PIO_W", BOFI_PIO_W},
117 	{"DMA_R", BOFI_DMA_R}, {"DMA_W", BOFI_DMA_W},
118 	{"PIO", BOFI_PIO_RW}, {"DMA", BOFI_DMA_RW},
119 	{"LOG", BOFI_LOG}, {"INTR", BOFI_INTR}, {0, 0}
120 };
121 static coding_t optypes[] = {
122 	{"EQ", BOFI_EQUAL}, {"AND", BOFI_AND}, {"OR", BOFI_OR},
123 	{"XOR", BOFI_XOR}, {"NO", BOFI_NO_TRANSFER},
124 	{"DELAY", BOFI_DELAY_INTR}, {"LOSE", BOFI_LOSE_INTR},
125 	{"EXTRA", BOFI_EXTRA_INTR}, {0, 0}
126 };
127 static coding_t doptypes[] = {
128 	{"EQ", BOFI_EQUAL}, {"AND", BOFI_AND}, {"OR", BOFI_OR},
129 	{"XOR", BOFI_XOR}, {0, 0}
130 };
131 static coding_t ioptypes[] = {
132 	{"DELAY", BOFI_DELAY_INTR}, {"LOSE", BOFI_LOSE_INTR},
133 	{"EXTRA", BOFI_EXTRA_INTR}, {0, 0}
134 };
135 
136 static const unsigned long long	DFLTLOGTIME	= -1ull; /* log forever */
137 
138 /*
139  * This global controls the generation of errdefs for PIO_W. The default should
140  * be to only perform an access check errdef but not to corrupt writes - this
141  * may trash non-FT platforms.
142  */
143 static uint_t atype_is_default;	/* do not corrupt PIO_W by default */
144 static uint_t lsize_is_default;	/* set when the user has not given a size */
145 
146 static uint64_t random_operand = 0xdeadbeafdeadbeafull;
147 #define	NPIO_DEFAULTS	(3)	/* number of default corruption values */
148 static longlong_t pio_default_values[NPIO_DEFAULTS] = {
149 	0x0ull,			/* corresponds to a line going high/low */
150 	0x32f1f03232f1f032ull,	/* the value returned when the fake ta is set */
151 	(longlong_t)(~0)	/* corresponds to a line going high/low */
152 };
153 
154 static uint_t dbglvl		= 0;	/* debug this program */
155 static int alarmed		= 0;
156 static int killed		= 0;
157 
158 /*
159  * name of a script to call before offlining a driver being tested
160  */
161 static char **fixup_script = 0;
162 static int	scriptargs = 0;
163 static char **pargv;
164 static int	pargc;
165 
166 static int	max_edef_wait = 0;
167 static int	edef_sleep = 0;
168 static int	do_status = 0;	/* report edef status in parsable format */
169 static char *user_comment = 0;
170 
171 static char *Progname;
172 static FILE *errfile;
173 static FILE *outfile;
174 
175 /*
176  * The th_define utility provides an interface to the bus_ops fault injection
177  * bofi device driver for defining error injection specifications (referred to
178  * as errdefs). An errdef corresponds to a specification of how to corrupt a
179  * device driver's accesses to its hardware. The command line arguments
180  * determine the precise nature of the fault to be injected. If the supplied
181  * arguments define a consistent errdef, the th_define process will store the
182  * errdef with the bofi driver and suspend itself until the criteria given by
183  * the errdef become satisfied (in practice, this will occur when the access
184  * counts go to zero).
185  *
186  * When the resulting errdef is activated using the th_manage(1M) user command
187  * utility, the bofi driver will act upon the errdef by matching the number of
188  * hardware accesses - specified in count, that are of the type specified in
189  * acc_types, made by instance number instance - of the driver whose name is
190  * name, (or by the driver instance specified by * path ) to the register set
191  * (or DMA handle) specified by rnumber, that lie within the range offset to
192  * offset + length from the beginning of the register set or DMA handle. It then
193  * applies operator and operand to the next failcount matching accesses.
194  *
195  * If acc_types includes LOG, th_define runs in automatic test script generation
196  * mode, and a set of test scripts (written in the Korn shell) is created and
197  * placed in a sub-directory of the current directory with the name
198  * driver.test.<id>. A separate, executable script is generated for each access
199  * handle that matches the logging criteria. The log of accesses is placed at
200  * the top of each script as a record of the session. If the current directory
201  * is not writable, file output is written to standard output. The base name of
202  * each test file is the driver name, and the extension is a number that
203  * discriminates between different access handles. A control script (with the
204  * same name as the created test directory) is generated that will run all the
205  * test scripts sequentially.
206  *
207  * Executing the scripts will install, and then activate, the resulting error
208  * definitions. Error definitions are activated sequentially and the driver
209  * instance under test is taken offline and brought back online before each test
210  * (refer to the -e option for more information). By default, logging will apply
211  * to all PIO accesses, interrupts and DMA accesses to and from areas mapped
212  * for both reading and writing, but it can be constrained by specifying
213  * additional acc_types, rnumber, offset and length. Logging will continue for
214  * count matching accesses, with an optional time limit of collect_time seconds.
215  *
216  * Either the -n or -P option must be provided. The other options are optional.
217  * If an option (other than the -a option) is specified multiple times, only
218  * the final value for the option is used. If an option is not specified, its
219  * associated value is set to an appropriate default, which will provide
220  * maximal error coverage as described below.
221  */
222 
223 /*PRINTFLIKE2*/
224 static void
225 msg(uint_t lvl, char *msg, ...)
226 {
227 #define	BUFSZ	128
228 
229 	if (lvl <= dbglvl) {
230 		int count;
231 		va_list args;
232 		char buf[BUFSZ];
233 		int	pos = 0;
234 
235 		va_start(args, msg);
236 		count = vsnprintf(buf, BUFSZ, msg, args);
237 		va_end(args);
238 		if (count > 0) {
239 			count += pos;
240 			if (count >= sizeof (buf))
241 				count = BUFSZ - 1;
242 			buf[count] = '\0';
243 			(void) fprintf(errfile, "%s", buf);
244 		}
245 	}
246 }
247 
248 static void
249 kill_sighandler(int sig)
250 {
251 	switch (sig) {
252 		case SIGALRM:
253 			alarmed = 1;
254 			break;
255 		default:
256 			killed = 1;
257 			break;
258 	}
259 }
260 
261 static void
262 set_handler(int sig)
263 {
264 	struct sigaction sa;
265 
266 	(void) sigfillset(&(sa.sa_mask));
267 	sa.sa_flags = 0;
268 	sa.sa_handler = kill_sighandler;
269 	if (sigaction(sig, &sa, NULL) != 0)
270 		/* install handler */
271 		msg(0, "bad sigaction: %s\n", strerror(errno));
272 }
273 
274 /*
275  * Compare two driver access handles
276  */
277 static int
278 hdl_cmp(const void *p1, const void *p2)
279 {
280 	struct handle_info *e1 = (struct handle_info *)p1;
281 	struct handle_info *e2 = (struct handle_info *)p2;
282 
283 	if (e1->instance < e2->instance)
284 		return (-1);
285 	else if (e1->instance > e2->instance)
286 		return (1);
287 	else if (e1->access_type < e2->access_type)
288 		return (-1);
289 	else if (e1->access_type > e2->access_type)
290 		return (1);
291 	else if (e1->rnumber < e2->rnumber)
292 		return (-1);
293 	else if (e1->rnumber > e2->rnumber)
294 		return (1);
295 	else if (e1->len < e2->len)
296 		return (-1);
297 	else if (e1->len > e2->len)
298 		return (1);
299 	else if (e1->offset < e2->offset)
300 		return (-1);
301 	else if (e1->offset > e2->offset)
302 		return (1);
303 	else if (e1->addr_cookie < e2->addr_cookie)
304 		return (-1);
305 	else if (e1->addr_cookie > e2->addr_cookie)
306 		return (1);
307 	else
308 		return (0);
309 }
310 
311 /*
312  * Compare two hardware accesses.
313  */
314 static int
315 elem_cmp(const void *p1, const void *p2)
316 {
317 	struct acc_log_elem *e1 = (struct acc_log_elem *)p1;
318 	struct acc_log_elem *e2 = (struct acc_log_elem *)p2;
319 
320 	if (e1->access_type < e2->access_type)
321 		return (-1);
322 	else if (e1->access_type > e2->access_type)
323 		return (1);
324 	else if (e1->offset < e2->offset)
325 		return (-1);
326 	else if (e1->offset > e2->offset)
327 		return (1);
328 	else if (e1->size < e2->size)
329 		return (-1);
330 	else if (e1->size > e2->size)
331 		return (1);
332 	else
333 		return (0);
334 }
335 
336 /*
337  * Another way of comparing two hardware accesses.
338  */
339 static int
340 log_cmp(const void *p1, const void *p2)
341 {
342 	struct acc_log_elem *e1 = (struct acc_log_elem *)p1;
343 	struct acc_log_elem *e2 = (struct acc_log_elem *)p2;
344 
345 	int rval = elem_cmp(p1, p2);
346 
347 	if (rval == 0)
348 		if (e1->repcount < e2->repcount)
349 			return (-1);
350 		else if (e1->repcount > e2->repcount)
351 			return (1);
352 		else
353 			return (0);
354 	else
355 		return (rval);
356 }
357 
358 /*
359  * And a final way of sorting a log (by access type followed by repcount).
360  */
361 static int
362 log_cmp2(const void *p1, const void *p2)
363 {
364 	struct acc_log_elem *e1 = (struct acc_log_elem *)p1;
365 	struct acc_log_elem *e2 = (struct acc_log_elem *)p2;
366 
367 	if (e1->access_type < e2->access_type)
368 		return (-1);
369 	else if (e1->access_type > e2->access_type)
370 		return (1);
371 	else if (e1->repcount < e2->repcount)
372 		return (-1);
373 	else if (e1->repcount > e2->repcount)
374 		return (1);
375 	else
376 		return (0);
377 }
378 
379 static void
380 dump_log(uint_t lvl, FILE *fp, struct acc_log_elem *items,
381 	size_t nitems, uint_t logflags)
382 {
383 	if (lvl <= dbglvl) {
384 		int i;
385 		uint_t offset, allthesame = 1;
386 
387 		if (logflags & BOFI_LOG_TIMESTAMP &&
388 			getenv("DUMP_FULL_LOG") != 0)
389 			allthesame = 0;
390 		else
391 			for (i = 1; i < nitems; i++)
392 				if (elem_cmp(items+i, items) != 0)
393 					allthesame = 0;
394 		if (fp != 0)
395 			(void) fprintf(fp,
396 			    "# Logged Accesses:\n# %-4s\t%-12s\t%-4s\t%-18s"
397 			    " (%-1s)\t%-10s\n\n", "type",
398 			    (items->access_type & BOFI_DMA_RW) ?
399 			    "address" : "offset",
400 			    "size", "value", "repcnt", "time");
401 
402 		for (i = 0; i < nitems; i++, items++) {
403 			offset = items->offset;
404 			if (fp != 0) {
405 				(void) fprintf(fp,
406 				    "# 0x%-2x\t0x%-10x\t%-4d\t0x%-16llx"
407 				    " (0x%-1x)\t%-8llu\n",
408 				    items->access_type, offset, items->size,
409 				    items->value, items->repcount,
410 				    (logflags & BOFI_LOG_TIMESTAMP) ?
411 				    items->access_time : 0ull);
412 
413 				if (allthesame) {
414 					(void) fprintf(fp,
415 					    "# Access duplicated %d times\n",
416 					    nitems);
417 					break;
418 				}
419 			} else
420 				msg(lvl, "# 0x%x 0x%x %d 0x%llx(0x%x) %llu\n",
421 				    items->access_type, offset, items->size,
422 				    items->value, items->repcount,
423 				    (logflags & BOFI_LOG_TIMESTAMP) ?
424 				    items->access_time : 0ull);
425 		}
426 	}
427 }
428 
429 static int
430 str_to_bm(char *optarg, coding_t *c, uint_t *bm)
431 {
432 	char *str;
433 	char *s = "\t\n ";
434 	int err = EINVAL;
435 
436 	msg(2, "str_to_bm: optarg %s\n", optarg);
437 	if (optarg != NULL && (str = strtok(optarg, s))) {
438 		msg(2, "str_to_bm: str %s\n", str);
439 		do {
440 			for (; c->str != 0; c++)
441 				if (strcmp(str, c->str) == 0) {
442 					*bm |= c->code;
443 					msg(2, "str_to_bm: %s matches\n",
444 					    c->str);
445 					err = 0;
446 					break;
447 				}
448 		} while ((str = strtok(NULL, s)));
449 	} else
450 		return (EINVAL);
451 	msg(2, "str_to_bm: done 0x%x\n", *bm);
452 	return (err);
453 }
454 
455 
456 /*
457  * Generic routine for commands that apply to a particular instance of
458  * a driver under test (e.g. activate all errdefs defined on an instance).
459  */
460 static int
461 manage_instance(int fd, char *namep, int instance, int cmd)
462 {
463 	struct bofi_errctl errctl;
464 
465 	errctl.namesize = strlen(namep);
466 	(void) strncpy(errctl.name, namep, MAXNAMELEN);
467 	errctl.instance = instance;
468 
469 	msg(8, "manage_instance: %s %d\n", namep, instance);
470 	if (ioctl(fd, cmd, &errctl) == -1) {
471 		msg(0, "bofi ioctl %d failed: %s\n", cmd, strerror(errno));
472 		return (-1);
473 	}
474 	return (0);
475 }
476 
477 
478 static int
479 define_one_error(
480 	FILE *fp,
481 	struct bofi_errdef *edp,
482 	struct acc_log_elem *item,
483 	ulong_t	nttime,
484 	ulong_t interval,
485 	char	*type,
486 	int fon,	/* corrupt after this many accesses */
487 	size_t fcnt,	/* and then fail it fcnt times */
488 	uint_t	acc_chk,
489 	char	*opname,
490 	uint64_t	operand)
491 {
492 	(void) fprintf(fp,
493 	    "-n %s -i %d -r %d -l 0x%llx 0x%x -a %s -c %d %d -f %d"
494 	    " -o %s 0x%llx",
495 	    (char *)edp->name,
496 	    edp->instance,
497 	    edp->rnumber,
498 	    edp->offset + item->offset,	/* offset into the regset */
499 	    item->size,	/* corrupt addrs from offset to offset+size */
500 	    type,
501 	    fon,	/* corrupt after this many accesses */
502 	    fcnt,	/* and then fail it fcnt times */
503 	    acc_chk,
504 	    opname,
505 	    operand);
506 
507 	(void) fprintf(fp, " -w %lu %lu\n", nttime, interval);
508 	return (0);
509 }
510 
511 static void
512 define_op_err(FILE *fp, int *ecnt, struct bofi_errdef *edp,
513 	struct acc_log_elem *item, ulong_t nttime, ulong_t interval, char *type,
514 	int fon, size_t fcnt)
515 {
516 	coding_t *ct;
517 	char	*opname;
518 	uint_t	op;
519 	uint64_t	operand;
520 	int k, save_size;
521 	uint64_t save_offset;
522 
523 	if (item->access_type & BOFI_INTR)
524 		ct = &ioptypes[0];
525 	else
526 		ct = &doptypes[0];
527 
528 	/*
529 	 * errdefs for dma accesses are too numerous so assume that dma writes
530 	 * (DDI_DMA_SYNC_FORDEV) create less exposure to potential errors than
531 	 * do dma reads (DDI_DMA_SYNC_FORCPU).
532 	 *
533 	 * also by default do not corrupt PIO_W - it may hang a non-FT platform.
534 	 */
535 	if (item->access_type != BOFI_DMA_W &&
536 	    ((item->access_type & BOFI_PIO_W) == 0 || !atype_is_default)) {
537 		/*
538 		 * user has asked for PIO_W
539 		 */
540 		for (; ct->str != 0; ct++) {
541 			op = ct->code;
542 			opname = ct->str;
543 			switch (op) {
544 			case BOFI_EQUAL:
545 				operand = random_operand; /* a random value */
546 				random_operand = lrand48() | ((uint64_t)
547 				    (lrand48()) << 32);
548 				break;
549 			case BOFI_AND:
550 				operand = 0xaddedabadb00bull;
551 				break;
552 			case BOFI_OR:
553 				operand = 0x1;
554 				break;
555 			case BOFI_XOR:
556 			default:
557 				operand = myULLMAX;
558 				break;
559 			case BOFI_DELAY_INTR: /* delay for 1 msec */
560 				operand = 1000000;
561 				break;
562 			case BOFI_LOSE_INTR: /* op not applicable */
563 				operand = 0;
564 				break;
565 			case BOFI_EXTRA_INTR: /* extra intrs */
566 				operand = 0xfff;
567 				break;
568 			}
569 			*ecnt = *ecnt + 1;
570 
571 			if ((item->access_type == BOFI_DMA_W ||
572 			    item->access_type == BOFI_DMA_R) &&
573 			    item->size > sizeof (uint64_t) && item->size <
574 			    DMA_INDIVIDUAL_CORRUPT_CUTOFF) {
575 				save_size = item->size;
576 				save_offset = item->offset;
577 				for (k = (item->size +
578 				    sizeof (uint64_t) - 1) &
579 				    ~(sizeof (uint64_t) - 1);
580 				    k > 0; k -= sizeof (uint64_t)) {
581 					item->size = sizeof (uint64_t);
582 					(void) define_one_error(fp, edp,
583 					    item, nttime, interval, type, fon,
584 					    fcnt, edp->acc_chk, opname,
585 					    operand);
586 					item->offset += sizeof (uint64_t);
587 				}
588 				item->size = save_size;
589 				item->offset = save_offset;
590 			} else {
591 				(void) define_one_error(fp, edp, item,
592 				    nttime, interval, type, fon, fcnt,
593 				    edp->acc_chk, opname, operand);
594 			}
595 
596 			if (op == BOFI_EQUAL) {
597 				uint_t cnt;
598 				for (cnt = 0; cnt < NPIO_DEFAULTS;
599 				    cnt++, *ecnt = *ecnt + 1) {
600 					if ((item->access_type == BOFI_DMA_W ||
601 					    item->access_type == BOFI_DMA_R) &&
602 					    item->size > sizeof (uint64_t) &&
603 					    item->size <
604 					    DMA_INDIVIDUAL_CORRUPT_CUTOFF) {
605 						save_size = item->size;
606 						save_offset = item->offset;
607 						for (k = (item->size +
608 						    sizeof (uint64_t) - 1) &
609 						    ~(sizeof (uint64_t) - 1);
610 						    k > 0;
611 						    k -= sizeof (uint64_t)) {
612 							item->size =
613 							    sizeof (uint64_t);
614 							(void) define_one_error(
615 							    fp, edp, item,
616 							    nttime, interval,
617 							    type, fon, fcnt,
618 							    edp->acc_chk,
619 							    opname,
620 							    pio_default_values
621 							    [cnt]);
622 							item->offset +=
623 							    sizeof (uint64_t);
624 						}
625 						item->size = save_size;
626 						item->offset = save_offset;
627 					} else {
628 						(void) define_one_error(fp,
629 						    edp, item, nttime, interval,
630 						    type, fon, fcnt,
631 						    edp->acc_chk, opname,
632 						    pio_default_values[cnt]);
633 					}
634 				}
635 			}
636 		}
637 	}
638 
639 	if ((item->access_type & BOFI_PIO_W) && !atype_is_default) {
640 		/*
641 		 * user has asked for PIO_W
642 		 */
643 		(void) define_one_error(fp, edp, item, nttime, interval,
644 		    type, fon, fcnt, edp->acc_chk, "NO", 0);
645 		*ecnt = *ecnt + 1;
646 	}
647 
648 	/*
649 	 * and finally an access check errdef
650 	 */
651 	if (item->access_type & BOFI_PIO_RW)
652 		(void) define_one_error(fp, edp, item, nttime, interval,
653 		    type, fon, fcnt, 1, "OR", 0);
654 
655 	if (item->access_type & BOFI_DMA_RW)
656 		(void) define_one_error(fp, edp, item, nttime, interval,
657 		    type, fon, fcnt, 2, "OR", 0);
658 
659 }
660 
661 /*
662  * Convert a collection of log entries into error definitions.
663  */
664 /* ARGSUSED */
665 static int
666 define_nerrs(int fd, FILE *fp, int *ecnt, struct bofi_errdef *edp,
667 	struct acc_log_elem *items,
668 	size_t nitems,
669 	uint_t naccess,
670 	uint_t minac,
671 	uint_t maxac,
672 	ulong_t	logtime,
673 	ulong_t	logsize)
674 {
675 	char	*type;
676 	uint_t	at;
677 	int	i;
678 	struct acc_log_elem	*item;
679 	char	*opname;
680 	uint_t	op;
681 	uint64_t	operand;
682 	int	cycleiops, cycledops;
683 	int	intrs = 0;
684 	ulong_t	ttime, nttime, interval;
685 
686 	op = edp->optype;
687 	operand = edp->operand;
688 	msg(3, "define_nerrs: nitems %d (ac %d at 0x%x): (%d %d)"
689 	    " (op 0x%x 0x%llx)\n\n", nitems, naccess, items->access_type,
690 	    minac, maxac, op, operand);
691 
692 	/*
693 	 * all items are guaranteed have values in the two element set {0, at}
694 	 * where at is a valid access type (so find the value of at)
695 	 */
696 	for (i = 0, item = items, at = 0; i < nitems; i++, item++)
697 		if (item->access_type != 0) {
698 			at = item->access_type;
699 			break;
700 		}
701 	if (at == 0)
702 		return (-1);
703 
704 	/*
705 	 * find the string form of the access type
706 	 */
707 	for (i = 0, type = 0; atypes[i].str != 0; i++) {
708 		if (atypes[i].code == at) {
709 			type = atypes[i].str;
710 			break;
711 		}
712 	}
713 	if (type == 0) {
714 		msg(0, "Unknown access type returned from bofi\n\t");
715 		dump_log(0, 0, item, 1, BOFI_LOG_TIMESTAMP);
716 		msg(1, "0x%x 0x%x 0x%x 0x%x\n", BOFI_LOG, BOFI_INTR,
717 		    BOFI_DMA_RW, BOFI_PIO_RW);
718 		return (-1);
719 	}
720 
721 	msg(1, "define_n: at = 0x%d (%s)\n", at, type == 0 ? "null" : type);
722 	/*
723 	 * find the string form of the operator
724 	 */
725 	for (i = 0, opname = 0; optypes[i].str != 0; i++) {
726 		if (op == optypes[i].code) {
727 			opname = optypes[i].str;
728 			break;
729 		}
730 	}
731 
732 	/*
733 	 * if not found or inconsistent default to XOR
734 	 */
735 	if (opname == 0 ||
736 	    (op == BOFI_NO_TRANSFER &&
737 	    (at & (BOFI_DMA_RW|BOFI_PIO_R))) ||
738 	    (op >= BOFI_DELAY_INTR && (at & BOFI_INTR) == 0)) {
739 		opname = optypes[3].str;	/* "XOR" */
740 		operand = myULLMAX;
741 		op = optypes[3].code;
742 	}
743 
744 	/*
745 	 * if operator and access type are inconsistent choose a sensible
746 	 * default
747 	 */
748 	cycleiops = 0;
749 	if (at & BOFI_INTR)
750 		if (op < BOFI_DELAY_INTR)
751 			cycleiops = 1;
752 		else if (op == BOFI_LOSE_INTR)
753 			operand = 0;
754 
755 	cycledops = 0;
756 	if (nitems == 1 && (at & BOFI_DMA_RW))
757 		cycledops = 1;
758 	/*
759 	 * for each access in the list define one or more error definitions
760 	 */
761 	for (i = 0, item = items; i < nitems; i++, item++) {
762 		size_t acnt, fcnt;
763 		int j, fon;
764 
765 		if (item->access_type == 0)
766 			continue;
767 
768 		/*
769 		 * base number of errors to inject on 3% of number of
770 		 * similar accesses seen during LOG phase
771 		 */
772 		acnt = item->repcount / 10 + 1; /* 10% */
773 		fcnt = (acnt >= 3) ? acnt / 3 : 1; /* 3% */
774 
775 		/*
776 		 * wait for twice the time it took during LOG phase
777 		 */
778 		if ((ttime = (item->access_time * 2)) < MIN_REPORT_TIME)
779 			ttime = MIN_REPORT_TIME;
780 		else if (max_edef_wait != 0 && ttime > max_edef_wait)
781 			ttime = max_edef_wait;
782 		/*
783 		 * if edef_sleep set (-w) the use that, otherwise use default
784 		 */
785 		interval = edef_sleep ? edef_sleep : DEFAULT_EDEF_SLEEP;
786 
787 		msg(10,
788 		    "define_n: item %d limit %d step %d (intr %d) tt(%lu)\n",
789 		    i, item->repcount, acnt, intrs, ttime);
790 
791 		for (j = 0, fon = 1, nttime = ttime; j < item->repcount;
792 		    j += acnt) {
793 			if (policy & OPERATORSPOLICY) {
794 				define_op_err(fp, ecnt, edp, item,
795 				    nttime, interval, type, fon, fcnt);
796 			} else {
797 				if (cycleiops) {
798 					op = ioptypes[intrs].code;
799 					opname = ioptypes[intrs++].str;
800 					switch (op) {
801 					case BOFI_DELAY_INTR:
802 						/* delay for 1 sec */
803 						operand = 1000000;
804 						break;
805 					case BOFI_LOSE_INTR:
806 						/* op not applicable */
807 						operand = 0;
808 						break;
809 					case BOFI_EXTRA_INTR:
810 					default:
811 						/* generate 2 extra intrs */
812 						operand = 0xfff;
813 						break;
814 					}
815 					intrs %= 3;
816 				} else if (cycledops) {
817 					op = doptypes[intrs].code;
818 					opname = doptypes[intrs++].str;
819 					switch (op) {
820 					case BOFI_EQUAL:
821 						random_operand = lrand48() |
822 						    ((uint64_t)
823 						    (lrand48()) << 32);
824 						break; /* a random value */
825 					case BOFI_AND:
826 						operand = 0xaddedabadb00bull;
827 						break;
828 					case BOFI_OR:
829 						operand = 0xd1ab011c0af1a5c0ull;
830 						break;
831 					case BOFI_XOR:
832 					default:
833 						operand = myULLMAX;
834 						break;
835 					}
836 					intrs %= 4;
837 				}
838 				(void) define_one_error(fp, edp, item,
839 				    nttime, interval, type, fon,
840 				    fcnt, edp->acc_chk, opname, operand);
841 				*ecnt = *ecnt + 1;
842 				if (op == BOFI_EQUAL) {
843 					uint_t cnt;
844 					for (cnt = 0; cnt < NPIO_DEFAULTS;
845 					    cnt++, *ecnt = *ecnt + 1)
846 						(void) define_one_error(fp,
847 						    edp, item, nttime,
848 						    interval, type, fon, fcnt,
849 						    edp->acc_chk, opname,
850 						    pio_default_values[cnt]);
851 				}
852 			}
853 
854 			/*
855 			 * all non maximal policies should only generate
856 			 * a single error definition set per access.
857 			 */
858 			if (!(policy & MAXIMALPOLICY))
859 				break;
860 
861 			nttime = (logtime - item->access_time) *
862 			    (j + acnt + fcnt - 1) / logsize;
863 			if (nttime < MIN_REPORT_TIME)
864 				nttime = MIN_REPORT_TIME;
865 			else if (nttime > max_edef_wait)
866 				nttime = max_edef_wait;
867 
868 			msg(11, "define_nerrs: %lu %d %d %d %llu\n", nttime,
869 			    max_edef_wait, fon, fcnt, item->access_time);
870 
871 			if (item->access_type != BOFI_INTR)
872 				fon += j;
873 		}
874 	}
875 
876 	return (0);
877 }
878 
879 static int
880 reduce_log(uint16_t pol, struct acc_log *log,		/* input args */
881 	struct acc_log_elem **llp, size_t *cntp)	/* output args */
882 {
883 	ulong_t logtime;
884 	struct acc_log_elem *items, *item, *elem;
885 	int cnt, nitems, acnt;
886 	int i, j, k, lb, ub, mina, maxa, cutoff[2], mean;
887 
888 	if (llp == 0 || cntp == 0)	/* subroutine interface violated */
889 		return (-1);
890 
891 	if (*llp == 0) {
892 		items = (void *)log->logbase;
893 		nitems = log->entries;
894 	} else {
895 		items = *llp;	/* outputs double up as inputs */
896 		nitems = *cntp;
897 	}
898 	/* has the utc time wrapped over ULMAX - unlikely so fix it at 10 */
899 	logtime = (log->stop_time >= log->start_time) ?
900 	    log->stop_time - log->start_time : 10ul;
901 
902 	msg(1, "reduce %d: logtime %lu\n", nitems, logtime);
903 	/*
904 	 * Sort the log by access type - do not remove duplicates yet (but do
905 	 * remove access that do not match the requested log -> errdef policy
906 	 * (defined by union pu pol). Set the repcount field of each entry to a
907 	 * unique value (in the control statement of the for loop) - this
908 	 * ensures that the qsort (following the for loop) will not remove any
909 	 * entries.
910 	 */
911 	for (i = 0, cnt = 0, elem = items; i < nitems;
912 	    elem->repcount = i, i++, elem++) {
913 		/*
914 		 * If interested in the I/O transfer size and this access
915 		 * does not match the requested size then ignore the access
916 		 */
917 		if ((pol & SIZEPOLICY) &&
918 		    (!(pol & MULTIPOLICY) || elem->repcount == 1) &&
919 		    /* req for DMA / ddi_rep */
920 		    (pol & elem->size) == 0)
921 			elem->access_type = 0;
922 			/* these will end up sorted at the head */
923 		else {
924 			cnt += 1;
925 			elem->size *= elem->repcount;
926 			if (log->flags & BOFI_LOG_TIMESTAMP)
927 				/* real access time */
928 				elem->access_time -= log->start_time;
929 			else
930 				/* linear fit */
931 				elem->access_time = logtime * (i + 1) / nitems;
932 		}
933 	}
934 
935 	qsort((void *)items, nitems, sizeof (*items), log_cmp);
936 
937 	msg(5, "qsorted log raw (nitems %d cnt %d:\n", nitems, cnt);
938 	dump_log(14, 0, items, nitems, log->flags);
939 
940 	if (cnt != nitems) {	/* some items should be ignored */
941 		items += (nitems - cnt);	/* ignore these ones */
942 		if ((nitems = cnt) == 0) {
943 			*cntp = 0;
944 			*llp = 0;
945 			return (0);
946 			/* the chosen policy has ignored everything */
947 		}
948 
949 	}
950 	/*
951 	 * Now remove duplicate entries based on access type, address and size.
952 	 * Reuse the repcount field to store the no. of duplicate accesses.
953 	 * Store the average access time in the single remaining
954 	 * representative of the duplicate set.
955 	 */
956 
957 	for (i = 1, cnt = 1, elem = items, elem->repcount = 1, item = elem + 1;
958 	    i < nitems; i++, item++) {
959 		if (elem_cmp(elem, item) == 0) {
960 			elem->access_time += item->access_time;
961 			elem->repcount++;
962 		} else {	/* not a duplicate */
963 			elem->access_time = logtime / elem->repcount;
964 			elem++;
965 			*elem = *item;
966 			cnt++;
967 			elem->repcount = 1;
968 		}
969 	}
970 	elem->access_time = logtime / elem->repcount;
971 
972 	/*
973 	 * The log is sorted by access type - now resort to order by frequency
974 	 * of accesses (ie for a given access type uncommon access will come
975 	 * first.
976 	 */
977 
978 	qsort((void *)items, cnt, sizeof (*items), log_cmp2);
979 	msg(4, "qsorted log2: cnt is %d\n", cnt);
980 	dump_log(4, 0, items, cnt, log->flags);
981 
982 	for (i = 0; i < cnt; i = j) {
983 
984 		/*
985 		 * Pick out the set [i, j) consisting of elements with the same
986 		 * access type
987 		 */
988 		for (j = i + 1, acnt = items[i].repcount; j < cnt &&
989 		    items[j].access_type == items[i].access_type; j++)
990 			acnt += items[j].repcount;
991 
992 		if (j - i == 1)	/* never ignore solo accesses of a given type */
993 			continue;
994 		/*
995 		 * Now determine what constitutes uncommon and common accesses:
996 		 */
997 		mina = items[i].repcount;
998 		maxa = items[j-1].repcount;
999 		mean = acnt / (j - i); /* mean value */
1000 
1001 		if (pol & (UNCOMMONPOLICY|MEDIANPOLICY)) {
1002 			cutoff[0] = (mean - mina) / DISTRIB_CUTOFF + mina;
1003 
1004 			for (ub = i; ub < j; ub++)
1005 				if (items[ub].repcount > cutoff[0])
1006 					break;
1007 			lb = j - 1;
1008 		} else {
1009 			lb = i;
1010 			ub = j-1;
1011 		}
1012 
1013 		if (pol & (COMMONPOLICY|MEDIANPOLICY)) {
1014 			cutoff[1] = maxa - (maxa - mean) / DISTRIB_CUTOFF;
1015 			for (lb = j - 1; lb >= i; lb--)
1016 				if (items[lb].repcount < cutoff[1])
1017 					break;
1018 			if (!(pol & (UNCOMMONPOLICY|MEDIANPOLICY)))
1019 				ub = i;
1020 		}
1021 
1022 		msg(3, "reduce_log: p 0x%x at %d:0x%x %d:0x%x acnt mina maxa"
1023 		    " (%d %d %d)"
1024 		    " mean %d cutoffs(%d %d) bnds(%d, %d)\n",
1025 		    pol, i, items[i].access_type, j, items[j].access_type,
1026 		    acnt, mina, maxa, mean, cutoff[0], cutoff[1], lb, ub);
1027 
1028 		if (ub <= lb)
1029 			if (!(pol & MEDIANPOLICY))
1030 				/* delete all the mid accesses */
1031 				for (k = ub; k <= lb; k++)
1032 					items[k].access_type = 0;
1033 			else {
1034 				if (!(pol & UNCOMMONPOLICY))
1035 					/* delete uncommon accesses */
1036 					for (k = i; k < ub; k++)
1037 						items[k].access_type = 0;
1038 				if (!(pol & COMMONPOLICY))
1039 					/* delete common accesses */
1040 					for (k = lb+1; k < j; k++)
1041 						items[k].access_type = 0;
1042 			}
1043 	}
1044 	msg(4, "reduce_log: returning %d items\n", cnt);
1045 	dump_log(5, 0, items, cnt, log->flags);
1046 	*cntp = cnt;
1047 	*llp = items;
1048 	return (0);
1049 }
1050 
1051 static void
1052 log2errdefs(int fd, struct bofi_errdef *edp, struct acc_log *log,
1053 	char *devpath)
1054 {
1055 	struct acc_log_elem	*items;
1056 	size_t			nitems;
1057 	int			i, j;
1058 	uint_t			acc_cnt;
1059 	char			fname[_POSIX_PATH_MAX];
1060 	FILE			*fp = 0;
1061 	time_t			utc = time(NULL);
1062 	int			ecnt = 0;
1063 	int			err;
1064 	ulong_t			logtime;
1065 	char			*buffer;
1066 	struct stat		statbuf;
1067 
1068 	items = (void *)log->logbase;
1069 	nitems = log->entries;
1070 	logtime = (log->stop_time >= log->start_time) ?
1071 	    log->stop_time - log->start_time : 10ul;
1072 
1073 	if (nitems == 0)
1074 		return;
1075 
1076 	/* ensure that generated errdefs complete in bounded time */
1077 	if (max_edef_wait == 0)
1078 		max_edef_wait =
1079 		    logtime > MIN_REPORT_TIME ? logtime : MIN_REPORT_TIME * 2;
1080 
1081 	msg(4, "log2errdefs(0x%p, 0x%p, %d, 0x%x):\n",
1082 	    (void *) edp, (void *) items, nitems, policy);
1083 
1084 	(void) snprintf(fname, sizeof (fname), "%s.%d", (char *)edp->name,
1085 	    (int)getpid());
1086 	if ((fp = fopen(fname, "w")) == 0)
1087 		fp = outfile;
1088 
1089 	(void) fprintf(fp, "#!/bin/ksh -p\n\n");
1090 	(void) fprintf(fp, "# %-24s%s\n", "Script creation time:", ctime(&utc));
1091 	(void) fprintf(fp, "# %-24s%llu\n",
1092 	    "Activation time:", log->start_time);
1093 	(void) fprintf(fp, "# %-24s%llu\n",
1094 	    "Deactivation time:", log->stop_time);
1095 	(void) fprintf(fp, "# %-24s%d\n", "Log size:", nitems);
1096 	(void) fprintf(fp, "# %-24s", "Errdef policy:");
1097 	for (i = 0; ptypes[i].str != 0; i++)
1098 		if (policy & ptypes[i].code)
1099 			(void) fprintf(fp, "%s ", ptypes[i].str);
1100 	(void) fprintf(fp, "\n");
1101 	(void) fprintf(fp, "# %-24s%s\n", "Driver:", (char *)edp->name);
1102 	(void) fprintf(fp, "# %-24s%d\n", "Instance:", edp->instance);
1103 	if (edp->access_type & BOFI_PIO_RW) {
1104 		(void) fprintf(fp, "# %-24s%d\n",
1105 		    "Register set:", edp->rnumber);
1106 		(void) fprintf(fp, "# %-24s0x%llx\n", "Offset:", edp->offset);
1107 		(void) fprintf(fp, "# %-24s0x%llx\n", "Length:", edp->len);
1108 	} else if (edp->access_type & BOFI_DMA_RW) {
1109 		(void) fprintf(fp, "# %-24s%d\n", "DMA handle:", edp->rnumber);
1110 		(void) fprintf(fp, "# %-24s0x%llx\n", "Offset:", edp->offset);
1111 		(void) fprintf(fp, "# %-24s0x%llx\n", "Length:", edp->len);
1112 	} else if ((edp->access_type & BOFI_INTR) == 0) {
1113 		(void) fprintf(fp, "# %-24s%d\n",
1114 		    "Unknown Handle Type:", edp->rnumber);
1115 	}
1116 
1117 	(void) fprintf(fp, "# %-24s0x%x ( ", "Access type:",
1118 	    (edp->access_type & ~BOFI_LOG));
1119 	if (edp->access_type & BOFI_PIO_R)
1120 		(void) fprintf(fp, "%s ", "pio_r");
1121 	if (edp->access_type & BOFI_PIO_W)
1122 		(void) fprintf(fp, "%s ", "pio_w");
1123 	if (edp->access_type & BOFI_DMA_W)
1124 		(void) fprintf(fp, "%s ", "dma_w");
1125 	if (edp->access_type & BOFI_DMA_R)
1126 		(void) fprintf(fp, "%s ", "dma_r");
1127 	if (edp->access_type & BOFI_INTR)
1128 		(void) fprintf(fp, "%s ", "intr");
1129 	(void) fprintf(fp, ")\n\n");
1130 	if (user_comment)
1131 		(void) fprintf(fp, "# %-24s%s\n\n",
1132 		    "Test Comment:", user_comment);
1133 
1134 	dump_log(0, fp, items, nitems, log->flags);
1135 
1136 	items = 0;
1137 	if ((err = reduce_log(policy, log, &items, &nitems)) < 0 ||
1138 	    nitems == 0) {
1139 		msg(4, "log2errdefs: reduce_log err %d nitems %d\n",
1140 		    err, nitems);
1141 		return;
1142 	}
1143 	(void) fprintf(fp, "\nerror() { echo \""
1144 	    "${0##*/}: $@\""
1145 	    " >&2; exit 2; }\n");
1146 	(void) fprintf(fp,
1147 	    "trap ' ' 16\t# ignore - it is trapped by abort monitor_edef\n");
1148 
1149 	(void) fprintf(fp, "\nfixup_script()\n{\n");
1150 	if (scriptargs > 0) {
1151 		(void) fprintf(fp, "\tif [[ $1 -eq 1 ]]\n\tthen\n");
1152 		(void) fprintf(fp, "\t\t# Call a user defined workload\n");
1153 		(void) fprintf(fp, "\t\t# while injecting errors\n\t\t");
1154 		for (i = 0; i < scriptargs; i++)
1155 			(void) fprintf(fp, "%s ", fixup_script[i]);
1156 		(void) fprintf(fp, "\n\tfi\n");
1157 		(void) fprintf(fp, "\treturn 0\n");
1158 	} else {
1159 		(void) fprintf(fp, "\tif [[ $1 -eq 0 ]]\n\tthen\n");
1160 		(void) fprintf(fp,
1161 		    "\t\t# terminate any outstanding workload\n");
1162 		(void) fprintf(fp, "\t\tif [ $script_pid -gt 0 ]; then\n");
1163 		(void) fprintf(fp, "\t\t\tkill $script_pid\n");
1164 		(void) fprintf(fp, "\t\t\tscript_pid=0\n");
1165 		(void) fprintf(fp, "\t\tfi\n");
1166 		(void) fprintf(fp, "\tfi\n");
1167 		(void) fprintf(fp, "\treturn -1\n");
1168 	}
1169 	(void) fprintf(fp, "}\n\n");
1170 	(void) fprintf(fp, "devpath=/devices%s\n\n", devpath);
1171 	(void) fprintf(fp, "#\n");
1172 	(void) fprintf(fp, "# following text extracted from th_script\n");
1173 	(void) fprintf(fp, "#\n");
1174 	if (stat("/usr/lib/th_script", &statbuf) == -1) {
1175 		msg(0, "log2errdefs: stat of /usr/lib/th_script failed\n");
1176 		return;
1177 	}
1178 	fd = open("/usr/lib/th_script", O_RDONLY);
1179 	if (fd == -1) {
1180 		msg(0, "log2errdefs: open of /usr/lib/th_script failed\n");
1181 		return;
1182 	}
1183 	buffer = malloc(statbuf.st_size);
1184 	if (!buffer) {
1185 		msg(0, "log2errdefs: malloc for /usr/lib/th_script failed\n");
1186 		return;
1187 	}
1188 	if (read(fd, buffer, statbuf.st_size) != statbuf.st_size) {
1189 		msg(0, "log2errdefs: read of /usr/lib/th_script failed\n");
1190 		return;
1191 	}
1192 	(void) fwrite(buffer, statbuf.st_size, 1, fp);
1193 	(void) close(fd);
1194 	(void) fprintf(fp, "#\n");
1195 	(void) fprintf(fp, "# end of extracted text\n");
1196 	(void) fprintf(fp, "#\n");
1197 	(void) fprintf(fp, "run_subtest %s %d <<ERRDEFS\n",
1198 	    (char *)edp->name, edp->instance);
1199 
1200 	for (i = 0; i < nitems; i = j) {
1201 
1202 		acc_cnt = items[i].repcount;
1203 		for (j = i + 1;
1204 		    j < nitems && items[j].access_type == items[i].access_type;
1205 		    j++)
1206 			acc_cnt += items[j].repcount;
1207 		msg(1, "l2e: nitems %d i %d j %d at 0x%x\n",
1208 		    nitems, i, j, items[i].access_type);
1209 		if (items[i].access_type != 0)
1210 			(void) define_nerrs(fd, fp, &ecnt, edp, items+i, j-i,
1211 			    acc_cnt, items[i].repcount, items[j-1].repcount,
1212 			    logtime, log->entries);
1213 	}
1214 
1215 	(void) fprintf(fp, "ERRDEFS\n");
1216 	(void) fprintf(fp, "exit 0\n");
1217 
1218 	if (fp != stdout && fp != stderr) {
1219 		if (fchmod(fileno(fp), S_IRWXU|S_IRGRP|S_IROTH))
1220 			msg(0, "fchmod failed: %s\n", strerror(errno));
1221 		if (fclose(fp) != 0)
1222 			msg(0, "close of %s failed: %s\n", fname,
1223 			    strerror(errno));
1224 	}
1225 	msg(10, "log2errdefs: done\n");
1226 }
1227 
1228 #define	LLSZMASK (sizeof (longlong_t) -1)
1229 
1230 static int
1231 add_edef(int fd,
1232 	struct bofi_errdef *errdef,	/* returned access criteria */
1233 	struct bofi_errstate *errstate,
1234 	struct handle_info *hdl,	/* handle to match against request */
1235 	struct bofi_errdef *edp)	/* requested access criteria */
1236 {
1237 	*errdef = *edp;
1238 	errdef->instance = hdl->instance;
1239 
1240 
1241 	if (hdl->access_type == 0)
1242 		return (EINVAL);
1243 
1244 	errdef->access_type =
1245 	    errdef->access_type & (hdl->access_type|BOFI_LOG);
1246 
1247 	/* use a big log for PIO and a small one otherwise */
1248 	if (lsize_is_default &&
1249 	    (errdef->access_type & BOFI_PIO_RW) == 0) {
1250 		errdef->access_count = DFLT_NONPIO_LOGSZ;
1251 		errdef->fail_count = 0;
1252 	}
1253 	errdef->log.logsize = errstate->log.logsize =
1254 	    errdef->access_count + errdef->fail_count - 1;
1255 	if (errdef->log.logsize == -1U) {
1256 		errdef->log.logsize = errstate->log.logsize = 0;
1257 	}
1258 	errdef->log.logbase = errstate->log.logbase =
1259 	    (caddr_t)GETSTRUCT(struct acc_log_elem, errdef->log.logsize);
1260 
1261 	if (errdef->log.logbase == 0)
1262 		return (EAGAIN);
1263 
1264 	errdef->rnumber = hdl->rnumber;
1265 	errdef->offset = hdl->offset;
1266 	errdef->len = hdl->len;
1267 
1268 	msg(4, "creating errdef: %d %s %d %d 0x%llx 0x%llx 0x%x 0x%x 0x%x"
1269 	    " 0x%x 0x%x 0x%llx\n",
1270 	    errdef->namesize, (char *)errdef->name,
1271 	    errdef->instance, errdef->rnumber,
1272 	    errdef->offset, errdef->len,
1273 	    errdef->access_type,
1274 	    errdef->access_count, errdef->fail_count,
1275 	    errdef->acc_chk, errdef->optype, errdef->operand);
1276 	if (ioctl(fd, BOFI_ADD_DEF, errdef) == -1) {
1277 		perror("th_define - adding errdef failed");
1278 		return (errno);
1279 	}
1280 	errdef->optype = edp->optype; /* driver clears it if fcnt is zero */
1281 	errstate->errdef_handle = errdef->errdef_handle;
1282 	return (0);
1283 }
1284 
1285 static void
1286 collect_state(int fd, int cmd,
1287 	struct bofi_errstate *errstate,
1288 	struct bofi_errdef *errdef,
1289 	char *devpath)
1290 {
1291 	int rval;
1292 	size_t ls = errstate->log.logsize;
1293 
1294 	msg(2, "collect_state: pre: edp->access_type 0x%x (logsize %d)\n",
1295 		errdef->access_type, errdef->log.logsize);
1296 
1297 	do {
1298 		errstate->log.logsize = 0; /* only copy the driver log once */
1299 
1300 		msg(10, "collecting state (lsize %d) ...\n",
1301 		    errstate->log.logsize);
1302 		errno = 0;
1303 
1304 		if (ioctl(fd, cmd, errstate) == -1 && errno != EINTR) {
1305 			perror("th_define (collect) -"
1306 			    " waiting for error report failed");
1307 			break;
1308 		}
1309 
1310 		(void) fprintf(outfile, "Logged %d out of %d accesses"
1311 		    " (%s %d %d 0x%x %d).\n",
1312 		    errstate->log.entries, ls,
1313 		    (char *)errdef->name, errdef->instance, errdef->rnumber,
1314 		    errdef->access_type, errstate->log.wrapcnt);
1315 
1316 		(void) msg(1, "\t(ac %d fc %d lf 0x%x wc %d).\n",
1317 		    errstate->access_count, errstate->fail_count,
1318 		    errstate->log.flags, errstate->log.wrapcnt);
1319 
1320 		rval = errno;
1321 		if ((errstate->log.flags & BOFI_LOG_WRAP) &&
1322 		    errstate->access_count > 0)
1323 			continue;
1324 		if (errstate->access_count <= 1 &&
1325 		    errstate->fail_count == 0 &&
1326 		    errstate->acc_chk == 0) {
1327 			msg(3, "collecting state complete entries %d\n",
1328 			    errstate->log.entries);
1329 			break;
1330 		}
1331 
1332 		msg(5, "still collecting state: %d, %d, %d\n",
1333 		    errstate->access_count, errstate->fail_count,
1334 		    errstate->acc_chk);
1335 		(void) msg(2, "Log: errno %d size %d entries %d "
1336 		    "(off 0x%llx len 0x%llx) ac %d\n", errno,
1337 		    errstate->log.logsize, errstate->log.entries,
1338 		    errdef->offset, errdef->len, errstate->access_count);
1339 
1340 	} while (rval == 0 && errstate->log.entries < ls);
1341 
1342 	/* now grab the log itself */
1343 	errstate->log.logsize = ls;
1344 	if (errstate->log.entries != 0) {
1345 		if (ioctl(fd, BOFI_CHK_STATE, errstate) == -1) {
1346 			msg(0,
1347 			    "%s: errorwhile retrieving %d log entries: %s\n",
1348 			    Progname, errstate->log.entries, strerror(errno));
1349 		} else {
1350 			msg(2, "collect_state: post: edp->access_type 0x%x"
1351 			    " (log entries %d %d) (%llu - %llu)\n",
1352 			    errdef->access_type,
1353 			    errstate->log.entries, errstate->access_count,
1354 			    errstate->log.start_time, errstate->log.stop_time);
1355 
1356 			log2errdefs(fd, errdef, &(errstate->log), devpath);
1357 		}
1358 	}
1359 }
1360 
1361 static void
1362 print_err_reports(FILE *fp, struct bofi_errstate *esp,
1363 	char *fname, char *cmt, int id)
1364 {
1365 	if (fname != 0 && *fname != 0)
1366 		(void) fprintf(fp, "%sErrdef file %s definition %d:",
1367 		    cmt, fname, id);
1368 	else
1369 		(void) fprintf(fp, "%s", cmt);
1370 
1371 	if (esp->access_count != 0) {
1372 		(void) fprintf(fp, " (access count %d).\n", esp->access_count);
1373 	} else {
1374 		(void) fprintf(fp, "\n%s\tremaining fail count %d acc_chk %d\n",
1375 		    cmt, esp->fail_count, esp->acc_chk);
1376 		(void) fprintf(fp, "%s\tfail time 0x%llx error reported time"
1377 		    " 0x%llx errors reported %d\n", cmt,
1378 		    esp->fail_time, esp->msg_time,
1379 		    esp->errmsg_count);
1380 		if (esp->msg_time)
1381 			(void) fprintf(fp, "%s\tmessage \"%s\" severity 0x%x\n",
1382 			    cmt, esp->buffer, (uint_t)esp->severity);
1383 	}
1384 }
1385 
1386 static void
1387 thr_collect(void *arg, char *devpath)
1388 {
1389 	int fd;
1390 	struct collector_def *hi = (struct collector_def *)arg;
1391 
1392 	msg(4, "thr_collect: collecting %s inst %d rn %d at = 0x%x.\n",
1393 	    hi->ed.name, hi->ed.instance,
1394 	    hi->ed.rnumber, hi->ed.access_type);
1395 
1396 	if ((fd = open(BOFI_DEV, O_RDWR)) == -1) {
1397 		if (errno == EAGAIN)
1398 			msg(0, "Too many instances of bofi currently open\n");
1399 		else
1400 			msg(0, "Error while opening bofi driver: %s",
1401 			    strerror(errno));
1402 	} else {
1403 		/*
1404 		 * Activate the logging errdefs - then collect the results.
1405 		 */
1406 		(void) manage_instance(fd, hi->ed.name,
1407 		    hi->ed.instance, BOFI_START);
1408 		collect_state(fd, BOFI_CHK_STATE_W, &hi->es, &hi->ed, devpath);
1409 	}
1410 
1411 	/*
1412 	 * there is no more work to do on this access handle so clean up / exit.
1413 	 */
1414 	msg(3, "thr_collect: closing and broadcasting.\n");
1415 	exit(0);
1416 }
1417 
1418 /*
1419  * Given an access handle known to the bofi driver see if the user has
1420  * specified access criteria that match that handle. Note: this matching
1421  * algorithm should be kept consistent with the drivers alogorithm.
1422  */
1423 static int
1424 match_hinfo(struct handle_info *hp, int instance, uint_t access_type,
1425 	int rnumber, offset_t offset, offset_t len)
1426 {
1427 
1428 	msg(9, "matching (%d %d) 0x%x %d offset (%llx, %llx) len (%llx %llx)\n",
1429 	    hp->instance, instance, access_type, rnumber,
1430 	    hp->offset, offset, hp->len, len);
1431 
1432 	if (instance != -1 && hp->instance != instance)
1433 		return (0);
1434 	if ((access_type & BOFI_DMA_RW) &&
1435 	    (hp->access_type & BOFI_DMA_RW) &&
1436 	    (rnumber == -1 || hp->rnumber == rnumber) &&
1437 	    ((uintptr_t)(hp->addr_cookie + offset + len) & ~LLSZMASK) >
1438 	    ((uintptr_t)((hp->addr_cookie + offset) + LLSZMASK) & ~LLSZMASK))
1439 		return (1);
1440 	else if ((access_type & BOFI_INTR) &&
1441 	    (hp->access_type & BOFI_INTR))
1442 		return (1);
1443 	else if ((access_type & BOFI_PIO_RW) &&
1444 	    (hp->access_type & BOFI_PIO_RW) &&
1445 	    (rnumber == -1 || hp->rnumber == rnumber) &&
1446 	    (len == 0 || hp->offset < offset + len) &&
1447 	    (hp->len == 0 || hp->offset + hp->len > offset))
1448 		return (1);
1449 	else
1450 		return (0);
1451 }
1452 
1453 /*
1454  * Obtain all the handles created by the driver specified by the name parameter
1455  * that match the remaining arguments. The output parameter nhdls indicates how
1456  * many of the structures pointed to by the output parameter hip match the
1457  * specification.
1458  *
1459  * It is the responsibility of the caller to free *hip when *nhdls != 0.
1460  */
1461 static int
1462 get_hinfo(int fd, char *name, struct handle_info **hip, size_t *nhdls,
1463     int instance, int atype, int rset, offset_t offset, offset_t len,
1464     int new_semantics)
1465 {
1466 	struct bofi_get_hdl_info hdli;
1467 	int command;
1468 
1469 	command = BOFI_GET_HANDLE_INFO;
1470 	hdli.namesize = strlen(name);
1471 	(void) strncpy(hdli.name, name, MAXNAMELEN);
1472 	/*
1473 	 * Initially ask for the number of access handles (not the structures)
1474 	 * in order to allocate memory
1475 	 */
1476 	hdli.hdli = 0;
1477 	*hip = 0;
1478 	hdli.count = 0;
1479 
1480 	/*
1481 	 * Ask the bofi driver for all handles created by the driver under test.
1482 	 */
1483 	if (ioctl(fd, command, &hdli) == -1) {
1484 		*nhdls = 0;
1485 		msg(0, "driver failed to return handles: %s\n",
1486 		    strerror(errno));
1487 		return (errno);
1488 	} else if ((*nhdls = hdli.count) == 0) {
1489 		msg(1, "get_hinfo: no registered handles\n");
1490 		return (0);	/* no handles */
1491 	} else if ((*hip = GETSTRUCT(struct handle_info, *nhdls)) == 0) {
1492 		return (EAGAIN);
1493 	} else {
1494 		struct handle_info *hp, **chosen;
1495 		int i;
1496 
1497 		/* Ask for *nhdls handles */
1498 		hdli.hdli = (caddr_t)*hip;
1499 		if (ioctl(fd, command, &hdli) == -1) {
1500 			int err = errno;
1501 
1502 			msg(0, "BOFI_GET_HANDLE_INFO ioctl returned error %d\n",
1503 			    err);
1504 			free(*hip);
1505 			return (err);
1506 		}
1507 
1508 		if (hdli.count < *nhdls)
1509 			*nhdls = hdli.count; /* some handles have gone away */
1510 
1511 		msg(4, "qsorting %d handles\n", *nhdls);
1512 		if (*nhdls > 1)
1513 			/* sort them naturally (NB ordering is not mandatory) */
1514 			qsort((void *)*hip, *nhdls, sizeof (**hip), hdl_cmp);
1515 
1516 		if ((chosen = malloc(sizeof (hp) * *nhdls)) != NULL) {
1517 			struct handle_info **ip;
1518 			/* the selected handles */
1519 			struct handle_info *prev = 0;
1520 			int scnt = 0;
1521 
1522 			for (i = 0, hp = *hip, ip = chosen; i < *nhdls;
1523 			    i++, hp++) {
1524 				/*
1525 				 * Remark: unbound handles never match
1526 				 * (access_type == 0)
1527 				 */
1528 				if (match_hinfo(hp, instance, atype, rset,
1529 				    offset&0x7fffffff, len&0x7fffffff)) {
1530 					msg(3, "match: 0x%x 0x%llx 0x%llx"
1531 					    " 0x%llx (0x%llx)\n",
1532 					    hp->access_type, hp->addr_cookie,
1533 					    hp->offset, hp->len,
1534 					    (hp->len & 0x7fffffff));
1535 					if (prev &&
1536 					    (prev->access_type & BOFI_DMA_RW) &&
1537 					    (hp->access_type & BOFI_DMA_RW) &&
1538 					    hp->instance == prev->instance &&
1539 					    hp->len == prev->len &&
1540 					    hp->addr_cookie ==
1541 					    prev->addr_cookie)
1542 						continue;
1543 
1544 					if ((hp->access_type & BOFI_DMA_RW) &&
1545 					    (atype & BOFI_DMA_RW) !=
1546 					    hp->access_type)
1547 						if (new_semantics)
1548 							continue;
1549 
1550 					if (prev)
1551 						msg(3, "match_hinfo: match:"
1552 						    " 0x%llx (%d %d) (%d %d)"
1553 						    " (0x%x 0x%x) (0x%llx,"
1554 						    " 0x%llx)\n",
1555 						    hp->addr_cookie,
1556 						    prev->instance,
1557 						    hp->instance, prev->rnumber,
1558 						    hp->rnumber,
1559 						    prev->access_type,
1560 						    hp->access_type, prev->len,
1561 						    hp->len);
1562 
1563 					/* it matches so remember it */
1564 					prev = *ip++ = hp;
1565 					scnt += 1;
1566 				}
1567 			}
1568 
1569 			if (*nhdls != scnt) {
1570 				/*
1571 				 * Reuse the alloc'ed memory to return
1572 				 * only those handles the user has asked for.
1573 				 * But first prune the handles to get rid of
1574 				 * overlapping ranges (they are ordered by
1575 				 * offset and length).
1576 				 */
1577 				*nhdls = scnt;
1578 				for (i = 0, hp = *hip, ip = chosen; i < scnt;
1579 				    i++, ip++, hp++)
1580 					if (hp != *ip)
1581 						(void) memcpy(hp, *ip,
1582 						    sizeof (*hp));
1583 			}
1584 			free(chosen);
1585 		}
1586 
1587 		for (i = 0, hp = *hip; i < *nhdls; i++, hp++) {
1588 			msg(4, "\t%d 0x%x %d 0x%llx 0x%llx 0x%llx\n",
1589 			    hp->instance, hp->access_type, hp->rnumber,
1590 			    hp->len, hp->offset, hp->addr_cookie);
1591 		}
1592 	}
1593 	if (*nhdls == 0 && *hip)
1594 		free(*hip);
1595 
1596 	msg(4, "get_info: %s got %d handles\n", name, *nhdls);
1597 	return (0);
1598 }
1599 
1600 static void
1601 init_sigs()
1602 {
1603 	struct sigaction sa;
1604 	int *ip, sigs[] = {SIGINT, SIGTERM, 0};
1605 
1606 	sa.sa_handler = kill_sighandler;
1607 	(void) sigemptyset(&sa.sa_mask);
1608 	for (ip = sigs; *ip; ip++)
1609 		(void) sigaddset(&sa.sa_mask, *ip);
1610 	sa.sa_flags = 0;
1611 	for (ip = sigs; *ip; ip++)
1612 		(void) sigaction(*ip, &sa, NULL);
1613 }
1614 
1615 static void
1616 up_resources()
1617 {
1618 	struct rlimit rl;
1619 
1620 	/* Potentially hungry on resources so up them all to their maximums */
1621 	if (getrlimit(RLIMIT_NOFILE, &rl) < 0)
1622 		msg(0, "failed to obtain RLIMIT_NOFILE: %s\n", strerror(errno));
1623 	else {
1624 		msg(12, "RLIMIT_NOFILE\t %lu (%lu)\n",
1625 		    rl.rlim_cur, rl.rlim_max);
1626 		rl.rlim_cur = rl.rlim_max;
1627 		if (setrlimit(RLIMIT_NOFILE, &rl) < 0)
1628 			msg(0, "failed to set RLIMIT_NOFILE: %s\n",
1629 			    strerror(errno));
1630 	}
1631 	if (getrlimit(RLIMIT_DATA, &rl) < 0)
1632 		msg(0, "failed to obtain RLIMIT_DATA: %s\n", strerror(errno));
1633 	else {
1634 		msg(12, "RLIMIT_DATA\t %lu (%lu)\n", rl.rlim_cur, rl.rlim_max);
1635 		rl.rlim_cur = rl.rlim_max;
1636 		if (setrlimit(RLIMIT_DATA, &rl) < 0)
1637 			msg(0, "failed to set RLIMIT_DATA: %s\n",
1638 			    strerror(errno));
1639 	}
1640 	if (getrlimit(RLIMIT_FSIZE, &rl) < 0)
1641 		msg(0, "failed to obtain RLIMIT_FSIZE: %s\n", strerror(errno));
1642 	else {
1643 		msg(12, "RLIMIT_FSIZE\t %lu (%lu)\n", rl.rlim_cur, rl.rlim_max);
1644 		rl.rlim_cur = rl.rlim_max;
1645 		if (setrlimit(RLIMIT_FSIZE, &rl) < 0)
1646 			msg(0, "failed to set RLIMIT_FSIZE: %s\n",
1647 			    strerror(errno));
1648 	}
1649 }
1650 
1651 static FILE *
1652 create_test_file(char *drvname)
1653 {
1654 	char dirname[_POSIX_PATH_MAX];
1655 	char testname[_POSIX_PATH_MAX];
1656 	FILE *fp = 0;
1657 	time_t utc = time(NULL);
1658 
1659 	if (snprintf(dirname, sizeof (dirname), "%s.test.%lu",
1660 	    drvname, utc) == -1 ||
1661 	    snprintf(testname, sizeof (testname), "%s.test.%lu",
1662 	    drvname, utc) == -1)
1663 		return (0);
1664 
1665 	if (mkdir(dirname, S_IRWXU|S_IRGRP|S_IROTH)) {
1666 		msg(0, "Error creating %s: %s\n", dirname, strerror(errno));
1667 		return (0);
1668 	}
1669 	if (chdir(dirname)) {
1670 		(void) rmdir(dirname);
1671 		return (0);
1672 	}
1673 	if ((fp = fopen(testname, "w")) == 0)
1674 		return (0);	/* leave created directory intact */
1675 
1676 	return (fp);
1677 }
1678 
1679 struct walk_arg {
1680 	char *path;
1681 	int instance;
1682 	char name[MAXPATHLEN];
1683 	int pathlen;
1684 };
1685 
1686 static int
1687 walk_callback(di_node_t node, void *arg)
1688 {
1689 	struct walk_arg *warg = (struct walk_arg *)arg;
1690 	char *driver_name;
1691 	char *path;
1692 
1693 	driver_name = di_driver_name(node);
1694 	if (driver_name != NULL) {
1695 		if (strcmp(driver_name, warg->name) == NULL &&
1696 		    di_instance(node) == warg->instance) {
1697 			path = di_devfs_path(node);
1698 			if (path == NULL)
1699 				warg->path = NULL;
1700 			else
1701 				(void) strncpy(warg->path, path, warg->pathlen);
1702 			return (DI_WALK_TERMINATE);
1703 		}
1704 	}
1705 	return (DI_WALK_CONTINUE);
1706 }
1707 
1708 static int
1709 getpath(char *path, int instance, char *name, int pathlen)
1710 {
1711 	di_node_t node;
1712 	struct walk_arg warg;
1713 
1714 	warg.instance = instance;
1715 	(void) strncpy(warg.name, name, MAXPATHLEN);
1716 	warg.path = path;
1717 	warg.pathlen = pathlen;
1718 	if ((node = di_init("/", DINFOSUBTREE)) == DI_NODE_NIL)
1719 		return (-1);
1720 	if (di_walk_node(node, DI_WALK_CLDFIRST, &warg, walk_callback) == -1) {
1721 		di_fini(node);
1722 		return (-1);
1723 	}
1724 	if (warg.path == NULL) {
1725 		di_fini(node);
1726 		return (-1);
1727 	}
1728 	di_fini(node);
1729 	return (0);
1730 }
1731 
1732 /*
1733  * Record logsize h/w accesses of type 'edp->access_type' made by instance
1734  * 'edp->instance' of driver 'edp->name' to the register set (or dma handle)
1735  * 'edp->rnumber' that lie within the range 'edp->offset' to
1736  * 'edp->offset' + 'edp->len'.
1737  * Access criteria may be mixed and matched:
1738  * -	access types may be combined (PIO read/write, DMA read write or intrs);
1739  * -	if 'edp->instance' is -1 all instances are checked for the criteria;
1740  * -	if 'edp->rnumber' is -1 all register sets and dma handles are matched;
1741  * -	'offset' and 'len' indicate that only PIO and DMA accesses within the
1742  *	range 'edp->offset' to 'edp->len' will be logged. Putting 'edp->offset'
1743  *      to zero and 'edp->len' to -1ull gives maximal coverage.
1744  *
1745  * 'collecttime' is the number of seconds used to log accesses
1746  *		(default is infinity).
1747  */
1748 static void
1749 test_driver(struct bofi_errdef *edp,
1750 	unsigned long long collecttime)
1751 {
1752 	pid_t pid;
1753 	int statloc;
1754 	struct collector_def *cdefs, *cdp;
1755 	struct handle_info *hdls, *hdl;
1756 	int i, fd;
1757 	size_t cnt;
1758 	size_t nchildren;
1759 	unsigned long long timechunk;
1760 	FILE *sfp;	/* generated control test file */
1761 	char buf[MAXPATHLEN];
1762 	char devpath[MAXPATHLEN];
1763 	char *devpathp = "NULL";
1764 	int drv_inst;
1765 	int got_it = 0;
1766 
1767 	char *name = (char *)edp->name;
1768 	uint_t logsize = edp->access_count + edp->fail_count - 1;
1769 	int inst = edp->instance;
1770 	uint_t atype = edp->access_type;
1771 	int rset = edp->rnumber;
1772 	offset_t offset = edp->offset;
1773 	offset_t len = edp->len;
1774 
1775 	msg(4, "test_driver: %s %d inst %d 0x%x rset %d %llx %llx\n",
1776 	    name, logsize, inst, atype, rset, offset, len);
1777 
1778 	drv_inst = inst;
1779 	if (getpath(devpath, inst, name, MAXPATHLEN) != -1) {
1780 		devpathp = devpath;
1781 		got_it = 1;
1782 	}
1783 	if (logsize == -1U)
1784 		logsize = 0;
1785 	fd = open(BOFI_DEV, O_RDWR);
1786 	if (fd == -1) {
1787 		perror("get_hdl_info - bad open of bofi driver");
1788 		return;
1789 	}
1790 	if (got_it) {
1791 		(void) snprintf(buf, sizeof (buf),
1792 		    "th_manage /devices%s offline", devpathp);
1793 		(void) system(buf);
1794 		(void) snprintf(buf, sizeof (buf),
1795 		    "th_manage /devices%s online", devpathp);
1796 		(void) system(buf);
1797 		(void) snprintf(buf, sizeof (buf),
1798 		    "th_manage /devices%s getstate >/dev/null", devpathp);
1799 		(void) system(buf);
1800 	}
1801 	if (get_hinfo(fd, name, &hdls, &cnt,
1802 	    inst, atype, rset, offset, len, 1) != 0) {
1803 		msg(0, "driver_test: bad get_info for %d hdls\n", cnt);
1804 		return;
1805 	} else if (logsize == 0 || collecttime == 0 || cnt == 0) {
1806 		if (cnt == 0)
1807 			msg(1, "No matching handles.\n");
1808 		return;
1809 	}
1810 	if ((cdefs = GETSTRUCT(struct collector_def, cnt)) == 0) {
1811 		msg(0, "driver_test: can't get memory for %d cdefs\n", cnt);
1812 		return;
1813 	}
1814 	up_resources();
1815 	if (got_it) {
1816 		if (scriptargs > 0) {
1817 			(void) snprintf(buf, sizeof (buf),
1818 			    "DRIVER_PATH=/devices%s DRIVER_INSTANCE=%d"
1819 			    " DRIVER_UNCONFIGURE=0 DRIVER_CONFIGURE=1",
1820 			    devpathp, drv_inst);
1821 			for (i = 0; i < scriptargs; i++) {
1822 				(void) strcat(buf, " ");
1823 				(void) strcat(buf, fixup_script[i]);
1824 			}
1825 			(void) strcat(buf, " &");
1826 		} else {
1827 			(void) snprintf(buf, sizeof (buf),
1828 			    "while : ; do th_manage /devices%s online;"
1829 			    " th_manage /devices%s getstate >/dev/null;"
1830 			    " th_manage /devices%s offline;done &"
1831 			    " echo $! >/tmp/bofi.pid",
1832 			    devpathp, devpathp, devpathp);
1833 		}
1834 		(void) system(buf);
1835 		(void) snprintf(buf, sizeof (buf), "sleep %d",
1836 		    edef_sleep ? edef_sleep : DEFAULT_EDEF_SLEEP);
1837 		(void) system(buf);
1838 	}
1839 
1840 	(void) fprintf(outfile,
1841 	    "Logging accesses to instances ");
1842 	for (i = 0, inst = -1, hdl = hdls; i < cnt;
1843 	    i++, hdl++) {
1844 		if (inst != hdl->instance) {
1845 			inst = hdl->instance;
1846 			(void) fprintf(outfile, "%d ", inst);
1847 		}
1848 	}
1849 	(void) fprintf(outfile, " (%d logs of size 0x%x).\n\t"
1850 	    "(Use th_manage ... clear_errdefs to terminate"
1851 	    " logging)\n", cnt, logsize);
1852 
1853 	sfp = create_test_file(name);
1854 	/*
1855 	 * Install a logging errdef for each matching handle,
1856 	 * and then create a child to collect the log.
1857 	 * The child is responsible for activating the log.
1858 	 */
1859 	for (i = 0, cdp = cdefs, hdl = hdls, nchildren = 0;
1860 	    i < cnt; i++, cdp++, hdl++) {
1861 		if (add_edef(fd, &cdp->ed, &cdp->es, hdl, edp) != 0) {
1862 			cdp->lp = 0;
1863 			cdp->pid = 0;
1864 		} else {
1865 			cdp->lp = (void *)cdp->ed.log.logbase;
1866 			msg(1, "test_driver: thr_create:"
1867 			    " lsize 0x%x 0x%x at 0x%x\n",
1868 			    cdp->es.log.logsize,
1869 			    cdp->ed.log.logsize,
1870 			    cdp->ed.access_type);
1871 			if ((pid = fork()) == -1) {
1872 				msg(0, "fork failed for handle"
1873 				    " %d: %s\n", i, strerror(errno));
1874 				cdp->pid = 0;	/* ignore */
1875 			} else if (pid == 0) {
1876 				thr_collect(cdp, devpathp);
1877 			} else {
1878 				cdp->pid = pid;
1879 				nchildren += 1;
1880 			}
1881 		}
1882 	}
1883 
1884 	if (nchildren != 0) {
1885 		if (sfp) {
1886 			(void) fprintf(sfp, "#!/bin/ksh -p\n\n");
1887 			(void) fprintf(sfp,
1888 			    "\n# Test control script generated using:\n#");
1889 			for (i = 0; i < pargc; i++)
1890 				(void) fprintf(sfp, " %s", pargv[i]);
1891 			(void) fprintf(sfp, "\n\n");
1892 			(void) fprintf(sfp, "\nrun_tests()\n{\n");
1893 			for (i = 0, cdp = cdefs; i < cnt; i++, cdp++)
1894 				if (cdp->pid) {
1895 					(void) fprintf(sfp,
1896 					    "\tif [ -x ./%s.%d ]\n\tthen\n",
1897 					    name, (int)cdp->pid);
1898 					(void) fprintf(sfp,
1899 					    "\t\techo \"Starting test"
1900 					    " %d (id %d)\"\n",
1901 					    i, (int)cdp->pid);
1902 					(void) fprintf(sfp, "\t\t./%s.%d\n",
1903 					    name, (int)cdp->pid);
1904 					(void) fprintf(sfp, "\t\techo \""
1905 					    "Test %d (id %d) complete\"\n",
1906 					    i, (int)cdp->pid);
1907 					(void) fprintf(sfp, "\tfi\n");
1908 				}
1909 			(void) fprintf(sfp, "}\n\nrun_tests\n");
1910 			if (fchmod(fileno(sfp), S_IRWXU|S_IRGRP|S_IROTH))
1911 				msg(0, "fchmod on control script failed: %s\n",
1912 				    strerror(errno));
1913 			if (fclose(sfp) != 0)
1914 				msg(0, "Error closing control script: %s\n",
1915 				    strerror(errno));
1916 		}
1917 
1918 		set_handler(SIGALRM);	/* handle it */
1919 		/*
1920 		 * The user may want to terminate logging before the log fills
1921 		 * so use a timer to signal the logging children to handle this
1922 		 * case.
1923 		 */
1924 		timechunk = collecttime / MAXALRMCALL;
1925 		collecttime = collecttime - timechunk * MAXALRMCALL;
1926 
1927 		msg(2, "logging for (0x%llx 0x%llx)\n", timechunk, collecttime);
1928 
1929 		(void) alarm(collecttime); /* odd bit of collect time */
1930 
1931 		/* wait for the log to fill or deadline satisfied */
1932 		for (;;) {
1933 			pid = wait(&statloc);
1934 			for (i = 0, nchildren = 0, cdp = cdefs;
1935 			    i < cnt; i++, cdp++)
1936 				if (cdp->pid == pid)
1937 					cdp->pid = 0;
1938 			for (i = 0, nchildren = 0, cdp = cdefs;
1939 			    i < cnt; i++, cdp++)
1940 				if (cdp->pid)
1941 					nchildren++;
1942 			if (nchildren == 0)
1943 				break;
1944 			if (killed)
1945 				break;
1946 			if (alarmed) {
1947 				if (timechunk-- > 0) {
1948 					/*
1949 					 * prepare for the next timeslice by
1950 					 * rearming the clock
1951 					 */
1952 					if (alarm(MAXALRMCALL) == 0)
1953 						alarmed = 0;
1954 					else {
1955 						/*
1956 						 * must have been a user abort
1957 						 * (via SIGALRM)
1958 						 */
1959 						(void) alarm(0);
1960 						break;
1961 					}
1962 				} else
1963 					break;
1964 			}
1965 		}
1966 
1967 		(void) fprintf(outfile, "Logging complete.\n");
1968 	}
1969 	if (got_it) {
1970 		if (scriptargs > 0) {
1971 			(void) snprintf(buf, sizeof (buf),
1972 			    "DRIVER_PATH=/devices%s DRIVER_INSTANCE=%d"
1973 			    " DRIVER_UNCONFIGURE=1 DRIVER_CONFIGURE=0",
1974 			    devpathp, drv_inst);
1975 			for (i = 0; i < scriptargs; i++) {
1976 				(void) strcat(buf, " ");
1977 				(void) strcat(buf, fixup_script[i]);
1978 			}
1979 			(void) system(buf);
1980 		} else {
1981 			(void) system("kill `cat /tmp/bofi.pid`");
1982 		}
1983 	}
1984 	msg(2, "test_driver: terminating\n");
1985 }
1986 
1987 static int
1988 getnameinst(char *orig_path, int *instance, char *name, int namelen)
1989 {
1990 	di_node_t node;
1991 	char *binding_name;
1992 
1993 	if ((node = di_init(&orig_path[8], DINFOSUBTREE|DINFOMINOR)) ==
1994 	    DI_NODE_NIL)
1995 		return (-1);
1996 	if ((binding_name = di_driver_name(node)) == NULL)
1997 		return (-1);
1998 	*instance = di_instance(node);
1999 	(void) strncpy(name, binding_name, namelen);
2000 	di_fini(node);
2001 	return (0);
2002 }
2003 
2004 static char syntax[] =
2005 	"          [ -n name [ -i instance ] | -P path ]\n"
2006 	"          [ -a acc_types ] [ -r rnumber ]\n"
2007 	"          [ -l offset [ length ] ] [ -c count [ failcount ] ]\n"
2008 	"          [ -o operator [ operand ] ] [ -f acc_chk  ]\n"
2009 	"          [ -w max_wait_period [ report_interval ] ]\n"
2010 	"     or\n"
2011 	"          [ -n name [ -i instance ] | -P path ]\n"
2012 	"          -a  LOG  [  acc_types ]  [ -r rnumber]\n"
2013 	"          [ -l offset [ length ] ] [ -c count [ failcount ] ]\n"
2014 	"          [ -s collect_time ] [ -p policy ] [ -x flags ]\n"
2015 	"          [ -C ] [-e fixup_script ]\n"
2016 	"     or\n"
2017 	"          -h";
2018 
2019 int
2020 main(int argc, char *argv[])
2021 {
2022 	extern char *optarg;
2023 	extern int optind;
2024 
2025 	char	c;		/* for parsing getopts */
2026 	int	nopts = 0;	/* for backward compatibility */
2027 	int	err = 0;
2028 
2029 	/* use a maximal set of defaults for logging or injecting */
2030 	struct bofi_errdef errdef = {
2031 		0,		/* length of driver name */
2032 		{0},		/* driver name */
2033 		-1,		/* monitor all instances */
2034 		-1,		/* monitor all register sets and DMA handles */
2035 		(offset_t)0,	/* monitor from start of reg. set or DMA hd */
2036 		myLLMAX,	/* monitor whole reg set or DMA hdl(no LLMAX) */
2037 		0,		/* qualify all */
2038 		DFLTLOGSZ,	/* default no. of accesses before corrupting */
2039 		0u,		/* default no. of accesses to corrupt */
2040 		0u,		/* no check access corruption */
2041 		BOFI_NOP,	/* no corruption operator by default */
2042 		myULLMAX,	/* default operand */
2043 		{0, 0, BOFI_LOG_TIMESTAMP, /* timestamp by default */
2044 		0, 0, 0, 0},	/* no logging by default */
2045 		0};
2046 
2047 
2048 	/* specify the default no of seconds for which to monitor */
2049 	unsigned long long	collecttime = DFLTLOGTIME;
2050 
2051 	char	*str;	/* temporary variable */
2052 	long	tmpl;	/* another one */
2053 	int		i;
2054 	uint_t	tmpui;
2055 
2056 	char buf[MAXPATHLEN];
2057 
2058 	Progname = (char *)strrchr(*argv, '/');
2059 	Progname = (Progname == NULL) ? *argv : Progname + 1;
2060 
2061 	errfile = stderr;
2062 	outfile = stdout;
2063 	policy = 0;
2064 	lsize_is_default = 1;
2065 	pargv = argv;
2066 	pargc = argc;
2067 
2068 	while ((c = getopt(argc, argv, "a:c:C:dD:e:f:h:i:l:n:o:p:P:r:s:tw:x"))
2069 	    != EOF) {
2070 		nopts++;
2071 		switch (c) {
2072 		case 'a':
2073 			msg(2, "option a: optarg %s optind %d argc %d\n",
2074 			    optarg, optind, argc);
2075 			if ((err = str_to_bm(optarg, atypes,
2076 			    &errdef.access_type)) == 0)
2077 				while (optind < argc && *argv[optind] != '-') {
2078 					if ((err = str_to_bm(argv[optind++],
2079 					    atypes, &errdef.access_type)))
2080 						break;
2081 				}
2082 			break;
2083 		case 'c':
2084 			lsize_is_default = 0;
2085 			/* zero is valid */
2086 			errdef.access_count = strtoul(optarg, &str, 0);
2087 			if (str == optarg)
2088 				err = EINVAL;
2089 			else if (optind < argc && (argv[optind][0] != '-' ||
2090 			    (strlen(argv[optind]) > 1 &&
2091 			    isdigit(argv[optind][1]))))
2092 				errdef.fail_count =
2093 				    strtoull(argv[optind++], 0, 0);
2094 			break;
2095 		case 'C':
2096 			user_comment = optarg;
2097 			if (optind < argc && argv[optind][0] != '-')
2098 				err = EINVAL;
2099 			break;
2100 		case 'D':
2101 			dbglvl = strtoul(optarg, &str, 0);
2102 			break;
2103 		case 'e':
2104 			fixup_script = 0;
2105 			scriptargs = 0;
2106 			fixup_script = &argv[optind - 1];
2107 			scriptargs += 1;
2108 			while (optind < argc) {
2109 				optind += 1;
2110 				scriptargs += 1;
2111 			}
2112 			break;
2113 		case 'f':
2114 			tmpl = strtol(optarg, &str, 0);
2115 
2116 			if (str != optarg)
2117 				errdef.acc_chk = tmpl;
2118 			else if (strcmp(optarg, "PIO") == NULL)
2119 				errdef.acc_chk = 1;
2120 			else if (strcmp(optarg, "DMA") == NULL)
2121 				errdef.acc_chk = 2;
2122 			else if (strcmp(optarg, "U4FT_ACC_NO_PIO") == NULL)
2123 				errdef.acc_chk = 1;
2124 			else if (strcmp(optarg, "U4FT_ACC_NO_DMA") == NULL)
2125 				errdef.acc_chk = 2;
2126 			else
2127 				err = EINVAL;
2128 			break;
2129 		case 'i':
2130 			if ((errdef.instance = strtol(optarg, &str, 0)) < 0)
2131 				errdef.instance = -1;
2132 			else if (str == optarg)
2133 				err = EINVAL;
2134 			break;
2135 		case 'l':
2136 			errdef.offset = strtoull(optarg, &str, 0);
2137 			if (str == optarg)
2138 				err = EINVAL;
2139 			else if (optind < argc &&
2140 			    (argv[optind][0] != '-' ||
2141 			    (strlen(argv[optind]) > 1 &&
2142 			    isdigit(argv[optind][1])))) {
2143 				/* -1 indicates the rest of register set */
2144 				errdef.len = strtoull(argv[optind++], 0, 0);
2145 			}
2146 			break;
2147 		case 'n':
2148 			(void) strncpy(errdef.name, optarg, MAXNAMELEN);
2149 			if ((errdef.namesize = strlen(errdef.name)) == 0)
2150 				err = EINVAL;
2151 			break;
2152 		case 'o':
2153 			for (i = 0; optypes[i].str != 0; i++)
2154 				if (strcmp(optarg, optypes[i].str) == 0) {
2155 					errdef.optype = optypes[i].code;
2156 					break;
2157 				}
2158 			if (optypes[i].str == 0)
2159 				err = EINVAL;
2160 			else if (optind < argc &&
2161 			    (argv[optind][0] != '-' ||
2162 			    (strlen(argv[optind]) > 1 &&
2163 			    isdigit(argv[optind][1]))))
2164 				errdef.operand =
2165 				    strtoull(argv[optind++], 0, 0);
2166 			break;
2167 		case 'p':
2168 			tmpui = 0x0u;
2169 			if ((err = str_to_bm(optarg, ptypes, &tmpui)) == 0) {
2170 				while (optind < argc && *argv[optind] != '-')
2171 					if ((err = str_to_bm(argv[optind++],
2172 					    ptypes, &tmpui)))
2173 						break;
2174 				policy = (uint16_t)tmpui;
2175 			}
2176 			if (err == 0 && (policy & BYTEPOLICY))
2177 				errdef.log.flags |= BOFI_LOG_REPIO;
2178 			break;
2179 		case 'P':
2180 			if (getnameinst(optarg, &errdef.instance, buf,
2181 			    MAXPATHLEN) == -1)
2182 				err = EINVAL;
2183 			else
2184 				(void) strncpy(errdef.name, buf, MAXNAMELEN);
2185 			break;
2186 		case 'r':
2187 			if ((errdef.rnumber = strtol(optarg, &str, 0)) < 0)
2188 				errdef.rnumber = -1;
2189 			if (str == optarg) err = EINVAL;
2190 			break;
2191 		case 's':
2192 			collecttime = strtoull(optarg, &str, 0);
2193 			if (str == optarg)
2194 				err = EINVAL;	/* zero is valid */
2195 			break;
2196 		case 'w':
2197 			do_status = 1;
2198 			max_edef_wait = strtoul(optarg, &str, 0);
2199 			/* zero is valid */
2200 			if (str == optarg)
2201 				err = EINVAL;
2202 			else if (optind < argc &&
2203 			    (argv[optind][0] != '-' ||
2204 			    (strlen(argv[optind]) > 1 &&
2205 			    isdigit(argv[optind][1]))))
2206 				edef_sleep = strtoull(argv[optind++], 0, 0);
2207 
2208 			break;
2209 		case 'x':
2210 			if ((optind < argc && *argv[optind] == '-') ||
2211 			    optind == argc)
2212 				errdef.log.flags |= BOFI_LOG_WRAP;
2213 			else {
2214 				if (strchr(argv[optind], 'w') != 0)
2215 					errdef.log.flags |= BOFI_LOG_WRAP;
2216 				if (strchr(argv[optind], 'r') != 0)
2217 					errdef.log.flags |= BOFI_LOG_REPIO;
2218 				if (strchr(argv[optind], 't') != 0)
2219 					errdef.log.flags |= BOFI_LOG_TIMESTAMP;
2220 				if (strstr(argv[optind], "~t") != 0)
2221 					errdef.log.flags &= ~BOFI_LOG_TIMESTAMP;
2222 				optind++;
2223 			}
2224 			break;
2225 		case 'h':
2226 			(void) fprintf(errfile, "usage: %s %s\n",
2227 			    Progname, syntax);
2228 			exit(0);
2229 			break;
2230 		case '?':	/* also picks up missing parameters */
2231 		default:
2232 			(void) fprintf(errfile, "usage: %s %s\n",
2233 			    Progname, syntax);
2234 			exit(2);
2235 		}
2236 
2237 		if (err) {
2238 			(void) fprintf(errfile, "usage: %s %s\n",
2239 			    Progname, syntax);
2240 			exit(2);
2241 		}
2242 		if (c == 'e')
2243 			break;	/* the -e option must be the final option */
2244 	}
2245 
2246 
2247 	if (errdef.name[0] == 0) {
2248 		msg(0, "%s - invalid name parameter\n", Progname);
2249 		exit(1);
2250 	}
2251 	errdef.namesize = strlen(errdef.name);
2252 
2253 	if (policy == 0) {
2254 		policy |= UNBIASEDPOLICY;
2255 		policy |= OPERATORSPOLICY;
2256 	}
2257 
2258 	if (errdef.optype == BOFI_NOP)
2259 		errdef.optype = BOFI_XOR;
2260 	if (errdef.access_type == BOFI_LOG) { /* qualify all accesses */
2261 		errdef.access_type =
2262 		    (BOFI_LOG|BOFI_DMA_RW|BOFI_PIO_RW|BOFI_INTR);
2263 		atype_is_default = 1;
2264 	} else if (errdef.access_type == 0) { /* qualify all accesses */
2265 		errdef.access_type =
2266 		    (BOFI_DMA_RW|BOFI_PIO_RW|BOFI_INTR);
2267 		atype_is_default = 1;
2268 	} else
2269 		atype_is_default = 0;
2270 
2271 	init_sigs();
2272 	if ((errdef.access_type & BOFI_LOG) == 0) {
2273 		int fd, i, instance;
2274 		size_t cnt;
2275 		struct handle_info *hdls, *hp;
2276 
2277 		if ((fd = open(BOFI_DEV, O_RDWR)) == -1) {
2278 			msg(0, "%s: error opening bofi driver: %s\n",
2279 			    Progname, strerror(errno));
2280 			exit(1);
2281 		}
2282 		if ((err = get_hinfo(fd, errdef.name, &hdls, &cnt,
2283 		    errdef.instance, errdef.access_type, errdef.rnumber,
2284 		    errdef.offset, errdef.len, 0)) != 0) {
2285 			msg(0, "%s: Bad lookup on bofi driver.\n", Progname);
2286 			(void) close(fd);
2287 			exit(1);
2288 		} else if (cnt == 0) {
2289 			msg(0,
2290 			    "%s: No handles match request access criteria.\n",
2291 			    Progname);
2292 			(void) close(fd);
2293 			exit(1);
2294 		}
2295 		if (errdef.instance == -1)
2296 			instance = -1;
2297 		else {
2298 			instance = hdls->instance;
2299 			for (i = 0, hp = hdls; i < cnt; i++, hp++) {
2300 				if (instance != hp->instance) {
2301 					instance = -1;
2302 					break;
2303 				}
2304 			}
2305 		}
2306 		if (instance == -1) {
2307 			msg(0, "Multiple instances match access criteria"
2308 			    " (only allowed when logging):\n");
2309 			msg(0, "\tinst\taccess\trnumber\toffset\tlength\n");
2310 			for (i = 0, hp = hdls; i < cnt; i++, hp++)
2311 				msg(0, "\t%d\t0x%x\t%d\t0x%llx\t0x%llx\n",
2312 				    hp->instance, hp->access_type,
2313 				    hp->rnumber, hp->offset, hp->len);
2314 		} else {
2315 			struct bofi_errstate es;
2316 			int timeleft = max_edef_wait;
2317 
2318 			if (ioctl(fd, BOFI_ADD_DEF, &errdef) == -1) {
2319 				perror("th_define - adding errdef failed");
2320 			} else {
2321 				es.errdef_handle = errdef.errdef_handle;
2322 				msg(4, "waiting for edef:"
2323 				    " %d %s %d %d 0x%llx 0x%llx 0x%x 0x%x"
2324 				    " 0x%x 0x%x 0x%x 0x%llx\n",
2325 				    errdef.namesize, errdef.name,
2326 				    errdef.instance, errdef.rnumber,
2327 				    errdef.offset, errdef.len,
2328 				    errdef.access_type, errdef.access_count,
2329 				    errdef.fail_count, errdef.acc_chk,
2330 				    errdef.optype, errdef.operand);
2331 
2332 				set_handler(SIGALRM);	/* handle it */
2333 
2334 				do {
2335 					if (do_status)
2336 						(void) alarm(edef_sleep);
2337 					if (ioctl(fd, BOFI_CHK_STATE_W,
2338 					    &es) == -1) {
2339 						if (errno != EINTR) {
2340 							perror("bad"
2341 							    " BOFI_CHK_STATE");
2342 							break;
2343 						} else if (!do_status) {
2344 							break;
2345 						}
2346 					}
2347 					if (do_status)
2348 						(void) fprintf(outfile,
2349 						    "%llu:%llu:%u:%u:%u:"
2350 						    "%u:%d:\"%s\"\n",
2351 						    es.fail_time, es.msg_time,
2352 						    es.access_count,
2353 						    es.fail_count,
2354 						    es.acc_chk, es.errmsg_count,
2355 						    (uint_t)es.severity,
2356 						    (es.msg_time) ?
2357 						    es.buffer : "");
2358 					if (es.acc_chk == 0 &&
2359 					    es.fail_count == 0 && !do_status)
2360 						print_err_reports(outfile,
2361 						    &es, "", "", -1);
2362 					else if (alarmed) {
2363 						alarmed = 0;
2364 						if ((timeleft -= edef_sleep) <=
2365 						    0) {
2366 							if (!do_status)
2367 							    print_err_reports(
2368 								outfile,
2369 								&es, "",
2370 								"", -1);
2371 							break;
2372 						}
2373 					} else if (!do_status)
2374 						print_err_reports(outfile,
2375 						    &es, "", "", -1);
2376 				} while (es.acc_chk != 0 || es.fail_count != 0);
2377 
2378 				msg(2, "done: acc_chk 0x%x fcnt %d\n",
2379 				    es.acc_chk, es.fail_count);
2380 			}
2381 
2382 			(void) close(fd);
2383 		}
2384 		free(hdls);
2385 		return (0);
2386 	}
2387 	test_driver(&errdef, collecttime);
2388 	return (0);
2389 }
2390