1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #pragma ident "%Z%%M% %I% %E% SMI"
27
28 #include <sys/time_impl.h>
29 #include <sys/wait.h>
30 #include <stdio.h>
31 #include <stdio_ext.h>
32 #include <stdlib.h>
33 #include <stdarg.h>
34 #include <ctype.h>
35 #include <time.h>
36 #include <fcntl.h>
37 #include <sys/stat.h>
38 #include <sys/resource.h>
39 #include <limits.h>
40 #include <string.h>
41 #include <unistd.h>
42 #include <errno.h>
43 #include <signal.h>
44 #include <libdevinfo.h>
45 #define _KERNEL
46 #include <sys/dditypes.h>
47 #include <sys/sunddi.h>
48 #include <sys/bofi.h>
49
50 #define BOFI_DEV "/devices/pseudo/bofi@0:bofi,ctl"
51
52 #define GETSTRUCT(s, num) \
53 ((s *) memalign(sizeof (void*), (num) * sizeof (s)))
54
55 #define MAXEDEFS (0x64) /* controls max no of concurent edefs */
56 #define DFLTLOGSZ (0x4000) /* default size of an access log */
57 #define DFLT_NONPIO_LOGSZ (0x400) /* default size of a log */
58 #define MAXALRMCALL (0x1000ull) /* alarm does not permit big values */
59 #define MIN_REPORT_TIME (5) /* min time to wait for edef status */
60 #define DISTRIB_CUTOFF (3) /* useful when reducing a log */
61 #define myLLMAX (0x7fffffffffffffffll)
62 #define myULLMAX (0xffffffffffffffffull)
63
64 /*
65 * default interval to wait between kicking off workload and injecting fault
66 */
67 #define DEFAULT_EDEF_SLEEP 3
68 /*
69 * when generating dma corruptions, it is best to corrupt each double word
70 * individually for control areas - however for data areas this can be
71 * excessive and would generate so many cases we would never finish the run.
72 * So set a cut-off value where we switch from corrupting each double word
73 * separately to corrupting th elot in one go. 0x100 bytes seems a good value
74 * on the drivers we have seen so far.
75 */
76 #define DMA_INDIVIDUAL_CORRUPT_CUTOFF 0x100
77
78 struct collector_def {
79 struct bofi_errdef ed; /* definition of the log criteria */
80 struct bofi_errstate es; /* the current status of the log */
81 struct acc_log_elem *lp; /* array of logged accesses */
82 pid_t pid;
83 };
84
85 static uint16_t policy;
86
87 #define BYTEPOLICY (0xf)
88 #define MULTIPOLICY (0x10)
89 #define SIZEPOLICY (BYTEPOLICY|MULTIPOLICY)
90 #define UNBIASEDPOLICY 0x20
91 #define UNCOMMONPOLICY 0x40
92 #define COMMONPOLICY 0x80
93 #define MEDIANPOLICY 0x100
94 #define MAXIMALPOLICY 0x200
95 #define OPERATORSPOLICY 0x400
96 #define VALIDPOLICY (0x7ff)
97
98 typedef
99 struct coding {
100 char *str;
101 uint_t code;
102 } coding_t;
103
104 static coding_t ptypes[] = {
105 {"onebyte", 0x1}, {"twobyte", 0x2},
106 {"fourbyte", 0x4}, {"eightbyte", 0x8},
107 {"multibyte", 0x10}, {"unbiased", 0x20}, {"uncommon", 0x40},
108 {"common", 0x80}, {"median", 0x100}, {"maximal", 0x200},
109 {"operators", 0x400}, {0, 0}
110 };
111 static coding_t atypes[] = {
112 {"pio_r", BOFI_PIO_R}, {"pio_w", BOFI_PIO_W},
113 {"dma_r", BOFI_DMA_R}, {"dma_w", BOFI_DMA_W},
114 {"pio", BOFI_PIO_RW}, {"dma", BOFI_DMA_RW},
115 {"log", BOFI_LOG}, {"intr", BOFI_INTR},
116 {"PIO_R", BOFI_PIO_R}, {"PIO_W", BOFI_PIO_W},
117 {"DMA_R", BOFI_DMA_R}, {"DMA_W", BOFI_DMA_W},
118 {"PIO", BOFI_PIO_RW}, {"DMA", BOFI_DMA_RW},
119 {"LOG", BOFI_LOG}, {"INTR", BOFI_INTR}, {0, 0}
120 };
121 static coding_t optypes[] = {
122 {"EQ", BOFI_EQUAL}, {"AND", BOFI_AND}, {"OR", BOFI_OR},
123 {"XOR", BOFI_XOR}, {"NO", BOFI_NO_TRANSFER},
124 {"DELAY", BOFI_DELAY_INTR}, {"LOSE", BOFI_LOSE_INTR},
125 {"EXTRA", BOFI_EXTRA_INTR}, {0, 0}
126 };
127 static coding_t doptypes[] = {
128 {"EQ", BOFI_EQUAL}, {"AND", BOFI_AND}, {"OR", BOFI_OR},
129 {"XOR", BOFI_XOR}, {0, 0}
130 };
131 static coding_t ioptypes[] = {
132 {"DELAY", BOFI_DELAY_INTR}, {"LOSE", BOFI_LOSE_INTR},
133 {"EXTRA", BOFI_EXTRA_INTR}, {0, 0}
134 };
135
136 static const unsigned long long DFLTLOGTIME = -1ull; /* log forever */
137
138 /*
139 * This global controls the generation of errdefs for PIO_W. The default should
140 * be to only perform an access check errdef but not to corrupt writes - this
141 * may trash non-FT platforms.
142 */
143 static uint_t atype_is_default; /* do not corrupt PIO_W by default */
144 static uint_t lsize_is_default; /* set when the user has not given a size */
145
146 static uint64_t random_operand = 0xdeadbeafdeadbeafull;
147 #define NPIO_DEFAULTS (3) /* number of default corruption values */
148 static longlong_t pio_default_values[NPIO_DEFAULTS] = {
149 0x0ull, /* corresponds to a line going high/low */
150 0x32f1f03232f1f032ull, /* the value returned when the fake ta is set */
151 (longlong_t)(~0) /* corresponds to a line going high/low */
152 };
153
154 static uint_t dbglvl = 0; /* debug this program */
155 static int alarmed = 0;
156 static int killed = 0;
157
158 /*
159 * name of a script to call before offlining a driver being tested
160 */
161 static char **fixup_script = 0;
162 static int scriptargs = 0;
163 static char **pargv;
164 static int pargc;
165
166 static int max_edef_wait = 0;
167 static int edef_sleep = 0;
168 static int do_status = 0; /* report edef status in parsable format */
169 static char *user_comment = 0;
170
171 static char *Progname;
172 static FILE *errfile;
173 static FILE *outfile;
174
175 /*
176 * The th_define utility provides an interface to the bus_ops fault injection
177 * bofi device driver for defining error injection specifications (referred to
178 * as errdefs). An errdef corresponds to a specification of how to corrupt a
179 * device driver's accesses to its hardware. The command line arguments
180 * determine the precise nature of the fault to be injected. If the supplied
181 * arguments define a consistent errdef, the th_define process will store the
182 * errdef with the bofi driver and suspend itself until the criteria given by
183 * the errdef become satisfied (in practice, this will occur when the access
184 * counts go to zero).
185 *
186 * When the resulting errdef is activated using the th_manage(1M) user command
187 * utility, the bofi driver will act upon the errdef by matching the number of
188 * hardware accesses - specified in count, that are of the type specified in
189 * acc_types, made by instance number instance - of the driver whose name is
190 * name, (or by the driver instance specified by * path ) to the register set
191 * (or DMA handle) specified by rnumber, that lie within the range offset to
192 * offset + length from the beginning of the register set or DMA handle. It then
193 * applies operator and operand to the next failcount matching accesses.
194 *
195 * If acc_types includes LOG, th_define runs in automatic test script generation
196 * mode, and a set of test scripts (written in the Korn shell) is created and
197 * placed in a sub-directory of the current directory with the name
198 * driver.test.<id>. A separate, executable script is generated for each access
199 * handle that matches the logging criteria. The log of accesses is placed at
200 * the top of each script as a record of the session. If the current directory
201 * is not writable, file output is written to standard output. The base name of
202 * each test file is the driver name, and the extension is a number that
203 * discriminates between different access handles. A control script (with the
204 * same name as the created test directory) is generated that will run all the
205 * test scripts sequentially.
206 *
207 * Executing the scripts will install, and then activate, the resulting error
208 * definitions. Error definitions are activated sequentially and the driver
209 * instance under test is taken offline and brought back online before each test
210 * (refer to the -e option for more information). By default, logging will apply
211 * to all PIO accesses, interrupts and DMA accesses to and from areas mapped
212 * for both reading and writing, but it can be constrained by specifying
213 * additional acc_types, rnumber, offset and length. Logging will continue for
214 * count matching accesses, with an optional time limit of collect_time seconds.
215 *
216 * Either the -n or -P option must be provided. The other options are optional.
217 * If an option (other than the -a option) is specified multiple times, only
218 * the final value for the option is used. If an option is not specified, its
219 * associated value is set to an appropriate default, which will provide
220 * maximal error coverage as described below.
221 */
222
223 /*PRINTFLIKE2*/
224 static void
msg(uint_t lvl,char * msg,...)225 msg(uint_t lvl, char *msg, ...)
226 {
227 #define BUFSZ 128
228
229 if (lvl <= dbglvl) {
230 int count;
231 va_list args;
232 char buf[BUFSZ];
233 int pos = 0;
234
235 va_start(args, msg);
236 count = vsnprintf(buf, BUFSZ, msg, args);
237 va_end(args);
238 if (count > 0) {
239 count += pos;
240 if (count >= sizeof (buf))
241 count = BUFSZ - 1;
242 buf[count] = '\0';
243 (void) fprintf(errfile, "%s", buf);
244 }
245 }
246 }
247
248 static void
kill_sighandler(int sig)249 kill_sighandler(int sig)
250 {
251 switch (sig) {
252 case SIGALRM:
253 alarmed = 1;
254 break;
255 default:
256 killed = 1;
257 break;
258 }
259 }
260
261 static void
set_handler(int sig)262 set_handler(int sig)
263 {
264 struct sigaction sa;
265
266 (void) sigfillset(&(sa.sa_mask));
267 sa.sa_flags = 0;
268 sa.sa_handler = kill_sighandler;
269 if (sigaction(sig, &sa, NULL) != 0)
270 /* install handler */
271 msg(0, "bad sigaction: %s\n", strerror(errno));
272 }
273
274 /*
275 * Compare two driver access handles
276 */
277 static int
hdl_cmp(const void * p1,const void * p2)278 hdl_cmp(const void *p1, const void *p2)
279 {
280 struct handle_info *e1 = (struct handle_info *)p1;
281 struct handle_info *e2 = (struct handle_info *)p2;
282
283 if (e1->instance < e2->instance)
284 return (-1);
285 else if (e1->instance > e2->instance)
286 return (1);
287 else if (e1->access_type < e2->access_type)
288 return (-1);
289 else if (e1->access_type > e2->access_type)
290 return (1);
291 else if (e1->rnumber < e2->rnumber)
292 return (-1);
293 else if (e1->rnumber > e2->rnumber)
294 return (1);
295 else if (e1->len < e2->len)
296 return (-1);
297 else if (e1->len > e2->len)
298 return (1);
299 else if (e1->offset < e2->offset)
300 return (-1);
301 else if (e1->offset > e2->offset)
302 return (1);
303 else if (e1->addr_cookie < e2->addr_cookie)
304 return (-1);
305 else if (e1->addr_cookie > e2->addr_cookie)
306 return (1);
307 else
308 return (0);
309 }
310
311 /*
312 * Compare two hardware accesses.
313 */
314 static int
elem_cmp(const void * p1,const void * p2)315 elem_cmp(const void *p1, const void *p2)
316 {
317 struct acc_log_elem *e1 = (struct acc_log_elem *)p1;
318 struct acc_log_elem *e2 = (struct acc_log_elem *)p2;
319
320 if (e1->access_type < e2->access_type)
321 return (-1);
322 else if (e1->access_type > e2->access_type)
323 return (1);
324 else if (e1->offset < e2->offset)
325 return (-1);
326 else if (e1->offset > e2->offset)
327 return (1);
328 else if (e1->size < e2->size)
329 return (-1);
330 else if (e1->size > e2->size)
331 return (1);
332 else
333 return (0);
334 }
335
336 /*
337 * Another way of comparing two hardware accesses.
338 */
339 static int
log_cmp(const void * p1,const void * p2)340 log_cmp(const void *p1, const void *p2)
341 {
342 struct acc_log_elem *e1 = (struct acc_log_elem *)p1;
343 struct acc_log_elem *e2 = (struct acc_log_elem *)p2;
344
345 int rval = elem_cmp(p1, p2);
346
347 if (rval == 0)
348 if (e1->repcount < e2->repcount)
349 return (-1);
350 else if (e1->repcount > e2->repcount)
351 return (1);
352 else
353 return (0);
354 else
355 return (rval);
356 }
357
358 /*
359 * And a final way of sorting a log (by access type followed by repcount).
360 */
361 static int
log_cmp2(const void * p1,const void * p2)362 log_cmp2(const void *p1, const void *p2)
363 {
364 struct acc_log_elem *e1 = (struct acc_log_elem *)p1;
365 struct acc_log_elem *e2 = (struct acc_log_elem *)p2;
366
367 if (e1->access_type < e2->access_type)
368 return (-1);
369 else if (e1->access_type > e2->access_type)
370 return (1);
371 else if (e1->repcount < e2->repcount)
372 return (-1);
373 else if (e1->repcount > e2->repcount)
374 return (1);
375 else
376 return (0);
377 }
378
379 static void
dump_log(uint_t lvl,FILE * fp,struct acc_log_elem * items,size_t nitems,uint_t logflags)380 dump_log(uint_t lvl, FILE *fp, struct acc_log_elem *items,
381 size_t nitems, uint_t logflags)
382 {
383 if (lvl <= dbglvl) {
384 int i;
385 uint_t offset, allthesame = 1;
386
387 if (logflags & BOFI_LOG_TIMESTAMP &&
388 getenv("DUMP_FULL_LOG") != 0)
389 allthesame = 0;
390 else
391 for (i = 1; i < nitems; i++)
392 if (elem_cmp(items+i, items) != 0)
393 allthesame = 0;
394 if (fp != 0)
395 (void) fprintf(fp,
396 "# Logged Accesses:\n# %-4s\t%-12s\t%-4s\t%-18s"
397 " (%-1s)\t%-10s\n\n", "type",
398 (items->access_type & BOFI_DMA_RW) ?
399 "address" : "offset",
400 "size", "value", "repcnt", "time");
401
402 for (i = 0; i < nitems; i++, items++) {
403 offset = items->offset;
404 if (fp != 0) {
405 (void) fprintf(fp,
406 "# 0x%-2x\t0x%-10x\t%-4d\t0x%-16llx"
407 " (0x%-1x)\t%-8llu\n",
408 items->access_type, offset, items->size,
409 items->value, items->repcount,
410 (logflags & BOFI_LOG_TIMESTAMP) ?
411 items->access_time : 0ull);
412
413 if (allthesame) {
414 (void) fprintf(fp,
415 "# Access duplicated %d times\n",
416 nitems);
417 break;
418 }
419 } else
420 msg(lvl, "# 0x%x 0x%x %d 0x%llx(0x%x) %llu\n",
421 items->access_type, offset, items->size,
422 items->value, items->repcount,
423 (logflags & BOFI_LOG_TIMESTAMP) ?
424 items->access_time : 0ull);
425 }
426 }
427 }
428
429 static int
str_to_bm(char * optarg,coding_t * c,uint_t * bm)430 str_to_bm(char *optarg, coding_t *c, uint_t *bm)
431 {
432 char *str;
433 char *s = "\t\n ";
434 int err = EINVAL;
435
436 msg(2, "str_to_bm: optarg %s\n", optarg);
437 if (optarg != NULL && (str = strtok(optarg, s))) {
438 msg(2, "str_to_bm: str %s\n", str);
439 do {
440 for (; c->str != 0; c++)
441 if (strcmp(str, c->str) == 0) {
442 *bm |= c->code;
443 msg(2, "str_to_bm: %s matches\n",
444 c->str);
445 err = 0;
446 break;
447 }
448 } while ((str = strtok(NULL, s)));
449 } else
450 return (EINVAL);
451 msg(2, "str_to_bm: done 0x%x\n", *bm);
452 return (err);
453 }
454
455
456 /*
457 * Generic routine for commands that apply to a particular instance of
458 * a driver under test (e.g. activate all errdefs defined on an instance).
459 */
460 static int
manage_instance(int fd,char * namep,int instance,int cmd)461 manage_instance(int fd, char *namep, int instance, int cmd)
462 {
463 struct bofi_errctl errctl;
464
465 errctl.namesize = strlen(namep);
466 (void) strncpy(errctl.name, namep, MAXNAMELEN);
467 errctl.instance = instance;
468
469 msg(8, "manage_instance: %s %d\n", namep, instance);
470 if (ioctl(fd, cmd, &errctl) == -1) {
471 msg(0, "bofi ioctl %d failed: %s\n", cmd, strerror(errno));
472 return (-1);
473 }
474 return (0);
475 }
476
477
478 static int
define_one_error(FILE * fp,struct bofi_errdef * edp,struct acc_log_elem * item,ulong_t nttime,ulong_t interval,char * type,int fon,size_t fcnt,uint_t acc_chk,char * opname,uint64_t operand)479 define_one_error(
480 FILE *fp,
481 struct bofi_errdef *edp,
482 struct acc_log_elem *item,
483 ulong_t nttime,
484 ulong_t interval,
485 char *type,
486 int fon, /* corrupt after this many accesses */
487 size_t fcnt, /* and then fail it fcnt times */
488 uint_t acc_chk,
489 char *opname,
490 uint64_t operand)
491 {
492 (void) fprintf(fp,
493 "-n %s -i %d -r %d -l 0x%llx 0x%x -a %s -c %d %d -f %d"
494 " -o %s 0x%llx",
495 (char *)edp->name,
496 edp->instance,
497 edp->rnumber,
498 edp->offset + item->offset, /* offset into the regset */
499 item->size, /* corrupt addrs from offset to offset+size */
500 type,
501 fon, /* corrupt after this many accesses */
502 fcnt, /* and then fail it fcnt times */
503 acc_chk,
504 opname,
505 operand);
506
507 (void) fprintf(fp, " -w %lu %lu\n", nttime, interval);
508 return (0);
509 }
510
511 static void
define_op_err(FILE * fp,int * ecnt,struct bofi_errdef * edp,struct acc_log_elem * item,ulong_t nttime,ulong_t interval,char * type,int fon,size_t fcnt)512 define_op_err(FILE *fp, int *ecnt, struct bofi_errdef *edp,
513 struct acc_log_elem *item, ulong_t nttime, ulong_t interval, char *type,
514 int fon, size_t fcnt)
515 {
516 coding_t *ct;
517 char *opname;
518 uint_t op;
519 uint64_t operand;
520 int k, save_size;
521 uint64_t save_offset;
522
523 if (item->access_type & BOFI_INTR)
524 ct = &ioptypes[0];
525 else
526 ct = &doptypes[0];
527
528 /*
529 * errdefs for dma accesses are too numerous so assume that dma writes
530 * (DDI_DMA_SYNC_FORDEV) create less exposure to potential errors than
531 * do dma reads (DDI_DMA_SYNC_FORCPU).
532 *
533 * also by default do not corrupt PIO_W - it may hang a non-FT platform.
534 */
535 if (item->access_type != BOFI_DMA_W &&
536 ((item->access_type & BOFI_PIO_W) == 0 || !atype_is_default)) {
537 /*
538 * user has asked for PIO_W
539 */
540 for (; ct->str != 0; ct++) {
541 op = ct->code;
542 opname = ct->str;
543 switch (op) {
544 case BOFI_EQUAL:
545 operand = random_operand; /* a random value */
546 random_operand = lrand48() | ((uint64_t)
547 (lrand48()) << 32);
548 break;
549 case BOFI_AND:
550 operand = 0xaddedabadb00bull;
551 break;
552 case BOFI_OR:
553 operand = 0x1;
554 break;
555 case BOFI_XOR:
556 default:
557 operand = myULLMAX;
558 break;
559 case BOFI_DELAY_INTR: /* delay for 1 msec */
560 operand = 1000000;
561 break;
562 case BOFI_LOSE_INTR: /* op not applicable */
563 operand = 0;
564 break;
565 case BOFI_EXTRA_INTR: /* extra intrs */
566 operand = 0xfff;
567 break;
568 }
569 *ecnt = *ecnt + 1;
570
571 if ((item->access_type == BOFI_DMA_W ||
572 item->access_type == BOFI_DMA_R) &&
573 item->size > sizeof (uint64_t) && item->size <
574 DMA_INDIVIDUAL_CORRUPT_CUTOFF) {
575 save_size = item->size;
576 save_offset = item->offset;
577 for (k = (item->size +
578 sizeof (uint64_t) - 1) &
579 ~(sizeof (uint64_t) - 1);
580 k > 0; k -= sizeof (uint64_t)) {
581 item->size = sizeof (uint64_t);
582 (void) define_one_error(fp, edp,
583 item, nttime, interval, type, fon,
584 fcnt, edp->acc_chk, opname,
585 operand);
586 item->offset += sizeof (uint64_t);
587 }
588 item->size = save_size;
589 item->offset = save_offset;
590 } else {
591 (void) define_one_error(fp, edp, item,
592 nttime, interval, type, fon, fcnt,
593 edp->acc_chk, opname, operand);
594 }
595
596 if (op == BOFI_EQUAL) {
597 uint_t cnt;
598 for (cnt = 0; cnt < NPIO_DEFAULTS;
599 cnt++, *ecnt = *ecnt + 1) {
600 if ((item->access_type == BOFI_DMA_W ||
601 item->access_type == BOFI_DMA_R) &&
602 item->size > sizeof (uint64_t) &&
603 item->size <
604 DMA_INDIVIDUAL_CORRUPT_CUTOFF) {
605 save_size = item->size;
606 save_offset = item->offset;
607 for (k = (item->size +
608 sizeof (uint64_t) - 1) &
609 ~(sizeof (uint64_t) - 1);
610 k > 0;
611 k -= sizeof (uint64_t)) {
612 item->size =
613 sizeof (uint64_t);
614 (void) define_one_error(
615 fp, edp, item,
616 nttime, interval,
617 type, fon, fcnt,
618 edp->acc_chk,
619 opname,
620 pio_default_values
621 [cnt]);
622 item->offset +=
623 sizeof (uint64_t);
624 }
625 item->size = save_size;
626 item->offset = save_offset;
627 } else {
628 (void) define_one_error(fp,
629 edp, item, nttime, interval,
630 type, fon, fcnt,
631 edp->acc_chk, opname,
632 pio_default_values[cnt]);
633 }
634 }
635 }
636 }
637 }
638
639 if ((item->access_type & BOFI_PIO_W) && !atype_is_default) {
640 /*
641 * user has asked for PIO_W
642 */
643 (void) define_one_error(fp, edp, item, nttime, interval,
644 type, fon, fcnt, edp->acc_chk, "NO", 0);
645 *ecnt = *ecnt + 1;
646 }
647
648 /*
649 * and finally an access check errdef
650 */
651 if (item->access_type & BOFI_PIO_RW)
652 (void) define_one_error(fp, edp, item, nttime, interval,
653 type, fon, fcnt, 1, "OR", 0);
654
655 if (item->access_type & BOFI_DMA_RW)
656 (void) define_one_error(fp, edp, item, nttime, interval,
657 type, fon, fcnt, 2, "OR", 0);
658
659 }
660
661 /*
662 * Convert a collection of log entries into error definitions.
663 */
664 /* ARGSUSED */
665 static int
define_nerrs(int fd,FILE * fp,int * ecnt,struct bofi_errdef * edp,struct acc_log_elem * items,size_t nitems,uint_t naccess,uint_t minac,uint_t maxac,ulong_t logtime,ulong_t logsize)666 define_nerrs(int fd, FILE *fp, int *ecnt, struct bofi_errdef *edp,
667 struct acc_log_elem *items,
668 size_t nitems,
669 uint_t naccess,
670 uint_t minac,
671 uint_t maxac,
672 ulong_t logtime,
673 ulong_t logsize)
674 {
675 char *type;
676 uint_t at;
677 int i;
678 struct acc_log_elem *item;
679 char *opname;
680 uint_t op;
681 uint64_t operand;
682 int cycleiops, cycledops;
683 int intrs = 0;
684 ulong_t ttime, nttime, interval;
685
686 op = edp->optype;
687 operand = edp->operand;
688 msg(3, "define_nerrs: nitems %d (ac %d at 0x%x): (%d %d)"
689 " (op 0x%x 0x%llx)\n\n", nitems, naccess, items->access_type,
690 minac, maxac, op, operand);
691
692 /*
693 * all items are guaranteed have values in the two element set {0, at}
694 * where at is a valid access type (so find the value of at)
695 */
696 for (i = 0, item = items, at = 0; i < nitems; i++, item++)
697 if (item->access_type != 0) {
698 at = item->access_type;
699 break;
700 }
701 if (at == 0)
702 return (-1);
703
704 /*
705 * find the string form of the access type
706 */
707 for (i = 0, type = 0; atypes[i].str != 0; i++) {
708 if (atypes[i].code == at) {
709 type = atypes[i].str;
710 break;
711 }
712 }
713 if (type == 0) {
714 msg(0, "Unknown access type returned from bofi\n\t");
715 dump_log(0, 0, item, 1, BOFI_LOG_TIMESTAMP);
716 msg(1, "0x%x 0x%x 0x%x 0x%x\n", BOFI_LOG, BOFI_INTR,
717 BOFI_DMA_RW, BOFI_PIO_RW);
718 return (-1);
719 }
720
721 msg(1, "define_n: at = 0x%d (%s)\n", at, type == 0 ? "null" : type);
722 /*
723 * find the string form of the operator
724 */
725 for (i = 0, opname = 0; optypes[i].str != 0; i++) {
726 if (op == optypes[i].code) {
727 opname = optypes[i].str;
728 break;
729 }
730 }
731
732 /*
733 * if not found or inconsistent default to XOR
734 */
735 if (opname == 0 ||
736 (op == BOFI_NO_TRANSFER &&
737 (at & (BOFI_DMA_RW|BOFI_PIO_R))) ||
738 (op >= BOFI_DELAY_INTR && (at & BOFI_INTR) == 0)) {
739 opname = optypes[3].str; /* "XOR" */
740 operand = myULLMAX;
741 op = optypes[3].code;
742 }
743
744 /*
745 * if operator and access type are inconsistent choose a sensible
746 * default
747 */
748 cycleiops = 0;
749 if (at & BOFI_INTR)
750 if (op < BOFI_DELAY_INTR)
751 cycleiops = 1;
752 else if (op == BOFI_LOSE_INTR)
753 operand = 0;
754
755 cycledops = 0;
756 if (nitems == 1 && (at & BOFI_DMA_RW))
757 cycledops = 1;
758 /*
759 * for each access in the list define one or more error definitions
760 */
761 for (i = 0, item = items; i < nitems; i++, item++) {
762 size_t acnt, fcnt;
763 int j, fon;
764
765 if (item->access_type == 0)
766 continue;
767
768 /*
769 * base number of errors to inject on 3% of number of
770 * similar accesses seen during LOG phase
771 */
772 acnt = item->repcount / 10 + 1; /* 10% */
773 fcnt = (acnt >= 3) ? acnt / 3 : 1; /* 3% */
774
775 /*
776 * wait for twice the time it took during LOG phase
777 */
778 if ((ttime = (item->access_time * 2)) < MIN_REPORT_TIME)
779 ttime = MIN_REPORT_TIME;
780 else if (max_edef_wait != 0 && ttime > max_edef_wait)
781 ttime = max_edef_wait;
782 /*
783 * if edef_sleep set (-w) the use that, otherwise use default
784 */
785 interval = edef_sleep ? edef_sleep : DEFAULT_EDEF_SLEEP;
786
787 msg(10,
788 "define_n: item %d limit %d step %d (intr %d) tt(%lu)\n",
789 i, item->repcount, acnt, intrs, ttime);
790
791 for (j = 0, fon = 1, nttime = ttime; j < item->repcount;
792 j += acnt) {
793 if (policy & OPERATORSPOLICY) {
794 define_op_err(fp, ecnt, edp, item,
795 nttime, interval, type, fon, fcnt);
796 } else {
797 if (cycleiops) {
798 op = ioptypes[intrs].code;
799 opname = ioptypes[intrs++].str;
800 switch (op) {
801 case BOFI_DELAY_INTR:
802 /* delay for 1 sec */
803 operand = 1000000;
804 break;
805 case BOFI_LOSE_INTR:
806 /* op not applicable */
807 operand = 0;
808 break;
809 case BOFI_EXTRA_INTR:
810 default:
811 /* generate 2 extra intrs */
812 operand = 0xfff;
813 break;
814 }
815 intrs %= 3;
816 } else if (cycledops) {
817 op = doptypes[intrs].code;
818 opname = doptypes[intrs++].str;
819 switch (op) {
820 case BOFI_EQUAL:
821 random_operand = lrand48() |
822 ((uint64_t)
823 (lrand48()) << 32);
824 break; /* a random value */
825 case BOFI_AND:
826 operand = 0xaddedabadb00bull;
827 break;
828 case BOFI_OR:
829 operand = 0xd1ab011c0af1a5c0ull;
830 break;
831 case BOFI_XOR:
832 default:
833 operand = myULLMAX;
834 break;
835 }
836 intrs %= 4;
837 }
838 (void) define_one_error(fp, edp, item,
839 nttime, interval, type, fon,
840 fcnt, edp->acc_chk, opname, operand);
841 *ecnt = *ecnt + 1;
842 if (op == BOFI_EQUAL) {
843 uint_t cnt;
844 for (cnt = 0; cnt < NPIO_DEFAULTS;
845 cnt++, *ecnt = *ecnt + 1)
846 (void) define_one_error(fp,
847 edp, item, nttime,
848 interval, type, fon, fcnt,
849 edp->acc_chk, opname,
850 pio_default_values[cnt]);
851 }
852 }
853
854 /*
855 * all non maximal policies should only generate
856 * a single error definition set per access.
857 */
858 if (!(policy & MAXIMALPOLICY))
859 break;
860
861 nttime = (logtime - item->access_time) *
862 (j + acnt + fcnt - 1) / logsize;
863 if (nttime < MIN_REPORT_TIME)
864 nttime = MIN_REPORT_TIME;
865 else if (nttime > max_edef_wait)
866 nttime = max_edef_wait;
867
868 msg(11, "define_nerrs: %lu %d %d %d %llu\n", nttime,
869 max_edef_wait, fon, fcnt, item->access_time);
870
871 if (item->access_type != BOFI_INTR)
872 fon += j;
873 }
874 }
875
876 return (0);
877 }
878
879 static int
reduce_log(uint16_t pol,struct acc_log * log,struct acc_log_elem ** llp,size_t * cntp)880 reduce_log(uint16_t pol, struct acc_log *log, /* input args */
881 struct acc_log_elem **llp, size_t *cntp) /* output args */
882 {
883 ulong_t logtime;
884 struct acc_log_elem *items, *item, *elem;
885 int cnt, nitems, acnt;
886 int i, j, k, lb, ub, mina, maxa, cutoff[2], mean;
887
888 if (llp == 0 || cntp == 0) /* subroutine interface violated */
889 return (-1);
890
891 if (*llp == 0) {
892 items = (void *)log->logbase;
893 nitems = log->entries;
894 } else {
895 items = *llp; /* outputs double up as inputs */
896 nitems = *cntp;
897 }
898 /* has the utc time wrapped over ULMAX - unlikely so fix it at 10 */
899 logtime = (log->stop_time >= log->start_time) ?
900 log->stop_time - log->start_time : 10ul;
901
902 msg(1, "reduce %d: logtime %lu\n", nitems, logtime);
903 /*
904 * Sort the log by access type - do not remove duplicates yet (but do
905 * remove access that do not match the requested log -> errdef policy
906 * (defined by union pu pol). Set the repcount field of each entry to a
907 * unique value (in the control statement of the for loop) - this
908 * ensures that the qsort (following the for loop) will not remove any
909 * entries.
910 */
911 for (i = 0, cnt = 0, elem = items; i < nitems;
912 elem->repcount = i, i++, elem++) {
913 /*
914 * If interested in the I/O transfer size and this access
915 * does not match the requested size then ignore the access
916 */
917 if ((pol & SIZEPOLICY) &&
918 (!(pol & MULTIPOLICY) || elem->repcount == 1) &&
919 /* req for DMA / ddi_rep */
920 (pol & elem->size) == 0)
921 elem->access_type = 0;
922 /* these will end up sorted at the head */
923 else {
924 cnt += 1;
925 elem->size *= elem->repcount;
926 if (log->flags & BOFI_LOG_TIMESTAMP)
927 /* real access time */
928 elem->access_time -= log->start_time;
929 else
930 /* linear fit */
931 elem->access_time = logtime * (i + 1) / nitems;
932 }
933 }
934
935 qsort((void *)items, nitems, sizeof (*items), log_cmp);
936
937 msg(5, "qsorted log raw (nitems %d cnt %d:\n", nitems, cnt);
938 dump_log(14, 0, items, nitems, log->flags);
939
940 if (cnt != nitems) { /* some items should be ignored */
941 items += (nitems - cnt); /* ignore these ones */
942 if ((nitems = cnt) == 0) {
943 *cntp = 0;
944 *llp = 0;
945 return (0);
946 /* the chosen policy has ignored everything */
947 }
948
949 }
950 /*
951 * Now remove duplicate entries based on access type, address and size.
952 * Reuse the repcount field to store the no. of duplicate accesses.
953 * Store the average access time in the single remaining
954 * representative of the duplicate set.
955 */
956
957 for (i = 1, cnt = 1, elem = items, elem->repcount = 1, item = elem + 1;
958 i < nitems; i++, item++) {
959 if (elem_cmp(elem, item) == 0) {
960 elem->access_time += item->access_time;
961 elem->repcount++;
962 } else { /* not a duplicate */
963 elem->access_time = logtime / elem->repcount;
964 elem++;
965 *elem = *item;
966 cnt++;
967 elem->repcount = 1;
968 }
969 }
970 elem->access_time = logtime / elem->repcount;
971
972 /*
973 * The log is sorted by access type - now resort to order by frequency
974 * of accesses (ie for a given access type uncommon access will come
975 * first.
976 */
977
978 qsort((void *)items, cnt, sizeof (*items), log_cmp2);
979 msg(4, "qsorted log2: cnt is %d\n", cnt);
980 dump_log(4, 0, items, cnt, log->flags);
981
982 for (i = 0; i < cnt; i = j) {
983
984 /*
985 * Pick out the set [i, j) consisting of elements with the same
986 * access type
987 */
988 for (j = i + 1, acnt = items[i].repcount; j < cnt &&
989 items[j].access_type == items[i].access_type; j++)
990 acnt += items[j].repcount;
991
992 if (j - i == 1) /* never ignore solo accesses of a given type */
993 continue;
994 /*
995 * Now determine what constitutes uncommon and common accesses:
996 */
997 mina = items[i].repcount;
998 maxa = items[j-1].repcount;
999 mean = acnt / (j - i); /* mean value */
1000
1001 if (pol & (UNCOMMONPOLICY|MEDIANPOLICY)) {
1002 cutoff[0] = (mean - mina) / DISTRIB_CUTOFF + mina;
1003
1004 for (ub = i; ub < j; ub++)
1005 if (items[ub].repcount > cutoff[0])
1006 break;
1007 lb = j - 1;
1008 } else {
1009 lb = i;
1010 ub = j-1;
1011 }
1012
1013 if (pol & (COMMONPOLICY|MEDIANPOLICY)) {
1014 cutoff[1] = maxa - (maxa - mean) / DISTRIB_CUTOFF;
1015 for (lb = j - 1; lb >= i; lb--)
1016 if (items[lb].repcount < cutoff[1])
1017 break;
1018 if (!(pol & (UNCOMMONPOLICY|MEDIANPOLICY)))
1019 ub = i;
1020 }
1021
1022 msg(3, "reduce_log: p 0x%x at %d:0x%x %d:0x%x acnt mina maxa"
1023 " (%d %d %d)"
1024 " mean %d cutoffs(%d %d) bnds(%d, %d)\n",
1025 pol, i, items[i].access_type, j, items[j].access_type,
1026 acnt, mina, maxa, mean, cutoff[0], cutoff[1], lb, ub);
1027
1028 if (ub <= lb)
1029 if (!(pol & MEDIANPOLICY))
1030 /* delete all the mid accesses */
1031 for (k = ub; k <= lb; k++)
1032 items[k].access_type = 0;
1033 else {
1034 if (!(pol & UNCOMMONPOLICY))
1035 /* delete uncommon accesses */
1036 for (k = i; k < ub; k++)
1037 items[k].access_type = 0;
1038 if (!(pol & COMMONPOLICY))
1039 /* delete common accesses */
1040 for (k = lb+1; k < j; k++)
1041 items[k].access_type = 0;
1042 }
1043 }
1044 msg(4, "reduce_log: returning %d items\n", cnt);
1045 dump_log(5, 0, items, cnt, log->flags);
1046 *cntp = cnt;
1047 *llp = items;
1048 return (0);
1049 }
1050
1051 static void
log2errdefs(int fd,struct bofi_errdef * edp,struct acc_log * log,char * devpath)1052 log2errdefs(int fd, struct bofi_errdef *edp, struct acc_log *log,
1053 char *devpath)
1054 {
1055 struct acc_log_elem *items;
1056 size_t nitems;
1057 int i, j;
1058 uint_t acc_cnt;
1059 char fname[_POSIX_PATH_MAX];
1060 FILE *fp = 0;
1061 time_t utc = time(NULL);
1062 int ecnt = 0;
1063 int err;
1064 ulong_t logtime;
1065 char *buffer;
1066 struct stat statbuf;
1067
1068 items = (void *)log->logbase;
1069 nitems = log->entries;
1070 logtime = (log->stop_time >= log->start_time) ?
1071 log->stop_time - log->start_time : 10ul;
1072
1073 if (nitems == 0)
1074 return;
1075
1076 /* ensure that generated errdefs complete in bounded time */
1077 if (max_edef_wait == 0)
1078 max_edef_wait =
1079 logtime > MIN_REPORT_TIME ? logtime : MIN_REPORT_TIME * 2;
1080
1081 msg(4, "log2errdefs(0x%p, 0x%p, %d, 0x%x):\n",
1082 (void *) edp, (void *) items, nitems, policy);
1083
1084 (void) snprintf(fname, sizeof (fname), "%s.%d", (char *)edp->name,
1085 (int)getpid());
1086 if ((fp = fopen(fname, "w")) == 0)
1087 fp = outfile;
1088
1089 (void) fprintf(fp, "#!/bin/ksh -p\n\n");
1090 (void) fprintf(fp, "# %-24s%s\n", "Script creation time:", ctime(&utc));
1091 (void) fprintf(fp, "# %-24s%llu\n",
1092 "Activation time:", log->start_time);
1093 (void) fprintf(fp, "# %-24s%llu\n",
1094 "Deactivation time:", log->stop_time);
1095 (void) fprintf(fp, "# %-24s%d\n", "Log size:", nitems);
1096 (void) fprintf(fp, "# %-24s", "Errdef policy:");
1097 for (i = 0; ptypes[i].str != 0; i++)
1098 if (policy & ptypes[i].code)
1099 (void) fprintf(fp, "%s ", ptypes[i].str);
1100 (void) fprintf(fp, "\n");
1101 (void) fprintf(fp, "# %-24s%s\n", "Driver:", (char *)edp->name);
1102 (void) fprintf(fp, "# %-24s%d\n", "Instance:", edp->instance);
1103 if (edp->access_type & BOFI_PIO_RW) {
1104 (void) fprintf(fp, "# %-24s%d\n",
1105 "Register set:", edp->rnumber);
1106 (void) fprintf(fp, "# %-24s0x%llx\n", "Offset:", edp->offset);
1107 (void) fprintf(fp, "# %-24s0x%llx\n", "Length:", edp->len);
1108 } else if (edp->access_type & BOFI_DMA_RW) {
1109 (void) fprintf(fp, "# %-24s%d\n", "DMA handle:", edp->rnumber);
1110 (void) fprintf(fp, "# %-24s0x%llx\n", "Offset:", edp->offset);
1111 (void) fprintf(fp, "# %-24s0x%llx\n", "Length:", edp->len);
1112 } else if ((edp->access_type & BOFI_INTR) == 0) {
1113 (void) fprintf(fp, "# %-24s%d\n",
1114 "Unknown Handle Type:", edp->rnumber);
1115 }
1116
1117 (void) fprintf(fp, "# %-24s0x%x ( ", "Access type:",
1118 (edp->access_type & ~BOFI_LOG));
1119 if (edp->access_type & BOFI_PIO_R)
1120 (void) fprintf(fp, "%s ", "pio_r");
1121 if (edp->access_type & BOFI_PIO_W)
1122 (void) fprintf(fp, "%s ", "pio_w");
1123 if (edp->access_type & BOFI_DMA_W)
1124 (void) fprintf(fp, "%s ", "dma_w");
1125 if (edp->access_type & BOFI_DMA_R)
1126 (void) fprintf(fp, "%s ", "dma_r");
1127 if (edp->access_type & BOFI_INTR)
1128 (void) fprintf(fp, "%s ", "intr");
1129 (void) fprintf(fp, ")\n\n");
1130 if (user_comment)
1131 (void) fprintf(fp, "# %-24s%s\n\n",
1132 "Test Comment:", user_comment);
1133
1134 dump_log(0, fp, items, nitems, log->flags);
1135
1136 items = 0;
1137 if ((err = reduce_log(policy, log, &items, &nitems)) < 0 ||
1138 nitems == 0) {
1139 msg(4, "log2errdefs: reduce_log err %d nitems %d\n",
1140 err, nitems);
1141 return;
1142 }
1143 (void) fprintf(fp, "\nerror() { echo \""
1144 "${0##*/}: $@\""
1145 " >&2; exit 2; }\n");
1146 (void) fprintf(fp,
1147 "trap ' ' 16\t# ignore - it is trapped by abort monitor_edef\n");
1148
1149 (void) fprintf(fp, "\nfixup_script()\n{\n");
1150 if (scriptargs > 0) {
1151 (void) fprintf(fp, "\tif [[ $1 -eq 1 ]]\n\tthen\n");
1152 (void) fprintf(fp, "\t\t# Call a user defined workload\n");
1153 (void) fprintf(fp, "\t\t# while injecting errors\n\t\t");
1154 for (i = 0; i < scriptargs; i++)
1155 (void) fprintf(fp, "%s ", fixup_script[i]);
1156 (void) fprintf(fp, "\n\tfi\n");
1157 (void) fprintf(fp, "\treturn 0\n");
1158 } else {
1159 (void) fprintf(fp, "\tif [[ $1 -eq 0 ]]\n\tthen\n");
1160 (void) fprintf(fp,
1161 "\t\t# terminate any outstanding workload\n");
1162 (void) fprintf(fp, "\t\tif [ $script_pid -gt 0 ]; then\n");
1163 (void) fprintf(fp, "\t\t\tkill $script_pid\n");
1164 (void) fprintf(fp, "\t\t\tscript_pid=0\n");
1165 (void) fprintf(fp, "\t\tfi\n");
1166 (void) fprintf(fp, "\tfi\n");
1167 (void) fprintf(fp, "\treturn -1\n");
1168 }
1169 (void) fprintf(fp, "}\n\n");
1170 (void) fprintf(fp, "devpath=/devices%s\n\n", devpath);
1171 (void) fprintf(fp, "#\n");
1172 (void) fprintf(fp, "# following text extracted from th_script\n");
1173 (void) fprintf(fp, "#\n");
1174 if (stat("/usr/lib/th_script", &statbuf) == -1) {
1175 msg(0, "log2errdefs: stat of /usr/lib/th_script failed\n");
1176 return;
1177 }
1178 fd = open("/usr/lib/th_script", O_RDONLY);
1179 if (fd == -1) {
1180 msg(0, "log2errdefs: open of /usr/lib/th_script failed\n");
1181 return;
1182 }
1183 buffer = malloc(statbuf.st_size);
1184 if (!buffer) {
1185 msg(0, "log2errdefs: malloc for /usr/lib/th_script failed\n");
1186 return;
1187 }
1188 if (read(fd, buffer, statbuf.st_size) != statbuf.st_size) {
1189 msg(0, "log2errdefs: read of /usr/lib/th_script failed\n");
1190 return;
1191 }
1192 (void) fwrite(buffer, statbuf.st_size, 1, fp);
1193 (void) close(fd);
1194 (void) fprintf(fp, "#\n");
1195 (void) fprintf(fp, "# end of extracted text\n");
1196 (void) fprintf(fp, "#\n");
1197 (void) fprintf(fp, "run_subtest %s %d <<ERRDEFS\n",
1198 (char *)edp->name, edp->instance);
1199
1200 for (i = 0; i < nitems; i = j) {
1201
1202 acc_cnt = items[i].repcount;
1203 for (j = i + 1;
1204 j < nitems && items[j].access_type == items[i].access_type;
1205 j++)
1206 acc_cnt += items[j].repcount;
1207 msg(1, "l2e: nitems %d i %d j %d at 0x%x\n",
1208 nitems, i, j, items[i].access_type);
1209 if (items[i].access_type != 0)
1210 (void) define_nerrs(fd, fp, &ecnt, edp, items+i, j-i,
1211 acc_cnt, items[i].repcount, items[j-1].repcount,
1212 logtime, log->entries);
1213 }
1214
1215 (void) fprintf(fp, "ERRDEFS\n");
1216 (void) fprintf(fp, "exit 0\n");
1217
1218 if (fp != stdout && fp != stderr) {
1219 if (fchmod(fileno(fp), S_IRWXU|S_IRGRP|S_IROTH))
1220 msg(0, "fchmod failed: %s\n", strerror(errno));
1221 if (fclose(fp) != 0)
1222 msg(0, "close of %s failed: %s\n", fname,
1223 strerror(errno));
1224 }
1225 msg(10, "log2errdefs: done\n");
1226 }
1227
1228 #define LLSZMASK (sizeof (longlong_t) -1)
1229
1230 static int
add_edef(int fd,struct bofi_errdef * errdef,struct bofi_errstate * errstate,struct handle_info * hdl,struct bofi_errdef * edp)1231 add_edef(int fd,
1232 struct bofi_errdef *errdef, /* returned access criteria */
1233 struct bofi_errstate *errstate,
1234 struct handle_info *hdl, /* handle to match against request */
1235 struct bofi_errdef *edp) /* requested access criteria */
1236 {
1237 *errdef = *edp;
1238 errdef->instance = hdl->instance;
1239
1240
1241 if (hdl->access_type == 0)
1242 return (EINVAL);
1243
1244 errdef->access_type =
1245 errdef->access_type & (hdl->access_type|BOFI_LOG);
1246
1247 /* use a big log for PIO and a small one otherwise */
1248 if (lsize_is_default &&
1249 (errdef->access_type & BOFI_PIO_RW) == 0) {
1250 errdef->access_count = DFLT_NONPIO_LOGSZ;
1251 errdef->fail_count = 0;
1252 }
1253 errdef->log.logsize = errstate->log.logsize =
1254 errdef->access_count + errdef->fail_count - 1;
1255 if (errdef->log.logsize == -1U) {
1256 errdef->log.logsize = errstate->log.logsize = 0;
1257 }
1258 errdef->log.logbase = errstate->log.logbase =
1259 (caddr_t)GETSTRUCT(struct acc_log_elem, errdef->log.logsize);
1260
1261 if (errdef->log.logbase == 0)
1262 return (EAGAIN);
1263
1264 errdef->rnumber = hdl->rnumber;
1265 errdef->offset = hdl->offset;
1266 errdef->len = hdl->len;
1267
1268 msg(4, "creating errdef: %d %s %d %d 0x%llx 0x%llx 0x%x 0x%x 0x%x"
1269 " 0x%x 0x%x 0x%llx\n",
1270 errdef->namesize, (char *)errdef->name,
1271 errdef->instance, errdef->rnumber,
1272 errdef->offset, errdef->len,
1273 errdef->access_type,
1274 errdef->access_count, errdef->fail_count,
1275 errdef->acc_chk, errdef->optype, errdef->operand);
1276 if (ioctl(fd, BOFI_ADD_DEF, errdef) == -1) {
1277 perror("th_define - adding errdef failed");
1278 return (errno);
1279 }
1280 errdef->optype = edp->optype; /* driver clears it if fcnt is zero */
1281 errstate->errdef_handle = errdef->errdef_handle;
1282 return (0);
1283 }
1284
1285 static void
collect_state(int fd,int cmd,struct bofi_errstate * errstate,struct bofi_errdef * errdef,char * devpath)1286 collect_state(int fd, int cmd,
1287 struct bofi_errstate *errstate,
1288 struct bofi_errdef *errdef,
1289 char *devpath)
1290 {
1291 int rval;
1292 size_t ls = errstate->log.logsize;
1293
1294 msg(2, "collect_state: pre: edp->access_type 0x%x (logsize %d)\n",
1295 errdef->access_type, errdef->log.logsize);
1296
1297 do {
1298 errstate->log.logsize = 0; /* only copy the driver log once */
1299
1300 msg(10, "collecting state (lsize %d) ...\n",
1301 errstate->log.logsize);
1302 errno = 0;
1303
1304 if (ioctl(fd, cmd, errstate) == -1 && errno != EINTR) {
1305 perror("th_define (collect) -"
1306 " waiting for error report failed");
1307 break;
1308 }
1309
1310 (void) fprintf(outfile, "Logged %d out of %d accesses"
1311 " (%s %d %d 0x%x %d).\n",
1312 errstate->log.entries, ls,
1313 (char *)errdef->name, errdef->instance, errdef->rnumber,
1314 errdef->access_type, errstate->log.wrapcnt);
1315
1316 (void) msg(1, "\t(ac %d fc %d lf 0x%x wc %d).\n",
1317 errstate->access_count, errstate->fail_count,
1318 errstate->log.flags, errstate->log.wrapcnt);
1319
1320 rval = errno;
1321 if ((errstate->log.flags & BOFI_LOG_WRAP) &&
1322 errstate->access_count > 0)
1323 continue;
1324 if (errstate->access_count <= 1 &&
1325 errstate->fail_count == 0 &&
1326 errstate->acc_chk == 0) {
1327 msg(3, "collecting state complete entries %d\n",
1328 errstate->log.entries);
1329 break;
1330 }
1331
1332 msg(5, "still collecting state: %d, %d, %d\n",
1333 errstate->access_count, errstate->fail_count,
1334 errstate->acc_chk);
1335 (void) msg(2, "Log: errno %d size %d entries %d "
1336 "(off 0x%llx len 0x%llx) ac %d\n", errno,
1337 errstate->log.logsize, errstate->log.entries,
1338 errdef->offset, errdef->len, errstate->access_count);
1339
1340 } while (rval == 0 && errstate->log.entries < ls);
1341
1342 /* now grab the log itself */
1343 errstate->log.logsize = ls;
1344 if (errstate->log.entries != 0) {
1345 if (ioctl(fd, BOFI_CHK_STATE, errstate) == -1) {
1346 msg(0,
1347 "%s: errorwhile retrieving %d log entries: %s\n",
1348 Progname, errstate->log.entries, strerror(errno));
1349 } else {
1350 msg(2, "collect_state: post: edp->access_type 0x%x"
1351 " (log entries %d %d) (%llu - %llu)\n",
1352 errdef->access_type,
1353 errstate->log.entries, errstate->access_count,
1354 errstate->log.start_time, errstate->log.stop_time);
1355
1356 log2errdefs(fd, errdef, &(errstate->log), devpath);
1357 }
1358 }
1359 }
1360
1361 static void
print_err_reports(FILE * fp,struct bofi_errstate * esp,char * fname,char * cmt,int id)1362 print_err_reports(FILE *fp, struct bofi_errstate *esp,
1363 char *fname, char *cmt, int id)
1364 {
1365 if (fname != 0 && *fname != 0)
1366 (void) fprintf(fp, "%sErrdef file %s definition %d:",
1367 cmt, fname, id);
1368 else
1369 (void) fprintf(fp, "%s", cmt);
1370
1371 if (esp->access_count != 0) {
1372 (void) fprintf(fp, " (access count %d).\n", esp->access_count);
1373 } else {
1374 (void) fprintf(fp, "\n%s\tremaining fail count %d acc_chk %d\n",
1375 cmt, esp->fail_count, esp->acc_chk);
1376 (void) fprintf(fp, "%s\tfail time 0x%llx error reported time"
1377 " 0x%llx errors reported %d\n", cmt,
1378 esp->fail_time, esp->msg_time,
1379 esp->errmsg_count);
1380 if (esp->msg_time)
1381 (void) fprintf(fp, "%s\tmessage \"%s\" severity 0x%x\n",
1382 cmt, esp->buffer, (uint_t)esp->severity);
1383 }
1384 }
1385
1386 static void
thr_collect(void * arg,char * devpath)1387 thr_collect(void *arg, char *devpath)
1388 {
1389 int fd;
1390 struct collector_def *hi = (struct collector_def *)arg;
1391
1392 msg(4, "thr_collect: collecting %s inst %d rn %d at = 0x%x.\n",
1393 hi->ed.name, hi->ed.instance,
1394 hi->ed.rnumber, hi->ed.access_type);
1395
1396 if ((fd = open(BOFI_DEV, O_RDWR)) == -1) {
1397 if (errno == EAGAIN)
1398 msg(0, "Too many instances of bofi currently open\n");
1399 else
1400 msg(0, "Error while opening bofi driver: %s",
1401 strerror(errno));
1402 } else {
1403 /*
1404 * Activate the logging errdefs - then collect the results.
1405 */
1406 (void) manage_instance(fd, hi->ed.name,
1407 hi->ed.instance, BOFI_START);
1408 collect_state(fd, BOFI_CHK_STATE_W, &hi->es, &hi->ed, devpath);
1409 }
1410
1411 /*
1412 * there is no more work to do on this access handle so clean up / exit.
1413 */
1414 msg(3, "thr_collect: closing and broadcasting.\n");
1415 exit(0);
1416 }
1417
1418 /*
1419 * Given an access handle known to the bofi driver see if the user has
1420 * specified access criteria that match that handle. Note: this matching
1421 * algorithm should be kept consistent with the drivers alogorithm.
1422 */
1423 static int
match_hinfo(struct handle_info * hp,int instance,uint_t access_type,int rnumber,offset_t offset,offset_t len)1424 match_hinfo(struct handle_info *hp, int instance, uint_t access_type,
1425 int rnumber, offset_t offset, offset_t len)
1426 {
1427
1428 msg(9, "matching (%d %d) 0x%x %d offset (%llx, %llx) len (%llx %llx)\n",
1429 hp->instance, instance, access_type, rnumber,
1430 hp->offset, offset, hp->len, len);
1431
1432 if (instance != -1 && hp->instance != instance)
1433 return (0);
1434 if ((access_type & BOFI_DMA_RW) &&
1435 (hp->access_type & BOFI_DMA_RW) &&
1436 (rnumber == -1 || hp->rnumber == rnumber))
1437 return (1);
1438 else if ((access_type & BOFI_INTR) &&
1439 (hp->access_type & BOFI_INTR))
1440 return (1);
1441 else if ((access_type & BOFI_PIO_RW) &&
1442 (hp->access_type & BOFI_PIO_RW) &&
1443 (rnumber == -1 || hp->rnumber == rnumber) &&
1444 (len == 0 || hp->offset < offset + len) &&
1445 (hp->len == 0 || hp->offset + hp->len > offset))
1446 return (1);
1447 else
1448 return (0);
1449 }
1450
1451 /*
1452 * Obtain all the handles created by the driver specified by the name parameter
1453 * that match the remaining arguments. The output parameter nhdls indicates how
1454 * many of the structures pointed to by the output parameter hip match the
1455 * specification.
1456 *
1457 * It is the responsibility of the caller to free *hip when *nhdls != 0.
1458 */
1459 static int
get_hinfo(int fd,char * name,struct handle_info ** hip,size_t * nhdls,int instance,int atype,int rset,offset_t offset,offset_t len,int new_semantics)1460 get_hinfo(int fd, char *name, struct handle_info **hip, size_t *nhdls,
1461 int instance, int atype, int rset, offset_t offset, offset_t len,
1462 int new_semantics)
1463 {
1464 struct bofi_get_hdl_info hdli;
1465 int command;
1466
1467 command = BOFI_GET_HANDLE_INFO;
1468 hdli.namesize = strlen(name);
1469 (void) strncpy(hdli.name, name, MAXNAMELEN);
1470 /*
1471 * Initially ask for the number of access handles (not the structures)
1472 * in order to allocate memory
1473 */
1474 hdli.hdli = 0;
1475 *hip = 0;
1476 hdli.count = 0;
1477
1478 /*
1479 * Ask the bofi driver for all handles created by the driver under test.
1480 */
1481 if (ioctl(fd, command, &hdli) == -1) {
1482 *nhdls = 0;
1483 msg(0, "driver failed to return handles: %s\n",
1484 strerror(errno));
1485 return (errno);
1486 } else if ((*nhdls = hdli.count) == 0) {
1487 msg(1, "get_hinfo: no registered handles\n");
1488 return (0); /* no handles */
1489 } else if ((*hip = GETSTRUCT(struct handle_info, *nhdls)) == 0) {
1490 return (EAGAIN);
1491 } else {
1492 struct handle_info *hp, **chosen;
1493 int i;
1494
1495 /* Ask for *nhdls handles */
1496 hdli.hdli = (caddr_t)*hip;
1497 if (ioctl(fd, command, &hdli) == -1) {
1498 int err = errno;
1499
1500 msg(0, "BOFI_GET_HANDLE_INFO ioctl returned error %d\n",
1501 err);
1502 free(*hip);
1503 return (err);
1504 }
1505
1506 if (hdli.count < *nhdls)
1507 *nhdls = hdli.count; /* some handles have gone away */
1508
1509 msg(4, "qsorting %d handles\n", *nhdls);
1510 if (*nhdls > 1)
1511 /* sort them naturally (NB ordering is not mandatory) */
1512 qsort((void *)*hip, *nhdls, sizeof (**hip), hdl_cmp);
1513
1514 if ((chosen = malloc(sizeof (hp) * *nhdls)) != NULL) {
1515 struct handle_info **ip;
1516 /* the selected handles */
1517 struct handle_info *prev = 0;
1518 int scnt = 0;
1519
1520 for (i = 0, hp = *hip, ip = chosen; i < *nhdls;
1521 i++, hp++) {
1522 /*
1523 * Remark: unbound handles never match
1524 * (access_type == 0)
1525 */
1526 if (match_hinfo(hp, instance, atype, rset,
1527 offset&0x7fffffff, len&0x7fffffff)) {
1528 msg(3, "match: 0x%x 0x%llx 0x%llx"
1529 " 0x%llx (0x%llx)\n",
1530 hp->access_type, hp->addr_cookie,
1531 hp->offset, hp->len,
1532 (hp->len & 0x7fffffff));
1533 if (prev &&
1534 (prev->access_type & BOFI_DMA_RW) &&
1535 (hp->access_type & BOFI_DMA_RW) &&
1536 hp->instance == prev->instance &&
1537 hp->len == prev->len &&
1538 hp->addr_cookie ==
1539 prev->addr_cookie)
1540 continue;
1541
1542 if ((hp->access_type & BOFI_DMA_RW) &&
1543 (atype & BOFI_DMA_RW) !=
1544 hp->access_type)
1545 if (new_semantics)
1546 continue;
1547
1548 if (prev)
1549 msg(3, "match_hinfo: match:"
1550 " 0x%llx (%d %d) (%d %d)"
1551 " (0x%x 0x%x) (0x%llx,"
1552 " 0x%llx)\n",
1553 hp->addr_cookie,
1554 prev->instance,
1555 hp->instance, prev->rnumber,
1556 hp->rnumber,
1557 prev->access_type,
1558 hp->access_type, prev->len,
1559 hp->len);
1560
1561 /* it matches so remember it */
1562 prev = *ip++ = hp;
1563 scnt += 1;
1564 }
1565 }
1566
1567 if (*nhdls != scnt) {
1568 /*
1569 * Reuse the alloc'ed memory to return
1570 * only those handles the user has asked for.
1571 * But first prune the handles to get rid of
1572 * overlapping ranges (they are ordered by
1573 * offset and length).
1574 */
1575 *nhdls = scnt;
1576 for (i = 0, hp = *hip, ip = chosen; i < scnt;
1577 i++, ip++, hp++)
1578 if (hp != *ip)
1579 (void) memcpy(hp, *ip,
1580 sizeof (*hp));
1581 }
1582 free(chosen);
1583 }
1584
1585 for (i = 0, hp = *hip; i < *nhdls; i++, hp++) {
1586 msg(4, "\t%d 0x%x %d 0x%llx 0x%llx 0x%llx\n",
1587 hp->instance, hp->access_type, hp->rnumber,
1588 hp->len, hp->offset, hp->addr_cookie);
1589 }
1590 }
1591 if (*nhdls == 0 && *hip)
1592 free(*hip);
1593
1594 msg(4, "get_info: %s got %d handles\n", name, *nhdls);
1595 return (0);
1596 }
1597
1598 static void
init_sigs()1599 init_sigs()
1600 {
1601 struct sigaction sa;
1602 int *ip, sigs[] = {SIGINT, SIGTERM, 0};
1603
1604 sa.sa_handler = kill_sighandler;
1605 (void) sigemptyset(&sa.sa_mask);
1606 for (ip = sigs; *ip; ip++)
1607 (void) sigaddset(&sa.sa_mask, *ip);
1608 sa.sa_flags = 0;
1609 for (ip = sigs; *ip; ip++)
1610 (void) sigaction(*ip, &sa, NULL);
1611 }
1612
1613 static void
up_resources()1614 up_resources()
1615 {
1616 struct rlimit rl;
1617
1618 /* Potentially hungry on resources so up them all to their maximums */
1619 if (getrlimit(RLIMIT_NOFILE, &rl) < 0)
1620 msg(0, "failed to obtain RLIMIT_NOFILE: %s\n", strerror(errno));
1621 else {
1622 msg(12, "RLIMIT_NOFILE\t %lu (%lu)\n",
1623 rl.rlim_cur, rl.rlim_max);
1624 rl.rlim_cur = rl.rlim_max;
1625 if (setrlimit(RLIMIT_NOFILE, &rl) < 0)
1626 msg(0, "failed to set RLIMIT_NOFILE: %s\n",
1627 strerror(errno));
1628 (void) enable_extended_FILE_stdio(-1, -1);
1629 }
1630 if (getrlimit(RLIMIT_DATA, &rl) < 0)
1631 msg(0, "failed to obtain RLIMIT_DATA: %s\n", strerror(errno));
1632 else {
1633 msg(12, "RLIMIT_DATA\t %lu (%lu)\n", rl.rlim_cur, rl.rlim_max);
1634 rl.rlim_cur = rl.rlim_max;
1635 if (setrlimit(RLIMIT_DATA, &rl) < 0)
1636 msg(0, "failed to set RLIMIT_DATA: %s\n",
1637 strerror(errno));
1638 }
1639 if (getrlimit(RLIMIT_FSIZE, &rl) < 0)
1640 msg(0, "failed to obtain RLIMIT_FSIZE: %s\n", strerror(errno));
1641 else {
1642 msg(12, "RLIMIT_FSIZE\t %lu (%lu)\n", rl.rlim_cur, rl.rlim_max);
1643 rl.rlim_cur = rl.rlim_max;
1644 if (setrlimit(RLIMIT_FSIZE, &rl) < 0)
1645 msg(0, "failed to set RLIMIT_FSIZE: %s\n",
1646 strerror(errno));
1647 }
1648 }
1649
1650 static FILE *
create_test_file(char * drvname)1651 create_test_file(char *drvname)
1652 {
1653 char dirname[_POSIX_PATH_MAX];
1654 char testname[_POSIX_PATH_MAX];
1655 FILE *fp = 0;
1656 time_t utc = time(NULL);
1657
1658 if (snprintf(dirname, sizeof (dirname), "%s.test.%lu",
1659 drvname, utc) == -1 ||
1660 snprintf(testname, sizeof (testname), "%s.test.%lu",
1661 drvname, utc) == -1)
1662 return (0);
1663
1664 if (mkdir(dirname, S_IRWXU|S_IRGRP|S_IROTH)) {
1665 msg(0, "Error creating %s: %s\n", dirname, strerror(errno));
1666 return (0);
1667 }
1668 if (chdir(dirname)) {
1669 (void) rmdir(dirname);
1670 return (0);
1671 }
1672 if ((fp = fopen(testname, "w")) == 0)
1673 return (0); /* leave created directory intact */
1674
1675 return (fp);
1676 }
1677
1678 struct walk_arg {
1679 char *path;
1680 int instance;
1681 char name[MAXPATHLEN];
1682 int pathlen;
1683 };
1684
1685 static int
walk_callback(di_node_t node,void * arg)1686 walk_callback(di_node_t node, void *arg)
1687 {
1688 struct walk_arg *warg = (struct walk_arg *)arg;
1689 char *driver_name;
1690 char *path;
1691
1692 driver_name = di_driver_name(node);
1693 if (driver_name != NULL) {
1694 if (strcmp(driver_name, warg->name) == NULL &&
1695 di_instance(node) == warg->instance) {
1696 path = di_devfs_path(node);
1697 if (path == NULL)
1698 warg->path = NULL;
1699 else
1700 (void) strncpy(warg->path, path, warg->pathlen);
1701 return (DI_WALK_TERMINATE);
1702 }
1703 }
1704 return (DI_WALK_CONTINUE);
1705 }
1706
1707 static int
getpath(char * path,int instance,char * name,int pathlen)1708 getpath(char *path, int instance, char *name, int pathlen)
1709 {
1710 di_node_t node;
1711 struct walk_arg warg;
1712
1713 warg.instance = instance;
1714 (void) strncpy(warg.name, name, MAXPATHLEN);
1715 warg.path = path;
1716 warg.pathlen = pathlen;
1717 if ((node = di_init("/", DINFOSUBTREE)) == DI_NODE_NIL)
1718 return (-1);
1719 if (di_walk_node(node, DI_WALK_CLDFIRST, &warg, walk_callback) == -1) {
1720 di_fini(node);
1721 return (-1);
1722 }
1723 if (warg.path == NULL) {
1724 di_fini(node);
1725 return (-1);
1726 }
1727 di_fini(node);
1728 return (0);
1729 }
1730
1731 /*
1732 * Record logsize h/w accesses of type 'edp->access_type' made by instance
1733 * 'edp->instance' of driver 'edp->name' to the register set (or dma handle)
1734 * 'edp->rnumber' that lie within the range 'edp->offset' to
1735 * 'edp->offset' + 'edp->len'.
1736 * Access criteria may be mixed and matched:
1737 * - access types may be combined (PIO read/write, DMA read write or intrs);
1738 * - if 'edp->instance' is -1 all instances are checked for the criteria;
1739 * - if 'edp->rnumber' is -1 all register sets and dma handles are matched;
1740 * - 'offset' and 'len' indicate that only PIO and DMA accesses within the
1741 * range 'edp->offset' to 'edp->len' will be logged. Putting 'edp->offset'
1742 * to zero and 'edp->len' to -1ull gives maximal coverage.
1743 *
1744 * 'collecttime' is the number of seconds used to log accesses
1745 * (default is infinity).
1746 */
1747 static void
test_driver(struct bofi_errdef * edp,unsigned long long collecttime)1748 test_driver(struct bofi_errdef *edp,
1749 unsigned long long collecttime)
1750 {
1751 pid_t pid;
1752 int statloc;
1753 struct collector_def *cdefs, *cdp;
1754 struct handle_info *hdls, *hdl;
1755 int i, fd;
1756 size_t cnt;
1757 size_t nchildren;
1758 unsigned long long timechunk;
1759 FILE *sfp; /* generated control test file */
1760 char buf[MAXPATHLEN];
1761 char devpath[MAXPATHLEN];
1762 char *devpathp = "NULL";
1763 int drv_inst;
1764 int got_it = 0;
1765
1766 char *name = (char *)edp->name;
1767 uint_t logsize = edp->access_count + edp->fail_count - 1;
1768 int inst = edp->instance;
1769 uint_t atype = edp->access_type;
1770 int rset = edp->rnumber;
1771 offset_t offset = edp->offset;
1772 offset_t len = edp->len;
1773
1774 msg(4, "test_driver: %s %d inst %d 0x%x rset %d %llx %llx\n",
1775 name, logsize, inst, atype, rset, offset, len);
1776
1777 drv_inst = inst;
1778 if (getpath(devpath, inst, name, MAXPATHLEN) != -1) {
1779 devpathp = devpath;
1780 got_it = 1;
1781 }
1782 if (logsize == -1U)
1783 logsize = 0;
1784 fd = open(BOFI_DEV, O_RDWR);
1785 if (fd == -1) {
1786 perror("get_hdl_info - bad open of bofi driver");
1787 return;
1788 }
1789 if (got_it) {
1790 (void) snprintf(buf, sizeof (buf),
1791 "th_manage /devices%s offline", devpathp);
1792 (void) system(buf);
1793 (void) snprintf(buf, sizeof (buf),
1794 "th_manage /devices%s online", devpathp);
1795 (void) system(buf);
1796 (void) snprintf(buf, sizeof (buf),
1797 "th_manage /devices%s getstate >/dev/null", devpathp);
1798 (void) system(buf);
1799 }
1800 if (get_hinfo(fd, name, &hdls, &cnt,
1801 inst, atype, rset, offset, len, 1) != 0) {
1802 msg(0, "driver_test: bad get_info for %d hdls\n", cnt);
1803 return;
1804 } else if (logsize == 0 || collecttime == 0 || cnt == 0) {
1805 if (cnt == 0)
1806 msg(1, "No matching handles.\n");
1807 return;
1808 }
1809 if ((cdefs = GETSTRUCT(struct collector_def, cnt)) == 0) {
1810 msg(0, "driver_test: can't get memory for %d cdefs\n", cnt);
1811 return;
1812 }
1813 up_resources();
1814 if (got_it) {
1815 if (scriptargs > 0) {
1816 (void) snprintf(buf, sizeof (buf),
1817 "DRIVER_PATH=/devices%s DRIVER_INSTANCE=%d"
1818 " DRIVER_UNCONFIGURE=0 DRIVER_CONFIGURE=1",
1819 devpathp, drv_inst);
1820 for (i = 0; i < scriptargs; i++) {
1821 (void) strcat(buf, " ");
1822 (void) strcat(buf, fixup_script[i]);
1823 }
1824 (void) strcat(buf, " &");
1825 } else {
1826 (void) snprintf(buf, sizeof (buf),
1827 "while : ; do th_manage /devices%s online;"
1828 " th_manage /devices%s getstate >/dev/null;"
1829 " th_manage /devices%s offline;done &"
1830 " echo $! >/tmp/bofi.pid",
1831 devpathp, devpathp, devpathp);
1832 }
1833 (void) system(buf);
1834 (void) snprintf(buf, sizeof (buf), "sleep %d",
1835 edef_sleep ? edef_sleep : DEFAULT_EDEF_SLEEP);
1836 (void) system(buf);
1837 }
1838
1839 (void) fprintf(outfile,
1840 "Logging accesses to instances ");
1841 for (i = 0, inst = -1, hdl = hdls; i < cnt;
1842 i++, hdl++) {
1843 if (inst != hdl->instance) {
1844 inst = hdl->instance;
1845 (void) fprintf(outfile, "%d ", inst);
1846 }
1847 }
1848 (void) fprintf(outfile, " (%d logs of size 0x%x).\n\t"
1849 "(Use th_manage ... clear_errdefs to terminate"
1850 " logging)\n", cnt, logsize);
1851
1852 sfp = create_test_file(name);
1853 /*
1854 * Install a logging errdef for each matching handle,
1855 * and then create a child to collect the log.
1856 * The child is responsible for activating the log.
1857 */
1858 for (i = 0, cdp = cdefs, hdl = hdls, nchildren = 0;
1859 i < cnt; i++, cdp++, hdl++) {
1860 if (add_edef(fd, &cdp->ed, &cdp->es, hdl, edp) != 0) {
1861 cdp->lp = 0;
1862 cdp->pid = 0;
1863 } else {
1864 cdp->lp = (void *)cdp->ed.log.logbase;
1865 msg(1, "test_driver: thr_create:"
1866 " lsize 0x%x 0x%x at 0x%x\n",
1867 cdp->es.log.logsize,
1868 cdp->ed.log.logsize,
1869 cdp->ed.access_type);
1870 if ((pid = fork()) == -1) {
1871 msg(0, "fork failed for handle"
1872 " %d: %s\n", i, strerror(errno));
1873 cdp->pid = 0; /* ignore */
1874 } else if (pid == 0) {
1875 thr_collect(cdp, devpathp);
1876 } else {
1877 cdp->pid = pid;
1878 nchildren += 1;
1879 }
1880 }
1881 }
1882
1883 if (nchildren != 0) {
1884 if (sfp) {
1885 (void) fprintf(sfp, "#!/bin/ksh -p\n\n");
1886 (void) fprintf(sfp,
1887 "\n# Test control script generated using:\n#");
1888 for (i = 0; i < pargc; i++)
1889 (void) fprintf(sfp, " %s", pargv[i]);
1890 (void) fprintf(sfp, "\n\n");
1891 (void) fprintf(sfp, "\nrun_tests()\n{\n");
1892 for (i = 0, cdp = cdefs; i < cnt; i++, cdp++)
1893 if (cdp->pid) {
1894 (void) fprintf(sfp,
1895 "\tif [ -x ./%s.%d ]\n\tthen\n",
1896 name, (int)cdp->pid);
1897 (void) fprintf(sfp,
1898 "\t\techo \"Starting test"
1899 " %d (id %d)\"\n",
1900 i, (int)cdp->pid);
1901 (void) fprintf(sfp, "\t\t./%s.%d\n",
1902 name, (int)cdp->pid);
1903 (void) fprintf(sfp, "\t\techo \""
1904 "Test %d (id %d) complete\"\n",
1905 i, (int)cdp->pid);
1906 (void) fprintf(sfp, "\tfi\n");
1907 }
1908 (void) fprintf(sfp, "}\n\nrun_tests\n");
1909 if (fchmod(fileno(sfp), S_IRWXU|S_IRGRP|S_IROTH))
1910 msg(0, "fchmod on control script failed: %s\n",
1911 strerror(errno));
1912 if (fclose(sfp) != 0)
1913 msg(0, "Error closing control script: %s\n",
1914 strerror(errno));
1915 }
1916
1917 set_handler(SIGALRM); /* handle it */
1918 /*
1919 * The user may want to terminate logging before the log fills
1920 * so use a timer to signal the logging children to handle this
1921 * case.
1922 */
1923 timechunk = collecttime / MAXALRMCALL;
1924 collecttime = collecttime - timechunk * MAXALRMCALL;
1925
1926 msg(2, "logging for (0x%llx 0x%llx)\n", timechunk, collecttime);
1927
1928 (void) alarm(collecttime); /* odd bit of collect time */
1929
1930 /* wait for the log to fill or deadline satisfied */
1931 for (;;) {
1932 pid = wait(&statloc);
1933 for (i = 0, nchildren = 0, cdp = cdefs;
1934 i < cnt; i++, cdp++)
1935 if (cdp->pid == pid)
1936 cdp->pid = 0;
1937 for (i = 0, nchildren = 0, cdp = cdefs;
1938 i < cnt; i++, cdp++)
1939 if (cdp->pid)
1940 nchildren++;
1941 if (nchildren == 0)
1942 break;
1943 if (killed)
1944 break;
1945 if (alarmed) {
1946 if (timechunk-- > 0) {
1947 /*
1948 * prepare for the next timeslice by
1949 * rearming the clock
1950 */
1951 if (alarm(MAXALRMCALL) == 0)
1952 alarmed = 0;
1953 else {
1954 /*
1955 * must have been a user abort
1956 * (via SIGALRM)
1957 */
1958 (void) alarm(0);
1959 break;
1960 }
1961 } else
1962 break;
1963 }
1964 }
1965
1966 (void) fprintf(outfile, "Logging complete.\n");
1967 }
1968 if (got_it) {
1969 if (scriptargs > 0) {
1970 (void) snprintf(buf, sizeof (buf),
1971 "DRIVER_PATH=/devices%s DRIVER_INSTANCE=%d"
1972 " DRIVER_UNCONFIGURE=1 DRIVER_CONFIGURE=0",
1973 devpathp, drv_inst);
1974 for (i = 0; i < scriptargs; i++) {
1975 (void) strcat(buf, " ");
1976 (void) strcat(buf, fixup_script[i]);
1977 }
1978 (void) system(buf);
1979 } else {
1980 (void) system("kill `cat /tmp/bofi.pid`");
1981 }
1982 }
1983 msg(2, "test_driver: terminating\n");
1984 }
1985
1986 static int
getnameinst(char * orig_path,int * instance,char * name,int namelen)1987 getnameinst(char *orig_path, int *instance, char *name, int namelen)
1988 {
1989 di_node_t node;
1990 char *binding_name;
1991
1992 if ((node = di_init(&orig_path[8], DINFOSUBTREE|DINFOMINOR)) ==
1993 DI_NODE_NIL)
1994 return (-1);
1995 if ((binding_name = di_driver_name(node)) == NULL)
1996 return (-1);
1997 *instance = di_instance(node);
1998 (void) strncpy(name, binding_name, namelen);
1999 di_fini(node);
2000 return (0);
2001 }
2002
2003 static char syntax[] =
2004 " [ -n name [ -i instance ] | -P path ]\n"
2005 " [ -a acc_types ] [ -r rnumber ]\n"
2006 " [ -l offset [ length ] ] [ -c count [ failcount ] ]\n"
2007 " [ -o operator [ operand ] ] [ -f acc_chk ]\n"
2008 " [ -w max_wait_period [ report_interval ] ]\n"
2009 " or\n"
2010 " [ -n name [ -i instance ] | -P path ]\n"
2011 " -a LOG [ acc_types ] [ -r rnumber]\n"
2012 " [ -l offset [ length ] ] [ -c count [ failcount ] ]\n"
2013 " [ -s collect_time ] [ -p policy ] [ -x flags ]\n"
2014 " [ -C ] [-e fixup_script ]\n"
2015 " or\n"
2016 " -h";
2017
2018 int
main(int argc,char * argv[])2019 main(int argc, char *argv[])
2020 {
2021 extern char *optarg;
2022 extern int optind;
2023
2024 char c; /* for parsing getopts */
2025 int nopts = 0; /* for backward compatibility */
2026 int err = 0;
2027
2028 /* use a maximal set of defaults for logging or injecting */
2029 struct bofi_errdef errdef = {
2030 0, /* length of driver name */
2031 {0}, /* driver name */
2032 -1, /* monitor all instances */
2033 -1, /* monitor all register sets and DMA handles */
2034 (offset_t)0, /* monitor from start of reg. set or DMA hd */
2035 myLLMAX, /* monitor whole reg set or DMA hdl(no LLMAX) */
2036 0, /* qualify all */
2037 DFLTLOGSZ, /* default no. of accesses before corrupting */
2038 0u, /* default no. of accesses to corrupt */
2039 0u, /* no check access corruption */
2040 BOFI_NOP, /* no corruption operator by default */
2041 myULLMAX, /* default operand */
2042 {0, 0, BOFI_LOG_TIMESTAMP, /* timestamp by default */
2043 0, 0, 0, 0}, /* no logging by default */
2044 0};
2045
2046
2047 /* specify the default no of seconds for which to monitor */
2048 unsigned long long collecttime = DFLTLOGTIME;
2049
2050 char *str; /* temporary variable */
2051 long tmpl; /* another one */
2052 int i;
2053 uint_t tmpui;
2054
2055 char buf[MAXPATHLEN];
2056
2057 Progname = (char *)strrchr(*argv, '/');
2058 Progname = (Progname == NULL) ? *argv : Progname + 1;
2059
2060 errfile = stderr;
2061 outfile = stdout;
2062 policy = 0;
2063 lsize_is_default = 1;
2064 pargv = argv;
2065 pargc = argc;
2066
2067 while ((c = getopt(argc, argv, "a:c:C:dD:e:f:h:i:l:n:o:p:P:r:s:tw:x"))
2068 != EOF) {
2069 nopts++;
2070 switch (c) {
2071 case 'a':
2072 msg(2, "option a: optarg %s optind %d argc %d\n",
2073 optarg, optind, argc);
2074 if ((err = str_to_bm(optarg, atypes,
2075 &errdef.access_type)) == 0)
2076 while (optind < argc && *argv[optind] != '-') {
2077 if ((err = str_to_bm(argv[optind++],
2078 atypes, &errdef.access_type)))
2079 break;
2080 }
2081 break;
2082 case 'c':
2083 lsize_is_default = 0;
2084 /* zero is valid */
2085 errdef.access_count = strtoul(optarg, &str, 0);
2086 if (str == optarg)
2087 err = EINVAL;
2088 else if (optind < argc && (argv[optind][0] != '-' ||
2089 (strlen(argv[optind]) > 1 &&
2090 isdigit(argv[optind][1]))))
2091 errdef.fail_count =
2092 strtoull(argv[optind++], 0, 0);
2093 break;
2094 case 'C':
2095 user_comment = optarg;
2096 if (optind < argc && argv[optind][0] != '-')
2097 err = EINVAL;
2098 break;
2099 case 'D':
2100 dbglvl = strtoul(optarg, &str, 0);
2101 break;
2102 case 'e':
2103 fixup_script = 0;
2104 scriptargs = 0;
2105 fixup_script = &argv[optind - 1];
2106 scriptargs += 1;
2107 while (optind < argc) {
2108 optind += 1;
2109 scriptargs += 1;
2110 }
2111 break;
2112 case 'f':
2113 tmpl = strtol(optarg, &str, 0);
2114
2115 if (str != optarg)
2116 errdef.acc_chk = tmpl;
2117 else if (strcmp(optarg, "PIO") == NULL)
2118 errdef.acc_chk = 1;
2119 else if (strcmp(optarg, "DMA") == NULL)
2120 errdef.acc_chk = 2;
2121 else if (strcmp(optarg, "U4FT_ACC_NO_PIO") == NULL)
2122 errdef.acc_chk = 1;
2123 else if (strcmp(optarg, "U4FT_ACC_NO_DMA") == NULL)
2124 errdef.acc_chk = 2;
2125 else
2126 err = EINVAL;
2127 break;
2128 case 'i':
2129 if ((errdef.instance = strtol(optarg, &str, 0)) < 0)
2130 errdef.instance = -1;
2131 else if (str == optarg)
2132 err = EINVAL;
2133 break;
2134 case 'l':
2135 errdef.offset = strtoull(optarg, &str, 0);
2136 if (str == optarg)
2137 err = EINVAL;
2138 else if (optind < argc &&
2139 (argv[optind][0] != '-' ||
2140 (strlen(argv[optind]) > 1 &&
2141 isdigit(argv[optind][1])))) {
2142 /* -1 indicates the rest of register set */
2143 errdef.len = strtoull(argv[optind++], 0, 0);
2144 }
2145 break;
2146 case 'n':
2147 (void) strncpy(errdef.name, optarg, MAXNAMELEN);
2148 if ((errdef.namesize = strlen(errdef.name)) == 0)
2149 err = EINVAL;
2150 break;
2151 case 'o':
2152 for (i = 0; optypes[i].str != 0; i++)
2153 if (strcmp(optarg, optypes[i].str) == 0) {
2154 errdef.optype = optypes[i].code;
2155 break;
2156 }
2157 if (optypes[i].str == 0)
2158 err = EINVAL;
2159 else if (optind < argc &&
2160 (argv[optind][0] != '-' ||
2161 (strlen(argv[optind]) > 1 &&
2162 isdigit(argv[optind][1]))))
2163 errdef.operand =
2164 strtoull(argv[optind++], 0, 0);
2165 break;
2166 case 'p':
2167 tmpui = 0x0u;
2168 if ((err = str_to_bm(optarg, ptypes, &tmpui)) == 0) {
2169 while (optind < argc && *argv[optind] != '-')
2170 if ((err = str_to_bm(argv[optind++],
2171 ptypes, &tmpui)))
2172 break;
2173 policy = (uint16_t)tmpui;
2174 }
2175 if (err == 0 && (policy & BYTEPOLICY))
2176 errdef.log.flags |= BOFI_LOG_REPIO;
2177 break;
2178 case 'P':
2179 if (getnameinst(optarg, &errdef.instance, buf,
2180 MAXPATHLEN) == -1)
2181 err = EINVAL;
2182 else
2183 (void) strncpy(errdef.name, buf, MAXNAMELEN);
2184 break;
2185 case 'r':
2186 if ((errdef.rnumber = strtol(optarg, &str, 0)) < 0)
2187 errdef.rnumber = -1;
2188 if (str == optarg) err = EINVAL;
2189 break;
2190 case 's':
2191 collecttime = strtoull(optarg, &str, 0);
2192 if (str == optarg)
2193 err = EINVAL; /* zero is valid */
2194 break;
2195 case 'w':
2196 do_status = 1;
2197 max_edef_wait = strtoul(optarg, &str, 0);
2198 /* zero is valid */
2199 if (str == optarg)
2200 err = EINVAL;
2201 else if (optind < argc &&
2202 (argv[optind][0] != '-' ||
2203 (strlen(argv[optind]) > 1 &&
2204 isdigit(argv[optind][1]))))
2205 edef_sleep = strtoull(argv[optind++], 0, 0);
2206
2207 break;
2208 case 'x':
2209 if ((optind < argc && *argv[optind] == '-') ||
2210 optind == argc)
2211 errdef.log.flags |= BOFI_LOG_WRAP;
2212 else {
2213 if (strchr(argv[optind], 'w') != 0)
2214 errdef.log.flags |= BOFI_LOG_WRAP;
2215 if (strchr(argv[optind], 'r') != 0)
2216 errdef.log.flags |= BOFI_LOG_REPIO;
2217 if (strchr(argv[optind], 't') != 0)
2218 errdef.log.flags |= BOFI_LOG_TIMESTAMP;
2219 if (strstr(argv[optind], "~t") != 0)
2220 errdef.log.flags &= ~BOFI_LOG_TIMESTAMP;
2221 optind++;
2222 }
2223 break;
2224 case 'h':
2225 (void) fprintf(errfile, "usage: %s %s\n",
2226 Progname, syntax);
2227 exit(0);
2228 break;
2229 case '?': /* also picks up missing parameters */
2230 default:
2231 (void) fprintf(errfile, "usage: %s %s\n",
2232 Progname, syntax);
2233 exit(2);
2234 }
2235
2236 if (err) {
2237 (void) fprintf(errfile, "usage: %s %s\n",
2238 Progname, syntax);
2239 exit(2);
2240 }
2241 if (c == 'e')
2242 break; /* the -e option must be the final option */
2243 }
2244
2245
2246 if (errdef.name[0] == 0) {
2247 msg(0, "%s - invalid name parameter\n", Progname);
2248 exit(1);
2249 }
2250 errdef.namesize = strlen(errdef.name);
2251
2252 if (policy == 0) {
2253 policy |= UNBIASEDPOLICY;
2254 policy |= OPERATORSPOLICY;
2255 }
2256
2257 if (errdef.optype == BOFI_NOP)
2258 errdef.optype = BOFI_XOR;
2259 if (errdef.access_type == BOFI_LOG) { /* qualify all accesses */
2260 errdef.access_type =
2261 (BOFI_LOG|BOFI_DMA_RW|BOFI_PIO_RW|BOFI_INTR);
2262 atype_is_default = 1;
2263 } else if (errdef.access_type == 0) { /* qualify all accesses */
2264 errdef.access_type =
2265 (BOFI_DMA_RW|BOFI_PIO_RW|BOFI_INTR);
2266 atype_is_default = 1;
2267 } else
2268 atype_is_default = 0;
2269
2270 init_sigs();
2271 if ((errdef.access_type & BOFI_LOG) == 0) {
2272 int fd, i, instance;
2273 size_t cnt;
2274 struct handle_info *hdls, *hp;
2275
2276 if ((fd = open(BOFI_DEV, O_RDWR)) == -1) {
2277 msg(0, "%s: error opening bofi driver: %s\n",
2278 Progname, strerror(errno));
2279 exit(1);
2280 }
2281 if ((err = get_hinfo(fd, errdef.name, &hdls, &cnt,
2282 errdef.instance, errdef.access_type, errdef.rnumber,
2283 errdef.offset, errdef.len, 0)) != 0) {
2284 msg(0, "%s: Bad lookup on bofi driver.\n", Progname);
2285 (void) close(fd);
2286 exit(1);
2287 } else if (cnt == 0) {
2288 msg(0,
2289 "%s: No handles match request access criteria.\n",
2290 Progname);
2291 (void) close(fd);
2292 exit(1);
2293 }
2294 if (errdef.instance == -1)
2295 instance = -1;
2296 else {
2297 instance = hdls->instance;
2298 for (i = 0, hp = hdls; i < cnt; i++, hp++) {
2299 if (instance != hp->instance) {
2300 instance = -1;
2301 break;
2302 }
2303 }
2304 }
2305 if (instance == -1) {
2306 msg(0, "Multiple instances match access criteria"
2307 " (only allowed when logging):\n");
2308 msg(0, "\tinst\taccess\trnumber\toffset\tlength\n");
2309 for (i = 0, hp = hdls; i < cnt; i++, hp++)
2310 msg(0, "\t%d\t0x%x\t%d\t0x%llx\t0x%llx\n",
2311 hp->instance, hp->access_type,
2312 hp->rnumber, hp->offset, hp->len);
2313 } else {
2314 struct bofi_errstate es;
2315 int timeleft = max_edef_wait;
2316
2317 if (ioctl(fd, BOFI_ADD_DEF, &errdef) == -1) {
2318 perror("th_define - adding errdef failed");
2319 } else {
2320 es.errdef_handle = errdef.errdef_handle;
2321 msg(4, "waiting for edef:"
2322 " %d %s %d %d 0x%llx 0x%llx 0x%x 0x%x"
2323 " 0x%x 0x%x 0x%x 0x%llx\n",
2324 errdef.namesize, errdef.name,
2325 errdef.instance, errdef.rnumber,
2326 errdef.offset, errdef.len,
2327 errdef.access_type, errdef.access_count,
2328 errdef.fail_count, errdef.acc_chk,
2329 errdef.optype, errdef.operand);
2330
2331 set_handler(SIGALRM); /* handle it */
2332
2333 do {
2334 if (do_status)
2335 (void) alarm(edef_sleep);
2336 if (ioctl(fd, BOFI_CHK_STATE_W,
2337 &es) == -1) {
2338 if (errno != EINTR) {
2339 perror("bad"
2340 " BOFI_CHK_STATE");
2341 break;
2342 } else if (!do_status) {
2343 break;
2344 }
2345 }
2346 if (do_status)
2347 (void) fprintf(outfile,
2348 "%llu:%llu:%u:%u:%u:"
2349 "%u:%d:\"%s\"\n",
2350 es.fail_time, es.msg_time,
2351 es.access_count,
2352 es.fail_count,
2353 es.acc_chk, es.errmsg_count,
2354 (uint_t)es.severity,
2355 (es.msg_time) ?
2356 es.buffer : "");
2357 if (es.acc_chk == 0 &&
2358 es.fail_count == 0 && !do_status)
2359 print_err_reports(outfile,
2360 &es, "", "", -1);
2361 else if (alarmed) {
2362 alarmed = 0;
2363 if ((timeleft -= edef_sleep) <=
2364 0) {
2365 if (do_status)
2366 break;
2367 print_err_reports(
2368 outfile, &es, "",
2369 "", -1);
2370 break;
2371 }
2372 } else if (!do_status)
2373 print_err_reports(outfile,
2374 &es, "", "", -1);
2375 } while (es.acc_chk != 0 || es.fail_count != 0);
2376
2377 msg(2, "done: acc_chk 0x%x fcnt %d\n",
2378 es.acc_chk, es.fail_count);
2379 }
2380
2381 (void) close(fd);
2382 }
2383 free(hdls);
2384 return (0);
2385 }
2386 test_driver(&errdef, collecttime);
2387 return (0);
2388 }
2389