1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #include <sys/types.h>
27 #include <sys/ksynch.h>
28 #include <sys/cmn_err.h>
29 #include <sys/errno.h>
30 #include <sys/kmem.h>
31 #include <sys/ddi.h>
32 #include <sys/nsc_thread.h>
33
34 #include "sd_bcache.h"
35 #include "sd_trace.h"
36 #include "sd_misc.h"
37
38 #ifndef _SD_NOTRACE
39
40 #ifndef SM_SDTRSEMA
41 #define SM_SDTRSEMA 1
42 #define SM_SDTRLCK 1
43 #endif
44
45 int _sd_trace_mask = 0;
46
47 /*
48 * _sdbd_trace_t _sd_trace_table[-1, 0 .. sdbc_max_devs - 1]
49 * allocate memory, shift pointer up by one.
50 */
51 static _sdbc_trace_t *_sd_trace_table;
52
53 static kcondvar_t _sd_adump_cv;
54 static int _sd_trace_configed;
55 static kmutex_t _sd_adump_lk;
56
57 static int _alert_cd = SDT_ANY_CD;
58 static int _last_cd = SDT_ANY_CD;
59 #define XMEM(x, y) (void)(x = y, y = (SDT_ANY_CD), x)
60
61 /*
62 * Forward declare all statics that are used before defined to enforce
63 * parameter checking.
64 * Some (if not all) of these could be removed if the code were reordered
65 */
66
67 static int _sd_set_adump(int cd, int flag, _sdtr_table_t *table);
68
69 /*
70 * _sdbc_tr_unload - cache is being unloaded. Release any memory/lock/sv's
71 * created by _sdbc_tr_unload and null the stale pointers.
72 *
73 */
74 void
_sdbc_tr_unload(void)75 _sdbc_tr_unload(void)
76 {
77 if (_sd_trace_table)
78 nsc_kmem_free((_sd_trace_table - 1),
79 sizeof (_sdbc_trace_t) * (sdbc_max_devs + 1));
80 cv_destroy(&_sd_adump_cv);
81 mutex_destroy(&_sd_adump_lk);
82
83 _sd_trace_table = NULL;
84 }
85
86 /*
87 * _sdbc_tr_load - cache is being loaded. Allocate the memory/lock/sv's
88 * which need to be present regardless of state of cache configuration.
89 *
90 */
91 int
_sdbc_tr_load(void)92 _sdbc_tr_load(void)
93 {
94 _sdbc_trace_t *m;
95
96 cv_init(&_sd_adump_cv, NULL, CV_DRIVER, NULL);
97 mutex_init(&_sd_adump_lk, NULL, MUTEX_DRIVER, NULL);
98
99 /*
100 * this maybe ought to wait to see if traces are configured, but it
101 * is only 4k
102 */
103
104 m = (_sdbc_trace_t *)nsc_kmem_zalloc(
105 sizeof (_sdbc_trace_t) * (sdbc_max_devs + 1),
106 KM_NOSLEEP, sdbc_stats_mem);
107
108 if (m == NULL) {
109 cmn_err(CE_WARN,
110 "sdbc(_sdbc_tr_load) cannot allocate trace table");
111 return (-1);
112 }
113 _sd_trace_table = m + 1;
114
115 return (0);
116
117 }
118
119 /*
120 * _sdbc_tr_configure - configure a trace area for the descriptor "cd".
121 * Unlike other ..._configure routines this routine is called multiple
122 * times since there will be an unknown number of open descriptors. At
123 * cache config time if tracing is enabled only the slot for SDT_INV_CD
124 * is created.
125 *
126 * Allocate the SD cache trace area (per device)
127 */
128
129 int
_sdbc_tr_configure(int cd)130 _sdbc_tr_configure(int cd)
131 {
132 int size;
133 _sdtr_table_t *t;
134 kmutex_t *lk;
135
136 if (!_sd_cache_config.trace_size)
137 return (0);
138
139 if (cd == SDT_INV_CD)
140 _sd_trace_configed = 1;
141
142 if (_sd_trace_table[cd].tbl)
143 return (0);
144
145 size = sizeof (_sdtr_table_t) +
146 _sd_cache_config.trace_size * sizeof (_sdtr_t);
147
148 if ((t = (_sdtr_table_t *)nsc_kmem_zalloc(size,
149 KM_NOSLEEP, sdbc_stats_mem)) == NULL) {
150 cmn_err(CE_WARN, "sdbc(_sdbc_tr_configure) failed to "
151 "allocate %d bytes for trace, cd=%d", size, cd);
152 return (-1);
153 }
154
155 lk = nsc_kmem_zalloc(sizeof (kmutex_t), KM_NOSLEEP, sdbc_local_mem);
156 if (!lk) {
157 nsc_kmem_free(t, size);
158 cmn_err(CE_WARN, "sdbc(_sdbc_tr_configure) cannot "
159 "alloc trace lock for cd %d", cd);
160 return (-1);
161 }
162 mutex_init(lk, NULL, MUTEX_DRIVER, NULL);
163
164 _sd_trace_table[cd].t_lock = lk;
165 t->tt_cd = cd;
166 t->tt_max = _sd_cache_config.trace_size;
167 t->tt_mask = _sd_cache_config.trace_mask;
168 t->tt_lbolt = (char)_sd_cache_config.trace_lbolt;
169 t->tt_good = (char)_sd_cache_config.trace_good;
170 _sd_trace_mask |= t->tt_mask;
171 _sd_trace_table[cd].tbl = t;
172 return (0);
173 }
174
175
176 /*
177 * _sdbc_tr_deconfigure
178 * free all trace memory (regions) when deconfiguring cache
179 */
180 void
_sdbc_tr_deconfigure(void)181 _sdbc_tr_deconfigure(void)
182 {
183 int i, size;
184 _sdbc_trace_t *tt;
185
186 if (!_sd_cache_config.trace_size || !_sd_trace_configed)
187 return;
188
189 mutex_enter(&_sd_adump_lk);
190 _sd_trace_configed = 0;
191 cv_broadcast(&_sd_adump_cv);
192 mutex_exit(&_sd_adump_lk);
193
194 for (i = -1, tt = &_sd_trace_table[-1]; i < sdbc_max_devs; i++, tt++) {
195 if (tt->tbl == NULL) continue;
196 size = tt->tbl->tt_max * sizeof (_sdtr_t) +
197 sizeof (_sdtr_table_t);
198 if (tt->t_lock) {
199 mutex_destroy(tt->t_lock);
200 nsc_kmem_free(tt->t_lock, sizeof (kmutex_t));
201 }
202 nsc_kmem_free(tt->tbl, size);
203 tt->t_lock = NULL;
204 tt->tbl = NULL;
205 }
206 _alert_cd = SDT_ANY_CD;
207 _last_cd = SDT_ANY_CD;
208 }
209
210 static int first_alert = 0;
211 /*
212 * SDALERT(f,cd,len,fba,flg,ret) \
213 * _sd_alert(f,cd,len,fba,flg,ret)
214 * Build a ALERT trace entry and place it into the trace table.
215 */
216 void
_sd_alert(int f,int cd,int len,nsc_off_t fba,int flg,int ret)217 _sd_alert(int f, int cd, int len, nsc_off_t fba, int flg, int ret)
218 {
219 int tin;
220 _sdtr_t *tp;
221 _sdtr_table_t *t;
222 kmutex_t *lk;
223
224 if (!first_alert) {
225 first_alert++;
226 cmn_err(CE_WARN,
227 "sdbc(_sd_alert) cd=%x f=%x len=%x fba=%" NSC_SZFMT
228 " flg=%x ret=%x", cd, f, len, fba, flg, ret);
229
230 }
231
232 /* Watch out for negative error codes or simply bogus cd's */
233
234 if (cd < -1 || cd >= sdbc_max_devs) {
235 /*
236 * no device trace buffer -- use SDT_INV_CD table?
237 */
238 if ((t = _sd_trace_table[-1].tbl) == NULL)
239 return;
240 lk = _sd_trace_table[-1].t_lock;
241 } else {
242 lk = _sd_trace_table[cd].t_lock;
243 if ((t = _sd_trace_table[cd].tbl) == NULL) {
244 /*
245 * no device trace buffer -- use SDT_INV_CD table?
246 */
247 if ((t = _sd_trace_table[-1].tbl) == NULL)
248 return;
249 lk = _sd_trace_table[-1].t_lock;
250 }
251 }
252
253 if (!(t->tt_mask & ST_ALERT))
254 return; /* check per-device mask */
255
256 if (t->tt_good) mutex_enter(lk);
257 t->tt_alert++; /* alert on this device */
258 t->tt_cnt++; /* overwritten entries if (tt_cnt >= tt_max) */
259
260 tin = t->tt_in++;
261 if (tin >= t->tt_max) tin = t->tt_in = 0;
262 tp = &t->tt_buf[tin];
263 tp->t_time = 0; /* not filled in yet */
264 if (t->tt_good) mutex_exit(lk);
265
266 tp->t_func = (ushort_t)f | ST_ALERT;
267 tp->t_len = (ushort_t)len;
268 tp->t_fba = fba;
269 tp->t_flg = flg;
270 tp->t_ret = ret;
271 /*
272 * On LP64 systems we will only capture the low 32 bits of the
273 * time this really should be good enough for our purposes.
274 *
275 */
276 if (t->tt_lbolt)
277 tp->t_time = (int)nsc_lbolt();
278 else
279 tp->t_time = (int)nsc_usec();
280
281 /* wakeup trace daemon, with hint */
282 _alert_cd = cd;
283
284 if (_sd_trace_configed)
285 cv_signal(&_sd_adump_cv);
286 }
287
288
289 /*
290 * SDTRACE(f,cd,len,fba,flg,ret) \
291 * if (_sd_trace_mask & (f)) _sd_trace(f,cd,len,fba,flg,ret)
292 * Build a trace entry and place it into the trace table.
293 */
294 void
_sd_trace(int f,int cd,int len,nsc_off_t fba,int flg,int ret)295 _sd_trace(int f, int cd, int len, nsc_off_t fba, int flg, int ret)
296 {
297 int tin;
298 _sdtr_t *tp;
299 _sdtr_table_t *t;
300 kmutex_t *lk;
301
302 /* Watch out for negative error codes or simply bogus cd's */
303
304 if (cd < -1 || cd >= sdbc_max_devs) {
305 /*
306 * no device trace buffer -- use SDT_INV_CD table?
307 */
308 if ((t = _sd_trace_table[-1].tbl) == NULL)
309 return;
310 lk = _sd_trace_table[-1].t_lock;
311 } else {
312 lk = _sd_trace_table[cd].t_lock;
313 if ((t = _sd_trace_table[cd].tbl) == NULL)
314 return;
315 }
316
317 if (!(t->tt_mask & f))
318 return; /* check per-device mask */
319
320 /*
321 * Don't overwrite if alert signaled (count lost instead)
322 * Locking only if 'trace_good' parameter set.
323 */
324 if (t->tt_good) mutex_enter(lk);
325 if (t->tt_alert && (t->tt_cnt >= t->tt_max)) {
326 t->tt_lost++; /* lost during alert */
327 if (t->tt_good) mutex_exit(lk);
328 return;
329 }
330 t->tt_cnt++; /* overwritten entries if (tt_cnt >= tt_max) */
331
332 tin = t->tt_in++;
333 if (tin >= t->tt_max) tin = t->tt_in = 0;
334 tp = &t->tt_buf[tin];
335 tp->t_time = 0; /* not filled in yet */
336 if (t->tt_good) mutex_exit(lk);
337
338 tp->t_func = (ushort_t)f;
339 tp->t_len = (ushort_t)len;
340 tp->t_fba = fba;
341 tp->t_flg = flg;
342 tp->t_ret = ret;
343 /*
344 * On LP64 systems we will only capture the low 32 bits of the
345 * time this really should be good enough for our purposes.
346 *
347 */
348 if (t->tt_lbolt)
349 tp->t_time = (int)nsc_lbolt();
350 else
351 tp->t_time = (int)nsc_usec();
352 }
353
354 /*
355 * _sd_scan_alert -- search for device with trace alert
356 */
357 static int
_sd_scan_alert(void)358 _sd_scan_alert(void)
359 {
360 int cd;
361
362 XMEM(cd, _alert_cd);
363 if ((cd != SDT_ANY_CD) && _sd_trace_table[cd].tbl->tt_alert)
364 return (cd);
365 for (cd = _last_cd + 1; cd < sdbc_max_devs; cd++)
366 if (_sd_trace_table[cd].tbl &&
367 _sd_trace_table[cd].tbl->tt_alert)
368 return (_last_cd = cd);
369 for (cd = SDT_INV_CD; cd <= _last_cd; cd++)
370 if (_sd_trace_table[cd].tbl &&
371 _sd_trace_table[cd].tbl->tt_alert)
372 return (_last_cd = cd);
373 return (SDT_ANY_CD);
374 }
375
376 /*
377 * _sd_scan_entries -- search for next device with trace entries
378 */
379 static int
_sd_scan_entries(void)380 _sd_scan_entries(void)
381 {
382 int cd;
383
384 for (cd = _last_cd + 1; cd < sdbc_max_devs; cd++)
385 if (_sd_trace_table[cd].tbl && _sd_trace_table[cd].tbl->tt_cnt)
386 return (_last_cd = cd);
387 for (cd = SDT_INV_CD; cd <= _last_cd; cd++)
388 if (_sd_trace_table[cd].tbl && _sd_trace_table[cd].tbl->tt_cnt)
389 return (_last_cd = cd);
390 return (SDT_ANY_CD);
391 }
392
393
394 /*
395 * _sd_adump
396 * copy information about new trace records to trace daemon,
397 * or modify trace parameters.
398 *
399 * Some tracing parameters can be modified
400 * [Either per-device if cd specified, or the defaults if cd = SDT_ANY_CD]
401 * SD_LOGSIZE: table.tt_max (size for future opens)
402 * SD_SET_LBOLT: table.tt_lbolt
403 * SD_SET_MASK: table.tt_mask
404 * SD_SET_GOOD: table.tt_good
405 *
406 * if (cd >= 0) dump specific device records;
407 * if (cd == SDT_INV_CD) dump records which don't apply to any one device.
408 * if (cd == SDT_ANY_CD), then choose a device:
409 * 1) most recent alert, block if (flag & SD_ALERT_WAIT)
410 * 2) "next" device with unprocessed records.
411 */
412 int
_sd_adump(void * args,int * rvp)413 _sd_adump(void *args, int *rvp)
414 {
415 struct a {
416 long cd;
417 _sdtr_table_t *table;
418 _sdtr_t *buf;
419 long size;
420 long flag;
421 } *uap = (struct a *)args;
422 _sdtr_t *ubuf;
423 _sdtr_table_t tt, *t;
424 kmutex_t *lk;
425 int cd, count, lost, new_cnt;
426
427 if (uap->flag & (SD_SET_SIZE|SD_SET_MASK|SD_SET_LBOLT|SD_SET_GOOD)) {
428 return (_sd_set_adump(uap->cd, uap->flag, uap->table));
429 }
430 if (! _sd_trace_configed) {
431 return (EINVAL); /* not initialized yet */
432 }
433 if (uap->cd >= SDT_INV_CD) {
434 /* specific device: check if configured. dump current state. */
435 if ((uap->cd > (long)sdbc_max_devs) ||
436 !(t = _sd_trace_table[uap->cd].tbl)) {
437 return (ENOSPC); /* no space configured */
438 }
439 lk = _sd_trace_table[uap->cd].t_lock;
440 cd = uap->cd;
441 } else {
442 /*
443 * SDT_ANY_CD:
444 * SD_ALERT_WAIT - wait for alert
445 */
446 scan:
447 if ((cd = _sd_scan_alert()) != SDT_ANY_CD)
448 goto dump;
449 if ((uap->flag & SD_ALERT_WAIT)) {
450 mutex_enter(&_sd_adump_lk);
451 if (!_sd_trace_configed) {
452 mutex_exit(&_sd_adump_lk);
453 return (EINVAL);
454 }
455
456 if (!cv_wait_sig(&_sd_adump_cv, &_sd_adump_lk)) {
457 mutex_exit(&_sd_adump_lk);
458 return (EINTR);
459 }
460 mutex_exit(&_sd_adump_lk);
461
462 if (!_sd_trace_configed || !_sd_cache_initialized) {
463 return (EIDRM);
464 }
465 goto scan;
466 }
467 /* any device with entries */
468 if ((cd = _sd_scan_entries()) == SDT_INV_CD)
469 return (0); /* no new entries */
470
471 dump:
472 lk = _sd_trace_table[cd].t_lock;
473 if ((t = _sd_trace_table[cd].tbl) == NULL) {
474 if (uap->flag & SD_ALERT_WAIT) {
475 t = _sd_trace_table[-1].tbl;
476 lk = _sd_trace_table[-1].t_lock;
477 } else {
478 return (ENOSPC); /* no space configured */
479 }
480 }
481 }
482
483 /*
484 * take a snapshot of the table state
485 */
486 if (t->tt_good)
487 mutex_enter(lk);
488 tt = *t;
489 if (t->tt_good)
490 mutex_exit(lk);
491
492 /*
493 * copy trace log entries to daemon
494 *
495 * size: entries in user-level 'buf'
496 * count: how many entries to copy [force count <= size]
497 * tt_max: size of kernel buffer
498 * tt_cnt: written entries [lossage if tt_cnt > tt_max]
499 * cnt: for wrap-around calculations
500 */
501 if ((count = tt.tt_cnt) > tt.tt_max) { /* lost from beginning */
502 tt.tt_out = tt.tt_in;
503 count = tt.tt_max;
504 lost = tt.tt_cnt - tt.tt_max;
505 } else
506 lost = 0;
507 if (count <= 0)
508 return (0);
509 if ((long)count > uap->size)
510 count = uap->size;
511 ubuf = uap->buf;
512 if ((tt.tt_out + count) > tt.tt_max) {
513 int cnt = tt.tt_max - tt.tt_out;
514 if (cnt > count)
515 cnt = count;
516 if (copyout(&(t->tt_buf[tt.tt_out]), ubuf,
517 cnt * sizeof (_sdtr_t))) {
518 return (EFAULT);
519 }
520 ubuf += cnt;
521 cnt = count - cnt;
522 if (copyout(&(t->tt_buf[0]), ubuf, cnt * sizeof (_sdtr_t))) {
523 return (EFAULT);
524 }
525 tt.tt_out = cnt;
526 } else {
527 if (copyout(&(t->tt_buf[tt.tt_out]), ubuf,
528 count * sizeof (_sdtr_t))) {
529 return (EFAULT);
530 }
531 tt.tt_out += count;
532 if (tt.tt_out == tt.tt_max)
533 tt.tt_out = 0;
534 }
535
536 /*
537 * tt_alert uses fuzzy counting.
538 * if multiple alerts signaled, leave it at 1.
539 */
540 if (t->tt_alert)
541 t->tt_alert = (t->tt_alert > 1) ? 1 : 0;
542
543 /*
544 * tt_cntout is tt_cnt after dump
545 * update tt_cnt for copied entries
546 */
547 if (t->tt_good)
548 mutex_enter(lk);
549 tt.tt_cntout = t->tt_cnt;
550 t->tt_out = tt.tt_out;
551 new_cnt = t->tt_cnt;
552 if ((new_cnt -= count+lost) < 0)
553 new_cnt = 0;
554 t->tt_cnt = new_cnt; /* race with new traces if not "tt_good" */
555 if (t->tt_good)
556 mutex_exit(lk);
557
558 if (copyout(&tt, uap->table, sizeof (tt) - sizeof (_sdtr_t))) {
559 return (EFAULT);
560 }
561 *rvp = count;
562
563 first_alert = 0;
564 return (0);
565 }
566
567
568 /* set size, mask, lbolt, or good(locks) */
569 static int
_sd_set_adump(int cd,int flag,_sdtr_table_t * table)570 _sd_set_adump(int cd, int flag, _sdtr_table_t *table)
571 {
572 _sdtr_table_t tt, *t;
573
574 if (copyin(table, &tt, sizeof (tt) - sizeof (_sdtr_t))) {
575 return (EFAULT);
576 }
577 if (cd == SDT_ANY_CD) { /* modify config parameter */
578 if (flag & SD_SET_SIZE)
579 _sd_cache_config.trace_size = tt.tt_max;
580 if (flag & SD_SET_MASK) {
581 _sd_cache_config.trace_mask = tt.tt_mask;
582 /* explicitly set global mask, not bitwise or */
583 _sd_trace_mask = tt.tt_mask;
584 }
585 if (flag & SD_SET_LBOLT)
586 _sd_cache_config.trace_lbolt = tt.tt_lbolt;
587 if (flag & SD_SET_GOOD)
588 _sd_cache_config.trace_good = tt.tt_good;
589 return (0);
590 }
591 if (flag & SD_SET_SIZE)
592 _sd_cache_config.trace_size = tt.tt_max;
593 /* modify particular device parameters */
594 if (!_sd_trace_table[cd].tbl)
595 (void) _sdbc_tr_configure(cd);
596 if ((t = _sd_trace_table[cd].tbl) == NULL)
597 return (0);
598 if (flag & SD_SET_MASK) {
599 t->tt_mask = tt.tt_mask;
600 _sd_trace_mask |= tt.tt_mask; /* or-ed with global mask */
601 }
602 if (flag & SD_SET_LBOLT)
603 t->tt_lbolt = tt.tt_lbolt;
604 if (flag & SD_SET_GOOD)
605 t->tt_good = tt.tt_good;
606 if (copyout(t, table, sizeof (*t) - sizeof (_sdtr_t))) {
607 return (EFAULT);
608 }
609 return (0);
610 }
611
612 #else /* ! _SD_NOTRACE */
613
_sd_adump()614 int _sd_adump() { return (ENOSYS); }
_sdbc_tr_load(void)615 int _sdbc_tr_load(void) { return (0); }
_sdbc_tr_configure(void)616 int _sdbc_tr_configure(void) { return (0); }
_sdbc_tr_deconfigure(void)617 void _sdbc_tr_deconfigure(void) { return; }
_sdbc_tr_unload(void)618 void _sdbc_tr_unload(void) { return; }
619
620 #endif /* ! _SD_NOTRACE */
621