xref: /titanic_41/usr/src/uts/common/avs/ns/sdbc/sd_trace.c (revision fcf3ce441efd61da9bb2884968af01cb7c1452cc)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/ksynch.h>
28 #include <sys/cmn_err.h>
29 #include <sys/errno.h>
30 #include <sys/kmem.h>
31 #include <sys/ddi.h>
32 #include <sys/nsc_thread.h>
33 
34 #include "sd_bcache.h"
35 #include "sd_trace.h"
36 #include "sd_misc.h"
37 
38 #ifndef _SD_NOTRACE
39 
40 #ifndef SM_SDTRSEMA
41 #define	SM_SDTRSEMA 1
42 #define	SM_SDTRLCK  1
43 #endif
44 
45 int _sd_trace_mask = 0;
46 
47 /*
48  * _sdbd_trace_t _sd_trace_table[-1, 0 .. sdbc_max_devs - 1]
49  *	allocate memory, shift pointer up by one.
50  */
51 static _sdbc_trace_t	*_sd_trace_table;
52 
53 static kcondvar_t	_sd_adump_cv;
54 static int _sd_trace_configed;
55 static kmutex_t  _sd_adump_lk;
56 
57 static int		_alert_cd = SDT_ANY_CD;
58 static int		_last_cd = SDT_ANY_CD;
59 #define	XMEM(x, y)	(void)(x = y, y = (SDT_ANY_CD), x)
60 
61 /*
62  * Forward declare all statics that are used before defined to enforce
63  * parameter checking.
64  * Some (if not all) of these could be removed if the code were reordered
65  */
66 
67 static int _sd_set_adump(int cd, int flag, _sdtr_table_t *table);
68 
69 /*
70  * _sdbc_tr_unload - cache is being unloaded. Release any memory/lock/sv's
71  * created by _sdbc_tr_unload and null the stale pointers.
72  *
73  */
74 void
_sdbc_tr_unload(void)75 _sdbc_tr_unload(void)
76 {
77 	if (_sd_trace_table)
78 		nsc_kmem_free((_sd_trace_table - 1),
79 		    sizeof (_sdbc_trace_t) * (sdbc_max_devs + 1));
80 	cv_destroy(&_sd_adump_cv);
81 	mutex_destroy(&_sd_adump_lk);
82 
83 	_sd_trace_table = NULL;
84 }
85 
86 /*
87  * _sdbc_tr_load - cache is being loaded. Allocate the memory/lock/sv's
88  * which need to be present regardless of state of cache configuration.
89  *
90  */
91 int
_sdbc_tr_load(void)92 _sdbc_tr_load(void)
93 {
94 	_sdbc_trace_t *m;
95 
96 	cv_init(&_sd_adump_cv, NULL, CV_DRIVER, NULL);
97 	mutex_init(&_sd_adump_lk, NULL, MUTEX_DRIVER, NULL);
98 
99 	/*
100 	 * this maybe ought to wait to see if traces are configured, but it
101 	 * is only 4k
102 	 */
103 
104 	m = (_sdbc_trace_t *)nsc_kmem_zalloc(
105 	    sizeof (_sdbc_trace_t) * (sdbc_max_devs + 1),
106 	    KM_NOSLEEP, sdbc_stats_mem);
107 
108 	if (m == NULL) {
109 		cmn_err(CE_WARN,
110 		    "sdbc(_sdbc_tr_load) cannot allocate trace table");
111 		return (-1);
112 	}
113 	_sd_trace_table = m + 1;
114 
115 	return (0);
116 
117 }
118 
119 /*
120  * _sdbc_tr_configure - configure a trace area for the descriptor "cd".
121  * Unlike other ..._configure routines this routine is called multiple
122  * times since there will be an unknown number of open descriptors. At
123  * cache config time if tracing is enabled only the slot for SDT_INV_CD
124  * is created.
125  *
126  * Allocate the SD cache trace area (per device)
127  */
128 
129 int
_sdbc_tr_configure(int cd)130 _sdbc_tr_configure(int cd)
131 {
132 	int size;
133 	_sdtr_table_t *t;
134 	kmutex_t *lk;
135 
136 	if (!_sd_cache_config.trace_size)
137 		return (0);
138 
139 	if (cd == SDT_INV_CD)
140 		_sd_trace_configed = 1;
141 
142 	if (_sd_trace_table[cd].tbl)
143 		return (0);
144 
145 	size = sizeof (_sdtr_table_t) +
146 	    _sd_cache_config.trace_size * sizeof (_sdtr_t);
147 
148 	if ((t = (_sdtr_table_t *)nsc_kmem_zalloc(size,
149 	    KM_NOSLEEP, sdbc_stats_mem)) == NULL) {
150 		cmn_err(CE_WARN, "sdbc(_sdbc_tr_configure) failed to "
151 		    "allocate %d bytes for trace, cd=%d", size, cd);
152 		return (-1);
153 	}
154 
155 	lk = nsc_kmem_zalloc(sizeof (kmutex_t), KM_NOSLEEP, sdbc_local_mem);
156 	if (!lk) {
157 		nsc_kmem_free(t, size);
158 		cmn_err(CE_WARN, "sdbc(_sdbc_tr_configure) cannot "
159 		    "alloc trace lock for cd %d", cd);
160 		return (-1);
161 	}
162 	mutex_init(lk, NULL, MUTEX_DRIVER, NULL);
163 
164 	_sd_trace_table[cd].t_lock = lk;
165 	t->tt_cd   = cd;
166 	t->tt_max  = _sd_cache_config.trace_size;
167 	t->tt_mask = _sd_cache_config.trace_mask;
168 	t->tt_lbolt = (char)_sd_cache_config.trace_lbolt;
169 	t->tt_good = (char)_sd_cache_config.trace_good;
170 	_sd_trace_mask |= t->tt_mask;
171 	_sd_trace_table[cd].tbl = t;
172 	return (0);
173 }
174 
175 
176 /*
177  * _sdbc_tr_deconfigure
178  *	free all trace memory (regions) when deconfiguring cache
179  */
180 void
_sdbc_tr_deconfigure(void)181 _sdbc_tr_deconfigure(void)
182 {
183 	int i, size;
184 	_sdbc_trace_t *tt;
185 
186 	if (!_sd_cache_config.trace_size || !_sd_trace_configed)
187 		return;
188 
189 	mutex_enter(&_sd_adump_lk);
190 	_sd_trace_configed = 0;
191 	cv_broadcast(&_sd_adump_cv);
192 	mutex_exit(&_sd_adump_lk);
193 
194 	for (i = -1, tt = &_sd_trace_table[-1]; i < sdbc_max_devs; i++, tt++) {
195 		if (tt->tbl == NULL) continue;
196 		size = tt->tbl->tt_max * sizeof (_sdtr_t) +
197 		    sizeof (_sdtr_table_t);
198 		if (tt->t_lock) {
199 			mutex_destroy(tt->t_lock);
200 			nsc_kmem_free(tt->t_lock, sizeof (kmutex_t));
201 		}
202 		nsc_kmem_free(tt->tbl, size);
203 		tt->t_lock = NULL;
204 		tt->tbl = NULL;
205 	}
206 	_alert_cd = SDT_ANY_CD;
207 	_last_cd = SDT_ANY_CD;
208 }
209 
210 static int first_alert = 0;
211 /*
212  * SDALERT(f,cd,len,fba,flg,ret) \
213  *	_sd_alert(f,cd,len,fba,flg,ret)
214  *  Build a ALERT trace entry and place it into the trace table.
215  */
216 void
_sd_alert(int f,int cd,int len,nsc_off_t fba,int flg,int ret)217 _sd_alert(int f, int cd, int len, nsc_off_t fba, int flg, int ret)
218 {
219 	int tin;
220 	_sdtr_t *tp;
221 	_sdtr_table_t *t;
222 	kmutex_t *lk;
223 
224 	if (!first_alert) {
225 		first_alert++;
226 		cmn_err(CE_WARN,
227 		    "sdbc(_sd_alert) cd=%x f=%x len=%x fba=%" NSC_SZFMT
228 		    " flg=%x ret=%x", cd, f, len, fba, flg, ret);
229 
230 	}
231 
232 	/* Watch out for negative error codes or simply bogus cd's */
233 
234 	if (cd < -1 || cd >= sdbc_max_devs) {
235 		/*
236 		 * no device trace buffer -- use SDT_INV_CD table?
237 		 */
238 		if ((t = _sd_trace_table[-1].tbl) == NULL)
239 			return;
240 		lk = _sd_trace_table[-1].t_lock;
241 	} else {
242 		lk = _sd_trace_table[cd].t_lock;
243 		if ((t = _sd_trace_table[cd].tbl) == NULL) {
244 			/*
245 			 * no device trace buffer -- use SDT_INV_CD table?
246 			 */
247 			if ((t = _sd_trace_table[-1].tbl) == NULL)
248 				return;
249 			lk = _sd_trace_table[-1].t_lock;
250 		}
251 	}
252 
253 	if (!(t->tt_mask & ST_ALERT))
254 		return;	/* check per-device mask */
255 
256 	if (t->tt_good) mutex_enter(lk);
257 	t->tt_alert++;	/* alert on this device */
258 	t->tt_cnt++;	/* overwritten entries if (tt_cnt >= tt_max) */
259 
260 	tin = t->tt_in++;
261 	if (tin >= t->tt_max) tin = t->tt_in = 0;
262 	tp = &t->tt_buf[tin];
263 	tp->t_time = 0;		/* not filled in yet */
264 	if (t->tt_good) mutex_exit(lk);
265 
266 	tp->t_func = (ushort_t)f | ST_ALERT;
267 	tp->t_len = (ushort_t)len;
268 	tp->t_fba = fba;
269 	tp->t_flg = flg;
270 	tp->t_ret = ret;
271 	/*
272 	 * On LP64 systems we will only capture the low 32 bits of the
273 	 * time this really should be good enough for our purposes.
274 	 *
275 	 */
276 	if (t->tt_lbolt)
277 		tp->t_time = (int)nsc_lbolt();
278 	else
279 		tp->t_time = (int)nsc_usec();
280 
281 	/* wakeup trace daemon, with hint */
282 	_alert_cd = cd;
283 
284 	if (_sd_trace_configed)
285 		cv_signal(&_sd_adump_cv);
286 }
287 
288 
289 /*
290  * SDTRACE(f,cd,len,fba,flg,ret) \
291  *	if (_sd_trace_mask & (f)) _sd_trace(f,cd,len,fba,flg,ret)
292  *  Build a trace entry and place it into the trace table.
293  */
294 void
_sd_trace(int f,int cd,int len,nsc_off_t fba,int flg,int ret)295 _sd_trace(int f, int cd, int len, nsc_off_t fba, int flg, int ret)
296 {
297 	int tin;
298 	_sdtr_t *tp;
299 	_sdtr_table_t *t;
300 	kmutex_t *lk;
301 
302 	/* Watch out for negative error codes or simply bogus cd's */
303 
304 	if (cd < -1 || cd >= sdbc_max_devs) {
305 		/*
306 		 * no device trace buffer -- use SDT_INV_CD table?
307 		 */
308 		if ((t = _sd_trace_table[-1].tbl) == NULL)
309 			return;
310 		lk = _sd_trace_table[-1].t_lock;
311 	} else {
312 		lk = _sd_trace_table[cd].t_lock;
313 		if ((t = _sd_trace_table[cd].tbl) == NULL)
314 			return;
315 	}
316 
317 	if (!(t->tt_mask & f))
318 		return;	/* check per-device mask */
319 
320 	/*
321 	 * Don't overwrite if alert signaled (count lost instead)
322 	 * Locking only if 'trace_good' parameter set.
323 	 */
324 	if (t->tt_good) mutex_enter(lk);
325 	if (t->tt_alert && (t->tt_cnt >= t->tt_max)) {
326 		t->tt_lost++; /* lost during alert */
327 		if (t->tt_good) mutex_exit(lk);
328 		return;
329 	}
330 	t->tt_cnt++;	/* overwritten entries if (tt_cnt >= tt_max) */
331 
332 	tin = t->tt_in++;
333 	if (tin >= t->tt_max) tin = t->tt_in = 0;
334 	tp = &t->tt_buf[tin];
335 	tp->t_time = 0;		/* not filled in yet */
336 	if (t->tt_good) mutex_exit(lk);
337 
338 	tp->t_func = (ushort_t)f;
339 	tp->t_len = (ushort_t)len;
340 	tp->t_fba = fba;
341 	tp->t_flg = flg;
342 	tp->t_ret = ret;
343 	/*
344 	 * On LP64 systems we will only capture the low 32 bits of the
345 	 * time this really should be good enough for our purposes.
346 	 *
347 	 */
348 	if (t->tt_lbolt)
349 		tp->t_time = (int)nsc_lbolt();
350 	else
351 		tp->t_time = (int)nsc_usec();
352 }
353 
354 /*
355  * _sd_scan_alert -- search for device with trace alert
356  */
357 static int
_sd_scan_alert(void)358 _sd_scan_alert(void)
359 {
360 	int cd;
361 
362 	XMEM(cd, _alert_cd);
363 	if ((cd != SDT_ANY_CD) && _sd_trace_table[cd].tbl->tt_alert)
364 		return (cd);
365 	for (cd = _last_cd + 1; cd < sdbc_max_devs; cd++)
366 		if (_sd_trace_table[cd].tbl &&
367 		    _sd_trace_table[cd].tbl->tt_alert)
368 			return (_last_cd = cd);
369 	for (cd = SDT_INV_CD; cd <= _last_cd; cd++)
370 		if (_sd_trace_table[cd].tbl &&
371 		    _sd_trace_table[cd].tbl->tt_alert)
372 			return (_last_cd = cd);
373 	return (SDT_ANY_CD);
374 }
375 
376 /*
377  * _sd_scan_entries -- search for next device with trace entries
378  */
379 static int
_sd_scan_entries(void)380 _sd_scan_entries(void)
381 {
382 	int cd;
383 
384 	for (cd = _last_cd + 1; cd < sdbc_max_devs; cd++)
385 		if (_sd_trace_table[cd].tbl && _sd_trace_table[cd].tbl->tt_cnt)
386 			return (_last_cd = cd);
387 	for (cd = SDT_INV_CD; cd <= _last_cd; cd++)
388 		if (_sd_trace_table[cd].tbl && _sd_trace_table[cd].tbl->tt_cnt)
389 			return (_last_cd = cd);
390 	return (SDT_ANY_CD);
391 }
392 
393 
394 /*
395  * _sd_adump
396  *	copy information about new trace records to trace daemon,
397  *	or modify trace parameters.
398  *
399  * Some tracing parameters can be modified
400  * [Either per-device if cd specified, or the defaults if cd = SDT_ANY_CD]
401  *  SD_LOGSIZE:   table.tt_max (size for future opens)
402  *  SD_SET_LBOLT: table.tt_lbolt
403  *  SD_SET_MASK:  table.tt_mask
404  *  SD_SET_GOOD:  table.tt_good
405  *
406  * if (cd >= 0) dump specific device records;
407  * if (cd == SDT_INV_CD) dump records which don't apply to any one device.
408  * if (cd == SDT_ANY_CD), then choose a device:
409  *	1) most recent alert, block if (flag & SD_ALERT_WAIT)
410  *	2) "next" device with unprocessed records.
411  */
412 int
_sd_adump(void * args,int * rvp)413 _sd_adump(void *args, int *rvp)
414 {
415 	struct a {
416 		long cd;
417 		_sdtr_table_t *table;
418 		_sdtr_t *buf;
419 		long size;
420 		long flag;
421 	} *uap = (struct a *)args;
422 	_sdtr_t *ubuf;
423 	_sdtr_table_t tt, *t;
424 	kmutex_t *lk;
425 	int cd, count, lost, new_cnt;
426 
427 	if (uap->flag & (SD_SET_SIZE|SD_SET_MASK|SD_SET_LBOLT|SD_SET_GOOD)) {
428 		return (_sd_set_adump(uap->cd, uap->flag, uap->table));
429 	}
430 	if (! _sd_trace_configed) {
431 		return (EINVAL); /* not initialized yet */
432 	}
433 	if (uap->cd >= SDT_INV_CD) {
434 		/* specific device: check if configured. dump current state. */
435 		if ((uap->cd > (long)sdbc_max_devs) ||
436 		    !(t = _sd_trace_table[uap->cd].tbl)) {
437 			return (ENOSPC); /* no space configured */
438 		}
439 		lk = _sd_trace_table[uap->cd].t_lock;
440 		cd = uap->cd;
441 	} else {
442 		/*
443 		 * SDT_ANY_CD:
444 		 * SD_ALERT_WAIT - wait for alert
445 		 */
446 	scan:
447 		if ((cd = _sd_scan_alert()) != SDT_ANY_CD)
448 			goto dump;
449 		if ((uap->flag & SD_ALERT_WAIT)) {
450 			mutex_enter(&_sd_adump_lk);
451 			if (!_sd_trace_configed) {
452 				mutex_exit(&_sd_adump_lk);
453 				return (EINVAL);
454 			}
455 
456 			if (!cv_wait_sig(&_sd_adump_cv, &_sd_adump_lk)) {
457 				mutex_exit(&_sd_adump_lk);
458 				return (EINTR);
459 			}
460 			mutex_exit(&_sd_adump_lk);
461 
462 			if (!_sd_trace_configed || !_sd_cache_initialized) {
463 				return (EIDRM);
464 			}
465 			goto scan;
466 		}
467 		/* any device with entries */
468 		if ((cd = _sd_scan_entries()) == SDT_INV_CD)
469 			return (0);		/* no new entries */
470 
471 	dump:
472 		lk = _sd_trace_table[cd].t_lock;
473 		if ((t = _sd_trace_table[cd].tbl) == NULL) {
474 			if (uap->flag & SD_ALERT_WAIT) {
475 				t = _sd_trace_table[-1].tbl;
476 				lk = _sd_trace_table[-1].t_lock;
477 			} else {
478 				return (ENOSPC); /* no space configured */
479 			}
480 		}
481 	}
482 
483 	/*
484 	 * take a snapshot of the table state
485 	 */
486 	if (t->tt_good)
487 		mutex_enter(lk);
488 	tt = *t;
489 	if (t->tt_good)
490 		mutex_exit(lk);
491 
492 	/*
493 	 * copy trace log entries to daemon
494 	 *
495 	 * size:   entries in user-level 'buf'
496 	 * count:  how many entries to copy [force count <= size]
497 	 * tt_max: size of kernel buffer
498 	 * tt_cnt: written entries [lossage if tt_cnt > tt_max]
499 	 * cnt:    for wrap-around calculations
500 	 */
501 	if ((count = tt.tt_cnt) > tt.tt_max) { /* lost from beginning */
502 		tt.tt_out = tt.tt_in;
503 		count = tt.tt_max;
504 		lost = tt.tt_cnt - tt.tt_max;
505 	} else
506 		lost = 0;
507 	if (count <= 0)
508 		return (0);
509 	if ((long)count > uap->size)
510 		count = uap->size;
511 	ubuf = uap->buf;
512 	if ((tt.tt_out + count) > tt.tt_max) {
513 		int cnt = tt.tt_max - tt.tt_out;
514 		if (cnt > count)
515 			cnt = count;
516 		if (copyout(&(t->tt_buf[tt.tt_out]), ubuf,
517 		    cnt * sizeof (_sdtr_t))) {
518 			return (EFAULT);
519 		}
520 		ubuf += cnt;
521 		cnt = count - cnt;
522 		if (copyout(&(t->tt_buf[0]), ubuf, cnt * sizeof (_sdtr_t))) {
523 			return (EFAULT);
524 		}
525 		tt.tt_out = cnt;
526 	} else {
527 		if (copyout(&(t->tt_buf[tt.tt_out]), ubuf,
528 		    count * sizeof (_sdtr_t))) {
529 			return (EFAULT);
530 		}
531 		tt.tt_out += count;
532 		if (tt.tt_out == tt.tt_max)
533 			tt.tt_out = 0;
534 	}
535 
536 	/*
537 	 * tt_alert uses fuzzy counting.
538 	 * if multiple alerts signaled, leave it at 1.
539 	 */
540 	if (t->tt_alert)
541 		t->tt_alert = (t->tt_alert > 1) ? 1 : 0;
542 
543 	/*
544 	 * tt_cntout is tt_cnt after dump
545 	 * update tt_cnt for copied entries
546 	 */
547 	if (t->tt_good)
548 		mutex_enter(lk);
549 	tt.tt_cntout = t->tt_cnt;
550 	t->tt_out = tt.tt_out;
551 	new_cnt = t->tt_cnt;
552 	if ((new_cnt -= count+lost) < 0)
553 		new_cnt = 0;
554 	t->tt_cnt = new_cnt;	/* race with new traces if not "tt_good" */
555 	if (t->tt_good)
556 		mutex_exit(lk);
557 
558 	if (copyout(&tt, uap->table, sizeof (tt) - sizeof (_sdtr_t))) {
559 		return (EFAULT);
560 	}
561 	*rvp = count;
562 
563 	first_alert = 0;
564 	return (0);
565 }
566 
567 
568 /* set size, mask, lbolt, or good(locks) */
569 static int
_sd_set_adump(int cd,int flag,_sdtr_table_t * table)570 _sd_set_adump(int cd, int flag, _sdtr_table_t *table)
571 {
572 	_sdtr_table_t tt, *t;
573 
574 	if (copyin(table, &tt, sizeof (tt) - sizeof (_sdtr_t))) {
575 		return (EFAULT);
576 	}
577 	if (cd == SDT_ANY_CD) {		/* modify config parameter */
578 		if (flag & SD_SET_SIZE)
579 			_sd_cache_config.trace_size = tt.tt_max;
580 		if (flag & SD_SET_MASK) {
581 			_sd_cache_config.trace_mask = tt.tt_mask;
582 			/* explicitly set global mask, not bitwise or */
583 			_sd_trace_mask = tt.tt_mask;
584 		}
585 		if (flag & SD_SET_LBOLT)
586 			_sd_cache_config.trace_lbolt = tt.tt_lbolt;
587 		if (flag & SD_SET_GOOD)
588 			_sd_cache_config.trace_good = tt.tt_good;
589 		return (0);
590 	}
591 	if (flag & SD_SET_SIZE)
592 		_sd_cache_config.trace_size = tt.tt_max;
593 	/* modify particular device parameters */
594 	if (!_sd_trace_table[cd].tbl)
595 		(void) _sdbc_tr_configure(cd);
596 	if ((t = _sd_trace_table[cd].tbl) == NULL)
597 		return (0);
598 	if (flag & SD_SET_MASK) {
599 		t->tt_mask = tt.tt_mask;
600 		_sd_trace_mask |= tt.tt_mask; /* or-ed with global mask */
601 	}
602 	if (flag & SD_SET_LBOLT)
603 		t->tt_lbolt = tt.tt_lbolt;
604 	if (flag & SD_SET_GOOD)
605 		t->tt_good = tt.tt_good;
606 	if (copyout(t, table, sizeof (*t) - sizeof (_sdtr_t))) {
607 		return (EFAULT);
608 	}
609 	return (0);
610 }
611 
612 #else /* ! _SD_NOTRACE */
613 
_sd_adump()614 int _sd_adump() 	{ return (ENOSYS); }
_sdbc_tr_load(void)615 int _sdbc_tr_load(void) 	{ return (0); }
_sdbc_tr_configure(void)616 int _sdbc_tr_configure(void) 	{ return (0); }
_sdbc_tr_deconfigure(void)617 void _sdbc_tr_deconfigure(void)	{ return; }
_sdbc_tr_unload(void)618 void _sdbc_tr_unload(void) { return; }
619 
620 #endif /* ! _SD_NOTRACE */
621