xref: /titanic_50/usr/src/cmd/latencytop/common/dwrapper.c (revision efd31e1d839d4665462b5c267a1c654548082663)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2008-2009, Intel Corporation.
23  * All Rights Reserved.
24  */
25 
26 #include <unistd.h>
27 #include <stdio.h>
28 #include <dtrace.h>
29 #include <string.h>
30 #include <stdlib.h>
31 #include <memory.h>
32 #include <limits.h>
33 
34 #include "latencytop.h"
35 
36 static dtrace_hdl_t *g_dtp = NULL;	/* dtrace handle */
37 static pid_t pid_self = -1;		/* PID of our own process */
38 
39 /*
40  * Ignore sched if sched is not tracked.
41  * Also ignore ourselves (i.e., latencytop).
42  */
43 #define	SHOULD_IGNORE(pid)		\
44 	((!g_config.lt_cfg_trace_sched && 0 == (pid)) || pid_self == (pid))
45 
46 /*
47  * Get an integer value from dtrace record.
48  */
49 static uint64_t
50 rec_get_value(void *a, size_t b)
51 {
52 	uint64_t ret = 0;
53 
54 	switch (b) {
55 	case sizeof (uint64_t):
56 		ret = *((uint64_t *)(a));
57 		break;
58 	case sizeof (uint32_t):
59 		ret = *((uint32_t *)(a));
60 		break;
61 	case sizeof (uint16_t):
62 		ret = *((uint16_t *)(a));
63 		break;
64 	case sizeof (uint8_t):
65 		ret = *((uint8_t *)(a));
66 		break;
67 	default:
68 		break;
69 	}
70 
71 	return (ret);
72 }
73 
74 /*
75  * Callback to process aggregation lt_call_* (related to on/off cpu
76  * activities) in the snapshot.
77  */
78 static int
79 aggwalk_call(const dtrace_aggdata_t *data, lt_stat_type_t stat_type)
80 {
81 	dtrace_aggdesc_t *aggdesc = data->dtada_desc;
82 	dtrace_syminfo_t dts;
83 	GElf_Sym sym;
84 	caddr_t addr;
85 	pid_t pid;
86 	id_t tid;
87 	unsigned int stack_depth;
88 	unsigned int pc_size;
89 	uint64_t pc;
90 	uint64_t agg_value;
91 	char *ptr = NULL;
92 	char *buffer = NULL;
93 	int ptrsize;
94 	unsigned int buffersize;
95 	char *tag = NULL;
96 	unsigned int priority;
97 	enum { REC_PID = 1, REC_TID, REC_STACK, REC_TAG, REC_PRIO, REC_AGG,
98 	    NREC };
99 
100 	/* Check action type */
101 	if ((aggdesc->dtagd_nrecs < NREC) ||
102 	    (aggdesc->dtagd_rec[REC_PID].dtrd_action != DTRACEACT_DIFEXPR) ||
103 	    (aggdesc->dtagd_rec[REC_TID].dtrd_action != DTRACEACT_DIFEXPR) ||
104 	    (aggdesc->dtagd_rec[REC_TAG].dtrd_action != DTRACEACT_DIFEXPR) ||
105 	    (aggdesc->dtagd_rec[REC_PRIO].dtrd_action != DTRACEACT_DIFEXPR) ||
106 	    (!DTRACEACT_ISAGG(aggdesc->dtagd_rec[REC_AGG].dtrd_action)) ||
107 	    (aggdesc->dtagd_rec[REC_STACK].dtrd_action != DTRACEACT_STACK)) {
108 
109 		return (-1);
110 	}
111 
112 	pid = rec_get_value(
113 	    data->dtada_data + aggdesc->dtagd_rec[REC_PID].dtrd_offset,
114 	    aggdesc->dtagd_rec[REC_PID].dtrd_size);
115 
116 	if (SHOULD_IGNORE(pid)) {
117 		return (0);
118 	}
119 
120 	tid = rec_get_value(
121 	    data->dtada_data + aggdesc->dtagd_rec[REC_TID].dtrd_offset,
122 	    aggdesc->dtagd_rec[REC_TID].dtrd_size);
123 
124 	/* Parse stack array from dtagd_rec */
125 	stack_depth = aggdesc->dtagd_rec[REC_STACK].dtrd_arg;
126 	pc_size = aggdesc->dtagd_rec[REC_STACK].dtrd_size / stack_depth;
127 	addr = data->dtada_data + aggdesc->dtagd_rec[REC_STACK].dtrd_offset;
128 	buffersize = (stack_depth * (2 * PATH_MAX + 2) + 1) * sizeof (char);
129 	buffer = (char *)lt_malloc(buffersize);
130 	ptr = buffer;
131 	ptrsize = buffersize;
132 
133 	/* Print the stack */
134 	while (stack_depth > 0) {
135 		pc = rec_get_value(addr, pc_size);
136 
137 		if (pc == 0) {
138 			break;
139 		}
140 
141 		addr += pc_size;
142 
143 		if (dtrace_lookup_by_addr(g_dtp, pc, &sym, &dts) == 0) {
144 			int len;
145 			len = snprintf(ptr, ptrsize,
146 			    "%s`%s ", dts.dts_object, dts.dts_name);
147 			ptrsize -= len;
148 
149 			if (ptrsize <= 0) {
150 				/*
151 				 * snprintf returns "desired" length, so
152 				 * reaching here means our buffer is full.
153 				 * Move ptr to the last byte of the buffer and
154 				 * break.
155 				 */
156 				ptr = &buffer[buffersize-1];
157 				break;
158 			} else {
159 				ptr += len;
160 			}
161 		}
162 	}
163 
164 	if (ptr != buffer) {
165 		/*
166 		 * We have printed something, so it is safe to remove
167 		 * the last ' '.
168 		 */
169 		*(ptr-1) = '\0';
170 	}
171 
172 	tag = (char *)data->dtada_data +
173 	    aggdesc->dtagd_rec[REC_TAG].dtrd_offset;
174 
175 	priority = rec_get_value(
176 	    data->dtada_data + aggdesc->dtagd_rec[REC_PRIO].dtrd_offset,
177 	    aggdesc->dtagd_rec[REC_PRIO].dtrd_size);
178 
179 	agg_value = rec_get_value(
180 	    data->dtada_data + aggdesc->dtagd_rec[REC_AGG].dtrd_offset,
181 	    aggdesc->dtagd_rec[REC_AGG].dtrd_size);
182 
183 	lt_stat_update(pid, tid, buffer, tag, priority, stat_type, agg_value);
184 
185 	if (buffer != NULL)  {
186 		free(buffer);
187 	}
188 
189 	return (0);
190 }
191 
192 /*
193  * Callback to process aggregation lt_named_* (related to lock spinning etc.),
194  * in the snapshot.
195  */
196 static int
197 aggwalk_named(const dtrace_aggdata_t *data, lt_stat_type_t stat_type)
198 {
199 	dtrace_aggdesc_t *aggdesc = data->dtada_desc;
200 	pid_t pid;
201 	id_t tid;
202 	uint64_t agg_value;
203 	int cause_id;
204 	char *type = NULL;
205 	enum { REC_PID = 1, REC_TID, REC_TYPE, REC_AGG, NREC };
206 
207 	/* Check action type */
208 	if ((aggdesc->dtagd_nrecs < NREC) ||
209 	    (aggdesc->dtagd_rec[REC_PID].dtrd_action != DTRACEACT_DIFEXPR) ||
210 	    (aggdesc->dtagd_rec[REC_TID].dtrd_action != DTRACEACT_DIFEXPR) ||
211 	    (aggdesc->dtagd_rec[REC_TYPE].dtrd_action != DTRACEACT_DIFEXPR) ||
212 	    (!DTRACEACT_ISAGG(aggdesc->dtagd_rec[REC_AGG].dtrd_action))) {
213 
214 		return (-1);
215 	}
216 
217 	pid = rec_get_value(
218 	    data->dtada_data + aggdesc->dtagd_rec[REC_PID].dtrd_offset,
219 	    aggdesc->dtagd_rec[REC_PID].dtrd_size);
220 
221 	if (SHOULD_IGNORE(pid)) {
222 		return (0);
223 	}
224 
225 	tid = rec_get_value(
226 	    data->dtada_data + aggdesc->dtagd_rec[REC_TID].dtrd_offset,
227 	    aggdesc->dtagd_rec[REC_TID].dtrd_size);
228 
229 	type = (char *)data->dtada_data
230 	    + aggdesc->dtagd_rec[REC_TYPE].dtrd_offset;
231 	cause_id = lt_table_cause_from_name(type, 1, CAUSE_FLAG_SPECIAL);
232 
233 	agg_value = rec_get_value(
234 	    data->dtada_data + aggdesc->dtagd_rec[REC_AGG].dtrd_offset,
235 	    aggdesc->dtagd_rec[REC_AGG].dtrd_size);
236 
237 	lt_stat_update_cause(pid, tid, cause_id, stat_type, agg_value);
238 
239 	return (0);
240 
241 }
242 
243 /*
244  * Callback to process aggregation lt_sync_* (related to synchronization
245  * objects), in the snapshot.
246  */
247 static int
248 aggwalk_sync(const dtrace_aggdata_t *data, lt_stat_type_t stat_type)
249 {
250 	dtrace_aggdesc_t *aggdesc = data->dtada_desc;
251 	pid_t pid;
252 	id_t tid;
253 	uint64_t agg_value;
254 	int stype;
255 	unsigned long long wchan;
256 	enum { REC_PID = 1, REC_TID, REC_STYPE, REC_WCHAN, REC_AGG, NREC };
257 
258 	/* Check action type */
259 	if ((aggdesc->dtagd_nrecs < NREC) ||
260 	    (aggdesc->dtagd_rec[REC_PID].dtrd_action != DTRACEACT_DIFEXPR) ||
261 	    (aggdesc->dtagd_rec[REC_TID].dtrd_action != DTRACEACT_DIFEXPR) ||
262 	    (aggdesc->dtagd_rec[REC_STYPE].dtrd_action != DTRACEACT_DIFEXPR) ||
263 	    (aggdesc->dtagd_rec[REC_WCHAN].dtrd_action != DTRACEACT_DIFEXPR) ||
264 	    (!DTRACEACT_ISAGG(aggdesc->dtagd_rec[REC_AGG].dtrd_action))) {
265 
266 		return (-1);
267 	}
268 
269 	pid = rec_get_value(
270 	    data->dtada_data + aggdesc->dtagd_rec[REC_PID].dtrd_offset,
271 	    aggdesc->dtagd_rec[REC_PID].dtrd_size);
272 
273 	if (SHOULD_IGNORE(pid)) {
274 		return (0);
275 	}
276 
277 	tid = rec_get_value(
278 	    data->dtada_data + aggdesc->dtagd_rec[REC_TID].dtrd_offset,
279 	    aggdesc->dtagd_rec[REC_TID].dtrd_size);
280 
281 	stype = rec_get_value(
282 	    data->dtada_data + aggdesc->dtagd_rec[REC_STYPE].dtrd_offset,
283 	    aggdesc->dtagd_rec[REC_STYPE].dtrd_size);
284 
285 	wchan = rec_get_value(
286 	    data->dtada_data + aggdesc->dtagd_rec[REC_WCHAN].dtrd_offset,
287 	    aggdesc->dtagd_rec[REC_WCHAN].dtrd_size);
288 
289 	agg_value = rec_get_value(
290 	    data->dtada_data + aggdesc->dtagd_rec[REC_AGG].dtrd_offset,
291 	    aggdesc->dtagd_rec[REC_AGG].dtrd_size);
292 
293 	lt_stat_update_sobj(pid, tid, stype, wchan, stat_type, agg_value);
294 
295 	return (0);
296 }
297 
298 /*
299  * Callback to process various aggregations in the snapshot. Called by
300  * different aggwalk_* functions.
301  */
302 /* ARGSUSED */
303 static int
304 aggwalk(const dtrace_aggdata_t *data, void *arg)
305 {
306 	char *tmp;
307 	char buffer[32];
308 	lt_stat_type_t stat_type;
309 	int (*func)(const dtrace_aggdata_t *, lt_stat_type_t);
310 
311 	(void) strncpy(buffer, data->dtada_desc->dtagd_name, sizeof (buffer));
312 	buffer[sizeof (buffer) - 1] = '\0';
313 	tmp = strtok(buffer, "_");
314 
315 	if (tmp == NULL || strcmp(tmp, "lt") != 0) {
316 		goto done;
317 	}
318 
319 	tmp = strtok(NULL, "_");
320 
321 	if (tmp == NULL) {
322 		goto done;
323 	} else if (strcmp(tmp, "call") == 0) {
324 		func = aggwalk_call;
325 	} else if (strcmp(tmp, "named") == 0) {
326 		func = aggwalk_named;
327 	} else if (strcmp(tmp, "sync") == 0) {
328 		func = aggwalk_sync;
329 	} else {
330 		goto done;
331 	}
332 
333 	tmp = strtok(NULL, "_");
334 
335 	if (tmp == NULL) {
336 		goto done;
337 	} else if (strcmp(tmp, "count") == 0) {
338 		stat_type = LT_STAT_COUNT;
339 	} else if (strcmp(tmp, "sum") == 0) {
340 		stat_type = LT_STAT_SUM;
341 	} else if (strcmp(tmp, "max") == 0) {
342 		stat_type = LT_STAT_MAX;
343 	} else {
344 		goto done;
345 	}
346 
347 	(void) func(data, stat_type);
348 
349 done:
350 	/* We have our data, so remove it from DTrace now */
351 	return (DTRACE_AGGWALK_REMOVE);
352 }
353 
354 /*
355  * Callback to handle event caused by DTrace dropping data.
356  */
357 /*ARGSUSED*/
358 static int
359 drop_handler(const dtrace_dropdata_t *data, void *user)
360 {
361 	lt_display_error("Drop: %s\n", data->dtdda_msg);
362 	lt_drop_detected = B_TRUE;
363 
364 	/* Pretend nothing happened, so just continue */
365 	return (DTRACE_HANDLE_OK);
366 }
367 
368 #ifndef EMBED_CONFIGS
369 /*
370  * Copy the content from a "real" file into a temp file.
371  */
372 static int
373 copy_tmp_file(const char *src, FILE *dst)
374 {
375 	FILE *tmp = NULL;
376 	char buffer[256];
377 	int bytes;
378 
379 	if ((tmp = fopen(src, "r")) == NULL) {
380 		return (-1);
381 	}
382 
383 	while ((bytes = fread(buffer, 1, sizeof (buffer), tmp)) > 0) {
384 		if (fwrite(buffer, bytes, 1, dst) != 1) {
385 			return (-1);
386 		}
387 	}
388 
389 	(void) fclose(tmp);
390 
391 	return (0);
392 }
393 #endif
394 
395 /*
396  * DTrace initialization. D script starts running when this function returns.
397  */
398 int
399 lt_dtrace_init(void)
400 {
401 	dtrace_prog_t *prog;
402 	dtrace_proginfo_t info;
403 	int err;
404 	FILE *fp_script = NULL;
405 	char tmp[64];
406 
407 	pid_self = getpid();
408 
409 	if ((g_dtp = dtrace_open(DTRACE_VERSION, 0, &err)) == NULL) {
410 		lt_display_error("Cannot open dtrace library: %s\n",
411 		    dtrace_errmsg(NULL, err));
412 		return (-1);
413 	}
414 
415 	if (dtrace_handle_drop(g_dtp, &drop_handler, NULL) == -1) {
416 		lt_display_error("Cannot install DTrace handle: %s\n",
417 		    dtrace_errmsg(NULL, err));
418 		return (-1);
419 	}
420 
421 	if (g_config.lt_cfg_enable_filter) {
422 		if ((err = dtrace_setopt(g_dtp, "define",
423 		    "ENABLE_FILTER")) != 0) {
424 			lt_display_error(
425 			    "Failed to set option ENABLE_FILTER.\n");
426 			return (err);
427 		}
428 	}
429 
430 	if (g_config.lt_cfg_trace_syncobj) {
431 		if ((err = dtrace_setopt(g_dtp, "define",
432 		    "ENABLE_SYNCOBJ")) != 0) {
433 			lt_display_error(
434 			    "Failed to set option ENABLE_SYNCOBJ.\n");
435 			return (err);
436 		}
437 	}
438 
439 	if (g_config.lt_cfg_trace_sched) {
440 		if ((err = dtrace_setopt(g_dtp, "define",
441 		    "ENABLE_SCHED")) != 0) {
442 			lt_display_error(
443 			    "Failed to set option ENABLE_SCHED.\n");
444 			return (err);
445 		}
446 	}
447 
448 	if (g_config.lt_cfg_trace_pid != 0) {
449 		(void) snprintf(tmp, sizeof (tmp), "TRACE_PID=%u",
450 		    g_config.lt_cfg_trace_pid);
451 		if ((err = dtrace_setopt(g_dtp, "define", tmp)) != 0) {
452 			lt_display_error(
453 			    "Failed to set option TRACE_PID.\n");
454 			return (err);
455 		}
456 	}
457 
458 	if (g_config.lt_cfg_trace_pgid != 0) {
459 		(void) snprintf(tmp, sizeof (tmp), "TRACE_PGID=%u",
460 		    g_config.lt_cfg_trace_pgid);
461 		if ((err = dtrace_setopt(g_dtp, "define", tmp)) != 0) {
462 			lt_display_error(
463 			    "Failed to set option TRACE_PGID.\n");
464 			return (err);
465 		}
466 	}
467 
468 	if (g_config.lt_cfg_low_overhead_mode) {
469 		if ((err = dtrace_setopt(g_dtp, "define",
470 		    "ENABLE_LOW_OVERHEAD")) != 0) {
471 			lt_display_error(
472 			    "Failed to set option ENABLE_LOW_OVERHEAD.\n");
473 			return (err);
474 		}
475 	}
476 
477 	/* Create a temp file; libdtrace needs it for cpp(1) */
478 	if ((fp_script = tmpfile()) == NULL) {
479 		lt_display_error("Cannot create tmp file\n");
480 		return (-1);
481 	}
482 
483 	/* Copy the main D script into the temp file */
484 #ifdef EMBED_CONFIGS
485 	if (fwrite(&latencytop_d_start,
486 	    (size_t)(&latencytop_d_end - &latencytop_d_start), 1, fp_script)
487 	    != 1) {
488 		lt_display_error("Could not copy D script, fwrite() failed\n");
489 		(void) fclose(fp_script);
490 		return (-1);
491 	}
492 #else
493 	if (copy_tmp_file(DEFAULT_D_SCRIPT_NAME, fp_script) != 0) {
494 		lt_display_error("Cannot open script file %s\n",
495 		    DEFAULT_D_SCRIPT_NAME);
496 		(void) fclose(fp_script);
497 		return (-1);
498 	}
499 #endif	/* EMBED_CONFIGS */
500 
501 	if (lt_table_append_trans(fp_script) != 0) {
502 		(void) fclose(fp_script);
503 		return (-1);
504 	}
505 
506 	(void) fseek(fp_script, 0, SEEK_SET);
507 
508 	if ((prog = dtrace_program_fcompile(g_dtp, fp_script,
509 	    DTRACE_C_CPP, 0, NULL)) == NULL) {
510 		lt_display_error("Failed to compile D script.\n");
511 		(void) fclose(fp_script);
512 		return (dtrace_errno(g_dtp));
513 	}
514 
515 	(void) fclose(fp_script);
516 
517 	/* Execute the D script */
518 	if (dtrace_program_exec(g_dtp, prog, &info) == -1) {
519 		lt_display_error("Failed to enable probes.\n");
520 		return (dtrace_errno(g_dtp));
521 	}
522 
523 	if (dtrace_go(g_dtp) != 0) {
524 		lt_display_error("Failed to run D script.\n");
525 		return (dtrace_errno(g_dtp));
526 	}
527 
528 	return (0);
529 }
530 
531 /*
532  * Worker function to move aggregate data to user space. Called periodically
533  * to prevent the kernel from running out of memory.
534  */
535 int
536 lt_dtrace_work(int force)
537 {
538 	static uint64_t last_snap = 0;
539 	uint64_t now = lt_millisecond();
540 
541 	if (!force && now - last_snap < g_config.lt_cfg_snap_interval) {
542 		return (last_snap + g_config.lt_cfg_snap_interval - now);
543 	}
544 
545 	if (dtrace_status(g_dtp) == -1) {
546 		lt_display_error("Failed when getting status: %s\n",
547 		    dtrace_errmsg(g_dtp, dtrace_errno(g_dtp)));
548 		return (-1);
549 	}
550 
551 	if (dtrace_aggregate_snap(g_dtp) != 0) {
552 		lt_display_error("Failed to snap aggregate: %s\n",
553 		    dtrace_errmsg(g_dtp, dtrace_errno(g_dtp)));
554 		return (-1);
555 	}
556 
557 	last_snap = now;
558 	return (0);
559 }
560 
561 /*
562  * Walk through dtrace aggregator and collect data for latencytop to display.
563  * Called immediately before UI update.
564  */
565 int
566 lt_dtrace_collect(void)
567 {
568 	if (lt_dtrace_work(1) != 0) {
569 		return (-1);
570 	}
571 
572 	if (dtrace_aggregate_walk(g_dtp, aggwalk, NULL) != 0) {
573 		lt_display_error("Failed to sort aggregate: %s\n",
574 		    dtrace_errmsg(g_dtp, dtrace_errno(g_dtp)));
575 		return (-1);
576 	}
577 
578 	/*
579 	 * Probably we don't need to clear again, because we have removed
580 	 * everything. Paranoid ?
581 	 */
582 	dtrace_aggregate_clear(g_dtp);
583 
584 	return (0);
585 }
586 
587 /*
588  * dtrace clean up.
589  */
590 int
591 lt_dtrace_deinit(void)
592 {
593 	int ret = 0;
594 
595 	if (dtrace_stop(g_dtp) != 0) {
596 		lt_display_error("dtrace_stop failed: %s\n",
597 		    dtrace_errmsg(g_dtp, dtrace_errno(g_dtp)));
598 		ret = -1;
599 	}
600 
601 	dtrace_close(g_dtp);
602 
603 	return (ret);
604 }
605