xref: /titanic_50/usr/src/cmd/latencytop/common/dwrapper.c (revision 48bc00d6814e04ff3edb32cafe7d1bc580baff68)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2008-2009, Intel Corporation.
23  * All Rights Reserved.
24  */
25 
26 #include <unistd.h>
27 #include <stdio.h>
28 #include <dtrace.h>
29 #include <string.h>
30 #include <stdlib.h>
31 #include <memory.h>
32 #include <limits.h>
33 
34 #include "latencytop.h"
35 
36 static dtrace_hdl_t *g_dtp = NULL;	/* dtrace handle */
37 static pid_t pid_self = -1;		/* PID of our own process */
38 
39 /*
40  * Ignore sched if sched is not tracked.
41  * Also ignore ourselves (i.e., latencytop).
42  */
43 #define	SHOULD_IGNORE(pid)		\
44 	((!g_config.lt_cfg_trace_sched && 0 == (pid)) || pid_self == (pid))
45 
46 /*
47  * Get an integer value from dtrace record.
48  */
49 static uint64_t
50 rec_get_value(void *a, size_t b)
51 {
52 	uint64_t ret = 0;
53 
54 	switch (b) {
55 	case sizeof (uint64_t):
56 		ret = *((uint64_t *)(a));
57 		break;
58 	case sizeof (uint32_t):
59 		ret = *((uint32_t *)(a));
60 		break;
61 	case sizeof (uint16_t):
62 		ret = *((uint16_t *)(a));
63 		break;
64 	case sizeof (uint8_t):
65 		ret = *((uint8_t *)(a));
66 		break;
67 	default:
68 		break;
69 	}
70 
71 	return (ret);
72 }
73 
74 /*
75  * Callback to process aggregation lt_call_* (related to on/off cpu
76  * activities) in the snapshot.
77  */
78 static int
79 aggwalk_call(const dtrace_aggdata_t *data, lt_stat_type_t stat_type)
80 {
81 	dtrace_aggdesc_t *aggdesc = data->dtada_desc;
82 	dtrace_syminfo_t dts;
83 	GElf_Sym sym;
84 	caddr_t addr;
85 	pid_t pid;
86 	id_t tid;
87 	unsigned int stack_depth;
88 	unsigned int pc_size;
89 	uint64_t pc;
90 	uint64_t agg_value;
91 	char *ptr = NULL;
92 	char *buffer = NULL;
93 	int ptrsize;
94 	unsigned int buffersize;
95 	char *tag = NULL;
96 	unsigned int priority;
97 	enum { REC_PID = 1, REC_TID, REC_STACK, REC_TAG, REC_PRIO, REC_AGG,
98 	    NREC };
99 
100 	/* Check action type */
101 	if ((aggdesc->dtagd_nrecs < NREC) ||
102 	    (aggdesc->dtagd_rec[REC_PID].dtrd_action != DTRACEACT_DIFEXPR) ||
103 	    (aggdesc->dtagd_rec[REC_TID].dtrd_action != DTRACEACT_DIFEXPR) ||
104 	    (aggdesc->dtagd_rec[REC_TAG].dtrd_action != DTRACEACT_DIFEXPR) ||
105 	    (aggdesc->dtagd_rec[REC_PRIO].dtrd_action != DTRACEACT_DIFEXPR) ||
106 	    (!DTRACEACT_ISAGG(aggdesc->dtagd_rec[REC_AGG].dtrd_action)) ||
107 	    (aggdesc->dtagd_rec[REC_STACK].dtrd_action != DTRACEACT_STACK)) {
108 
109 		return (-1);
110 	}
111 
112 	pid = rec_get_value(
113 	    data->dtada_data + aggdesc->dtagd_rec[REC_PID].dtrd_offset,
114 	    aggdesc->dtagd_rec[REC_PID].dtrd_size);
115 
116 	if (SHOULD_IGNORE(pid)) {
117 		return (0);
118 	}
119 
120 	tid = rec_get_value(
121 	    data->dtada_data + aggdesc->dtagd_rec[REC_TID].dtrd_offset,
122 	    aggdesc->dtagd_rec[REC_TID].dtrd_size);
123 
124 	/* Parse stack array from dtagd_rec */
125 	stack_depth = aggdesc->dtagd_rec[REC_STACK].dtrd_arg;
126 	pc_size = aggdesc->dtagd_rec[REC_STACK].dtrd_size / stack_depth;
127 	addr = data->dtada_data + aggdesc->dtagd_rec[REC_STACK].dtrd_offset;
128 	buffersize = (stack_depth * (2 * PATH_MAX + 2) + 1) * sizeof (char);
129 	buffer = (char *)lt_malloc(buffersize);
130 	ptr = buffer;
131 	ptrsize = buffersize;
132 
133 	/* Print the stack */
134 	while (stack_depth > 0) {
135 		pc = rec_get_value(addr, pc_size);
136 
137 		if (pc == 0) {
138 			break;
139 		}
140 
141 		addr += pc_size;
142 
143 		if (dtrace_lookup_by_addr(g_dtp, pc, &sym, &dts) == 0) {
144 			int len;
145 			len = snprintf(ptr, ptrsize,
146 			    "%s`%s ", dts.dts_object, dts.dts_name);
147 			ptrsize -= len;
148 
149 			if (ptrsize <= 0) {
150 				/*
151 				 * snprintf returns "desired" length, so
152 				 * reaching here means our buffer is full.
153 				 * Move ptr to the last byte of the buffer and
154 				 * break.
155 				 */
156 				ptr = &buffer[buffersize-1];
157 				break;
158 			} else {
159 				ptr += len;
160 			}
161 		}
162 	}
163 
164 	if (ptr != buffer) {
165 		/*
166 		 * We have printed something, so it is safe to remove
167 		 * the last ' '.
168 		 */
169 		*(ptr-1) = '\0';
170 	}
171 
172 	tag = (char *)data->dtada_data +
173 	    aggdesc->dtagd_rec[REC_TAG].dtrd_offset;
174 
175 	priority = rec_get_value(
176 	    data->dtada_data + aggdesc->dtagd_rec[REC_PRIO].dtrd_offset,
177 	    aggdesc->dtagd_rec[REC_PRIO].dtrd_size);
178 
179 	agg_value = rec_get_value(
180 	    data->dtada_data + aggdesc->dtagd_rec[REC_AGG].dtrd_offset,
181 	    aggdesc->dtagd_rec[REC_AGG].dtrd_size);
182 
183 	lt_stat_update(pid, tid, buffer, tag, priority, stat_type, agg_value);
184 
185 	if (buffer != NULL)  {
186 		free(buffer);
187 	}
188 
189 	return (0);
190 }
191 
192 /*
193  * Callback to process aggregation lt_named_* (related to lock spinning etc.),
194  * in the snapshot.
195  */
196 static int
197 aggwalk_named(const dtrace_aggdata_t *data, lt_stat_type_t stat_type)
198 {
199 	dtrace_aggdesc_t *aggdesc = data->dtada_desc;
200 	pid_t pid;
201 	id_t tid;
202 	uint64_t agg_value;
203 	int cause_id;
204 	char *type = NULL;
205 	enum { REC_PID = 1, REC_TID, REC_TYPE, REC_AGG, NREC };
206 
207 	/* Check action type */
208 	if ((aggdesc->dtagd_nrecs < NREC) ||
209 	    (aggdesc->dtagd_rec[REC_PID].dtrd_action != DTRACEACT_DIFEXPR) ||
210 	    (aggdesc->dtagd_rec[REC_TID].dtrd_action != DTRACEACT_DIFEXPR) ||
211 	    (aggdesc->dtagd_rec[REC_TYPE].dtrd_action != DTRACEACT_DIFEXPR) ||
212 	    (!DTRACEACT_ISAGG(aggdesc->dtagd_rec[REC_AGG].dtrd_action))) {
213 
214 		return (-1);
215 	}
216 
217 	pid = rec_get_value(
218 	    data->dtada_data + aggdesc->dtagd_rec[REC_PID].dtrd_offset,
219 	    aggdesc->dtagd_rec[REC_PID].dtrd_size);
220 
221 	if (SHOULD_IGNORE(pid)) {
222 		return (0);
223 	}
224 
225 	tid = rec_get_value(
226 	    data->dtada_data + aggdesc->dtagd_rec[REC_TID].dtrd_offset,
227 	    aggdesc->dtagd_rec[REC_TID].dtrd_size);
228 
229 	type = (char *)data->dtada_data
230 	    + aggdesc->dtagd_rec[REC_TYPE].dtrd_offset;
231 	cause_id = lt_table_cause_from_name(type, 1, CAUSE_FLAG_SPECIAL);
232 
233 	agg_value = rec_get_value(
234 	    data->dtada_data + aggdesc->dtagd_rec[REC_AGG].dtrd_offset,
235 	    aggdesc->dtagd_rec[REC_AGG].dtrd_size);
236 
237 	lt_stat_update_cause(pid, tid, cause_id, stat_type, agg_value);
238 
239 	return (0);
240 
241 }
242 
243 /*
244  * Callback to process aggregation lt_sync_* (related to synchronization
245  * objects), in the snapshot.
246  */
247 static int
248 aggwalk_sync(const dtrace_aggdata_t *data, lt_stat_type_t stat_type)
249 {
250 	dtrace_aggdesc_t *aggdesc = data->dtada_desc;
251 	pid_t pid;
252 	id_t tid;
253 	uint64_t agg_value;
254 	int stype;
255 	unsigned long long wchan;
256 	enum { REC_PID = 1, REC_TID, REC_STYPE, REC_WCHAN, REC_AGG, NREC };
257 
258 	/* Check action type */
259 	if ((aggdesc->dtagd_nrecs < NREC) ||
260 	    (aggdesc->dtagd_rec[REC_PID].dtrd_action != DTRACEACT_DIFEXPR) ||
261 	    (aggdesc->dtagd_rec[REC_TID].dtrd_action != DTRACEACT_DIFEXPR) ||
262 	    (aggdesc->dtagd_rec[REC_STYPE].dtrd_action != DTRACEACT_DIFEXPR) ||
263 	    (aggdesc->dtagd_rec[REC_WCHAN].dtrd_action != DTRACEACT_DIFEXPR) ||
264 	    (!DTRACEACT_ISAGG(aggdesc->dtagd_rec[REC_AGG].dtrd_action))) {
265 
266 		return (-1);
267 	}
268 
269 	pid = rec_get_value(
270 	    data->dtada_data + aggdesc->dtagd_rec[REC_PID].dtrd_offset,
271 	    aggdesc->dtagd_rec[REC_PID].dtrd_size);
272 
273 	if (SHOULD_IGNORE(pid)) {
274 		return (0);
275 	}
276 
277 	tid = rec_get_value(
278 	    data->dtada_data + aggdesc->dtagd_rec[REC_TID].dtrd_offset,
279 	    aggdesc->dtagd_rec[REC_TID].dtrd_size);
280 
281 	stype = rec_get_value(
282 	    data->dtada_data + aggdesc->dtagd_rec[REC_STYPE].dtrd_offset,
283 	    aggdesc->dtagd_rec[REC_STYPE].dtrd_size);
284 
285 	wchan = rec_get_value(
286 	    data->dtada_data + aggdesc->dtagd_rec[REC_WCHAN].dtrd_offset,
287 	    aggdesc->dtagd_rec[REC_WCHAN].dtrd_size);
288 
289 	agg_value = rec_get_value(
290 	    data->dtada_data + aggdesc->dtagd_rec[REC_AGG].dtrd_offset,
291 	    aggdesc->dtagd_rec[REC_AGG].dtrd_size);
292 
293 	lt_stat_update_sobj(pid, tid, stype, wchan, stat_type, agg_value);
294 
295 	return (0);
296 }
297 
298 /*
299  * Callback to process various aggregations in the snapshot. Called by
300  * different aggwalk_* functions.
301  */
302 /* ARGSUSED */
303 static int
304 aggwalk(const dtrace_aggdata_t *data, void *arg)
305 {
306 	char *tmp;
307 	char buffer[32];
308 	lt_stat_type_t stat_type;
309 	int (*func)(const dtrace_aggdata_t *, lt_stat_type_t);
310 
311 	(void) strncpy(buffer, data->dtada_desc->dtagd_name, sizeof (buffer));
312 	buffer[sizeof (buffer) - 1] = '\0';
313 	tmp = strtok(buffer, "_");
314 
315 	if (tmp == NULL || strcmp(tmp, "lt") != 0) {
316 		goto done;
317 	}
318 
319 	tmp = strtok(NULL, "_");
320 
321 	if (tmp == NULL) {
322 		goto done;
323 	} else if (strcmp(tmp, "call") == 0) {
324 		func = aggwalk_call;
325 	} else if (strcmp(tmp, "named") == 0) {
326 		func = aggwalk_named;
327 	} else if (strcmp(tmp, "sync") == 0) {
328 		func = aggwalk_sync;
329 	} else {
330 		goto done;
331 	}
332 
333 	tmp = strtok(NULL, "_");
334 
335 	if (tmp == NULL) {
336 		goto done;
337 	} else if (strcmp(tmp, "count") == 0) {
338 		stat_type = LT_STAT_COUNT;
339 	} else if (strcmp(tmp, "sum") == 0) {
340 		stat_type = LT_STAT_SUM;
341 	} else if (strcmp(tmp, "max") == 0) {
342 		stat_type = LT_STAT_MAX;
343 	} else {
344 		goto done;
345 	}
346 
347 	(void) func(data, stat_type);
348 
349 done:
350 	/* We have our data, so remove it from DTrace now */
351 	return (DTRACE_AGGWALK_REMOVE);
352 }
353 
354 /*
355  * Callback to handle event caused by DTrace dropping data.
356  */
357 /*ARGSUSED*/
358 static int
359 drop_handler(const dtrace_dropdata_t *data, void *user)
360 {
361 	lt_display_error("Drop: %s\n", data->dtdda_msg);
362 
363 	/* Pretend nothing happened, so just continue */
364 	return (DTRACE_HANDLE_OK);
365 }
366 
367 #ifndef EMBED_CONFIGS
368 /*
369  * Copy the content from a "real" file into a temp file.
370  */
371 static int
372 copy_tmp_file(const char *src, FILE *dst)
373 {
374 	FILE *tmp = NULL;
375 	char buffer[256];
376 	int bytes;
377 
378 	if ((tmp = fopen(src, "r")) == NULL) {
379 		return (-1);
380 	}
381 
382 	while ((bytes = fread(buffer, 1, sizeof (buffer), tmp)) > 0) {
383 		if (fwrite(buffer, bytes, 1, dst) != 1) {
384 			return (-1);
385 		}
386 	}
387 
388 	(void) fclose(tmp);
389 
390 	return (0);
391 }
392 #endif
393 
394 /*
395  * DTrace initialization. D script starts running when this function returns.
396  */
397 int
398 lt_dtrace_init(void)
399 {
400 	dtrace_prog_t *prog;
401 	dtrace_proginfo_t info;
402 	int err;
403 	FILE *fp_script = NULL;
404 
405 	pid_self = getpid();
406 
407 	if ((g_dtp = dtrace_open(DTRACE_VERSION, 0, &err)) == NULL) {
408 		lt_display_error("Cannot open dtrace library: %s\n",
409 		    dtrace_errmsg(NULL, err));
410 		return (-1);
411 	}
412 
413 	if (dtrace_handle_drop(g_dtp, &drop_handler, NULL) == -1) {
414 		lt_display_error("Cannot install DTrace handle: %s\n",
415 		    dtrace_errmsg(NULL, err));
416 		return (-1);
417 	}
418 
419 	if (g_config.lt_cfg_enable_filter) {
420 		if ((err = dtrace_setopt(g_dtp, "define",
421 		    "ENABLE_FILTER")) != 0) {
422 			lt_display_error(
423 			    "Failed to set option ENABLE_FILTER.\n");
424 			return (err);
425 		}
426 	}
427 
428 	if (g_config.lt_cfg_trace_syncobj) {
429 		if ((err = dtrace_setopt(g_dtp, "define",
430 		    "ENABLE_SYNCOBJ")) != 0) {
431 			lt_display_error(
432 			    "Failed to set option ENABLE_SYNCOBJ.\n");
433 			return (err);
434 		}
435 	}
436 
437 	if (g_config.lt_cfg_trace_sched) {
438 		if ((err = dtrace_setopt(g_dtp, "define",
439 		    "ENABLE_SCHED")) != 0) {
440 			lt_display_error(
441 			    "Failed to set option ENABLE_SYNCOBJ.\n");
442 			return (err);
443 		}
444 	}
445 
446 	if (g_config.lt_cfg_low_overhead_mode) {
447 		if ((err = dtrace_setopt(g_dtp, "define",
448 		    "ENABLE_LOW_OVERHEAD")) != 0) {
449 			lt_display_error(
450 			    "Failed to set option ENABLE_SYNCOBJ.\n");
451 			return (err);
452 		}
453 	}
454 
455 	/* Create a temp file; libdtrace needs it for cpp(1) */
456 	if ((fp_script = tmpfile()) == NULL) {
457 		lt_display_error("Cannot create tmp file\n");
458 		return (-1);
459 	}
460 
461 	/* Copy the main D script into the temp file */
462 #ifdef EMBED_CONFIGS
463 	if (fwrite(&latencytop_d_start,
464 	    (size_t)(&latencytop_d_end - &latencytop_d_start), 1, fp_script)
465 	    != 1) {
466 		lt_display_error("Could not copy D script, fwrite() failed\n");
467 		(void) fclose(fp_script);
468 		return (-1);
469 	}
470 #else
471 	if (copy_tmp_file(DEFAULT_D_SCRIPT_NAME, fp_script) != 0) {
472 		lt_display_error("Cannot open script file %s\n",
473 		    DEFAULT_D_SCRIPT_NAME);
474 		(void) fclose(fp_script);
475 		return (-1);
476 	}
477 #endif	/* EMBED_CONFIGS */
478 
479 	if (lt_table_append_trans(fp_script) != 0) {
480 		(void) fclose(fp_script);
481 		return (-1);
482 	}
483 
484 	(void) fseek(fp_script, 0, SEEK_SET);
485 
486 	if ((prog = dtrace_program_fcompile(g_dtp, fp_script,
487 	    DTRACE_C_CPP, 0, NULL)) == NULL) {
488 		lt_display_error("Failed to compile D script.\n");
489 		(void) fclose(fp_script);
490 		return (dtrace_errno(g_dtp));
491 	}
492 
493 	(void) fclose(fp_script);
494 
495 	/* Execute the D script */
496 	if (dtrace_program_exec(g_dtp, prog, &info) == -1) {
497 		lt_display_error("Failed to enable probes.\n");
498 		return (dtrace_errno(g_dtp));
499 	}
500 
501 	if (dtrace_go(g_dtp) != 0) {
502 		lt_display_error("Failed to run D script.\n");
503 		return (dtrace_errno(g_dtp));
504 	}
505 
506 	return (0);
507 }
508 
509 /*
510  * Worker function to move aggregate data to user space. Called periodically
511  * to prevent the kernel from running out of memory.
512  */
513 int
514 lt_dtrace_work(int force)
515 {
516 	static uint64_t last_snap = 0;
517 	uint64_t now = lt_millisecond();
518 
519 	if (!force && now - last_snap < g_config.lt_cfg_snap_interval) {
520 		return (last_snap + g_config.lt_cfg_snap_interval - now);
521 	}
522 
523 	if (dtrace_status(g_dtp) == -1) {
524 		lt_display_error("Failed when getting status: %s\n",
525 		    dtrace_errmsg(g_dtp, dtrace_errno(g_dtp)));
526 		return (-1);
527 	}
528 
529 	if (dtrace_aggregate_snap(g_dtp) != 0) {
530 		lt_display_error("Failed to snap aggregate: %s\n",
531 		    dtrace_errmsg(g_dtp, dtrace_errno(g_dtp)));
532 		return (-1);
533 	}
534 
535 	last_snap = now;
536 	return (0);
537 }
538 
539 /*
540  * Walk through dtrace aggregator and collect data for latencytop to display.
541  * Called immediately before UI update.
542  */
543 int
544 lt_dtrace_collect(void)
545 {
546 	if (lt_dtrace_work(1) != 0) {
547 		return (-1);
548 	}
549 
550 	if (dtrace_aggregate_walk(g_dtp, aggwalk, NULL) != 0) {
551 		lt_display_error("Failed to sort aggregate: %s\n",
552 		    dtrace_errmsg(g_dtp, dtrace_errno(g_dtp)));
553 		return (-1);
554 	}
555 
556 	/*
557 	 * Probably we don't need to clear again, because we have removed
558 	 * everything. Paranoid ?
559 	 */
560 	dtrace_aggregate_clear(g_dtp);
561 
562 	return (0);
563 }
564 
565 /*
566  * dtrace clean up.
567  */
568 void
569 lt_dtrace_deinit(void)
570 {
571 	(void) dtrace_stop(g_dtp);
572 	dtrace_close(g_dtp);
573 }
574