xref: /titanic_50/usr/src/cmd/latencytop/common/dwrapper.c (revision 540db9a98e48e044a5fb290242f3ebb8cc3afc36)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2008-2009, Intel Corporation.
23  * All Rights Reserved.
24  */
25 
26 #include <unistd.h>
27 #include <stdio.h>
28 #include <dtrace.h>
29 #include <string.h>
30 #include <stdlib.h>
31 #include <memory.h>
32 #include <limits.h>
33 
34 #include "latencytop.h"
35 
36 static dtrace_hdl_t *g_dtp = NULL;	/* dtrace handle */
37 static pid_t pid_self = -1;		/* PID of our own process */
38 
39 /*
40  * Ignore sched if sched is not tracked.
41  * Also ignore ourselves (i.e., latencytop).
42  */
43 #define	SHOULD_IGNORE(pid)		\
44 	((!g_config.lt_cfg_trace_sched && 0 == (pid)) || pid_self == (pid))
45 
46 /*
47  * Get an integer value from dtrace record.
48  */
49 static uint64_t
50 rec_get_value(void *a, size_t b)
51 {
52 	uint64_t ret = 0;
53 
54 	switch (b) {
55 	case sizeof (uint64_t):
56 		ret = *((uint64_t *)(a));
57 		break;
58 	case sizeof (uint32_t):
59 		ret = *((uint32_t *)(a));
60 		break;
61 	case sizeof (uint16_t):
62 		ret = *((uint16_t *)(a));
63 		break;
64 	case sizeof (uint8_t):
65 		ret = *((uint8_t *)(a));
66 		break;
67 	default:
68 		break;
69 	}
70 
71 	return (ret);
72 }
73 
74 /*
75  * Callback to process aggregation lt_call_* (related to on/off cpu
76  * activities) in the snapshot.
77  */
78 static int
79 aggwalk_call(const dtrace_aggdata_t *data, lt_stat_type_t stat_type)
80 {
81 	dtrace_aggdesc_t *aggdesc = data->dtada_desc;
82 	dtrace_syminfo_t dts;
83 	GElf_Sym sym;
84 	caddr_t addr;
85 	pid_t pid;
86 	id_t tid;
87 	unsigned int stack_depth;
88 	unsigned int pc_size;
89 	uint64_t pc;
90 	uint64_t agg_value;
91 	char *ptr = NULL;
92 	char *buffer = NULL;
93 	int ptrsize;
94 	unsigned int buffersize;
95 	char *tag = NULL;
96 	unsigned int priority;
97 	enum { REC_PID = 1, REC_TID, REC_STACK, REC_TAG, REC_PRIO, REC_AGG,
98 	    NREC };
99 
100 	/* Check action type */
101 	if ((aggdesc->dtagd_nrecs < NREC) ||
102 	    (aggdesc->dtagd_rec[REC_PID].dtrd_action != DTRACEACT_DIFEXPR) ||
103 	    (aggdesc->dtagd_rec[REC_TID].dtrd_action != DTRACEACT_DIFEXPR) ||
104 	    (aggdesc->dtagd_rec[REC_TAG].dtrd_action != DTRACEACT_DIFEXPR) ||
105 	    (aggdesc->dtagd_rec[REC_PRIO].dtrd_action != DTRACEACT_DIFEXPR) ||
106 	    (!DTRACEACT_ISAGG(aggdesc->dtagd_rec[REC_AGG].dtrd_action)) ||
107 	    (aggdesc->dtagd_rec[REC_STACK].dtrd_action != DTRACEACT_STACK)) {
108 
109 		return (-1);
110 	}
111 
112 	pid = rec_get_value(
113 	    data->dtada_data + aggdesc->dtagd_rec[REC_PID].dtrd_offset,
114 	    aggdesc->dtagd_rec[REC_PID].dtrd_size);
115 
116 	if (SHOULD_IGNORE(pid)) {
117 		return (0);
118 	}
119 
120 	tid = rec_get_value(
121 	    data->dtada_data + aggdesc->dtagd_rec[REC_TID].dtrd_offset,
122 	    aggdesc->dtagd_rec[REC_TID].dtrd_size);
123 
124 	/* Parse stack array from dtagd_rec */
125 	stack_depth = aggdesc->dtagd_rec[REC_STACK].dtrd_arg;
126 	pc_size = aggdesc->dtagd_rec[REC_STACK].dtrd_size / stack_depth;
127 	addr = data->dtada_data + aggdesc->dtagd_rec[REC_STACK].dtrd_offset;
128 	buffersize = (stack_depth * (2 * PATH_MAX + 2) + 1) * sizeof (char);
129 	buffer = (char *)lt_malloc(buffersize);
130 	ptr = buffer;
131 	ptrsize = buffersize;
132 
133 	/* Print the stack */
134 	while (stack_depth > 0) {
135 		pc = rec_get_value(addr, pc_size);
136 
137 		if (pc == 0) {
138 			break;
139 		}
140 
141 		addr += pc_size;
142 
143 		if (dtrace_lookup_by_addr(g_dtp, pc, &sym, &dts) == 0) {
144 			int len;
145 			len = snprintf(ptr, ptrsize,
146 			    "%s`%s ", dts.dts_object, dts.dts_name);
147 			ptrsize -= len;
148 
149 			if (ptrsize <= 0) {
150 				/*
151 				 * snprintf returns "desired" length, so
152 				 * reaching here means our buffer is full.
153 				 * Move ptr to the last byte of the buffer and
154 				 * break.
155 				 */
156 				ptr = &buffer[buffersize-1];
157 				break;
158 			} else {
159 				ptr += len;
160 			}
161 		}
162 	}
163 
164 	if (ptr != buffer) {
165 		/*
166 		 * We have printed something, so it is safe to remove
167 		 * the last ' '.
168 		 */
169 		*(ptr-1) = '\0';
170 	}
171 
172 	tag = (char *)data->dtada_data +
173 	    aggdesc->dtagd_rec[REC_TAG].dtrd_offset;
174 
175 	priority = rec_get_value(
176 	    data->dtada_data + aggdesc->dtagd_rec[REC_PRIO].dtrd_offset,
177 	    aggdesc->dtagd_rec[REC_PRIO].dtrd_size);
178 
179 	agg_value = rec_get_value(
180 	    data->dtada_data + aggdesc->dtagd_rec[REC_AGG].dtrd_offset,
181 	    aggdesc->dtagd_rec[REC_AGG].dtrd_size);
182 
183 	lt_stat_update(pid, tid, buffer, tag, priority, stat_type, agg_value);
184 
185 	if (buffer != NULL)  {
186 		free(buffer);
187 	}
188 
189 	return (0);
190 }
191 
192 /*
193  * Callback to process aggregation lt_named_* (related to lock spinning etc.),
194  * in the snapshot.
195  */
196 static int
197 aggwalk_named(const dtrace_aggdata_t *data, lt_stat_type_t stat_type)
198 {
199 	dtrace_aggdesc_t *aggdesc = data->dtada_desc;
200 	pid_t pid;
201 	id_t tid;
202 	uint64_t agg_value;
203 	int cause_id;
204 	char *type = NULL;
205 	enum { REC_PID = 1, REC_TID, REC_TYPE, REC_AGG, NREC };
206 
207 	/* Check action type */
208 	if ((aggdesc->dtagd_nrecs < NREC) ||
209 	    (aggdesc->dtagd_rec[REC_PID].dtrd_action != DTRACEACT_DIFEXPR) ||
210 	    (aggdesc->dtagd_rec[REC_TID].dtrd_action != DTRACEACT_DIFEXPR) ||
211 	    (aggdesc->dtagd_rec[REC_TYPE].dtrd_action != DTRACEACT_DIFEXPR) ||
212 	    (!DTRACEACT_ISAGG(aggdesc->dtagd_rec[REC_AGG].dtrd_action))) {
213 
214 		return (-1);
215 	}
216 
217 	pid = rec_get_value(
218 	    data->dtada_data + aggdesc->dtagd_rec[REC_PID].dtrd_offset,
219 	    aggdesc->dtagd_rec[REC_PID].dtrd_size);
220 
221 	if (SHOULD_IGNORE(pid)) {
222 		return (0);
223 	}
224 
225 	tid = rec_get_value(
226 	    data->dtada_data + aggdesc->dtagd_rec[REC_TID].dtrd_offset,
227 	    aggdesc->dtagd_rec[REC_TID].dtrd_size);
228 
229 	type = (char *)data->dtada_data
230 	    + aggdesc->dtagd_rec[REC_TYPE].dtrd_offset;
231 	cause_id = lt_table_cause_from_name(type, 1, CAUSE_FLAG_SPECIAL);
232 
233 	agg_value = rec_get_value(
234 	    data->dtada_data + aggdesc->dtagd_rec[REC_AGG].dtrd_offset,
235 	    aggdesc->dtagd_rec[REC_AGG].dtrd_size);
236 
237 	lt_stat_update_cause(pid, tid, cause_id, stat_type, agg_value);
238 
239 	return (0);
240 
241 }
242 
243 /*
244  * Callback to process aggregation lt_sync_* (related to synchronization
245  * objects), in the snapshot.
246  */
247 static int
248 aggwalk_sync(const dtrace_aggdata_t *data, lt_stat_type_t stat_type)
249 {
250 	dtrace_aggdesc_t *aggdesc = data->dtada_desc;
251 	pid_t pid;
252 	id_t tid;
253 	uint64_t agg_value;
254 	int stype;
255 	unsigned long long wchan;
256 	enum { REC_PID = 1, REC_TID, REC_STYPE, REC_WCHAN, REC_AGG, NREC };
257 
258 	/* Check action type */
259 	if ((aggdesc->dtagd_nrecs < NREC) ||
260 	    (aggdesc->dtagd_rec[REC_PID].dtrd_action != DTRACEACT_DIFEXPR) ||
261 	    (aggdesc->dtagd_rec[REC_TID].dtrd_action != DTRACEACT_DIFEXPR) ||
262 	    (aggdesc->dtagd_rec[REC_STYPE].dtrd_action != DTRACEACT_DIFEXPR) ||
263 	    (aggdesc->dtagd_rec[REC_WCHAN].dtrd_action != DTRACEACT_DIFEXPR) ||
264 	    (!DTRACEACT_ISAGG(aggdesc->dtagd_rec[REC_AGG].dtrd_action))) {
265 
266 		return (-1);
267 	}
268 
269 	pid = rec_get_value(
270 	    data->dtada_data + aggdesc->dtagd_rec[REC_PID].dtrd_offset,
271 	    aggdesc->dtagd_rec[REC_PID].dtrd_size);
272 
273 	if (SHOULD_IGNORE(pid)) {
274 		return (0);
275 	}
276 
277 	tid = rec_get_value(
278 	    data->dtada_data + aggdesc->dtagd_rec[REC_TID].dtrd_offset,
279 	    aggdesc->dtagd_rec[REC_TID].dtrd_size);
280 
281 	stype = rec_get_value(
282 	    data->dtada_data + aggdesc->dtagd_rec[REC_STYPE].dtrd_offset,
283 	    aggdesc->dtagd_rec[REC_STYPE].dtrd_size);
284 
285 	wchan = rec_get_value(
286 	    data->dtada_data + aggdesc->dtagd_rec[REC_WCHAN].dtrd_offset,
287 	    aggdesc->dtagd_rec[REC_WCHAN].dtrd_size);
288 
289 	agg_value = rec_get_value(
290 	    data->dtada_data + aggdesc->dtagd_rec[REC_AGG].dtrd_offset,
291 	    aggdesc->dtagd_rec[REC_AGG].dtrd_size);
292 
293 	lt_stat_update_sobj(pid, tid, stype, wchan, stat_type, agg_value);
294 
295 	return (0);
296 }
297 
298 /*
299  * Callback to process various aggregations in the snapshot. Called by
300  * different aggwalk_* functions.
301  */
302 /* ARGSUSED */
303 static int
304 aggwalk(const dtrace_aggdata_t *data, void *arg)
305 {
306 	char *tmp;
307 	char buffer[32];
308 	lt_stat_type_t stat_type;
309 	int (*func)(const dtrace_aggdata_t *, lt_stat_type_t);
310 
311 	(void) strncpy(buffer, data->dtada_desc->dtagd_name, sizeof (buffer));
312 	buffer[sizeof (buffer) - 1] = '\0';
313 	tmp = strtok(buffer, "_");
314 
315 	if (tmp == NULL || strcmp(tmp, "lt") != 0) {
316 		goto done;
317 	}
318 
319 	tmp = strtok(NULL, "_");
320 
321 	if (tmp == NULL) {
322 		goto done;
323 	} else if (strcmp(tmp, "call") == 0) {
324 		func = aggwalk_call;
325 	} else if (strcmp(tmp, "named") == 0) {
326 		func = aggwalk_named;
327 	} else if (strcmp(tmp, "sync") == 0) {
328 		func = aggwalk_sync;
329 	} else {
330 		goto done;
331 	}
332 
333 	tmp = strtok(NULL, "_");
334 
335 	if (tmp == NULL) {
336 		goto done;
337 	} else if (strcmp(tmp, "count") == 0) {
338 		stat_type = LT_STAT_COUNT;
339 	} else if (strcmp(tmp, "sum") == 0) {
340 		stat_type = LT_STAT_SUM;
341 	} else if (strcmp(tmp, "max") == 0) {
342 		stat_type = LT_STAT_MAX;
343 	} else {
344 		goto done;
345 	}
346 
347 	(void) func(data, stat_type);
348 
349 done:
350 	/* We have our data, so remove it from DTrace now */
351 	return (DTRACE_AGGWALK_REMOVE);
352 }
353 
354 /*
355  * Callback to handle event caused by DTrace dropping data.
356  */
357 /*ARGSUSED*/
358 static int
359 drop_handler(const dtrace_dropdata_t *data, void *user)
360 {
361 	lt_display_error("Drop: %s\n", data->dtdda_msg);
362 	lt_drop_detected = B_TRUE;
363 
364 	/* Pretend nothing happened, so just continue */
365 	return (DTRACE_HANDLE_OK);
366 }
367 
368 #ifndef EMBED_CONFIGS
369 /*
370  * Copy the content from a "real" file into a temp file.
371  */
372 static int
373 copy_tmp_file(const char *src, FILE *dst)
374 {
375 	FILE *tmp = NULL;
376 	char buffer[256];
377 	int bytes;
378 
379 	if ((tmp = fopen(src, "r")) == NULL) {
380 		return (-1);
381 	}
382 
383 	while ((bytes = fread(buffer, 1, sizeof (buffer), tmp)) > 0) {
384 		if (fwrite(buffer, bytes, 1, dst) != 1) {
385 			return (-1);
386 		}
387 	}
388 
389 	(void) fclose(tmp);
390 
391 	return (0);
392 }
393 #endif
394 
395 /*
396  * DTrace initialization. D script starts running when this function returns.
397  */
398 int
399 lt_dtrace_init(void)
400 {
401 	dtrace_prog_t *prog;
402 	dtrace_proginfo_t info;
403 	int err;
404 	FILE *fp_script = NULL;
405 
406 	pid_self = getpid();
407 
408 	if ((g_dtp = dtrace_open(DTRACE_VERSION, 0, &err)) == NULL) {
409 		lt_display_error("Cannot open dtrace library: %s\n",
410 		    dtrace_errmsg(NULL, err));
411 		return (-1);
412 	}
413 
414 	if (dtrace_handle_drop(g_dtp, &drop_handler, NULL) == -1) {
415 		lt_display_error("Cannot install DTrace handle: %s\n",
416 		    dtrace_errmsg(NULL, err));
417 		return (-1);
418 	}
419 
420 	if (g_config.lt_cfg_enable_filter) {
421 		if ((err = dtrace_setopt(g_dtp, "define",
422 		    "ENABLE_FILTER")) != 0) {
423 			lt_display_error(
424 			    "Failed to set option ENABLE_FILTER.\n");
425 			return (err);
426 		}
427 	}
428 
429 	if (g_config.lt_cfg_trace_syncobj) {
430 		if ((err = dtrace_setopt(g_dtp, "define",
431 		    "ENABLE_SYNCOBJ")) != 0) {
432 			lt_display_error(
433 			    "Failed to set option ENABLE_SYNCOBJ.\n");
434 			return (err);
435 		}
436 	}
437 
438 	if (g_config.lt_cfg_trace_sched) {
439 		if ((err = dtrace_setopt(g_dtp, "define",
440 		    "ENABLE_SCHED")) != 0) {
441 			lt_display_error(
442 			    "Failed to set option ENABLE_SYNCOBJ.\n");
443 			return (err);
444 		}
445 	}
446 
447 	if (g_config.lt_cfg_low_overhead_mode) {
448 		if ((err = dtrace_setopt(g_dtp, "define",
449 		    "ENABLE_LOW_OVERHEAD")) != 0) {
450 			lt_display_error(
451 			    "Failed to set option ENABLE_SYNCOBJ.\n");
452 			return (err);
453 		}
454 	}
455 
456 	/* Create a temp file; libdtrace needs it for cpp(1) */
457 	if ((fp_script = tmpfile()) == NULL) {
458 		lt_display_error("Cannot create tmp file\n");
459 		return (-1);
460 	}
461 
462 	/* Copy the main D script into the temp file */
463 #ifdef EMBED_CONFIGS
464 	if (fwrite(&latencytop_d_start,
465 	    (size_t)(&latencytop_d_end - &latencytop_d_start), 1, fp_script)
466 	    != 1) {
467 		lt_display_error("Could not copy D script, fwrite() failed\n");
468 		(void) fclose(fp_script);
469 		return (-1);
470 	}
471 #else
472 	if (copy_tmp_file(DEFAULT_D_SCRIPT_NAME, fp_script) != 0) {
473 		lt_display_error("Cannot open script file %s\n",
474 		    DEFAULT_D_SCRIPT_NAME);
475 		(void) fclose(fp_script);
476 		return (-1);
477 	}
478 #endif	/* EMBED_CONFIGS */
479 
480 	if (lt_table_append_trans(fp_script) != 0) {
481 		(void) fclose(fp_script);
482 		return (-1);
483 	}
484 
485 	(void) fseek(fp_script, 0, SEEK_SET);
486 
487 	if ((prog = dtrace_program_fcompile(g_dtp, fp_script,
488 	    DTRACE_C_CPP, 0, NULL)) == NULL) {
489 		lt_display_error("Failed to compile D script.\n");
490 		(void) fclose(fp_script);
491 		return (dtrace_errno(g_dtp));
492 	}
493 
494 	(void) fclose(fp_script);
495 
496 	/* Execute the D script */
497 	if (dtrace_program_exec(g_dtp, prog, &info) == -1) {
498 		lt_display_error("Failed to enable probes.\n");
499 		return (dtrace_errno(g_dtp));
500 	}
501 
502 	if (dtrace_go(g_dtp) != 0) {
503 		lt_display_error("Failed to run D script.\n");
504 		return (dtrace_errno(g_dtp));
505 	}
506 
507 	return (0);
508 }
509 
510 /*
511  * Worker function to move aggregate data to user space. Called periodically
512  * to prevent the kernel from running out of memory.
513  */
514 int
515 lt_dtrace_work(int force)
516 {
517 	static uint64_t last_snap = 0;
518 	uint64_t now = lt_millisecond();
519 
520 	if (!force && now - last_snap < g_config.lt_cfg_snap_interval) {
521 		return (last_snap + g_config.lt_cfg_snap_interval - now);
522 	}
523 
524 	if (dtrace_status(g_dtp) == -1) {
525 		lt_display_error("Failed when getting status: %s\n",
526 		    dtrace_errmsg(g_dtp, dtrace_errno(g_dtp)));
527 		return (-1);
528 	}
529 
530 	if (dtrace_aggregate_snap(g_dtp) != 0) {
531 		lt_display_error("Failed to snap aggregate: %s\n",
532 		    dtrace_errmsg(g_dtp, dtrace_errno(g_dtp)));
533 		return (-1);
534 	}
535 
536 	last_snap = now;
537 	return (0);
538 }
539 
540 /*
541  * Walk through dtrace aggregator and collect data for latencytop to display.
542  * Called immediately before UI update.
543  */
544 int
545 lt_dtrace_collect(void)
546 {
547 	if (lt_dtrace_work(1) != 0) {
548 		return (-1);
549 	}
550 
551 	if (dtrace_aggregate_walk(g_dtp, aggwalk, NULL) != 0) {
552 		lt_display_error("Failed to sort aggregate: %s\n",
553 		    dtrace_errmsg(g_dtp, dtrace_errno(g_dtp)));
554 		return (-1);
555 	}
556 
557 	/*
558 	 * Probably we don't need to clear again, because we have removed
559 	 * everything. Paranoid ?
560 	 */
561 	dtrace_aggregate_clear(g_dtp);
562 
563 	return (0);
564 }
565 
566 /*
567  * dtrace clean up.
568  */
569 int
570 lt_dtrace_deinit(void)
571 {
572 	int ret = 0;
573 
574 	if (dtrace_stop(g_dtp) != 0) {
575 		lt_display_error("dtrace_stop failed: %s\n",
576 		    dtrace_errmsg(g_dtp, dtrace_errno(g_dtp)));
577 		ret = -1;
578 	}
579 
580 	dtrace_close(g_dtp);
581 
582 	return (ret);
583 }
584