xref: /titanic_44/usr/src/cmd/latencytop/common/dwrapper.c (revision 05dd151b48459b1282a9ba09a1012307693cf4b9)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2008-2009, Intel Corporation.
23  * All Rights Reserved.
24  */
25 
26 #include <unistd.h>
27 #include <stdio.h>
28 #include <dtrace.h>
29 #include <string.h>
30 #include <stdlib.h>
31 #include <memory.h>
32 #include <limits.h>
33 
34 #include "latencytop.h"
35 
36 static dtrace_hdl_t *g_dtp = NULL;	/* The dtrace handle */
37 static pid_t pid_self = -1;		/* PID of our own process */
38 
39 /*
40  * Checks if the process is latencytop itself or sched (if we are not tracing
41  * sched), we should ignore them.
42  */
43 #define	SHOULD_IGNORE(pid)		\
44 	((!g_config.trace_sched && 0 == (pid)) || pid_self == (pid))
45 
46 /*
47  * Get an integer value from dtrace record.
48  */
49 static uint64_t
50 rec_get_value(void *a, size_t b)
51 {
52 	uint64_t ret = 0;
53 
54 	switch (b) {
55 	case sizeof (uint64_t):
56 		ret = *((uint64_t *)(a));
57 		break;
58 	case sizeof (uint32_t):
59 		ret = *((uint32_t *)(a));
60 		break;
61 	case sizeof (uint16_t):
62 		ret = *((uint16_t *)(a));
63 		break;
64 	case sizeof (uint8_t):
65 		ret = *((uint8_t *)(a));
66 		break;
67 	default:
68 		break;
69 	}
70 
71 	return (ret);
72 }
73 
74 /*
75  * Callback to process each aggregation in the snapshot.
76  * This one processes lt_call_*, which contains on/off cpu activites.
77  */
78 static int
79 aggwalk_call(const dtrace_aggdata_t *data, lt_stat_type_t stat_type)
80 {
81 	const int REC_PID = 1;
82 	const int REC_TID = 2;
83 	const int REC_STACK = 3;
84 	const int REC_AGG = 4;
85 	const int NREC = 5;
86 
87 	dtrace_aggdesc_t *aggdesc = data->dtada_desc;
88 	dtrace_syminfo_t dts;
89 	GElf_Sym sym;
90 	caddr_t addr;
91 	pid_t pid;
92 	id_t tid;
93 	unsigned int stack_depth;
94 	unsigned int pc_size;
95 	uint64_t pc;
96 	uint64_t agg_value;
97 	char *ptr = NULL;
98 	char *buffer = NULL;
99 	int ptrsize;
100 	unsigned int buffersize;
101 
102 	if (aggdesc->dtagd_nrecs < NREC) {
103 		/* Not enough records */
104 		goto err;
105 	}
106 
107 	if (aggdesc->dtagd_rec[REC_PID].dtrd_action != DTRACEACT_DIFEXPR) {
108 		/* Record is not PID, this is an error. */
109 		goto err;
110 	}
111 	pid = rec_get_value(
112 	    data->dtada_data + aggdesc->dtagd_rec[REC_PID].dtrd_offset,
113 	    aggdesc->dtagd_rec[REC_PID].dtrd_size);
114 	if (SHOULD_IGNORE(pid)) {
115 		goto done;
116 	}
117 
118 	if (aggdesc->dtagd_rec[REC_TID].dtrd_action != DTRACEACT_DIFEXPR) {
119 		/* Record is not TID, this is an error. */
120 		goto err;
121 	}
122 	tid = rec_get_value(
123 	    data->dtada_data + aggdesc->dtagd_rec[REC_TID].dtrd_offset,
124 	    aggdesc->dtagd_rec[REC_TID].dtrd_size);
125 
126 	if (aggdesc->dtagd_rec[REC_STACK].dtrd_action != DTRACEACT_STACK) {
127 		/* Record is not stack(), this is an error. */
128 		goto err;
129 	}
130 
131 	/* Parse stack array from dtagd_rec */
132 	stack_depth = aggdesc->dtagd_rec[REC_STACK].dtrd_arg;
133 	pc_size = aggdesc->dtagd_rec[REC_STACK].dtrd_size / stack_depth;
134 	addr = data->dtada_data + aggdesc->dtagd_rec[REC_STACK].dtrd_offset;
135 	buffersize = (stack_depth * (2 * PATH_MAX + 2) + 1) * sizeof (char);
136 	buffer = (char *)lt_malloc(buffersize);
137 	ptr = buffer;
138 	ptrsize = buffersize;
139 
140 	/* Print the stack */
141 	while (stack_depth > 0) {
142 		pc = rec_get_value(addr, pc_size);
143 		if (pc == 0) {
144 			break;
145 		}
146 		addr += pc_size;
147 		if (dtrace_lookup_by_addr(g_dtp, pc, &sym, &dts) == 0) {
148 			int len;
149 			len = snprintf(ptr, ptrsize,
150 			    "%s`%s ", dts.dts_object, dts.dts_name);
151 			ptrsize -= len;
152 			if (ptrsize <= 0) {
153 				/*
154 				 * Snprintf returns "desired" length, so
155 				 * reaching here means our buffer is full.
156 				 * Move ptr to last byte in the buffer and
157 				 * break early.
158 				 */
159 				ptr = &buffer[buffersize-1];
160 				break;
161 			} else	{
162 				ptr += len;
163 			}
164 		}
165 	}
166 
167 	if (ptr != buffer) {
168 		/*
169 		 * We have printed something,
170 		 * so it is safe to remove last ' '.
171 		 */
172 		*(ptr-1) = 0;
173 	}
174 
175 	/* Parsing aggregation data */
176 	if (!DTRACEACT_ISAGG(aggdesc->dtagd_rec[REC_AGG].dtrd_action)) {
177 		/* Record is not aggregation, this is an error. */
178 		goto err;
179 	}
180 	agg_value = rec_get_value(
181 	    data->dtada_data + aggdesc->dtagd_rec[REC_AGG].dtrd_offset,
182 	    aggdesc->dtagd_rec[REC_AGG].dtrd_size);
183 
184 	lt_stat_update(pid, tid, buffer, stat_type, agg_value);
185 
186 done:
187 	if (buffer != NULL) {
188 		free(buffer);
189 	}
190 	return (0);
191 
192 err:
193 	if (buffer != NULL) {
194 		free(buffer);
195 	}
196 	return (-1);
197 }
198 
199 /*
200  * Callback to process each aggregation in the snapshot.
201  * This one processes lt_named_*, which contains data such as lock spinning.
202  */
203 static int
204 aggwalk_named(const dtrace_aggdata_t *data, lt_stat_type_t stat_type)
205 {
206 	const int REC_PID = 1;
207 	const int REC_TID = 2;
208 	const int REC_TYPE = 3;
209 	const int REC_AGG = 4;
210 	const int NREC = 5;
211 
212 	dtrace_aggdesc_t *aggdesc = data->dtada_desc;
213 	pid_t pid;
214 	id_t tid;
215 	uint64_t agg_value;
216 	int cause_id;
217 	char *type = NULL;
218 
219 	if (aggdesc->dtagd_nrecs < NREC) {
220 		/* Not enough records */
221 		return (-1);
222 	}
223 
224 	if (aggdesc->dtagd_rec[REC_PID].dtrd_action != DTRACEACT_DIFEXPR) {
225 		/* Record is not PID, this is an error. */
226 		return (-1);
227 	}
228 	pid = rec_get_value(
229 	    data->dtada_data + aggdesc->dtagd_rec[REC_PID].dtrd_offset,
230 	    aggdesc->dtagd_rec[REC_PID].dtrd_size);
231 	if (SHOULD_IGNORE(pid)) {
232 		return (0);
233 	}
234 	if (aggdesc->dtagd_rec[REC_TID].dtrd_action != DTRACEACT_DIFEXPR) {
235 		/* Record is not TID, this is an error. */
236 		return (-1);
237 	}
238 	tid = rec_get_value(
239 	    data->dtada_data + aggdesc->dtagd_rec[REC_TID].dtrd_offset,
240 	    aggdesc->dtagd_rec[REC_TID].dtrd_size);
241 
242 	if (aggdesc->dtagd_rec[REC_TYPE].dtrd_action != DTRACEACT_DIFEXPR) {
243 		/* Record is not type, this is an error. */
244 		return (-1);
245 	}
246 	type = (char *)data->dtada_data
247 	    + aggdesc->dtagd_rec[REC_TYPE].dtrd_offset;
248 	cause_id = lt_table_lookup_named_cause(type, 1);
249 
250 	/* Parsing aggregation data */
251 	if (!DTRACEACT_ISAGG(aggdesc->dtagd_rec[REC_AGG].dtrd_action)) {
252 		/* Record is not aggregation, this is an error. */
253 		return (-1);
254 	}
255 	agg_value = rec_get_value(
256 	    data->dtada_data + aggdesc->dtagd_rec[REC_AGG].dtrd_offset,
257 	    aggdesc->dtagd_rec[REC_AGG].dtrd_size);
258 
259 	lt_stat_update_cause(pid, tid, cause_id, stat_type, agg_value);
260 
261 	return (0);
262 
263 }
264 
265 /*
266  * Callback to process each aggregation in the snapshot.
267  * This one processes lt_sync_*, which traces synchronization objects.
268  */
269 static int
270 aggwalk_sync(const dtrace_aggdata_t *data, lt_stat_type_t stat_type)
271 {
272 	const int REC_PID = 1;
273 	const int REC_TID = 2;
274 	const int REC_STYPE = 3;
275 	const int REC_WCHAN = 4;
276 	const int REC_AGG = 5;
277 	const int NREC = 6;
278 
279 	dtrace_aggdesc_t *aggdesc = data->dtada_desc;
280 	pid_t pid;
281 	id_t tid;
282 	uint64_t agg_value;
283 	int stype;
284 	unsigned long long wchan;
285 
286 	if (aggdesc->dtagd_nrecs < NREC) {
287 		/* Not enough records */
288 		return (-1);
289 	}
290 
291 	if (aggdesc->dtagd_rec[REC_PID].dtrd_action != DTRACEACT_DIFEXPR) {
292 		/* Record is not PID, this is an error. */
293 		return (-1);
294 	}
295 	pid = rec_get_value(
296 	    data->dtada_data + aggdesc->dtagd_rec[REC_PID].dtrd_offset,
297 	    aggdesc->dtagd_rec[REC_PID].dtrd_size);
298 	if (SHOULD_IGNORE(pid)) {
299 		return (0);
300 	}
301 
302 	if (aggdesc->dtagd_rec[REC_TID].dtrd_action != DTRACEACT_DIFEXPR) {
303 		/* Record is not TID, this is an error. */
304 		return (-1);
305 	}
306 	tid = rec_get_value(
307 	    data->dtada_data + aggdesc->dtagd_rec[REC_TID].dtrd_offset,
308 	    aggdesc->dtagd_rec[REC_TID].dtrd_size);
309 
310 	if (aggdesc->dtagd_rec[REC_STYPE].dtrd_action != DTRACEACT_DIFEXPR) {
311 		/* Record is not stype, this is an error. */
312 		return (-1);
313 	}
314 	stype = rec_get_value(
315 	    data->dtada_data + aggdesc->dtagd_rec[REC_STYPE].dtrd_offset,
316 	    aggdesc->dtagd_rec[REC_STYPE].dtrd_size);
317 
318 	if (aggdesc->dtagd_rec[REC_WCHAN].dtrd_action != DTRACEACT_DIFEXPR) {
319 		/* Record is not wchan, this is an error. */
320 		return (-1);
321 	}
322 	wchan = rec_get_value(
323 	    data->dtada_data + aggdesc->dtagd_rec[REC_WCHAN].dtrd_offset,
324 	    aggdesc->dtagd_rec[REC_WCHAN].dtrd_size);
325 
326 	/* Parsing aggregation data */
327 	if (!DTRACEACT_ISAGG(aggdesc->dtagd_rec[REC_AGG].dtrd_action)) {
328 		/* Record is not aggregation, this is an error. */
329 		return (-1);
330 	}
331 	agg_value = rec_get_value(
332 	    data->dtada_data + aggdesc->dtagd_rec[REC_AGG].dtrd_offset,
333 	    aggdesc->dtagd_rec[REC_AGG].dtrd_size);
334 
335 	lt_stat_update_sobj(pid, tid, stype, wchan, stat_type, agg_value);
336 
337 	return (0);
338 }
339 
340 /*
341  * Callback to process each aggregation in the snapshot.
342  * This one dispatches to different aggwalk_*().
343  */
344 /* ARGSUSED */
345 static int
346 aggwalk(const dtrace_aggdata_t *data, void *arg)
347 {
348 	char *tmp;
349 	char buffer[32];
350 	lt_stat_type_t stat_type = LT_STAT_COUNT;
351 	int (*func)(const dtrace_aggdata_t *, lt_stat_type_t);
352 
353 	(void) strncpy(buffer, data->dtada_desc->dtagd_name, sizeof (buffer));
354 	buffer[sizeof (buffer) - 1] = 0;
355 
356 	tmp = strtok(buffer, "_");
357 	if (strcmp(tmp, "lt") != 0) {
358 		goto done;
359 	}
360 
361 	tmp = strtok(NULL, "_");
362 	if (strcmp(tmp, "call") == 0) {
363 		func = aggwalk_call;
364 	} else if (strcmp(tmp, "named") == 0) {
365 		func = aggwalk_named;
366 	} else if (strcmp(tmp, "sync") == 0) {
367 		func = aggwalk_sync;
368 	} else {
369 		goto done;
370 	}
371 
372 	tmp = strtok(NULL, "_");
373 	if (strcmp(tmp, "count") == 0) {
374 		stat_type = LT_STAT_COUNT;
375 	} else if (strcmp(tmp, "sum") == 0) {
376 		stat_type = LT_STAT_SUM;
377 	} else if (strcmp(tmp, "max") == 0) {
378 		stat_type = LT_STAT_MAX;
379 	} else {
380 		goto done;
381 	}
382 
383 	(void) func(data, stat_type);
384 
385 done:
386 	/* We have our data, remove it from DTrace. */
387 	return (DTRACE_AGGWALK_REMOVE);
388 }
389 
390 /*
391  * Callback to handle DTrace drop data events.
392  */
393 /*ARGSUSED*/
394 static int
395 drop_handler(const dtrace_dropdata_t *data, void *user)
396 {
397 	lt_display_error("Drop: %s\n", data->dtdda_msg);
398 	/*
399 	 * Pretend nothing happened. So our program can continue.
400 	 */
401 	return (DTRACE_HANDLE_OK);
402 }
403 
404 /*
405  * DTrace initialization. The D script is running when this function returns.
406  */
407 int
408 lt_dtrace_init(void)
409 {
410 	dtrace_prog_t *prog;
411 	dtrace_proginfo_t info;
412 	int err;
413 	FILE *fp_script = NULL;
414 
415 	pid_self = getpid();
416 	/* Open dtrace, set up handler */
417 	g_dtp = dtrace_open(DTRACE_VERSION, 0, &err);
418 	if (g_dtp == NULL) {
419 		lt_display_error("Cannot open dtrace library: %s\n",
420 		    dtrace_errmsg(NULL, err));
421 		return (-1);
422 	}
423 
424 	if (dtrace_handle_drop(g_dtp, &drop_handler, NULL) == -1) {
425 		lt_display_error("Cannot install DTrace handle: %s\n",
426 		    dtrace_errmsg(NULL, err));
427 		return (-1);
428 	}
429 
430 	/* Load D script, set up macro and compile */
431 #ifdef EMBED_CONFIGS
432 	/* Create a temp file because libdtrace use cpp(1) on files only. */
433 	fp_script = tmpfile();
434 	if (fp_script == NULL) {
435 		lt_display_error("Cannot create tmp file\n");
436 		return (-1);
437 	}
438 	(void) fwrite(latencytop_d, latencytop_d_len, 1, fp_script);
439 	(void) fseek(fp_script, 0, SEEK_SET);
440 #else
441 	fp_script = fopen(DEFAULT_D_SCRIPT_NAME, "r");
442 	if (fp_script == NULL) {
443 		lt_display_error("Cannot open script file %s\n",
444 		    DEFAULT_D_SCRIPT_NAME);
445 		return (-1);
446 	}
447 #endif	/* EMBED_CONFIGS */
448 
449 	if (g_config.enable_filter) {
450 		(void) dtrace_setopt(g_dtp, "define", "ENABLE_FILTER");
451 	}
452 	if (g_config.trace_syncobj) {
453 		(void) dtrace_setopt(g_dtp, "define", "ENABLE_SYNCOBJ");
454 	}
455 	if (g_config.trace_sched) {
456 		(void) dtrace_setopt(g_dtp, "define", "ENABLE_SCHED");
457 	}
458 	if (g_config.low_overhead_mode) {
459 		(void) dtrace_setopt(g_dtp, "define", "ENABLE_LOW_OVERHEAD");
460 	}
461 
462 	prog = dtrace_program_fcompile(g_dtp, fp_script,
463 	    DTRACE_C_CPP, 0, NULL);
464 	(void) fclose(fp_script);
465 	if (prog == NULL) {
466 		lt_display_error("Failed to compile D script.\n");
467 		return (dtrace_errno(g_dtp));
468 	}
469 
470 	/* Execute the D script */
471 	if (dtrace_program_exec(g_dtp, prog, &info) == -1) {
472 		lt_display_error("Failed to enable probes.\n");
473 		return (dtrace_errno(g_dtp));
474 	}
475 	if (dtrace_go(g_dtp) != 0) {
476 		lt_display_error("Failed to run D script.\n");
477 		return (dtrace_errno(g_dtp));
478 	}
479 	return (0);
480 }
481 
482 /*
483  * Worker function to move aggregator data to user space.
484  * Needs to be called periodically to prevent running out of kernel memory.
485  */
486 int
487 lt_dtrace_work(int force)
488 {
489 	static uint64_t last_snap = 0;
490 	uint64_t now = lt_millisecond();
491 
492 	if (!force && now - last_snap < g_config.snap_interval) {
493 		return (last_snap + g_config.snap_interval - now);
494 	}
495 
496 	if (dtrace_status(g_dtp) == -1) {
497 		lt_display_error("Failed when getting status: %s\n",
498 		    dtrace_errmsg(g_dtp, dtrace_errno(g_dtp)));
499 		return (-1);
500 	}
501 
502 	if (dtrace_aggregate_snap(g_dtp) != 0) {
503 		lt_display_error("Failed to snap aggregate: %s\n",
504 		    dtrace_errmsg(g_dtp, dtrace_errno(g_dtp)));
505 		return (-1);
506 	}
507 
508 	last_snap = now;
509 	return (0);
510 }
511 
512 /*
513  * Walk through aggregator and collect data to LatencyTOP.
514  * Different from lt_dtrace_work, this one moves data from libdtrace
515  * to latencytop.
516  * This needs to be called immediately before update UI.
517  */
518 int
519 lt_dtrace_collect(void)
520 {
521 	if (lt_dtrace_work(1) != 0) {
522 		return (-1);
523 	}
524 
525 	if (dtrace_aggregate_walk(g_dtp, aggwalk, NULL) != 0) {
526 		lt_display_error("Failed to sort aggregate: %s\n",
527 		    dtrace_errmsg(g_dtp, dtrace_errno(g_dtp)));
528 		return (-1);
529 	}
530 
531 	/*
532 	 * Probably no need to clear again, because we removed everything.
533 	 * Paranoid.
534 	 */
535 	dtrace_aggregate_clear(g_dtp);
536 
537 	return (0);
538 }
539 
540 /*
541  * Clean up and close DTrace.
542  */
543 void
544 lt_dtrace_deinit(void)
545 {
546 	(void) dtrace_stop(g_dtp);
547 	dtrace_close(g_dtp);
548 }
549