1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2008-2009, Intel Corporation.
23 * All Rights Reserved.
24 */
25
26 #include <unistd.h>
27 #include <stdio.h>
28 #include <dtrace.h>
29 #include <string.h>
30 #include <stdlib.h>
31 #include <memory.h>
32 #include <limits.h>
33
34 #include "latencytop.h"
35
36 static dtrace_hdl_t *g_dtp = NULL; /* dtrace handle */
37 static pid_t pid_self = -1; /* PID of our own process */
38
39 /*
40 * Ignore sched if sched is not tracked.
41 * Also ignore ourselves (i.e., latencytop).
42 */
43 #define SHOULD_IGNORE(pid) \
44 ((!g_config.lt_cfg_trace_sched && 0 == (pid)) || pid_self == (pid))
45
46 /*
47 * Get an integer value from dtrace record.
48 */
49 static uint64_t
rec_get_value(void * a,size_t b)50 rec_get_value(void *a, size_t b)
51 {
52 uint64_t ret = 0;
53
54 switch (b) {
55 case sizeof (uint64_t):
56 ret = *((uint64_t *)(a));
57 break;
58 case sizeof (uint32_t):
59 ret = *((uint32_t *)(a));
60 break;
61 case sizeof (uint16_t):
62 ret = *((uint16_t *)(a));
63 break;
64 case sizeof (uint8_t):
65 ret = *((uint8_t *)(a));
66 break;
67 default:
68 break;
69 }
70
71 return (ret);
72 }
73
74 /*
75 * Callback to process aggregation lt_call_* (related to on/off cpu
76 * activities) in the snapshot.
77 */
78 static int
aggwalk_call(const dtrace_aggdata_t * data,lt_stat_type_t stat_type)79 aggwalk_call(const dtrace_aggdata_t *data, lt_stat_type_t stat_type)
80 {
81 dtrace_aggdesc_t *aggdesc = data->dtada_desc;
82 dtrace_syminfo_t dts;
83 GElf_Sym sym;
84 caddr_t addr;
85 pid_t pid;
86 id_t tid;
87 unsigned int stack_depth;
88 unsigned int pc_size;
89 uint64_t pc;
90 uint64_t agg_value;
91 char *ptr = NULL;
92 char *buffer = NULL;
93 int ptrsize;
94 unsigned int buffersize;
95 char *tag = NULL;
96 unsigned int priority;
97 enum { REC_PID = 1, REC_TID, REC_STACK, REC_TAG, REC_PRIO, REC_AGG,
98 NREC };
99
100 /* Check action type */
101 if ((aggdesc->dtagd_nrecs < NREC) ||
102 (aggdesc->dtagd_rec[REC_PID].dtrd_action != DTRACEACT_DIFEXPR) ||
103 (aggdesc->dtagd_rec[REC_TID].dtrd_action != DTRACEACT_DIFEXPR) ||
104 (aggdesc->dtagd_rec[REC_TAG].dtrd_action != DTRACEACT_DIFEXPR) ||
105 (aggdesc->dtagd_rec[REC_PRIO].dtrd_action != DTRACEACT_DIFEXPR) ||
106 (!DTRACEACT_ISAGG(aggdesc->dtagd_rec[REC_AGG].dtrd_action)) ||
107 (aggdesc->dtagd_rec[REC_STACK].dtrd_action != DTRACEACT_STACK)) {
108
109 return (-1);
110 }
111
112 pid = rec_get_value(
113 data->dtada_data + aggdesc->dtagd_rec[REC_PID].dtrd_offset,
114 aggdesc->dtagd_rec[REC_PID].dtrd_size);
115
116 if (SHOULD_IGNORE(pid)) {
117 return (0);
118 }
119
120 tid = rec_get_value(
121 data->dtada_data + aggdesc->dtagd_rec[REC_TID].dtrd_offset,
122 aggdesc->dtagd_rec[REC_TID].dtrd_size);
123
124 /* Parse stack array from dtagd_rec */
125 stack_depth = aggdesc->dtagd_rec[REC_STACK].dtrd_arg;
126 pc_size = aggdesc->dtagd_rec[REC_STACK].dtrd_size / stack_depth;
127 addr = data->dtada_data + aggdesc->dtagd_rec[REC_STACK].dtrd_offset;
128 buffersize = (stack_depth * (2 * PATH_MAX + 2) + 1) * sizeof (char);
129 buffer = (char *)lt_malloc(buffersize);
130 ptr = buffer;
131 ptrsize = buffersize;
132
133 /* Print the stack */
134 while (stack_depth > 0) {
135 pc = rec_get_value(addr, pc_size);
136
137 if (pc == 0) {
138 break;
139 }
140
141 addr += pc_size;
142
143 if (dtrace_lookup_by_addr(g_dtp, pc, &sym, &dts) == 0) {
144 int len;
145 len = snprintf(ptr, ptrsize,
146 "%s`%s ", dts.dts_object, dts.dts_name);
147 ptrsize -= len;
148
149 if (ptrsize <= 0) {
150 /*
151 * snprintf returns "desired" length, so
152 * reaching here means our buffer is full.
153 * Move ptr to the last byte of the buffer and
154 * break.
155 */
156 ptr = &buffer[buffersize-1];
157 break;
158 } else {
159 ptr += len;
160 }
161 }
162 }
163
164 if (ptr != buffer) {
165 /*
166 * We have printed something, so it is safe to remove
167 * the last ' '.
168 */
169 *(ptr-1) = '\0';
170 }
171
172 tag = (char *)data->dtada_data +
173 aggdesc->dtagd_rec[REC_TAG].dtrd_offset;
174
175 priority = rec_get_value(
176 data->dtada_data + aggdesc->dtagd_rec[REC_PRIO].dtrd_offset,
177 aggdesc->dtagd_rec[REC_PRIO].dtrd_size);
178
179 agg_value = rec_get_value(
180 data->dtada_data + aggdesc->dtagd_rec[REC_AGG].dtrd_offset,
181 aggdesc->dtagd_rec[REC_AGG].dtrd_size);
182
183 lt_stat_update(pid, tid, buffer, tag, priority, stat_type, agg_value);
184
185 if (buffer != NULL) {
186 free(buffer);
187 }
188
189 return (0);
190 }
191
192 /*
193 * Callback to process aggregation lt_named_* (related to lock spinning etc.),
194 * in the snapshot.
195 */
196 static int
aggwalk_named(const dtrace_aggdata_t * data,lt_stat_type_t stat_type)197 aggwalk_named(const dtrace_aggdata_t *data, lt_stat_type_t stat_type)
198 {
199 dtrace_aggdesc_t *aggdesc = data->dtada_desc;
200 pid_t pid;
201 id_t tid;
202 uint64_t agg_value;
203 int cause_id;
204 char *type = NULL;
205 enum { REC_PID = 1, REC_TID, REC_TYPE, REC_AGG, NREC };
206
207 /* Check action type */
208 if ((aggdesc->dtagd_nrecs < NREC) ||
209 (aggdesc->dtagd_rec[REC_PID].dtrd_action != DTRACEACT_DIFEXPR) ||
210 (aggdesc->dtagd_rec[REC_TID].dtrd_action != DTRACEACT_DIFEXPR) ||
211 (aggdesc->dtagd_rec[REC_TYPE].dtrd_action != DTRACEACT_DIFEXPR) ||
212 (!DTRACEACT_ISAGG(aggdesc->dtagd_rec[REC_AGG].dtrd_action))) {
213
214 return (-1);
215 }
216
217 pid = rec_get_value(
218 data->dtada_data + aggdesc->dtagd_rec[REC_PID].dtrd_offset,
219 aggdesc->dtagd_rec[REC_PID].dtrd_size);
220
221 if (SHOULD_IGNORE(pid)) {
222 return (0);
223 }
224
225 tid = rec_get_value(
226 data->dtada_data + aggdesc->dtagd_rec[REC_TID].dtrd_offset,
227 aggdesc->dtagd_rec[REC_TID].dtrd_size);
228
229 type = (char *)data->dtada_data
230 + aggdesc->dtagd_rec[REC_TYPE].dtrd_offset;
231 cause_id = lt_table_cause_from_name(type, 1, CAUSE_FLAG_SPECIAL);
232
233 agg_value = rec_get_value(
234 data->dtada_data + aggdesc->dtagd_rec[REC_AGG].dtrd_offset,
235 aggdesc->dtagd_rec[REC_AGG].dtrd_size);
236
237 lt_stat_update_cause(pid, tid, cause_id, stat_type, agg_value);
238
239 return (0);
240
241 }
242
243 /*
244 * Callback to process aggregation lt_sync_* (related to synchronization
245 * objects), in the snapshot.
246 */
247 static int
aggwalk_sync(const dtrace_aggdata_t * data,lt_stat_type_t stat_type)248 aggwalk_sync(const dtrace_aggdata_t *data, lt_stat_type_t stat_type)
249 {
250 dtrace_aggdesc_t *aggdesc = data->dtada_desc;
251 pid_t pid;
252 id_t tid;
253 uint64_t agg_value;
254 int stype;
255 unsigned long long wchan;
256 enum { REC_PID = 1, REC_TID, REC_STYPE, REC_WCHAN, REC_AGG, NREC };
257
258 /* Check action type */
259 if ((aggdesc->dtagd_nrecs < NREC) ||
260 (aggdesc->dtagd_rec[REC_PID].dtrd_action != DTRACEACT_DIFEXPR) ||
261 (aggdesc->dtagd_rec[REC_TID].dtrd_action != DTRACEACT_DIFEXPR) ||
262 (aggdesc->dtagd_rec[REC_STYPE].dtrd_action != DTRACEACT_DIFEXPR) ||
263 (aggdesc->dtagd_rec[REC_WCHAN].dtrd_action != DTRACEACT_DIFEXPR) ||
264 (!DTRACEACT_ISAGG(aggdesc->dtagd_rec[REC_AGG].dtrd_action))) {
265
266 return (-1);
267 }
268
269 pid = rec_get_value(
270 data->dtada_data + aggdesc->dtagd_rec[REC_PID].dtrd_offset,
271 aggdesc->dtagd_rec[REC_PID].dtrd_size);
272
273 if (SHOULD_IGNORE(pid)) {
274 return (0);
275 }
276
277 tid = rec_get_value(
278 data->dtada_data + aggdesc->dtagd_rec[REC_TID].dtrd_offset,
279 aggdesc->dtagd_rec[REC_TID].dtrd_size);
280
281 stype = rec_get_value(
282 data->dtada_data + aggdesc->dtagd_rec[REC_STYPE].dtrd_offset,
283 aggdesc->dtagd_rec[REC_STYPE].dtrd_size);
284
285 wchan = rec_get_value(
286 data->dtada_data + aggdesc->dtagd_rec[REC_WCHAN].dtrd_offset,
287 aggdesc->dtagd_rec[REC_WCHAN].dtrd_size);
288
289 agg_value = rec_get_value(
290 data->dtada_data + aggdesc->dtagd_rec[REC_AGG].dtrd_offset,
291 aggdesc->dtagd_rec[REC_AGG].dtrd_size);
292
293 lt_stat_update_sobj(pid, tid, stype, wchan, stat_type, agg_value);
294
295 return (0);
296 }
297
298 /*
299 * Callback to process various aggregations in the snapshot. Called by
300 * different aggwalk_* functions.
301 */
302 /* ARGSUSED */
303 static int
aggwalk(const dtrace_aggdata_t * data,void * arg)304 aggwalk(const dtrace_aggdata_t *data, void *arg)
305 {
306 char *tmp;
307 char buffer[32];
308 lt_stat_type_t stat_type;
309 int (*func)(const dtrace_aggdata_t *, lt_stat_type_t);
310
311 (void) strncpy(buffer, data->dtada_desc->dtagd_name, sizeof (buffer));
312 buffer[sizeof (buffer) - 1] = '\0';
313 tmp = strtok(buffer, "_");
314
315 if (tmp == NULL || strcmp(tmp, "lt") != 0) {
316 goto done;
317 }
318
319 tmp = strtok(NULL, "_");
320
321 if (tmp == NULL) {
322 goto done;
323 } else if (strcmp(tmp, "call") == 0) {
324 func = aggwalk_call;
325 } else if (strcmp(tmp, "named") == 0) {
326 func = aggwalk_named;
327 } else if (strcmp(tmp, "sync") == 0) {
328 func = aggwalk_sync;
329 } else {
330 goto done;
331 }
332
333 tmp = strtok(NULL, "_");
334
335 if (tmp == NULL) {
336 goto done;
337 } else if (strcmp(tmp, "count") == 0) {
338 stat_type = LT_STAT_COUNT;
339 } else if (strcmp(tmp, "sum") == 0) {
340 stat_type = LT_STAT_SUM;
341 } else if (strcmp(tmp, "max") == 0) {
342 stat_type = LT_STAT_MAX;
343 } else {
344 goto done;
345 }
346
347 (void) func(data, stat_type);
348
349 done:
350 /* We have our data, so remove it from DTrace now */
351 return (DTRACE_AGGWALK_REMOVE);
352 }
353
354 /*
355 * Callback to handle event caused by DTrace dropping data.
356 */
357 /*ARGSUSED*/
358 static int
drop_handler(const dtrace_dropdata_t * data,void * user)359 drop_handler(const dtrace_dropdata_t *data, void *user)
360 {
361 lt_display_error("Drop: %s\n", data->dtdda_msg);
362 lt_drop_detected = B_TRUE;
363
364 /* Pretend nothing happened, so just continue */
365 return (DTRACE_HANDLE_OK);
366 }
367
368 #ifndef EMBED_CONFIGS
369 /*
370 * Copy the content from a "real" file into a temp file.
371 */
372 static int
copy_tmp_file(const char * src,FILE * dst)373 copy_tmp_file(const char *src, FILE *dst)
374 {
375 FILE *tmp = NULL;
376 char buffer[256];
377 int bytes;
378
379 if ((tmp = fopen(src, "r")) == NULL) {
380 return (-1);
381 }
382
383 while ((bytes = fread(buffer, 1, sizeof (buffer), tmp)) > 0) {
384 if (fwrite(buffer, bytes, 1, dst) != 1) {
385 return (-1);
386 }
387 }
388
389 (void) fclose(tmp);
390
391 return (0);
392 }
393 #endif
394
395 /*
396 * DTrace initialization. D script starts running when this function returns.
397 */
398 int
lt_dtrace_init(void)399 lt_dtrace_init(void)
400 {
401 dtrace_prog_t *prog;
402 dtrace_proginfo_t info;
403 int err;
404 FILE *fp_script = NULL;
405 char tmp[64];
406
407 pid_self = getpid();
408
409 if ((g_dtp = dtrace_open(DTRACE_VERSION, 0, &err)) == NULL) {
410 lt_display_error("Cannot open dtrace library: %s\n",
411 dtrace_errmsg(NULL, err));
412 return (-1);
413 }
414
415 if (dtrace_handle_drop(g_dtp, &drop_handler, NULL) == -1) {
416 lt_display_error("Cannot install DTrace handle: %s\n",
417 dtrace_errmsg(NULL, err));
418 return (-1);
419 }
420
421 if (g_config.lt_cfg_enable_filter) {
422 if ((err = dtrace_setopt(g_dtp, "define",
423 "ENABLE_FILTER")) != 0) {
424 lt_display_error(
425 "Failed to set option ENABLE_FILTER.\n");
426 return (err);
427 }
428 }
429
430 if (g_config.lt_cfg_trace_syncobj) {
431 if ((err = dtrace_setopt(g_dtp, "define",
432 "ENABLE_SYNCOBJ")) != 0) {
433 lt_display_error(
434 "Failed to set option ENABLE_SYNCOBJ.\n");
435 return (err);
436 }
437 }
438
439 if (g_config.lt_cfg_trace_sched) {
440 if ((err = dtrace_setopt(g_dtp, "define",
441 "ENABLE_SCHED")) != 0) {
442 lt_display_error(
443 "Failed to set option ENABLE_SCHED.\n");
444 return (err);
445 }
446 }
447
448 if (g_config.lt_cfg_trace_pid != 0) {
449 (void) snprintf(tmp, sizeof (tmp), "TRACE_PID=%u",
450 g_config.lt_cfg_trace_pid);
451 if ((err = dtrace_setopt(g_dtp, "define", tmp)) != 0) {
452 lt_display_error(
453 "Failed to set option TRACE_PID.\n");
454 return (err);
455 }
456 }
457
458 if (g_config.lt_cfg_trace_pgid != 0) {
459 (void) snprintf(tmp, sizeof (tmp), "TRACE_PGID=%u",
460 g_config.lt_cfg_trace_pgid);
461 if ((err = dtrace_setopt(g_dtp, "define", tmp)) != 0) {
462 lt_display_error(
463 "Failed to set option TRACE_PGID.\n");
464 return (err);
465 }
466 }
467
468 if (g_config.lt_cfg_low_overhead_mode) {
469 if ((err = dtrace_setopt(g_dtp, "define",
470 "ENABLE_LOW_OVERHEAD")) != 0) {
471 lt_display_error(
472 "Failed to set option ENABLE_LOW_OVERHEAD.\n");
473 return (err);
474 }
475 }
476
477 /* Create a temp file; libdtrace needs it for cpp(1) */
478 if ((fp_script = tmpfile()) == NULL) {
479 lt_display_error("Cannot create tmp file\n");
480 return (-1);
481 }
482
483 /* Copy the main D script into the temp file */
484 #ifdef EMBED_CONFIGS
485 if (fwrite(&latencytop_d_start,
486 (size_t)(&latencytop_d_end - &latencytop_d_start), 1, fp_script)
487 != 1) {
488 lt_display_error("Could not copy D script, fwrite() failed\n");
489 (void) fclose(fp_script);
490 return (-1);
491 }
492 #else
493 if (copy_tmp_file(DEFAULT_D_SCRIPT_NAME, fp_script) != 0) {
494 lt_display_error("Cannot open script file %s\n",
495 DEFAULT_D_SCRIPT_NAME);
496 (void) fclose(fp_script);
497 return (-1);
498 }
499 #endif /* EMBED_CONFIGS */
500
501 if (lt_table_append_trans(fp_script) != 0) {
502 (void) fclose(fp_script);
503 return (-1);
504 }
505
506 (void) fseek(fp_script, 0, SEEK_SET);
507
508 if ((prog = dtrace_program_fcompile(g_dtp, fp_script,
509 DTRACE_C_CPP, 0, NULL)) == NULL) {
510 lt_display_error("Failed to compile D script.\n");
511 (void) fclose(fp_script);
512 return (dtrace_errno(g_dtp));
513 }
514
515 (void) fclose(fp_script);
516
517 /* Execute the D script */
518 if (dtrace_program_exec(g_dtp, prog, &info) == -1) {
519 lt_display_error("Failed to enable probes.\n");
520 return (dtrace_errno(g_dtp));
521 }
522
523 if (dtrace_go(g_dtp) != 0) {
524 lt_display_error("Failed to run D script.\n");
525 return (dtrace_errno(g_dtp));
526 }
527
528 return (0);
529 }
530
531 /*
532 * Worker function to move aggregate data to user space. Called periodically
533 * to prevent the kernel from running out of memory.
534 */
535 int
lt_dtrace_work(int force)536 lt_dtrace_work(int force)
537 {
538 static uint64_t last_snap = 0;
539 uint64_t now = lt_millisecond();
540
541 if (!force && now - last_snap < g_config.lt_cfg_snap_interval) {
542 return (last_snap + g_config.lt_cfg_snap_interval - now);
543 }
544
545 if (dtrace_status(g_dtp) == -1) {
546 lt_display_error("Failed when getting status: %s\n",
547 dtrace_errmsg(g_dtp, dtrace_errno(g_dtp)));
548 return (-1);
549 }
550
551 if (dtrace_aggregate_snap(g_dtp) != 0) {
552 lt_display_error("Failed to snap aggregate: %s\n",
553 dtrace_errmsg(g_dtp, dtrace_errno(g_dtp)));
554 return (-1);
555 }
556
557 last_snap = now;
558 return (0);
559 }
560
561 /*
562 * Walk through dtrace aggregator and collect data for latencytop to display.
563 * Called immediately before UI update.
564 */
565 int
lt_dtrace_collect(void)566 lt_dtrace_collect(void)
567 {
568 if (lt_dtrace_work(1) != 0) {
569 return (-1);
570 }
571
572 if (dtrace_aggregate_walk(g_dtp, aggwalk, NULL) != 0) {
573 lt_display_error("Failed to sort aggregate: %s\n",
574 dtrace_errmsg(g_dtp, dtrace_errno(g_dtp)));
575 return (-1);
576 }
577
578 /*
579 * Probably we don't need to clear again, because we have removed
580 * everything. Paranoid ?
581 */
582 dtrace_aggregate_clear(g_dtp);
583
584 return (0);
585 }
586
587 /*
588 * dtrace clean up.
589 */
590 int
lt_dtrace_deinit(void)591 lt_dtrace_deinit(void)
592 {
593 int ret = 0;
594
595 if (dtrace_stop(g_dtp) != 0) {
596 lt_display_error("dtrace_stop failed: %s\n",
597 dtrace_errmsg(g_dtp, dtrace_errno(g_dtp)));
598 ret = -1;
599 }
600
601 dtrace_close(g_dtp);
602
603 return (ret);
604 }
605