1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2008-2009, Intel Corporation. 23 * All Rights Reserved. 24 */ 25 26 #include <unistd.h> 27 #include <stdio.h> 28 #include <dtrace.h> 29 #include <string.h> 30 #include <stdlib.h> 31 #include <memory.h> 32 #include <limits.h> 33 34 #include "latencytop.h" 35 36 static dtrace_hdl_t *g_dtp = NULL; /* dtrace handle */ 37 static pid_t pid_self = -1; /* PID of our own process */ 38 39 /* 40 * Ignore sched if sched is not tracked. 41 * Also ignore ourselves (i.e., latencytop). 42 */ 43 #define SHOULD_IGNORE(pid) \ 44 ((!g_config.lt_cfg_trace_sched && 0 == (pid)) || pid_self == (pid)) 45 46 /* 47 * Get an integer value from dtrace record. 48 */ 49 static uint64_t 50 rec_get_value(void *a, size_t b) 51 { 52 uint64_t ret = 0; 53 54 switch (b) { 55 case sizeof (uint64_t): 56 ret = *((uint64_t *)(a)); 57 break; 58 case sizeof (uint32_t): 59 ret = *((uint32_t *)(a)); 60 break; 61 case sizeof (uint16_t): 62 ret = *((uint16_t *)(a)); 63 break; 64 case sizeof (uint8_t): 65 ret = *((uint8_t *)(a)); 66 break; 67 default: 68 break; 69 } 70 71 return (ret); 72 } 73 74 /* 75 * Callback to process aggregation lt_call_* (related to on/off cpu 76 * activities) in the snapshot. 77 */ 78 static int 79 aggwalk_call(const dtrace_aggdata_t *data, lt_stat_type_t stat_type) 80 { 81 dtrace_aggdesc_t *aggdesc = data->dtada_desc; 82 dtrace_syminfo_t dts; 83 GElf_Sym sym; 84 caddr_t addr; 85 pid_t pid; 86 id_t tid; 87 unsigned int stack_depth; 88 unsigned int pc_size; 89 uint64_t pc; 90 uint64_t agg_value; 91 char *ptr = NULL; 92 char *buffer = NULL; 93 int ptrsize; 94 unsigned int buffersize; 95 char *tag = NULL; 96 unsigned int priority; 97 enum { REC_PID = 1, REC_TID, REC_STACK, REC_TAG, REC_PRIO, REC_AGG, 98 NREC }; 99 100 /* Check action type */ 101 if ((aggdesc->dtagd_nrecs < NREC) || 102 (aggdesc->dtagd_rec[REC_PID].dtrd_action != DTRACEACT_DIFEXPR) || 103 (aggdesc->dtagd_rec[REC_TID].dtrd_action != DTRACEACT_DIFEXPR) || 104 (aggdesc->dtagd_rec[REC_TAG].dtrd_action != DTRACEACT_DIFEXPR) || 105 (aggdesc->dtagd_rec[REC_PRIO].dtrd_action != DTRACEACT_DIFEXPR) || 106 (!DTRACEACT_ISAGG(aggdesc->dtagd_rec[REC_AGG].dtrd_action)) || 107 (aggdesc->dtagd_rec[REC_STACK].dtrd_action != DTRACEACT_STACK)) { 108 109 return (-1); 110 } 111 112 pid = rec_get_value( 113 data->dtada_data + aggdesc->dtagd_rec[REC_PID].dtrd_offset, 114 aggdesc->dtagd_rec[REC_PID].dtrd_size); 115 116 if (SHOULD_IGNORE(pid)) { 117 return (0); 118 } 119 120 tid = rec_get_value( 121 data->dtada_data + aggdesc->dtagd_rec[REC_TID].dtrd_offset, 122 aggdesc->dtagd_rec[REC_TID].dtrd_size); 123 124 /* Parse stack array from dtagd_rec */ 125 stack_depth = aggdesc->dtagd_rec[REC_STACK].dtrd_arg; 126 pc_size = aggdesc->dtagd_rec[REC_STACK].dtrd_size / stack_depth; 127 addr = data->dtada_data + aggdesc->dtagd_rec[REC_STACK].dtrd_offset; 128 buffersize = (stack_depth * (2 * PATH_MAX + 2) + 1) * sizeof (char); 129 buffer = (char *)lt_malloc(buffersize); 130 ptr = buffer; 131 ptrsize = buffersize; 132 133 /* Print the stack */ 134 while (stack_depth > 0) { 135 pc = rec_get_value(addr, pc_size); 136 137 if (pc == 0) { 138 break; 139 } 140 141 addr += pc_size; 142 143 if (dtrace_lookup_by_addr(g_dtp, pc, &sym, &dts) == 0) { 144 int len; 145 len = snprintf(ptr, ptrsize, 146 "%s`%s ", dts.dts_object, dts.dts_name); 147 ptrsize -= len; 148 149 if (ptrsize <= 0) { 150 /* 151 * snprintf returns "desired" length, so 152 * reaching here means our buffer is full. 153 * Move ptr to the last byte of the buffer and 154 * break. 155 */ 156 ptr = &buffer[buffersize-1]; 157 break; 158 } else { 159 ptr += len; 160 } 161 } 162 } 163 164 if (ptr != buffer) { 165 /* 166 * We have printed something, so it is safe to remove 167 * the last ' '. 168 */ 169 *(ptr-1) = '\0'; 170 } 171 172 tag = (char *)data->dtada_data + 173 aggdesc->dtagd_rec[REC_TAG].dtrd_offset; 174 175 priority = rec_get_value( 176 data->dtada_data + aggdesc->dtagd_rec[REC_PRIO].dtrd_offset, 177 aggdesc->dtagd_rec[REC_PRIO].dtrd_size); 178 179 agg_value = rec_get_value( 180 data->dtada_data + aggdesc->dtagd_rec[REC_AGG].dtrd_offset, 181 aggdesc->dtagd_rec[REC_AGG].dtrd_size); 182 183 lt_stat_update(pid, tid, buffer, tag, priority, stat_type, agg_value); 184 185 if (buffer != NULL) { 186 free(buffer); 187 } 188 189 return (0); 190 } 191 192 /* 193 * Callback to process aggregation lt_named_* (related to lock spinning etc.), 194 * in the snapshot. 195 */ 196 static int 197 aggwalk_named(const dtrace_aggdata_t *data, lt_stat_type_t stat_type) 198 { 199 dtrace_aggdesc_t *aggdesc = data->dtada_desc; 200 pid_t pid; 201 id_t tid; 202 uint64_t agg_value; 203 int cause_id; 204 char *type = NULL; 205 enum { REC_PID = 1, REC_TID, REC_TYPE, REC_AGG, NREC }; 206 207 /* Check action type */ 208 if ((aggdesc->dtagd_nrecs < NREC) || 209 (aggdesc->dtagd_rec[REC_PID].dtrd_action != DTRACEACT_DIFEXPR) || 210 (aggdesc->dtagd_rec[REC_TID].dtrd_action != DTRACEACT_DIFEXPR) || 211 (aggdesc->dtagd_rec[REC_TYPE].dtrd_action != DTRACEACT_DIFEXPR) || 212 (!DTRACEACT_ISAGG(aggdesc->dtagd_rec[REC_AGG].dtrd_action))) { 213 214 return (-1); 215 } 216 217 pid = rec_get_value( 218 data->dtada_data + aggdesc->dtagd_rec[REC_PID].dtrd_offset, 219 aggdesc->dtagd_rec[REC_PID].dtrd_size); 220 221 if (SHOULD_IGNORE(pid)) { 222 return (0); 223 } 224 225 tid = rec_get_value( 226 data->dtada_data + aggdesc->dtagd_rec[REC_TID].dtrd_offset, 227 aggdesc->dtagd_rec[REC_TID].dtrd_size); 228 229 type = (char *)data->dtada_data 230 + aggdesc->dtagd_rec[REC_TYPE].dtrd_offset; 231 cause_id = lt_table_cause_from_name(type, 1, CAUSE_FLAG_SPECIAL); 232 233 agg_value = rec_get_value( 234 data->dtada_data + aggdesc->dtagd_rec[REC_AGG].dtrd_offset, 235 aggdesc->dtagd_rec[REC_AGG].dtrd_size); 236 237 lt_stat_update_cause(pid, tid, cause_id, stat_type, agg_value); 238 239 return (0); 240 241 } 242 243 /* 244 * Callback to process aggregation lt_sync_* (related to synchronization 245 * objects), in the snapshot. 246 */ 247 static int 248 aggwalk_sync(const dtrace_aggdata_t *data, lt_stat_type_t stat_type) 249 { 250 dtrace_aggdesc_t *aggdesc = data->dtada_desc; 251 pid_t pid; 252 id_t tid; 253 uint64_t agg_value; 254 int stype; 255 unsigned long long wchan; 256 enum { REC_PID = 1, REC_TID, REC_STYPE, REC_WCHAN, REC_AGG, NREC }; 257 258 /* Check action type */ 259 if ((aggdesc->dtagd_nrecs < NREC) || 260 (aggdesc->dtagd_rec[REC_PID].dtrd_action != DTRACEACT_DIFEXPR) || 261 (aggdesc->dtagd_rec[REC_TID].dtrd_action != DTRACEACT_DIFEXPR) || 262 (aggdesc->dtagd_rec[REC_STYPE].dtrd_action != DTRACEACT_DIFEXPR) || 263 (aggdesc->dtagd_rec[REC_WCHAN].dtrd_action != DTRACEACT_DIFEXPR) || 264 (!DTRACEACT_ISAGG(aggdesc->dtagd_rec[REC_AGG].dtrd_action))) { 265 266 return (-1); 267 } 268 269 pid = rec_get_value( 270 data->dtada_data + aggdesc->dtagd_rec[REC_PID].dtrd_offset, 271 aggdesc->dtagd_rec[REC_PID].dtrd_size); 272 273 if (SHOULD_IGNORE(pid)) { 274 return (0); 275 } 276 277 tid = rec_get_value( 278 data->dtada_data + aggdesc->dtagd_rec[REC_TID].dtrd_offset, 279 aggdesc->dtagd_rec[REC_TID].dtrd_size); 280 281 stype = rec_get_value( 282 data->dtada_data + aggdesc->dtagd_rec[REC_STYPE].dtrd_offset, 283 aggdesc->dtagd_rec[REC_STYPE].dtrd_size); 284 285 wchan = rec_get_value( 286 data->dtada_data + aggdesc->dtagd_rec[REC_WCHAN].dtrd_offset, 287 aggdesc->dtagd_rec[REC_WCHAN].dtrd_size); 288 289 agg_value = rec_get_value( 290 data->dtada_data + aggdesc->dtagd_rec[REC_AGG].dtrd_offset, 291 aggdesc->dtagd_rec[REC_AGG].dtrd_size); 292 293 lt_stat_update_sobj(pid, tid, stype, wchan, stat_type, agg_value); 294 295 return (0); 296 } 297 298 /* 299 * Callback to process various aggregations in the snapshot. Called by 300 * different aggwalk_* functions. 301 */ 302 /* ARGSUSED */ 303 static int 304 aggwalk(const dtrace_aggdata_t *data, void *arg) 305 { 306 char *tmp; 307 char buffer[32]; 308 lt_stat_type_t stat_type; 309 int (*func)(const dtrace_aggdata_t *, lt_stat_type_t); 310 311 (void) strncpy(buffer, data->dtada_desc->dtagd_name, sizeof (buffer)); 312 buffer[sizeof (buffer) - 1] = '\0'; 313 tmp = strtok(buffer, "_"); 314 315 if (tmp == NULL || strcmp(tmp, "lt") != 0) { 316 goto done; 317 } 318 319 tmp = strtok(NULL, "_"); 320 321 if (tmp == NULL) { 322 goto done; 323 } else if (strcmp(tmp, "call") == 0) { 324 func = aggwalk_call; 325 } else if (strcmp(tmp, "named") == 0) { 326 func = aggwalk_named; 327 } else if (strcmp(tmp, "sync") == 0) { 328 func = aggwalk_sync; 329 } else { 330 goto done; 331 } 332 333 tmp = strtok(NULL, "_"); 334 335 if (tmp == NULL) { 336 goto done; 337 } else if (strcmp(tmp, "count") == 0) { 338 stat_type = LT_STAT_COUNT; 339 } else if (strcmp(tmp, "sum") == 0) { 340 stat_type = LT_STAT_SUM; 341 } else if (strcmp(tmp, "max") == 0) { 342 stat_type = LT_STAT_MAX; 343 } else { 344 goto done; 345 } 346 347 (void) func(data, stat_type); 348 349 done: 350 /* We have our data, so remove it from DTrace now */ 351 return (DTRACE_AGGWALK_REMOVE); 352 } 353 354 /* 355 * Callback to handle event caused by DTrace dropping data. 356 */ 357 /*ARGSUSED*/ 358 static int 359 drop_handler(const dtrace_dropdata_t *data, void *user) 360 { 361 lt_display_error("Drop: %s\n", data->dtdda_msg); 362 lt_drop_detected = B_TRUE; 363 364 /* Pretend nothing happened, so just continue */ 365 return (DTRACE_HANDLE_OK); 366 } 367 368 #ifndef EMBED_CONFIGS 369 /* 370 * Copy the content from a "real" file into a temp file. 371 */ 372 static int 373 copy_tmp_file(const char *src, FILE *dst) 374 { 375 FILE *tmp = NULL; 376 char buffer[256]; 377 int bytes; 378 379 if ((tmp = fopen(src, "r")) == NULL) { 380 return (-1); 381 } 382 383 while ((bytes = fread(buffer, 1, sizeof (buffer), tmp)) > 0) { 384 if (fwrite(buffer, bytes, 1, dst) != 1) { 385 return (-1); 386 } 387 } 388 389 (void) fclose(tmp); 390 391 return (0); 392 } 393 #endif 394 395 /* 396 * DTrace initialization. D script starts running when this function returns. 397 */ 398 int 399 lt_dtrace_init(void) 400 { 401 dtrace_prog_t *prog; 402 dtrace_proginfo_t info; 403 int err; 404 FILE *fp_script = NULL; 405 406 pid_self = getpid(); 407 408 if ((g_dtp = dtrace_open(DTRACE_VERSION, 0, &err)) == NULL) { 409 lt_display_error("Cannot open dtrace library: %s\n", 410 dtrace_errmsg(NULL, err)); 411 return (-1); 412 } 413 414 if (dtrace_handle_drop(g_dtp, &drop_handler, NULL) == -1) { 415 lt_display_error("Cannot install DTrace handle: %s\n", 416 dtrace_errmsg(NULL, err)); 417 return (-1); 418 } 419 420 if (g_config.lt_cfg_enable_filter) { 421 if ((err = dtrace_setopt(g_dtp, "define", 422 "ENABLE_FILTER")) != 0) { 423 lt_display_error( 424 "Failed to set option ENABLE_FILTER.\n"); 425 return (err); 426 } 427 } 428 429 if (g_config.lt_cfg_trace_syncobj) { 430 if ((err = dtrace_setopt(g_dtp, "define", 431 "ENABLE_SYNCOBJ")) != 0) { 432 lt_display_error( 433 "Failed to set option ENABLE_SYNCOBJ.\n"); 434 return (err); 435 } 436 } 437 438 if (g_config.lt_cfg_trace_sched) { 439 if ((err = dtrace_setopt(g_dtp, "define", 440 "ENABLE_SCHED")) != 0) { 441 lt_display_error( 442 "Failed to set option ENABLE_SYNCOBJ.\n"); 443 return (err); 444 } 445 } 446 447 if (g_config.lt_cfg_low_overhead_mode) { 448 if ((err = dtrace_setopt(g_dtp, "define", 449 "ENABLE_LOW_OVERHEAD")) != 0) { 450 lt_display_error( 451 "Failed to set option ENABLE_SYNCOBJ.\n"); 452 return (err); 453 } 454 } 455 456 /* Create a temp file; libdtrace needs it for cpp(1) */ 457 if ((fp_script = tmpfile()) == NULL) { 458 lt_display_error("Cannot create tmp file\n"); 459 return (-1); 460 } 461 462 /* Copy the main D script into the temp file */ 463 #ifdef EMBED_CONFIGS 464 if (fwrite(&latencytop_d_start, 465 (size_t)(&latencytop_d_end - &latencytop_d_start), 1, fp_script) 466 != 1) { 467 lt_display_error("Could not copy D script, fwrite() failed\n"); 468 (void) fclose(fp_script); 469 return (-1); 470 } 471 #else 472 if (copy_tmp_file(DEFAULT_D_SCRIPT_NAME, fp_script) != 0) { 473 lt_display_error("Cannot open script file %s\n", 474 DEFAULT_D_SCRIPT_NAME); 475 (void) fclose(fp_script); 476 return (-1); 477 } 478 #endif /* EMBED_CONFIGS */ 479 480 if (lt_table_append_trans(fp_script) != 0) { 481 (void) fclose(fp_script); 482 return (-1); 483 } 484 485 (void) fseek(fp_script, 0, SEEK_SET); 486 487 if ((prog = dtrace_program_fcompile(g_dtp, fp_script, 488 DTRACE_C_CPP, 0, NULL)) == NULL) { 489 lt_display_error("Failed to compile D script.\n"); 490 (void) fclose(fp_script); 491 return (dtrace_errno(g_dtp)); 492 } 493 494 (void) fclose(fp_script); 495 496 /* Execute the D script */ 497 if (dtrace_program_exec(g_dtp, prog, &info) == -1) { 498 lt_display_error("Failed to enable probes.\n"); 499 return (dtrace_errno(g_dtp)); 500 } 501 502 if (dtrace_go(g_dtp) != 0) { 503 lt_display_error("Failed to run D script.\n"); 504 return (dtrace_errno(g_dtp)); 505 } 506 507 return (0); 508 } 509 510 /* 511 * Worker function to move aggregate data to user space. Called periodically 512 * to prevent the kernel from running out of memory. 513 */ 514 int 515 lt_dtrace_work(int force) 516 { 517 static uint64_t last_snap = 0; 518 uint64_t now = lt_millisecond(); 519 520 if (!force && now - last_snap < g_config.lt_cfg_snap_interval) { 521 return (last_snap + g_config.lt_cfg_snap_interval - now); 522 } 523 524 if (dtrace_status(g_dtp) == -1) { 525 lt_display_error("Failed when getting status: %s\n", 526 dtrace_errmsg(g_dtp, dtrace_errno(g_dtp))); 527 return (-1); 528 } 529 530 if (dtrace_aggregate_snap(g_dtp) != 0) { 531 lt_display_error("Failed to snap aggregate: %s\n", 532 dtrace_errmsg(g_dtp, dtrace_errno(g_dtp))); 533 return (-1); 534 } 535 536 last_snap = now; 537 return (0); 538 } 539 540 /* 541 * Walk through dtrace aggregator and collect data for latencytop to display. 542 * Called immediately before UI update. 543 */ 544 int 545 lt_dtrace_collect(void) 546 { 547 if (lt_dtrace_work(1) != 0) { 548 return (-1); 549 } 550 551 if (dtrace_aggregate_walk(g_dtp, aggwalk, NULL) != 0) { 552 lt_display_error("Failed to sort aggregate: %s\n", 553 dtrace_errmsg(g_dtp, dtrace_errno(g_dtp))); 554 return (-1); 555 } 556 557 /* 558 * Probably we don't need to clear again, because we have removed 559 * everything. Paranoid ? 560 */ 561 dtrace_aggregate_clear(g_dtp); 562 563 return (0); 564 } 565 566 /* 567 * dtrace clean up. 568 */ 569 int 570 lt_dtrace_deinit(void) 571 { 572 int ret = 0; 573 574 if (dtrace_stop(g_dtp) != 0) { 575 lt_display_error("dtrace_stop failed: %s\n", 576 dtrace_errmsg(g_dtp, dtrace_errno(g_dtp))); 577 ret = -1; 578 } 579 580 dtrace_close(g_dtp); 581 582 return (ret); 583 } 584