1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2008-2009, Intel Corporation. 23 * All Rights Reserved. 24 */ 25 26 #include <unistd.h> 27 #include <stdio.h> 28 #include <dtrace.h> 29 #include <string.h> 30 #include <stdlib.h> 31 #include <memory.h> 32 #include <limits.h> 33 34 #include "latencytop.h" 35 36 static dtrace_hdl_t *g_dtp = NULL; /* The dtrace handle */ 37 static pid_t pid_self = -1; /* PID of our own process */ 38 39 /* 40 * Checks if the process is latencytop itself or sched (if we are not tracing 41 * sched), we should ignore them. 42 */ 43 #define SHOULD_IGNORE(pid) \ 44 ((!g_config.trace_sched && 0 == (pid)) || pid_self == (pid)) 45 46 /* 47 * Get an integer value from dtrace record. 48 */ 49 static uint64_t 50 rec_get_value(void *a, size_t b) 51 { 52 uint64_t ret = 0; 53 54 switch (b) { 55 case sizeof (uint64_t): 56 ret = *((uint64_t *)(a)); 57 break; 58 case sizeof (uint32_t): 59 ret = *((uint32_t *)(a)); 60 break; 61 case sizeof (uint16_t): 62 ret = *((uint16_t *)(a)); 63 break; 64 case sizeof (uint8_t): 65 ret = *((uint8_t *)(a)); 66 break; 67 default: 68 break; 69 } 70 71 return (ret); 72 } 73 74 /* 75 * Callback to process each aggregation in the snapshot. 76 * This one processes lt_call_*, which contains on/off cpu activites. 77 */ 78 static int 79 aggwalk_call(const dtrace_aggdata_t *data, lt_stat_type_t stat_type) 80 { 81 const int REC_PID = 1; 82 const int REC_TID = 2; 83 const int REC_STACK = 3; 84 const int REC_AGG = 4; 85 const int NREC = 5; 86 87 dtrace_aggdesc_t *aggdesc = data->dtada_desc; 88 dtrace_syminfo_t dts; 89 GElf_Sym sym; 90 caddr_t addr; 91 pid_t pid; 92 id_t tid; 93 unsigned int stack_depth; 94 unsigned int pc_size; 95 uint64_t pc; 96 uint64_t agg_value; 97 char *ptr = NULL; 98 char *buffer = NULL; 99 int ptrsize; 100 unsigned int buffersize; 101 102 if (aggdesc->dtagd_nrecs < NREC) { 103 /* Not enough records */ 104 goto err; 105 } 106 107 if (aggdesc->dtagd_rec[REC_PID].dtrd_action != DTRACEACT_DIFEXPR) { 108 /* Record is not PID, this is an error. */ 109 goto err; 110 } 111 pid = rec_get_value( 112 data->dtada_data + aggdesc->dtagd_rec[REC_PID].dtrd_offset, 113 aggdesc->dtagd_rec[REC_PID].dtrd_size); 114 if (SHOULD_IGNORE(pid)) { 115 goto done; 116 } 117 118 if (aggdesc->dtagd_rec[REC_TID].dtrd_action != DTRACEACT_DIFEXPR) { 119 /* Record is not TID, this is an error. */ 120 goto err; 121 } 122 tid = rec_get_value( 123 data->dtada_data + aggdesc->dtagd_rec[REC_TID].dtrd_offset, 124 aggdesc->dtagd_rec[REC_TID].dtrd_size); 125 126 if (aggdesc->dtagd_rec[REC_STACK].dtrd_action != DTRACEACT_STACK) { 127 /* Record is not stack(), this is an error. */ 128 goto err; 129 } 130 131 /* Parse stack array from dtagd_rec */ 132 stack_depth = aggdesc->dtagd_rec[REC_STACK].dtrd_arg; 133 pc_size = aggdesc->dtagd_rec[REC_STACK].dtrd_size / stack_depth; 134 addr = data->dtada_data + aggdesc->dtagd_rec[REC_STACK].dtrd_offset; 135 buffersize = (stack_depth * (2 * PATH_MAX + 2) + 1) * sizeof (char); 136 buffer = (char *)lt_malloc(buffersize); 137 ptr = buffer; 138 ptrsize = buffersize; 139 140 /* Print the stack */ 141 while (stack_depth > 0) { 142 pc = rec_get_value(addr, pc_size); 143 if (pc == 0) { 144 break; 145 } 146 addr += pc_size; 147 if (dtrace_lookup_by_addr(g_dtp, pc, &sym, &dts) == 0) { 148 int len; 149 len = snprintf(ptr, ptrsize, 150 "%s`%s ", dts.dts_object, dts.dts_name); 151 ptrsize -= len; 152 if (ptrsize <= 0) { 153 /* 154 * Snprintf returns "desired" length, so 155 * reaching here means our buffer is full. 156 * Move ptr to last byte in the buffer and 157 * break early. 158 */ 159 ptr = &buffer[buffersize-1]; 160 break; 161 } else { 162 ptr += len; 163 } 164 } 165 } 166 167 if (ptr != buffer) { 168 /* 169 * We have printed something, 170 * so it is safe to remove last ' '. 171 */ 172 *(ptr-1) = 0; 173 } 174 175 /* Parsing aggregation data */ 176 if (!DTRACEACT_ISAGG(aggdesc->dtagd_rec[REC_AGG].dtrd_action)) { 177 /* Record is not aggregation, this is an error. */ 178 goto err; 179 } 180 agg_value = rec_get_value( 181 data->dtada_data + aggdesc->dtagd_rec[REC_AGG].dtrd_offset, 182 aggdesc->dtagd_rec[REC_AGG].dtrd_size); 183 184 lt_stat_update(pid, tid, buffer, stat_type, agg_value); 185 186 done: 187 if (buffer != NULL) { 188 free(buffer); 189 } 190 return (0); 191 192 err: 193 if (buffer != NULL) { 194 free(buffer); 195 } 196 return (-1); 197 } 198 199 /* 200 * Callback to process each aggregation in the snapshot. 201 * This one processes lt_named_*, which contains data such as lock spinning. 202 */ 203 static int 204 aggwalk_named(const dtrace_aggdata_t *data, lt_stat_type_t stat_type) 205 { 206 const int REC_PID = 1; 207 const int REC_TID = 2; 208 const int REC_TYPE = 3; 209 const int REC_AGG = 4; 210 const int NREC = 5; 211 212 dtrace_aggdesc_t *aggdesc = data->dtada_desc; 213 pid_t pid; 214 id_t tid; 215 uint64_t agg_value; 216 int cause_id; 217 char *type = NULL; 218 219 if (aggdesc->dtagd_nrecs < NREC) { 220 /* Not enough records */ 221 return (-1); 222 } 223 224 if (aggdesc->dtagd_rec[REC_PID].dtrd_action != DTRACEACT_DIFEXPR) { 225 /* Record is not PID, this is an error. */ 226 return (-1); 227 } 228 pid = rec_get_value( 229 data->dtada_data + aggdesc->dtagd_rec[REC_PID].dtrd_offset, 230 aggdesc->dtagd_rec[REC_PID].dtrd_size); 231 if (SHOULD_IGNORE(pid)) { 232 return (0); 233 } 234 if (aggdesc->dtagd_rec[REC_TID].dtrd_action != DTRACEACT_DIFEXPR) { 235 /* Record is not TID, this is an error. */ 236 return (-1); 237 } 238 tid = rec_get_value( 239 data->dtada_data + aggdesc->dtagd_rec[REC_TID].dtrd_offset, 240 aggdesc->dtagd_rec[REC_TID].dtrd_size); 241 242 if (aggdesc->dtagd_rec[REC_TYPE].dtrd_action != DTRACEACT_DIFEXPR) { 243 /* Record is not type, this is an error. */ 244 return (-1); 245 } 246 type = (char *)data->dtada_data 247 + aggdesc->dtagd_rec[REC_TYPE].dtrd_offset; 248 cause_id = lt_table_lookup_named_cause(type, 1); 249 250 /* Parsing aggregation data */ 251 if (!DTRACEACT_ISAGG(aggdesc->dtagd_rec[REC_AGG].dtrd_action)) { 252 /* Record is not aggregation, this is an error. */ 253 return (-1); 254 } 255 agg_value = rec_get_value( 256 data->dtada_data + aggdesc->dtagd_rec[REC_AGG].dtrd_offset, 257 aggdesc->dtagd_rec[REC_AGG].dtrd_size); 258 259 lt_stat_update_cause(pid, tid, cause_id, stat_type, agg_value); 260 261 return (0); 262 263 } 264 265 /* 266 * Callback to process each aggregation in the snapshot. 267 * This one processes lt_sync_*, which traces synchronization objects. 268 */ 269 static int 270 aggwalk_sync(const dtrace_aggdata_t *data, lt_stat_type_t stat_type) 271 { 272 const int REC_PID = 1; 273 const int REC_TID = 2; 274 const int REC_STYPE = 3; 275 const int REC_WCHAN = 4; 276 const int REC_AGG = 5; 277 const int NREC = 6; 278 279 dtrace_aggdesc_t *aggdesc = data->dtada_desc; 280 pid_t pid; 281 id_t tid; 282 uint64_t agg_value; 283 int stype; 284 unsigned long long wchan; 285 286 if (aggdesc->dtagd_nrecs < NREC) { 287 /* Not enough records */ 288 return (-1); 289 } 290 291 if (aggdesc->dtagd_rec[REC_PID].dtrd_action != DTRACEACT_DIFEXPR) { 292 /* Record is not PID, this is an error. */ 293 return (-1); 294 } 295 pid = rec_get_value( 296 data->dtada_data + aggdesc->dtagd_rec[REC_PID].dtrd_offset, 297 aggdesc->dtagd_rec[REC_PID].dtrd_size); 298 if (SHOULD_IGNORE(pid)) { 299 return (0); 300 } 301 302 if (aggdesc->dtagd_rec[REC_TID].dtrd_action != DTRACEACT_DIFEXPR) { 303 /* Record is not TID, this is an error. */ 304 return (-1); 305 } 306 tid = rec_get_value( 307 data->dtada_data + aggdesc->dtagd_rec[REC_TID].dtrd_offset, 308 aggdesc->dtagd_rec[REC_TID].dtrd_size); 309 310 if (aggdesc->dtagd_rec[REC_STYPE].dtrd_action != DTRACEACT_DIFEXPR) { 311 /* Record is not stype, this is an error. */ 312 return (-1); 313 } 314 stype = rec_get_value( 315 data->dtada_data + aggdesc->dtagd_rec[REC_STYPE].dtrd_offset, 316 aggdesc->dtagd_rec[REC_STYPE].dtrd_size); 317 318 if (aggdesc->dtagd_rec[REC_WCHAN].dtrd_action != DTRACEACT_DIFEXPR) { 319 /* Record is not wchan, this is an error. */ 320 return (-1); 321 } 322 wchan = rec_get_value( 323 data->dtada_data + aggdesc->dtagd_rec[REC_WCHAN].dtrd_offset, 324 aggdesc->dtagd_rec[REC_WCHAN].dtrd_size); 325 326 /* Parsing aggregation data */ 327 if (!DTRACEACT_ISAGG(aggdesc->dtagd_rec[REC_AGG].dtrd_action)) { 328 /* Record is not aggregation, this is an error. */ 329 return (-1); 330 } 331 agg_value = rec_get_value( 332 data->dtada_data + aggdesc->dtagd_rec[REC_AGG].dtrd_offset, 333 aggdesc->dtagd_rec[REC_AGG].dtrd_size); 334 335 lt_stat_update_sobj(pid, tid, stype, wchan, stat_type, agg_value); 336 337 return (0); 338 } 339 340 /* 341 * Callback to process each aggregation in the snapshot. 342 * This one dispatches to different aggwalk_*(). 343 */ 344 /* ARGSUSED */ 345 static int 346 aggwalk(const dtrace_aggdata_t *data, void *arg) 347 { 348 char *tmp; 349 char buffer[32]; 350 lt_stat_type_t stat_type = LT_STAT_COUNT; 351 int (*func)(const dtrace_aggdata_t *, lt_stat_type_t); 352 353 (void) strncpy(buffer, data->dtada_desc->dtagd_name, sizeof (buffer)); 354 buffer[sizeof (buffer) - 1] = 0; 355 356 tmp = strtok(buffer, "_"); 357 if (strcmp(tmp, "lt") != 0) { 358 goto done; 359 } 360 361 tmp = strtok(NULL, "_"); 362 if (strcmp(tmp, "call") == 0) { 363 func = aggwalk_call; 364 } else if (strcmp(tmp, "named") == 0) { 365 func = aggwalk_named; 366 } else if (strcmp(tmp, "sync") == 0) { 367 func = aggwalk_sync; 368 } else { 369 goto done; 370 } 371 372 tmp = strtok(NULL, "_"); 373 if (strcmp(tmp, "count") == 0) { 374 stat_type = LT_STAT_COUNT; 375 } else if (strcmp(tmp, "sum") == 0) { 376 stat_type = LT_STAT_SUM; 377 } else if (strcmp(tmp, "max") == 0) { 378 stat_type = LT_STAT_MAX; 379 } else { 380 goto done; 381 } 382 383 (void) func(data, stat_type); 384 385 done: 386 /* We have our data, remove it from DTrace. */ 387 return (DTRACE_AGGWALK_REMOVE); 388 } 389 390 /* 391 * Callback to handle DTrace drop data events. 392 */ 393 /*ARGSUSED*/ 394 static int 395 drop_handler(const dtrace_dropdata_t *data, void *user) 396 { 397 lt_display_error("Drop: %s\n", data->dtdda_msg); 398 /* 399 * Pretend nothing happened. So our program can continue. 400 */ 401 return (DTRACE_HANDLE_OK); 402 } 403 404 /* 405 * DTrace initialization. The D script is running when this function returns. 406 */ 407 int 408 lt_dtrace_init(void) 409 { 410 dtrace_prog_t *prog; 411 dtrace_proginfo_t info; 412 int err; 413 FILE *fp_script = NULL; 414 415 pid_self = getpid(); 416 /* Open dtrace, set up handler */ 417 g_dtp = dtrace_open(DTRACE_VERSION, 0, &err); 418 if (g_dtp == NULL) { 419 lt_display_error("Cannot open dtrace library: %s\n", 420 dtrace_errmsg(NULL, err)); 421 return (-1); 422 } 423 424 if (dtrace_handle_drop(g_dtp, &drop_handler, NULL) == -1) { 425 lt_display_error("Cannot install DTrace handle: %s\n", 426 dtrace_errmsg(NULL, err)); 427 return (-1); 428 } 429 430 /* Load D script, set up macro and compile */ 431 #ifdef EMBED_CONFIGS 432 /* Create a temp file because libdtrace use cpp(1) on files only. */ 433 fp_script = tmpfile(); 434 if (fp_script == NULL) { 435 lt_display_error("Cannot create tmp file\n"); 436 return (-1); 437 } 438 (void) fwrite(latencytop_d, latencytop_d_len, 1, fp_script); 439 (void) fseek(fp_script, 0, SEEK_SET); 440 #else 441 fp_script = fopen(DEFAULT_D_SCRIPT_NAME, "r"); 442 if (fp_script == NULL) { 443 lt_display_error("Cannot open script file %s\n", 444 DEFAULT_D_SCRIPT_NAME); 445 return (-1); 446 } 447 #endif /* EMBED_CONFIGS */ 448 449 if (g_config.enable_filter) { 450 (void) dtrace_setopt(g_dtp, "define", "ENABLE_FILTER"); 451 } 452 if (g_config.trace_syncobj) { 453 (void) dtrace_setopt(g_dtp, "define", "ENABLE_SYNCOBJ"); 454 } 455 if (g_config.trace_sched) { 456 (void) dtrace_setopt(g_dtp, "define", "ENABLE_SCHED"); 457 } 458 if (g_config.low_overhead_mode) { 459 (void) dtrace_setopt(g_dtp, "define", "ENABLE_LOW_OVERHEAD"); 460 } 461 462 prog = dtrace_program_fcompile(g_dtp, fp_script, 463 DTRACE_C_CPP, 0, NULL); 464 (void) fclose(fp_script); 465 if (prog == NULL) { 466 lt_display_error("Failed to compile D script.\n"); 467 return (dtrace_errno(g_dtp)); 468 } 469 470 /* Execute the D script */ 471 if (dtrace_program_exec(g_dtp, prog, &info) == -1) { 472 lt_display_error("Failed to enable probes.\n"); 473 return (dtrace_errno(g_dtp)); 474 } 475 if (dtrace_go(g_dtp) != 0) { 476 lt_display_error("Failed to run D script.\n"); 477 return (dtrace_errno(g_dtp)); 478 } 479 return (0); 480 } 481 482 /* 483 * Worker function to move aggregator data to user space. 484 * Needs to be called periodically to prevent running out of kernel memory. 485 */ 486 int 487 lt_dtrace_work(int force) 488 { 489 static uint64_t last_snap = 0; 490 uint64_t now = lt_millisecond(); 491 492 if (!force && now - last_snap < g_config.snap_interval) { 493 return (last_snap + g_config.snap_interval - now); 494 } 495 496 if (dtrace_status(g_dtp) == -1) { 497 lt_display_error("Failed when getting status: %s\n", 498 dtrace_errmsg(g_dtp, dtrace_errno(g_dtp))); 499 return (-1); 500 } 501 502 if (dtrace_aggregate_snap(g_dtp) != 0) { 503 lt_display_error("Failed to snap aggregate: %s\n", 504 dtrace_errmsg(g_dtp, dtrace_errno(g_dtp))); 505 return (-1); 506 } 507 508 last_snap = now; 509 return (0); 510 } 511 512 /* 513 * Walk through aggregator and collect data to LatencyTOP. 514 * Different from lt_dtrace_work, this one moves data from libdtrace 515 * to latencytop. 516 * This needs to be called immediately before update UI. 517 */ 518 int 519 lt_dtrace_collect(void) 520 { 521 if (lt_dtrace_work(1) != 0) { 522 return (-1); 523 } 524 525 if (dtrace_aggregate_walk(g_dtp, aggwalk, NULL) != 0) { 526 lt_display_error("Failed to sort aggregate: %s\n", 527 dtrace_errmsg(g_dtp, dtrace_errno(g_dtp))); 528 return (-1); 529 } 530 531 /* 532 * Probably no need to clear again, because we removed everything. 533 * Paranoid. 534 */ 535 dtrace_aggregate_clear(g_dtp); 536 537 return (0); 538 } 539 540 /* 541 * Clean up and close DTrace. 542 */ 543 void 544 lt_dtrace_deinit(void) 545 { 546 (void) dtrace_stop(g_dtp); 547 dtrace_close(g_dtp); 548 } 549