1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * Copyright (c) 2011, Joyent, Inc. All rights reserved. 29 */ 30 31 #include <stdlib.h> 32 #include <strings.h> 33 #include <errno.h> 34 #include <unistd.h> 35 #include <dt_impl.h> 36 #include <assert.h> 37 #if defined(sun) 38 #include <alloca.h> 39 #else 40 #include <sys/sysctl.h> 41 #include <libproc_compat.h> 42 #endif 43 #include <limits.h> 44 45 #define DTRACE_AHASHSIZE 32779 /* big 'ol prime */ 46 47 /* 48 * Because qsort(3C) does not allow an argument to be passed to a comparison 49 * function, the variables that affect comparison must regrettably be global; 50 * they are protected by a global static lock, dt_qsort_lock. 51 */ 52 static pthread_mutex_t dt_qsort_lock = PTHREAD_MUTEX_INITIALIZER; 53 54 static int dt_revsort; 55 static int dt_keysort; 56 static int dt_keypos; 57 58 #define DT_LESSTHAN (dt_revsort == 0 ? -1 : 1) 59 #define DT_GREATERTHAN (dt_revsort == 0 ? 1 : -1) 60 61 static void 62 dt_aggregate_count(int64_t *existing, int64_t *new, size_t size) 63 { 64 uint_t i; 65 66 for (i = 0; i < size / sizeof (int64_t); i++) 67 existing[i] = existing[i] + new[i]; 68 } 69 70 static int 71 dt_aggregate_countcmp(int64_t *lhs, int64_t *rhs) 72 { 73 int64_t lvar = *lhs; 74 int64_t rvar = *rhs; 75 76 if (lvar < rvar) 77 return (DT_LESSTHAN); 78 79 if (lvar > rvar) 80 return (DT_GREATERTHAN); 81 82 return (0); 83 } 84 85 /*ARGSUSED*/ 86 static void 87 dt_aggregate_min(int64_t *existing, int64_t *new, size_t size) 88 { 89 if (*new < *existing) 90 *existing = *new; 91 } 92 93 /*ARGSUSED*/ 94 static void 95 dt_aggregate_max(int64_t *existing, int64_t *new, size_t size) 96 { 97 if (*new > *existing) 98 *existing = *new; 99 } 100 101 static int 102 dt_aggregate_averagecmp(int64_t *lhs, int64_t *rhs) 103 { 104 int64_t lavg = lhs[0] ? (lhs[1] / lhs[0]) : 0; 105 int64_t ravg = rhs[0] ? (rhs[1] / rhs[0]) : 0; 106 107 if (lavg < ravg) 108 return (DT_LESSTHAN); 109 110 if (lavg > ravg) 111 return (DT_GREATERTHAN); 112 113 return (0); 114 } 115 116 static int 117 dt_aggregate_stddevcmp(int64_t *lhs, int64_t *rhs) 118 { 119 uint64_t lsd = dt_stddev((uint64_t *)lhs, 1); 120 uint64_t rsd = dt_stddev((uint64_t *)rhs, 1); 121 122 if (lsd < rsd) 123 return (DT_LESSTHAN); 124 125 if (lsd > rsd) 126 return (DT_GREATERTHAN); 127 128 return (0); 129 } 130 131 /*ARGSUSED*/ 132 static void 133 dt_aggregate_lquantize(int64_t *existing, int64_t *new, size_t size) 134 { 135 int64_t arg = *existing++; 136 uint16_t levels = DTRACE_LQUANTIZE_LEVELS(arg); 137 int i; 138 139 for (i = 0; i <= levels + 1; i++) 140 existing[i] = existing[i] + new[i + 1]; 141 } 142 143 static long double 144 dt_aggregate_lquantizedsum(int64_t *lquanta) 145 { 146 int64_t arg = *lquanta++; 147 int32_t base = DTRACE_LQUANTIZE_BASE(arg); 148 uint16_t step = DTRACE_LQUANTIZE_STEP(arg); 149 uint16_t levels = DTRACE_LQUANTIZE_LEVELS(arg), i; 150 long double total = (long double)lquanta[0] * (long double)(base - 1); 151 152 for (i = 0; i < levels; base += step, i++) 153 total += (long double)lquanta[i + 1] * (long double)base; 154 155 return (total + (long double)lquanta[levels + 1] * 156 (long double)(base + 1)); 157 } 158 159 static int64_t 160 dt_aggregate_lquantizedzero(int64_t *lquanta) 161 { 162 int64_t arg = *lquanta++; 163 int32_t base = DTRACE_LQUANTIZE_BASE(arg); 164 uint16_t step = DTRACE_LQUANTIZE_STEP(arg); 165 uint16_t levels = DTRACE_LQUANTIZE_LEVELS(arg), i; 166 167 if (base - 1 == 0) 168 return (lquanta[0]); 169 170 for (i = 0; i < levels; base += step, i++) { 171 if (base != 0) 172 continue; 173 174 return (lquanta[i + 1]); 175 } 176 177 if (base + 1 == 0) 178 return (lquanta[levels + 1]); 179 180 return (0); 181 } 182 183 static int 184 dt_aggregate_lquantizedcmp(int64_t *lhs, int64_t *rhs) 185 { 186 long double lsum = dt_aggregate_lquantizedsum(lhs); 187 long double rsum = dt_aggregate_lquantizedsum(rhs); 188 int64_t lzero, rzero; 189 190 if (lsum < rsum) 191 return (DT_LESSTHAN); 192 193 if (lsum > rsum) 194 return (DT_GREATERTHAN); 195 196 /* 197 * If they're both equal, then we will compare based on the weights at 198 * zero. If the weights at zero are equal (or if zero is not within 199 * the range of the linear quantization), then this will be judged a 200 * tie and will be resolved based on the key comparison. 201 */ 202 lzero = dt_aggregate_lquantizedzero(lhs); 203 rzero = dt_aggregate_lquantizedzero(rhs); 204 205 if (lzero < rzero) 206 return (DT_LESSTHAN); 207 208 if (lzero > rzero) 209 return (DT_GREATERTHAN); 210 211 return (0); 212 } 213 214 static void 215 dt_aggregate_llquantize(int64_t *existing, int64_t *new, size_t size) 216 { 217 int i; 218 219 for (i = 1; i < size / sizeof (int64_t); i++) 220 existing[i] = existing[i] + new[i]; 221 } 222 223 static long double 224 dt_aggregate_llquantizedsum(int64_t *llquanta) 225 { 226 int64_t arg = *llquanta++; 227 uint16_t factor = DTRACE_LLQUANTIZE_FACTOR(arg); 228 uint16_t low = DTRACE_LLQUANTIZE_LOW(arg); 229 uint16_t high = DTRACE_LLQUANTIZE_HIGH(arg); 230 uint16_t nsteps = DTRACE_LLQUANTIZE_NSTEP(arg); 231 int bin = 0, order; 232 int64_t value = 1, next, step; 233 long double total; 234 235 assert(nsteps >= factor); 236 assert(nsteps % factor == 0); 237 238 for (order = 0; order < low; order++) 239 value *= factor; 240 241 total = (long double)llquanta[bin++] * (long double)(value - 1); 242 243 next = value * factor; 244 step = next > nsteps ? next / nsteps : 1; 245 246 while (order <= high) { 247 assert(value < next); 248 total += (long double)llquanta[bin++] * (long double)(value); 249 250 if ((value += step) != next) 251 continue; 252 253 next = value * factor; 254 step = next > nsteps ? next / nsteps : 1; 255 order++; 256 } 257 258 return (total + (long double)llquanta[bin] * (long double)value); 259 } 260 261 static int 262 dt_aggregate_llquantizedcmp(int64_t *lhs, int64_t *rhs) 263 { 264 long double lsum = dt_aggregate_llquantizedsum(lhs); 265 long double rsum = dt_aggregate_llquantizedsum(rhs); 266 int64_t lzero, rzero; 267 268 if (lsum < rsum) 269 return (DT_LESSTHAN); 270 271 if (lsum > rsum) 272 return (DT_GREATERTHAN); 273 274 /* 275 * If they're both equal, then we will compare based on the weights at 276 * zero. If the weights at zero are equal, then this will be judged a 277 * tie and will be resolved based on the key comparison. 278 */ 279 lzero = lhs[1]; 280 rzero = rhs[1]; 281 282 if (lzero < rzero) 283 return (DT_LESSTHAN); 284 285 if (lzero > rzero) 286 return (DT_GREATERTHAN); 287 288 return (0); 289 } 290 291 static int 292 dt_aggregate_quantizedcmp(int64_t *lhs, int64_t *rhs) 293 { 294 int nbuckets = DTRACE_QUANTIZE_NBUCKETS; 295 long double ltotal = 0, rtotal = 0; 296 int64_t lzero, rzero; 297 uint_t i; 298 299 for (i = 0; i < nbuckets; i++) { 300 int64_t bucketval = DTRACE_QUANTIZE_BUCKETVAL(i); 301 302 if (bucketval == 0) { 303 lzero = lhs[i]; 304 rzero = rhs[i]; 305 } 306 307 ltotal += (long double)bucketval * (long double)lhs[i]; 308 rtotal += (long double)bucketval * (long double)rhs[i]; 309 } 310 311 if (ltotal < rtotal) 312 return (DT_LESSTHAN); 313 314 if (ltotal > rtotal) 315 return (DT_GREATERTHAN); 316 317 /* 318 * If they're both equal, then we will compare based on the weights at 319 * zero. If the weights at zero are equal, then this will be judged a 320 * tie and will be resolved based on the key comparison. 321 */ 322 if (lzero < rzero) 323 return (DT_LESSTHAN); 324 325 if (lzero > rzero) 326 return (DT_GREATERTHAN); 327 328 return (0); 329 } 330 331 static void 332 dt_aggregate_usym(dtrace_hdl_t *dtp, uint64_t *data) 333 { 334 uint64_t pid = data[0]; 335 uint64_t *pc = &data[1]; 336 struct ps_prochandle *P; 337 GElf_Sym sym; 338 339 if (dtp->dt_vector != NULL) 340 return; 341 342 if ((P = dt_proc_grab(dtp, pid, PGRAB_RDONLY | PGRAB_FORCE, 0)) == NULL) 343 return; 344 345 dt_proc_lock(dtp, P); 346 347 if (Plookup_by_addr(P, *pc, NULL, 0, &sym) == 0) 348 *pc = sym.st_value; 349 350 dt_proc_unlock(dtp, P); 351 dt_proc_release(dtp, P); 352 } 353 354 static void 355 dt_aggregate_umod(dtrace_hdl_t *dtp, uint64_t *data) 356 { 357 uint64_t pid = data[0]; 358 uint64_t *pc = &data[1]; 359 struct ps_prochandle *P; 360 const prmap_t *map; 361 362 if (dtp->dt_vector != NULL) 363 return; 364 365 if ((P = dt_proc_grab(dtp, pid, PGRAB_RDONLY | PGRAB_FORCE, 0)) == NULL) 366 return; 367 368 dt_proc_lock(dtp, P); 369 370 if ((map = Paddr_to_map(P, *pc)) != NULL) 371 *pc = map->pr_vaddr; 372 373 dt_proc_unlock(dtp, P); 374 dt_proc_release(dtp, P); 375 } 376 377 static void 378 dt_aggregate_sym(dtrace_hdl_t *dtp, uint64_t *data) 379 { 380 GElf_Sym sym; 381 uint64_t *pc = data; 382 383 if (dtrace_lookup_by_addr(dtp, *pc, &sym, NULL) == 0) 384 *pc = sym.st_value; 385 } 386 387 static void 388 dt_aggregate_mod(dtrace_hdl_t *dtp, uint64_t *data) 389 { 390 uint64_t *pc = data; 391 dt_module_t *dmp; 392 393 if (dtp->dt_vector != NULL) { 394 /* 395 * We don't have a way of just getting the module for a 396 * vectored open, and it doesn't seem to be worth defining 397 * one. This means that use of mod() won't get true 398 * aggregation in the postmortem case (some modules may 399 * appear more than once in aggregation output). It seems 400 * unlikely that anyone will ever notice or care... 401 */ 402 return; 403 } 404 405 for (dmp = dt_list_next(&dtp->dt_modlist); dmp != NULL; 406 dmp = dt_list_next(dmp)) { 407 if (*pc - dmp->dm_text_va < dmp->dm_text_size) { 408 *pc = dmp->dm_text_va; 409 return; 410 } 411 } 412 } 413 414 static dtrace_aggvarid_t 415 dt_aggregate_aggvarid(dt_ahashent_t *ent) 416 { 417 dtrace_aggdesc_t *agg = ent->dtahe_data.dtada_desc; 418 caddr_t data = ent->dtahe_data.dtada_data; 419 dtrace_recdesc_t *rec = agg->dtagd_rec; 420 421 /* 422 * First, we'll check the variable ID in the aggdesc. If it's valid, 423 * we'll return it. If not, we'll use the compiler-generated ID 424 * present as the first record. 425 */ 426 if (agg->dtagd_varid != DTRACE_AGGVARIDNONE) 427 return (agg->dtagd_varid); 428 429 agg->dtagd_varid = *((dtrace_aggvarid_t *)(uintptr_t)(data + 430 rec->dtrd_offset)); 431 432 return (agg->dtagd_varid); 433 } 434 435 436 static int 437 dt_aggregate_snap_cpu(dtrace_hdl_t *dtp, processorid_t cpu) 438 { 439 dtrace_epid_t id; 440 uint64_t hashval; 441 size_t offs, roffs, size, ndx; 442 int i, j, rval; 443 caddr_t addr, data; 444 dtrace_recdesc_t *rec; 445 dt_aggregate_t *agp = &dtp->dt_aggregate; 446 dtrace_aggdesc_t *agg; 447 dt_ahash_t *hash = &agp->dtat_hash; 448 dt_ahashent_t *h; 449 dtrace_bufdesc_t b = agp->dtat_buf, *buf = &b; 450 dtrace_aggdata_t *aggdata; 451 int flags = agp->dtat_flags; 452 453 buf->dtbd_cpu = cpu; 454 455 #if defined(sun) 456 if (dt_ioctl(dtp, DTRACEIOC_AGGSNAP, buf) == -1) { 457 #else 458 if (dt_ioctl(dtp, DTRACEIOC_AGGSNAP, &buf) == -1) { 459 #endif 460 if (errno == ENOENT) { 461 /* 462 * If that failed with ENOENT, it may be because the 463 * CPU was unconfigured. This is okay; we'll just 464 * do nothing but return success. 465 */ 466 return (0); 467 } 468 469 return (dt_set_errno(dtp, errno)); 470 } 471 472 if (buf->dtbd_drops != 0) { 473 if (dt_handle_cpudrop(dtp, cpu, 474 DTRACEDROP_AGGREGATION, buf->dtbd_drops) == -1) 475 return (-1); 476 } 477 478 if (buf->dtbd_size == 0) 479 return (0); 480 481 if (hash->dtah_hash == NULL) { 482 size_t size; 483 484 hash->dtah_size = DTRACE_AHASHSIZE; 485 size = hash->dtah_size * sizeof (dt_ahashent_t *); 486 487 if ((hash->dtah_hash = malloc(size)) == NULL) 488 return (dt_set_errno(dtp, EDT_NOMEM)); 489 490 bzero(hash->dtah_hash, size); 491 } 492 493 for (offs = 0; offs < buf->dtbd_size; ) { 494 /* 495 * We're guaranteed to have an ID. 496 */ 497 id = *((dtrace_epid_t *)((uintptr_t)buf->dtbd_data + 498 (uintptr_t)offs)); 499 500 if (id == DTRACE_AGGIDNONE) { 501 /* 502 * This is filler to assure proper alignment of the 503 * next record; we simply ignore it. 504 */ 505 offs += sizeof (id); 506 continue; 507 } 508 509 if ((rval = dt_aggid_lookup(dtp, id, &agg)) != 0) 510 return (rval); 511 512 addr = buf->dtbd_data + offs; 513 size = agg->dtagd_size; 514 hashval = 0; 515 516 for (j = 0; j < agg->dtagd_nrecs - 1; j++) { 517 rec = &agg->dtagd_rec[j]; 518 roffs = rec->dtrd_offset; 519 520 switch (rec->dtrd_action) { 521 case DTRACEACT_USYM: 522 dt_aggregate_usym(dtp, 523 /* LINTED - alignment */ 524 (uint64_t *)&addr[roffs]); 525 break; 526 527 case DTRACEACT_UMOD: 528 dt_aggregate_umod(dtp, 529 /* LINTED - alignment */ 530 (uint64_t *)&addr[roffs]); 531 break; 532 533 case DTRACEACT_SYM: 534 /* LINTED - alignment */ 535 dt_aggregate_sym(dtp, (uint64_t *)&addr[roffs]); 536 break; 537 538 case DTRACEACT_MOD: 539 /* LINTED - alignment */ 540 dt_aggregate_mod(dtp, (uint64_t *)&addr[roffs]); 541 break; 542 543 default: 544 break; 545 } 546 547 for (i = 0; i < rec->dtrd_size; i++) 548 hashval += addr[roffs + i]; 549 } 550 551 ndx = hashval % hash->dtah_size; 552 553 for (h = hash->dtah_hash[ndx]; h != NULL; h = h->dtahe_next) { 554 if (h->dtahe_hashval != hashval) 555 continue; 556 557 if (h->dtahe_size != size) 558 continue; 559 560 aggdata = &h->dtahe_data; 561 data = aggdata->dtada_data; 562 563 for (j = 0; j < agg->dtagd_nrecs - 1; j++) { 564 rec = &agg->dtagd_rec[j]; 565 roffs = rec->dtrd_offset; 566 567 for (i = 0; i < rec->dtrd_size; i++) 568 if (addr[roffs + i] != data[roffs + i]) 569 goto hashnext; 570 } 571 572 /* 573 * We found it. Now we need to apply the aggregating 574 * action on the data here. 575 */ 576 rec = &agg->dtagd_rec[agg->dtagd_nrecs - 1]; 577 roffs = rec->dtrd_offset; 578 /* LINTED - alignment */ 579 h->dtahe_aggregate((int64_t *)&data[roffs], 580 /* LINTED - alignment */ 581 (int64_t *)&addr[roffs], rec->dtrd_size); 582 583 /* 584 * If we're keeping per CPU data, apply the aggregating 585 * action there as well. 586 */ 587 if (aggdata->dtada_percpu != NULL) { 588 data = aggdata->dtada_percpu[cpu]; 589 590 /* LINTED - alignment */ 591 h->dtahe_aggregate((int64_t *)data, 592 /* LINTED - alignment */ 593 (int64_t *)&addr[roffs], rec->dtrd_size); 594 } 595 596 goto bufnext; 597 hashnext: 598 continue; 599 } 600 601 /* 602 * If we're here, we couldn't find an entry for this record. 603 */ 604 if ((h = malloc(sizeof (dt_ahashent_t))) == NULL) 605 return (dt_set_errno(dtp, EDT_NOMEM)); 606 bzero(h, sizeof (dt_ahashent_t)); 607 aggdata = &h->dtahe_data; 608 609 if ((aggdata->dtada_data = malloc(size)) == NULL) { 610 free(h); 611 return (dt_set_errno(dtp, EDT_NOMEM)); 612 } 613 614 bcopy(addr, aggdata->dtada_data, size); 615 aggdata->dtada_size = size; 616 aggdata->dtada_desc = agg; 617 aggdata->dtada_handle = dtp; 618 (void) dt_epid_lookup(dtp, agg->dtagd_epid, 619 &aggdata->dtada_edesc, &aggdata->dtada_pdesc); 620 aggdata->dtada_normal = 1; 621 622 h->dtahe_hashval = hashval; 623 h->dtahe_size = size; 624 (void) dt_aggregate_aggvarid(h); 625 626 rec = &agg->dtagd_rec[agg->dtagd_nrecs - 1]; 627 628 if (flags & DTRACE_A_PERCPU) { 629 int max_cpus = agp->dtat_maxcpu; 630 caddr_t *percpu = malloc(max_cpus * sizeof (caddr_t)); 631 632 if (percpu == NULL) { 633 free(aggdata->dtada_data); 634 free(h); 635 return (dt_set_errno(dtp, EDT_NOMEM)); 636 } 637 638 for (j = 0; j < max_cpus; j++) { 639 percpu[j] = malloc(rec->dtrd_size); 640 641 if (percpu[j] == NULL) { 642 while (--j >= 0) 643 free(percpu[j]); 644 645 free(aggdata->dtada_data); 646 free(h); 647 return (dt_set_errno(dtp, EDT_NOMEM)); 648 } 649 650 if (j == cpu) { 651 bcopy(&addr[rec->dtrd_offset], 652 percpu[j], rec->dtrd_size); 653 } else { 654 bzero(percpu[j], rec->dtrd_size); 655 } 656 } 657 658 aggdata->dtada_percpu = percpu; 659 } 660 661 switch (rec->dtrd_action) { 662 case DTRACEAGG_MIN: 663 h->dtahe_aggregate = dt_aggregate_min; 664 break; 665 666 case DTRACEAGG_MAX: 667 h->dtahe_aggregate = dt_aggregate_max; 668 break; 669 670 case DTRACEAGG_LQUANTIZE: 671 h->dtahe_aggregate = dt_aggregate_lquantize; 672 break; 673 674 case DTRACEAGG_LLQUANTIZE: 675 h->dtahe_aggregate = dt_aggregate_llquantize; 676 break; 677 678 case DTRACEAGG_COUNT: 679 case DTRACEAGG_SUM: 680 case DTRACEAGG_AVG: 681 case DTRACEAGG_STDDEV: 682 case DTRACEAGG_QUANTIZE: 683 h->dtahe_aggregate = dt_aggregate_count; 684 break; 685 686 default: 687 return (dt_set_errno(dtp, EDT_BADAGG)); 688 } 689 690 if (hash->dtah_hash[ndx] != NULL) 691 hash->dtah_hash[ndx]->dtahe_prev = h; 692 693 h->dtahe_next = hash->dtah_hash[ndx]; 694 hash->dtah_hash[ndx] = h; 695 696 if (hash->dtah_all != NULL) 697 hash->dtah_all->dtahe_prevall = h; 698 699 h->dtahe_nextall = hash->dtah_all; 700 hash->dtah_all = h; 701 bufnext: 702 offs += agg->dtagd_size; 703 } 704 705 return (0); 706 } 707 708 int 709 dtrace_aggregate_snap(dtrace_hdl_t *dtp) 710 { 711 int i, rval; 712 dt_aggregate_t *agp = &dtp->dt_aggregate; 713 hrtime_t now = gethrtime(); 714 dtrace_optval_t interval = dtp->dt_options[DTRACEOPT_AGGRATE]; 715 716 if (dtp->dt_lastagg != 0) { 717 if (now - dtp->dt_lastagg < interval) 718 return (0); 719 720 dtp->dt_lastagg += interval; 721 } else { 722 dtp->dt_lastagg = now; 723 } 724 725 if (!dtp->dt_active) 726 return (dt_set_errno(dtp, EINVAL)); 727 728 if (agp->dtat_buf.dtbd_size == 0) 729 return (0); 730 731 for (i = 0; i < agp->dtat_ncpus; i++) { 732 if ((rval = dt_aggregate_snap_cpu(dtp, agp->dtat_cpus[i]))) 733 return (rval); 734 } 735 736 return (0); 737 } 738 739 static int 740 dt_aggregate_hashcmp(const void *lhs, const void *rhs) 741 { 742 dt_ahashent_t *lh = *((dt_ahashent_t **)lhs); 743 dt_ahashent_t *rh = *((dt_ahashent_t **)rhs); 744 dtrace_aggdesc_t *lagg = lh->dtahe_data.dtada_desc; 745 dtrace_aggdesc_t *ragg = rh->dtahe_data.dtada_desc; 746 747 if (lagg->dtagd_nrecs < ragg->dtagd_nrecs) 748 return (DT_LESSTHAN); 749 750 if (lagg->dtagd_nrecs > ragg->dtagd_nrecs) 751 return (DT_GREATERTHAN); 752 753 return (0); 754 } 755 756 static int 757 dt_aggregate_varcmp(const void *lhs, const void *rhs) 758 { 759 dt_ahashent_t *lh = *((dt_ahashent_t **)lhs); 760 dt_ahashent_t *rh = *((dt_ahashent_t **)rhs); 761 dtrace_aggvarid_t lid, rid; 762 763 lid = dt_aggregate_aggvarid(lh); 764 rid = dt_aggregate_aggvarid(rh); 765 766 if (lid < rid) 767 return (DT_LESSTHAN); 768 769 if (lid > rid) 770 return (DT_GREATERTHAN); 771 772 return (0); 773 } 774 775 static int 776 dt_aggregate_keycmp(const void *lhs, const void *rhs) 777 { 778 dt_ahashent_t *lh = *((dt_ahashent_t **)lhs); 779 dt_ahashent_t *rh = *((dt_ahashent_t **)rhs); 780 dtrace_aggdesc_t *lagg = lh->dtahe_data.dtada_desc; 781 dtrace_aggdesc_t *ragg = rh->dtahe_data.dtada_desc; 782 dtrace_recdesc_t *lrec, *rrec; 783 char *ldata, *rdata; 784 int rval, i, j, keypos, nrecs; 785 786 if ((rval = dt_aggregate_hashcmp(lhs, rhs)) != 0) 787 return (rval); 788 789 nrecs = lagg->dtagd_nrecs - 1; 790 assert(nrecs == ragg->dtagd_nrecs - 1); 791 792 keypos = dt_keypos + 1 >= nrecs ? 0 : dt_keypos; 793 794 for (i = 1; i < nrecs; i++) { 795 uint64_t lval, rval; 796 int ndx = i + keypos; 797 798 if (ndx >= nrecs) 799 ndx = ndx - nrecs + 1; 800 801 lrec = &lagg->dtagd_rec[ndx]; 802 rrec = &ragg->dtagd_rec[ndx]; 803 804 ldata = lh->dtahe_data.dtada_data + lrec->dtrd_offset; 805 rdata = rh->dtahe_data.dtada_data + rrec->dtrd_offset; 806 807 if (lrec->dtrd_size < rrec->dtrd_size) 808 return (DT_LESSTHAN); 809 810 if (lrec->dtrd_size > rrec->dtrd_size) 811 return (DT_GREATERTHAN); 812 813 switch (lrec->dtrd_size) { 814 case sizeof (uint64_t): 815 /* LINTED - alignment */ 816 lval = *((uint64_t *)ldata); 817 /* LINTED - alignment */ 818 rval = *((uint64_t *)rdata); 819 break; 820 821 case sizeof (uint32_t): 822 /* LINTED - alignment */ 823 lval = *((uint32_t *)ldata); 824 /* LINTED - alignment */ 825 rval = *((uint32_t *)rdata); 826 break; 827 828 case sizeof (uint16_t): 829 /* LINTED - alignment */ 830 lval = *((uint16_t *)ldata); 831 /* LINTED - alignment */ 832 rval = *((uint16_t *)rdata); 833 break; 834 835 case sizeof (uint8_t): 836 lval = *((uint8_t *)ldata); 837 rval = *((uint8_t *)rdata); 838 break; 839 840 default: 841 switch (lrec->dtrd_action) { 842 case DTRACEACT_UMOD: 843 case DTRACEACT_UADDR: 844 case DTRACEACT_USYM: 845 for (j = 0; j < 2; j++) { 846 /* LINTED - alignment */ 847 lval = ((uint64_t *)ldata)[j]; 848 /* LINTED - alignment */ 849 rval = ((uint64_t *)rdata)[j]; 850 851 if (lval < rval) 852 return (DT_LESSTHAN); 853 854 if (lval > rval) 855 return (DT_GREATERTHAN); 856 } 857 858 break; 859 860 default: 861 for (j = 0; j < lrec->dtrd_size; j++) { 862 lval = ((uint8_t *)ldata)[j]; 863 rval = ((uint8_t *)rdata)[j]; 864 865 if (lval < rval) 866 return (DT_LESSTHAN); 867 868 if (lval > rval) 869 return (DT_GREATERTHAN); 870 } 871 } 872 873 continue; 874 } 875 876 if (lval < rval) 877 return (DT_LESSTHAN); 878 879 if (lval > rval) 880 return (DT_GREATERTHAN); 881 } 882 883 return (0); 884 } 885 886 static int 887 dt_aggregate_valcmp(const void *lhs, const void *rhs) 888 { 889 dt_ahashent_t *lh = *((dt_ahashent_t **)lhs); 890 dt_ahashent_t *rh = *((dt_ahashent_t **)rhs); 891 dtrace_aggdesc_t *lagg = lh->dtahe_data.dtada_desc; 892 dtrace_aggdesc_t *ragg = rh->dtahe_data.dtada_desc; 893 caddr_t ldata = lh->dtahe_data.dtada_data; 894 caddr_t rdata = rh->dtahe_data.dtada_data; 895 dtrace_recdesc_t *lrec, *rrec; 896 int64_t *laddr, *raddr; 897 int rval, i; 898 899 if ((rval = dt_aggregate_hashcmp(lhs, rhs)) != 0) 900 return (rval); 901 902 if (lagg->dtagd_nrecs > ragg->dtagd_nrecs) 903 return (DT_GREATERTHAN); 904 905 if (lagg->dtagd_nrecs < ragg->dtagd_nrecs) 906 return (DT_LESSTHAN); 907 908 for (i = 0; i < lagg->dtagd_nrecs; i++) { 909 lrec = &lagg->dtagd_rec[i]; 910 rrec = &ragg->dtagd_rec[i]; 911 912 if (lrec->dtrd_offset < rrec->dtrd_offset) 913 return (DT_LESSTHAN); 914 915 if (lrec->dtrd_offset > rrec->dtrd_offset) 916 return (DT_GREATERTHAN); 917 918 if (lrec->dtrd_action < rrec->dtrd_action) 919 return (DT_LESSTHAN); 920 921 if (lrec->dtrd_action > rrec->dtrd_action) 922 return (DT_GREATERTHAN); 923 } 924 925 laddr = (int64_t *)(uintptr_t)(ldata + lrec->dtrd_offset); 926 raddr = (int64_t *)(uintptr_t)(rdata + rrec->dtrd_offset); 927 928 switch (lrec->dtrd_action) { 929 case DTRACEAGG_AVG: 930 rval = dt_aggregate_averagecmp(laddr, raddr); 931 break; 932 933 case DTRACEAGG_STDDEV: 934 rval = dt_aggregate_stddevcmp(laddr, raddr); 935 break; 936 937 case DTRACEAGG_QUANTIZE: 938 rval = dt_aggregate_quantizedcmp(laddr, raddr); 939 break; 940 941 case DTRACEAGG_LQUANTIZE: 942 rval = dt_aggregate_lquantizedcmp(laddr, raddr); 943 break; 944 945 case DTRACEAGG_LLQUANTIZE: 946 rval = dt_aggregate_llquantizedcmp(laddr, raddr); 947 break; 948 949 case DTRACEAGG_COUNT: 950 case DTRACEAGG_SUM: 951 case DTRACEAGG_MIN: 952 case DTRACEAGG_MAX: 953 rval = dt_aggregate_countcmp(laddr, raddr); 954 break; 955 956 default: 957 assert(0); 958 } 959 960 return (rval); 961 } 962 963 static int 964 dt_aggregate_valkeycmp(const void *lhs, const void *rhs) 965 { 966 int rval; 967 968 if ((rval = dt_aggregate_valcmp(lhs, rhs)) != 0) 969 return (rval); 970 971 /* 972 * If we're here, the values for the two aggregation elements are 973 * equal. We already know that the key layout is the same for the two 974 * elements; we must now compare the keys themselves as a tie-breaker. 975 */ 976 return (dt_aggregate_keycmp(lhs, rhs)); 977 } 978 979 static int 980 dt_aggregate_keyvarcmp(const void *lhs, const void *rhs) 981 { 982 int rval; 983 984 if ((rval = dt_aggregate_keycmp(lhs, rhs)) != 0) 985 return (rval); 986 987 return (dt_aggregate_varcmp(lhs, rhs)); 988 } 989 990 static int 991 dt_aggregate_varkeycmp(const void *lhs, const void *rhs) 992 { 993 int rval; 994 995 if ((rval = dt_aggregate_varcmp(lhs, rhs)) != 0) 996 return (rval); 997 998 return (dt_aggregate_keycmp(lhs, rhs)); 999 } 1000 1001 static int 1002 dt_aggregate_valvarcmp(const void *lhs, const void *rhs) 1003 { 1004 int rval; 1005 1006 if ((rval = dt_aggregate_valkeycmp(lhs, rhs)) != 0) 1007 return (rval); 1008 1009 return (dt_aggregate_varcmp(lhs, rhs)); 1010 } 1011 1012 static int 1013 dt_aggregate_varvalcmp(const void *lhs, const void *rhs) 1014 { 1015 int rval; 1016 1017 if ((rval = dt_aggregate_varcmp(lhs, rhs)) != 0) 1018 return (rval); 1019 1020 return (dt_aggregate_valkeycmp(lhs, rhs)); 1021 } 1022 1023 static int 1024 dt_aggregate_keyvarrevcmp(const void *lhs, const void *rhs) 1025 { 1026 return (dt_aggregate_keyvarcmp(rhs, lhs)); 1027 } 1028 1029 static int 1030 dt_aggregate_varkeyrevcmp(const void *lhs, const void *rhs) 1031 { 1032 return (dt_aggregate_varkeycmp(rhs, lhs)); 1033 } 1034 1035 static int 1036 dt_aggregate_valvarrevcmp(const void *lhs, const void *rhs) 1037 { 1038 return (dt_aggregate_valvarcmp(rhs, lhs)); 1039 } 1040 1041 static int 1042 dt_aggregate_varvalrevcmp(const void *lhs, const void *rhs) 1043 { 1044 return (dt_aggregate_varvalcmp(rhs, lhs)); 1045 } 1046 1047 static int 1048 dt_aggregate_bundlecmp(const void *lhs, const void *rhs) 1049 { 1050 dt_ahashent_t **lh = *((dt_ahashent_t ***)lhs); 1051 dt_ahashent_t **rh = *((dt_ahashent_t ***)rhs); 1052 int i, rval; 1053 1054 if (dt_keysort) { 1055 /* 1056 * If we're sorting on keys, we need to scan until we find the 1057 * last entry -- that's the representative key. (The order of 1058 * the bundle is values followed by key to accommodate the 1059 * default behavior of sorting by value.) If the keys are 1060 * equal, we'll fall into the value comparison loop, below. 1061 */ 1062 for (i = 0; lh[i + 1] != NULL; i++) 1063 continue; 1064 1065 assert(i != 0); 1066 assert(rh[i + 1] == NULL); 1067 1068 if ((rval = dt_aggregate_keycmp(&lh[i], &rh[i])) != 0) 1069 return (rval); 1070 } 1071 1072 for (i = 0; ; i++) { 1073 if (lh[i + 1] == NULL) { 1074 /* 1075 * All of the values are equal; if we're sorting on 1076 * keys, then we're only here because the keys were 1077 * found to be equal and these records are therefore 1078 * equal. If we're not sorting on keys, we'll use the 1079 * key comparison from the representative key as the 1080 * tie-breaker. 1081 */ 1082 if (dt_keysort) 1083 return (0); 1084 1085 assert(i != 0); 1086 assert(rh[i + 1] == NULL); 1087 return (dt_aggregate_keycmp(&lh[i], &rh[i])); 1088 } else { 1089 if ((rval = dt_aggregate_valcmp(&lh[i], &rh[i])) != 0) 1090 return (rval); 1091 } 1092 } 1093 } 1094 1095 int 1096 dt_aggregate_go(dtrace_hdl_t *dtp) 1097 { 1098 dt_aggregate_t *agp = &dtp->dt_aggregate; 1099 dtrace_optval_t size, cpu; 1100 dtrace_bufdesc_t *buf = &agp->dtat_buf; 1101 int rval, i; 1102 1103 assert(agp->dtat_maxcpu == 0); 1104 assert(agp->dtat_ncpu == 0); 1105 assert(agp->dtat_cpus == NULL); 1106 1107 agp->dtat_maxcpu = dt_sysconf(dtp, _SC_CPUID_MAX) + 1; 1108 agp->dtat_ncpu = dt_sysconf(dtp, _SC_NPROCESSORS_MAX); 1109 agp->dtat_cpus = malloc(agp->dtat_ncpu * sizeof (processorid_t)); 1110 1111 if (agp->dtat_cpus == NULL) 1112 return (dt_set_errno(dtp, EDT_NOMEM)); 1113 1114 /* 1115 * Use the aggregation buffer size as reloaded from the kernel. 1116 */ 1117 size = dtp->dt_options[DTRACEOPT_AGGSIZE]; 1118 1119 rval = dtrace_getopt(dtp, "aggsize", &size); 1120 assert(rval == 0); 1121 1122 if (size == 0 || size == DTRACEOPT_UNSET) 1123 return (0); 1124 1125 buf = &agp->dtat_buf; 1126 buf->dtbd_size = size; 1127 1128 if ((buf->dtbd_data = malloc(buf->dtbd_size)) == NULL) 1129 return (dt_set_errno(dtp, EDT_NOMEM)); 1130 1131 /* 1132 * Now query for the CPUs enabled. 1133 */ 1134 rval = dtrace_getopt(dtp, "cpu", &cpu); 1135 assert(rval == 0 && cpu != DTRACEOPT_UNSET); 1136 1137 if (cpu != DTRACE_CPUALL) { 1138 assert(cpu < agp->dtat_ncpu); 1139 agp->dtat_cpus[agp->dtat_ncpus++] = (processorid_t)cpu; 1140 1141 return (0); 1142 } 1143 1144 agp->dtat_ncpus = 0; 1145 for (i = 0; i < agp->dtat_maxcpu; i++) { 1146 if (dt_status(dtp, i) == -1) 1147 continue; 1148 1149 agp->dtat_cpus[agp->dtat_ncpus++] = i; 1150 } 1151 1152 return (0); 1153 } 1154 1155 static int 1156 dt_aggwalk_rval(dtrace_hdl_t *dtp, dt_ahashent_t *h, int rval) 1157 { 1158 dt_aggregate_t *agp = &dtp->dt_aggregate; 1159 dtrace_aggdata_t *data; 1160 dtrace_aggdesc_t *aggdesc; 1161 dtrace_recdesc_t *rec; 1162 int i; 1163 1164 switch (rval) { 1165 case DTRACE_AGGWALK_NEXT: 1166 break; 1167 1168 case DTRACE_AGGWALK_CLEAR: { 1169 uint32_t size, offs = 0; 1170 1171 aggdesc = h->dtahe_data.dtada_desc; 1172 rec = &aggdesc->dtagd_rec[aggdesc->dtagd_nrecs - 1]; 1173 size = rec->dtrd_size; 1174 data = &h->dtahe_data; 1175 1176 if (rec->dtrd_action == DTRACEAGG_LQUANTIZE) { 1177 offs = sizeof (uint64_t); 1178 size -= sizeof (uint64_t); 1179 } 1180 1181 bzero(&data->dtada_data[rec->dtrd_offset] + offs, size); 1182 1183 if (data->dtada_percpu == NULL) 1184 break; 1185 1186 for (i = 0; i < dtp->dt_aggregate.dtat_maxcpu; i++) 1187 bzero(data->dtada_percpu[i] + offs, size); 1188 break; 1189 } 1190 1191 case DTRACE_AGGWALK_ERROR: 1192 /* 1193 * We assume that errno is already set in this case. 1194 */ 1195 return (dt_set_errno(dtp, errno)); 1196 1197 case DTRACE_AGGWALK_ABORT: 1198 return (dt_set_errno(dtp, EDT_DIRABORT)); 1199 1200 case DTRACE_AGGWALK_DENORMALIZE: 1201 h->dtahe_data.dtada_normal = 1; 1202 return (0); 1203 1204 case DTRACE_AGGWALK_NORMALIZE: 1205 if (h->dtahe_data.dtada_normal == 0) { 1206 h->dtahe_data.dtada_normal = 1; 1207 return (dt_set_errno(dtp, EDT_BADRVAL)); 1208 } 1209 1210 return (0); 1211 1212 case DTRACE_AGGWALK_REMOVE: { 1213 dtrace_aggdata_t *aggdata = &h->dtahe_data; 1214 int max_cpus = agp->dtat_maxcpu; 1215 1216 /* 1217 * First, remove this hash entry from its hash chain. 1218 */ 1219 if (h->dtahe_prev != NULL) { 1220 h->dtahe_prev->dtahe_next = h->dtahe_next; 1221 } else { 1222 dt_ahash_t *hash = &agp->dtat_hash; 1223 size_t ndx = h->dtahe_hashval % hash->dtah_size; 1224 1225 assert(hash->dtah_hash[ndx] == h); 1226 hash->dtah_hash[ndx] = h->dtahe_next; 1227 } 1228 1229 if (h->dtahe_next != NULL) 1230 h->dtahe_next->dtahe_prev = h->dtahe_prev; 1231 1232 /* 1233 * Now remove it from the list of all hash entries. 1234 */ 1235 if (h->dtahe_prevall != NULL) { 1236 h->dtahe_prevall->dtahe_nextall = h->dtahe_nextall; 1237 } else { 1238 dt_ahash_t *hash = &agp->dtat_hash; 1239 1240 assert(hash->dtah_all == h); 1241 hash->dtah_all = h->dtahe_nextall; 1242 } 1243 1244 if (h->dtahe_nextall != NULL) 1245 h->dtahe_nextall->dtahe_prevall = h->dtahe_prevall; 1246 1247 /* 1248 * We're unlinked. We can safely destroy the data. 1249 */ 1250 if (aggdata->dtada_percpu != NULL) { 1251 for (i = 0; i < max_cpus; i++) 1252 free(aggdata->dtada_percpu[i]); 1253 free(aggdata->dtada_percpu); 1254 } 1255 1256 free(aggdata->dtada_data); 1257 free(h); 1258 1259 return (0); 1260 } 1261 1262 default: 1263 return (dt_set_errno(dtp, EDT_BADRVAL)); 1264 } 1265 1266 return (0); 1267 } 1268 1269 void 1270 dt_aggregate_qsort(dtrace_hdl_t *dtp, void *base, size_t nel, size_t width, 1271 int (*compar)(const void *, const void *)) 1272 { 1273 int rev = dt_revsort, key = dt_keysort, keypos = dt_keypos; 1274 dtrace_optval_t keyposopt = dtp->dt_options[DTRACEOPT_AGGSORTKEYPOS]; 1275 1276 dt_revsort = (dtp->dt_options[DTRACEOPT_AGGSORTREV] != DTRACEOPT_UNSET); 1277 dt_keysort = (dtp->dt_options[DTRACEOPT_AGGSORTKEY] != DTRACEOPT_UNSET); 1278 1279 if (keyposopt != DTRACEOPT_UNSET && keyposopt <= INT_MAX) { 1280 dt_keypos = (int)keyposopt; 1281 } else { 1282 dt_keypos = 0; 1283 } 1284 1285 if (compar == NULL) { 1286 if (!dt_keysort) { 1287 compar = dt_aggregate_varvalcmp; 1288 } else { 1289 compar = dt_aggregate_varkeycmp; 1290 } 1291 } 1292 1293 qsort(base, nel, width, compar); 1294 1295 dt_revsort = rev; 1296 dt_keysort = key; 1297 dt_keypos = keypos; 1298 } 1299 1300 int 1301 dtrace_aggregate_walk(dtrace_hdl_t *dtp, dtrace_aggregate_f *func, void *arg) 1302 { 1303 dt_ahashent_t *h, *next; 1304 dt_ahash_t *hash = &dtp->dt_aggregate.dtat_hash; 1305 1306 for (h = hash->dtah_all; h != NULL; h = next) { 1307 /* 1308 * dt_aggwalk_rval() can potentially remove the current hash 1309 * entry; we need to load the next hash entry before calling 1310 * into it. 1311 */ 1312 next = h->dtahe_nextall; 1313 1314 if (dt_aggwalk_rval(dtp, h, func(&h->dtahe_data, arg)) == -1) 1315 return (-1); 1316 } 1317 1318 return (0); 1319 } 1320 1321 static int 1322 dt_aggregate_walk_sorted(dtrace_hdl_t *dtp, 1323 dtrace_aggregate_f *func, void *arg, 1324 int (*sfunc)(const void *, const void *)) 1325 { 1326 dt_aggregate_t *agp = &dtp->dt_aggregate; 1327 dt_ahashent_t *h, **sorted; 1328 dt_ahash_t *hash = &agp->dtat_hash; 1329 size_t i, nentries = 0; 1330 1331 for (h = hash->dtah_all; h != NULL; h = h->dtahe_nextall) 1332 nentries++; 1333 1334 sorted = dt_alloc(dtp, nentries * sizeof (dt_ahashent_t *)); 1335 1336 if (sorted == NULL) 1337 return (-1); 1338 1339 for (h = hash->dtah_all, i = 0; h != NULL; h = h->dtahe_nextall) 1340 sorted[i++] = h; 1341 1342 (void) pthread_mutex_lock(&dt_qsort_lock); 1343 1344 if (sfunc == NULL) { 1345 dt_aggregate_qsort(dtp, sorted, nentries, 1346 sizeof (dt_ahashent_t *), NULL); 1347 } else { 1348 /* 1349 * If we've been explicitly passed a sorting function, 1350 * we'll use that -- ignoring the values of the "aggsortrev", 1351 * "aggsortkey" and "aggsortkeypos" options. 1352 */ 1353 qsort(sorted, nentries, sizeof (dt_ahashent_t *), sfunc); 1354 } 1355 1356 (void) pthread_mutex_unlock(&dt_qsort_lock); 1357 1358 for (i = 0; i < nentries; i++) { 1359 h = sorted[i]; 1360 1361 if (dt_aggwalk_rval(dtp, h, func(&h->dtahe_data, arg)) == -1) { 1362 dt_free(dtp, sorted); 1363 return (-1); 1364 } 1365 } 1366 1367 dt_free(dtp, sorted); 1368 return (0); 1369 } 1370 1371 int 1372 dtrace_aggregate_walk_sorted(dtrace_hdl_t *dtp, 1373 dtrace_aggregate_f *func, void *arg) 1374 { 1375 return (dt_aggregate_walk_sorted(dtp, func, arg, NULL)); 1376 } 1377 1378 int 1379 dtrace_aggregate_walk_keysorted(dtrace_hdl_t *dtp, 1380 dtrace_aggregate_f *func, void *arg) 1381 { 1382 return (dt_aggregate_walk_sorted(dtp, func, 1383 arg, dt_aggregate_varkeycmp)); 1384 } 1385 1386 int 1387 dtrace_aggregate_walk_valsorted(dtrace_hdl_t *dtp, 1388 dtrace_aggregate_f *func, void *arg) 1389 { 1390 return (dt_aggregate_walk_sorted(dtp, func, 1391 arg, dt_aggregate_varvalcmp)); 1392 } 1393 1394 int 1395 dtrace_aggregate_walk_keyvarsorted(dtrace_hdl_t *dtp, 1396 dtrace_aggregate_f *func, void *arg) 1397 { 1398 return (dt_aggregate_walk_sorted(dtp, func, 1399 arg, dt_aggregate_keyvarcmp)); 1400 } 1401 1402 int 1403 dtrace_aggregate_walk_valvarsorted(dtrace_hdl_t *dtp, 1404 dtrace_aggregate_f *func, void *arg) 1405 { 1406 return (dt_aggregate_walk_sorted(dtp, func, 1407 arg, dt_aggregate_valvarcmp)); 1408 } 1409 1410 int 1411 dtrace_aggregate_walk_keyrevsorted(dtrace_hdl_t *dtp, 1412 dtrace_aggregate_f *func, void *arg) 1413 { 1414 return (dt_aggregate_walk_sorted(dtp, func, 1415 arg, dt_aggregate_varkeyrevcmp)); 1416 } 1417 1418 int 1419 dtrace_aggregate_walk_valrevsorted(dtrace_hdl_t *dtp, 1420 dtrace_aggregate_f *func, void *arg) 1421 { 1422 return (dt_aggregate_walk_sorted(dtp, func, 1423 arg, dt_aggregate_varvalrevcmp)); 1424 } 1425 1426 int 1427 dtrace_aggregate_walk_keyvarrevsorted(dtrace_hdl_t *dtp, 1428 dtrace_aggregate_f *func, void *arg) 1429 { 1430 return (dt_aggregate_walk_sorted(dtp, func, 1431 arg, dt_aggregate_keyvarrevcmp)); 1432 } 1433 1434 int 1435 dtrace_aggregate_walk_valvarrevsorted(dtrace_hdl_t *dtp, 1436 dtrace_aggregate_f *func, void *arg) 1437 { 1438 return (dt_aggregate_walk_sorted(dtp, func, 1439 arg, dt_aggregate_valvarrevcmp)); 1440 } 1441 1442 int 1443 dtrace_aggregate_walk_joined(dtrace_hdl_t *dtp, dtrace_aggvarid_t *aggvars, 1444 int naggvars, dtrace_aggregate_walk_joined_f *func, void *arg) 1445 { 1446 dt_aggregate_t *agp = &dtp->dt_aggregate; 1447 dt_ahashent_t *h, **sorted = NULL, ***bundle, **nbundle; 1448 const dtrace_aggdata_t **data; 1449 dt_ahashent_t *zaggdata = NULL; 1450 dt_ahash_t *hash = &agp->dtat_hash; 1451 size_t nentries = 0, nbundles = 0, start, zsize = 0, bundlesize; 1452 dtrace_aggvarid_t max = 0, aggvar; 1453 int rval = -1, *map, *remap = NULL; 1454 int i, j; 1455 dtrace_optval_t sortpos = dtp->dt_options[DTRACEOPT_AGGSORTPOS]; 1456 1457 /* 1458 * If the sorting position is greater than the number of aggregation 1459 * variable IDs, we silently set it to 0. 1460 */ 1461 if (sortpos == DTRACEOPT_UNSET || sortpos >= naggvars) 1462 sortpos = 0; 1463 1464 /* 1465 * First we need to translate the specified aggregation variable IDs 1466 * into a linear map that will allow us to translate an aggregation 1467 * variable ID into its position in the specified aggvars. 1468 */ 1469 for (i = 0; i < naggvars; i++) { 1470 if (aggvars[i] == DTRACE_AGGVARIDNONE || aggvars[i] < 0) 1471 return (dt_set_errno(dtp, EDT_BADAGGVAR)); 1472 1473 if (aggvars[i] > max) 1474 max = aggvars[i]; 1475 } 1476 1477 if ((map = dt_zalloc(dtp, (max + 1) * sizeof (int))) == NULL) 1478 return (-1); 1479 1480 zaggdata = dt_zalloc(dtp, naggvars * sizeof (dt_ahashent_t)); 1481 1482 if (zaggdata == NULL) 1483 goto out; 1484 1485 for (i = 0; i < naggvars; i++) { 1486 int ndx = i + sortpos; 1487 1488 if (ndx >= naggvars) 1489 ndx -= naggvars; 1490 1491 aggvar = aggvars[ndx]; 1492 assert(aggvar <= max); 1493 1494 if (map[aggvar]) { 1495 /* 1496 * We have an aggregation variable that is present 1497 * more than once in the array of aggregation 1498 * variables. While it's unclear why one might want 1499 * to do this, it's legal. To support this construct, 1500 * we will allocate a remap that will indicate the 1501 * position from which this aggregation variable 1502 * should be pulled. (That is, where the remap will 1503 * map from one position to another.) 1504 */ 1505 if (remap == NULL) { 1506 remap = dt_zalloc(dtp, naggvars * sizeof (int)); 1507 1508 if (remap == NULL) 1509 goto out; 1510 } 1511 1512 /* 1513 * Given that the variable is already present, assert 1514 * that following through the mapping and adjusting 1515 * for the sort position yields the same aggregation 1516 * variable ID. 1517 */ 1518 assert(aggvars[(map[aggvar] - 1 + sortpos) % 1519 naggvars] == aggvars[ndx]); 1520 1521 remap[i] = map[aggvar]; 1522 continue; 1523 } 1524 1525 map[aggvar] = i + 1; 1526 } 1527 1528 /* 1529 * We need to take two passes over the data to size our allocation, so 1530 * we'll use the first pass to also fill in the zero-filled data to be 1531 * used to properly format a zero-valued aggregation. 1532 */ 1533 for (h = hash->dtah_all; h != NULL; h = h->dtahe_nextall) { 1534 dtrace_aggvarid_t id; 1535 int ndx; 1536 1537 if ((id = dt_aggregate_aggvarid(h)) > max || !(ndx = map[id])) 1538 continue; 1539 1540 if (zaggdata[ndx - 1].dtahe_size == 0) { 1541 zaggdata[ndx - 1].dtahe_size = h->dtahe_size; 1542 zaggdata[ndx - 1].dtahe_data = h->dtahe_data; 1543 } 1544 1545 nentries++; 1546 } 1547 1548 if (nentries == 0) { 1549 /* 1550 * We couldn't find any entries; there is nothing else to do. 1551 */ 1552 rval = 0; 1553 goto out; 1554 } 1555 1556 /* 1557 * Before we sort the data, we're going to look for any holes in our 1558 * zero-filled data. This will occur if an aggregation variable that 1559 * we are being asked to print has not yet been assigned the result of 1560 * any aggregating action for _any_ tuple. The issue becomes that we 1561 * would like a zero value to be printed for all columns for this 1562 * aggregation, but without any record description, we don't know the 1563 * aggregating action that corresponds to the aggregation variable. To 1564 * try to find a match, we're simply going to lookup aggregation IDs 1565 * (which are guaranteed to be contiguous and to start from 1), looking 1566 * for the specified aggregation variable ID. If we find a match, 1567 * we'll use that. If we iterate over all aggregation IDs and don't 1568 * find a match, then we must be an anonymous enabling. (Anonymous 1569 * enablings can't currently derive either aggregation variable IDs or 1570 * aggregation variable names given only an aggregation ID.) In this 1571 * obscure case (anonymous enabling, multiple aggregation printa() with 1572 * some aggregations not represented for any tuple), our defined 1573 * behavior is that the zero will be printed in the format of the first 1574 * aggregation variable that contains any non-zero value. 1575 */ 1576 for (i = 0; i < naggvars; i++) { 1577 if (zaggdata[i].dtahe_size == 0) { 1578 dtrace_aggvarid_t aggvar; 1579 1580 aggvar = aggvars[(i - sortpos + naggvars) % naggvars]; 1581 assert(zaggdata[i].dtahe_data.dtada_data == NULL); 1582 1583 for (j = DTRACE_AGGIDNONE + 1; ; j++) { 1584 dtrace_aggdesc_t *agg; 1585 dtrace_aggdata_t *aggdata; 1586 1587 if (dt_aggid_lookup(dtp, j, &agg) != 0) 1588 break; 1589 1590 if (agg->dtagd_varid != aggvar) 1591 continue; 1592 1593 /* 1594 * We have our description -- now we need to 1595 * cons up the zaggdata entry for it. 1596 */ 1597 aggdata = &zaggdata[i].dtahe_data; 1598 aggdata->dtada_size = agg->dtagd_size; 1599 aggdata->dtada_desc = agg; 1600 aggdata->dtada_handle = dtp; 1601 (void) dt_epid_lookup(dtp, agg->dtagd_epid, 1602 &aggdata->dtada_edesc, 1603 &aggdata->dtada_pdesc); 1604 aggdata->dtada_normal = 1; 1605 zaggdata[i].dtahe_hashval = 0; 1606 zaggdata[i].dtahe_size = agg->dtagd_size; 1607 break; 1608 } 1609 1610 if (zaggdata[i].dtahe_size == 0) { 1611 caddr_t data; 1612 1613 /* 1614 * We couldn't find this aggregation, meaning 1615 * that we have never seen it before for any 1616 * tuple _and_ this is an anonymous enabling. 1617 * That is, we're in the obscure case outlined 1618 * above. In this case, our defined behavior 1619 * is to format the data in the format of the 1620 * first non-zero aggregation -- of which, of 1621 * course, we know there to be at least one 1622 * (or nentries would have been zero). 1623 */ 1624 for (j = 0; j < naggvars; j++) { 1625 if (zaggdata[j].dtahe_size != 0) 1626 break; 1627 } 1628 1629 assert(j < naggvars); 1630 zaggdata[i] = zaggdata[j]; 1631 1632 data = zaggdata[i].dtahe_data.dtada_data; 1633 assert(data != NULL); 1634 } 1635 } 1636 } 1637 1638 /* 1639 * Now we need to allocate our zero-filled data for use for 1640 * aggregations that don't have a value corresponding to a given key. 1641 */ 1642 for (i = 0; i < naggvars; i++) { 1643 dtrace_aggdata_t *aggdata = &zaggdata[i].dtahe_data; 1644 dtrace_aggdesc_t *aggdesc = aggdata->dtada_desc; 1645 dtrace_recdesc_t *rec; 1646 uint64_t larg; 1647 caddr_t zdata; 1648 1649 zsize = zaggdata[i].dtahe_size; 1650 assert(zsize != 0); 1651 1652 if ((zdata = dt_zalloc(dtp, zsize)) == NULL) { 1653 /* 1654 * If we failed to allocated some zero-filled data, we 1655 * need to zero out the remaining dtada_data pointers 1656 * to prevent the wrong data from being freed below. 1657 */ 1658 for (j = i; j < naggvars; j++) 1659 zaggdata[j].dtahe_data.dtada_data = NULL; 1660 goto out; 1661 } 1662 1663 aggvar = aggvars[(i - sortpos + naggvars) % naggvars]; 1664 1665 /* 1666 * First, the easy bit. To maintain compatibility with 1667 * consumers that pull the compiler-generated ID out of the 1668 * data, we put that ID at the top of the zero-filled data. 1669 */ 1670 rec = &aggdesc->dtagd_rec[0]; 1671 /* LINTED - alignment */ 1672 *((dtrace_aggvarid_t *)(zdata + rec->dtrd_offset)) = aggvar; 1673 1674 rec = &aggdesc->dtagd_rec[aggdesc->dtagd_nrecs - 1]; 1675 1676 /* 1677 * Now for the more complicated part. If (and only if) this 1678 * is an lquantize() aggregating action, zero-filled data is 1679 * not equivalent to an empty record: we must also get the 1680 * parameters for the lquantize(). 1681 */ 1682 if (rec->dtrd_action == DTRACEAGG_LQUANTIZE) { 1683 if (aggdata->dtada_data != NULL) { 1684 /* 1685 * The easier case here is if we actually have 1686 * some prototype data -- in which case we 1687 * manually dig it out of the aggregation 1688 * record. 1689 */ 1690 /* LINTED - alignment */ 1691 larg = *((uint64_t *)(aggdata->dtada_data + 1692 rec->dtrd_offset)); 1693 } else { 1694 /* 1695 * We don't have any prototype data. As a 1696 * result, we know that we _do_ have the 1697 * compiler-generated information. (If this 1698 * were an anonymous enabling, all of our 1699 * zero-filled data would have prototype data 1700 * -- either directly or indirectly.) So as 1701 * gross as it is, we'll grovel around in the 1702 * compiler-generated information to find the 1703 * lquantize() parameters. 1704 */ 1705 dtrace_stmtdesc_t *sdp; 1706 dt_ident_t *aid; 1707 dt_idsig_t *isp; 1708 1709 sdp = (dtrace_stmtdesc_t *)(uintptr_t) 1710 aggdesc->dtagd_rec[0].dtrd_uarg; 1711 aid = sdp->dtsd_aggdata; 1712 isp = (dt_idsig_t *)aid->di_data; 1713 assert(isp->dis_auxinfo != 0); 1714 larg = isp->dis_auxinfo; 1715 } 1716 1717 /* LINTED - alignment */ 1718 *((uint64_t *)(zdata + rec->dtrd_offset)) = larg; 1719 } 1720 1721 aggdata->dtada_data = zdata; 1722 } 1723 1724 /* 1725 * Now that we've dealt with setting up our zero-filled data, we can 1726 * allocate our sorted array, and take another pass over the data to 1727 * fill it. 1728 */ 1729 sorted = dt_alloc(dtp, nentries * sizeof (dt_ahashent_t *)); 1730 1731 if (sorted == NULL) 1732 goto out; 1733 1734 for (h = hash->dtah_all, i = 0; h != NULL; h = h->dtahe_nextall) { 1735 dtrace_aggvarid_t id; 1736 1737 if ((id = dt_aggregate_aggvarid(h)) > max || !map[id]) 1738 continue; 1739 1740 sorted[i++] = h; 1741 } 1742 1743 assert(i == nentries); 1744 1745 /* 1746 * We've loaded our array; now we need to sort by value to allow us 1747 * to create bundles of like value. We're going to acquire the 1748 * dt_qsort_lock here, and hold it across all of our subsequent 1749 * comparison and sorting. 1750 */ 1751 (void) pthread_mutex_lock(&dt_qsort_lock); 1752 1753 qsort(sorted, nentries, sizeof (dt_ahashent_t *), 1754 dt_aggregate_keyvarcmp); 1755 1756 /* 1757 * Now we need to go through and create bundles. Because the number 1758 * of bundles is bounded by the size of the sorted array, we're going 1759 * to reuse the underlying storage. And note that "bundle" is an 1760 * array of pointers to arrays of pointers to dt_ahashent_t -- making 1761 * its type (regrettably) "dt_ahashent_t ***". (Regrettable because 1762 * '*' -- like '_' and 'X' -- should never appear in triplicate in 1763 * an ideal world.) 1764 */ 1765 bundle = (dt_ahashent_t ***)sorted; 1766 1767 for (i = 1, start = 0; i <= nentries; i++) { 1768 if (i < nentries && 1769 dt_aggregate_keycmp(&sorted[i], &sorted[i - 1]) == 0) 1770 continue; 1771 1772 /* 1773 * We have a bundle boundary. Everything from start to 1774 * (i - 1) belongs in one bundle. 1775 */ 1776 assert(i - start <= naggvars); 1777 bundlesize = (naggvars + 2) * sizeof (dt_ahashent_t *); 1778 1779 if ((nbundle = dt_zalloc(dtp, bundlesize)) == NULL) { 1780 (void) pthread_mutex_unlock(&dt_qsort_lock); 1781 goto out; 1782 } 1783 1784 for (j = start; j < i; j++) { 1785 dtrace_aggvarid_t id = dt_aggregate_aggvarid(sorted[j]); 1786 1787 assert(id <= max); 1788 assert(map[id] != 0); 1789 assert(map[id] - 1 < naggvars); 1790 assert(nbundle[map[id] - 1] == NULL); 1791 nbundle[map[id] - 1] = sorted[j]; 1792 1793 if (nbundle[naggvars] == NULL) 1794 nbundle[naggvars] = sorted[j]; 1795 } 1796 1797 for (j = 0; j < naggvars; j++) { 1798 if (nbundle[j] != NULL) 1799 continue; 1800 1801 /* 1802 * Before we assume that this aggregation variable 1803 * isn't present (and fall back to using the 1804 * zero-filled data allocated earlier), check the 1805 * remap. If we have a remapping, we'll drop it in 1806 * here. Note that we might be remapping an 1807 * aggregation variable that isn't present for this 1808 * key; in this case, the aggregation data that we 1809 * copy will point to the zeroed data. 1810 */ 1811 if (remap != NULL && remap[j]) { 1812 assert(remap[j] - 1 < j); 1813 assert(nbundle[remap[j] - 1] != NULL); 1814 nbundle[j] = nbundle[remap[j] - 1]; 1815 } else { 1816 nbundle[j] = &zaggdata[j]; 1817 } 1818 } 1819 1820 bundle[nbundles++] = nbundle; 1821 start = i; 1822 } 1823 1824 /* 1825 * Now we need to re-sort based on the first value. 1826 */ 1827 dt_aggregate_qsort(dtp, bundle, nbundles, sizeof (dt_ahashent_t **), 1828 dt_aggregate_bundlecmp); 1829 1830 (void) pthread_mutex_unlock(&dt_qsort_lock); 1831 1832 /* 1833 * We're done! Now we just need to go back over the sorted bundles, 1834 * calling the function. 1835 */ 1836 data = alloca((naggvars + 1) * sizeof (dtrace_aggdata_t *)); 1837 1838 for (i = 0; i < nbundles; i++) { 1839 for (j = 0; j < naggvars; j++) 1840 data[j + 1] = NULL; 1841 1842 for (j = 0; j < naggvars; j++) { 1843 int ndx = j - sortpos; 1844 1845 if (ndx < 0) 1846 ndx += naggvars; 1847 1848 assert(bundle[i][ndx] != NULL); 1849 data[j + 1] = &bundle[i][ndx]->dtahe_data; 1850 } 1851 1852 for (j = 0; j < naggvars; j++) 1853 assert(data[j + 1] != NULL); 1854 1855 /* 1856 * The representative key is the last element in the bundle. 1857 * Assert that we have one, and then set it to be the first 1858 * element of data. 1859 */ 1860 assert(bundle[i][j] != NULL); 1861 data[0] = &bundle[i][j]->dtahe_data; 1862 1863 if ((rval = func(data, naggvars + 1, arg)) == -1) 1864 goto out; 1865 } 1866 1867 rval = 0; 1868 out: 1869 for (i = 0; i < nbundles; i++) 1870 dt_free(dtp, bundle[i]); 1871 1872 if (zaggdata != NULL) { 1873 for (i = 0; i < naggvars; i++) 1874 dt_free(dtp, zaggdata[i].dtahe_data.dtada_data); 1875 } 1876 1877 dt_free(dtp, zaggdata); 1878 dt_free(dtp, sorted); 1879 dt_free(dtp, remap); 1880 dt_free(dtp, map); 1881 1882 return (rval); 1883 } 1884 1885 int 1886 dtrace_aggregate_print(dtrace_hdl_t *dtp, FILE *fp, 1887 dtrace_aggregate_walk_f *func) 1888 { 1889 dt_print_aggdata_t pd; 1890 1891 pd.dtpa_dtp = dtp; 1892 pd.dtpa_fp = fp; 1893 pd.dtpa_allunprint = 1; 1894 1895 if (func == NULL) 1896 func = dtrace_aggregate_walk_sorted; 1897 1898 if ((*func)(dtp, dt_print_agg, &pd) == -1) 1899 return (dt_set_errno(dtp, dtp->dt_errno)); 1900 1901 return (0); 1902 } 1903 1904 void 1905 dtrace_aggregate_clear(dtrace_hdl_t *dtp) 1906 { 1907 dt_aggregate_t *agp = &dtp->dt_aggregate; 1908 dt_ahash_t *hash = &agp->dtat_hash; 1909 dt_ahashent_t *h; 1910 dtrace_aggdata_t *data; 1911 dtrace_aggdesc_t *aggdesc; 1912 dtrace_recdesc_t *rec; 1913 int i, max_cpus = agp->dtat_maxcpu; 1914 1915 for (h = hash->dtah_all; h != NULL; h = h->dtahe_nextall) { 1916 aggdesc = h->dtahe_data.dtada_desc; 1917 rec = &aggdesc->dtagd_rec[aggdesc->dtagd_nrecs - 1]; 1918 data = &h->dtahe_data; 1919 1920 bzero(&data->dtada_data[rec->dtrd_offset], rec->dtrd_size); 1921 1922 if (data->dtada_percpu == NULL) 1923 continue; 1924 1925 for (i = 0; i < max_cpus; i++) 1926 bzero(data->dtada_percpu[i], rec->dtrd_size); 1927 } 1928 } 1929 1930 void 1931 dt_aggregate_destroy(dtrace_hdl_t *dtp) 1932 { 1933 dt_aggregate_t *agp = &dtp->dt_aggregate; 1934 dt_ahash_t *hash = &agp->dtat_hash; 1935 dt_ahashent_t *h, *next; 1936 dtrace_aggdata_t *aggdata; 1937 int i, max_cpus = agp->dtat_maxcpu; 1938 1939 if (hash->dtah_hash == NULL) { 1940 assert(hash->dtah_all == NULL); 1941 } else { 1942 free(hash->dtah_hash); 1943 1944 for (h = hash->dtah_all; h != NULL; h = next) { 1945 next = h->dtahe_nextall; 1946 1947 aggdata = &h->dtahe_data; 1948 1949 if (aggdata->dtada_percpu != NULL) { 1950 for (i = 0; i < max_cpus; i++) 1951 free(aggdata->dtada_percpu[i]); 1952 free(aggdata->dtada_percpu); 1953 } 1954 1955 free(aggdata->dtada_data); 1956 free(h); 1957 } 1958 1959 hash->dtah_hash = NULL; 1960 hash->dtah_all = NULL; 1961 hash->dtah_size = 0; 1962 } 1963 1964 free(agp->dtat_buf.dtbd_data); 1965 free(agp->dtat_cpus); 1966 } 1967