1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <stdlib.h> 30 #include <strings.h> 31 #include <errno.h> 32 #include <unistd.h> 33 #include <dt_impl.h> 34 #include <assert.h> 35 #if defined(sun) 36 #include <alloca.h> 37 #else 38 #include <sys/sysctl.h> 39 #include <libproc_compat.h> 40 #endif 41 #include <limits.h> 42 43 #define DTRACE_AHASHSIZE 32779 /* big 'ol prime */ 44 45 /* 46 * Because qsort(3C) does not allow an argument to be passed to a comparison 47 * function, the variables that affect comparison must regrettably be global; 48 * they are protected by a global static lock, dt_qsort_lock. 49 */ 50 static pthread_mutex_t dt_qsort_lock = PTHREAD_MUTEX_INITIALIZER; 51 52 static int dt_revsort; 53 static int dt_keysort; 54 static int dt_keypos; 55 56 #define DT_LESSTHAN (dt_revsort == 0 ? -1 : 1) 57 #define DT_GREATERTHAN (dt_revsort == 0 ? 1 : -1) 58 59 static void 60 dt_aggregate_count(int64_t *existing, int64_t *new, size_t size) 61 { 62 uint_t i; 63 64 for (i = 0; i < size / sizeof (int64_t); i++) 65 existing[i] = existing[i] + new[i]; 66 } 67 68 static int 69 dt_aggregate_countcmp(int64_t *lhs, int64_t *rhs) 70 { 71 int64_t lvar = *lhs; 72 int64_t rvar = *rhs; 73 74 if (lvar < rvar) 75 return (DT_LESSTHAN); 76 77 if (lvar > rvar) 78 return (DT_GREATERTHAN); 79 80 return (0); 81 } 82 83 /*ARGSUSED*/ 84 static void 85 dt_aggregate_min(int64_t *existing, int64_t *new, size_t size) 86 { 87 if (*new < *existing) 88 *existing = *new; 89 } 90 91 /*ARGSUSED*/ 92 static void 93 dt_aggregate_max(int64_t *existing, int64_t *new, size_t size) 94 { 95 if (*new > *existing) 96 *existing = *new; 97 } 98 99 static int 100 dt_aggregate_averagecmp(int64_t *lhs, int64_t *rhs) 101 { 102 int64_t lavg = lhs[0] ? (lhs[1] / lhs[0]) : 0; 103 int64_t ravg = rhs[0] ? (rhs[1] / rhs[0]) : 0; 104 105 if (lavg < ravg) 106 return (DT_LESSTHAN); 107 108 if (lavg > ravg) 109 return (DT_GREATERTHAN); 110 111 return (0); 112 } 113 114 static int 115 dt_aggregate_stddevcmp(int64_t *lhs, int64_t *rhs) 116 { 117 uint64_t lsd = dt_stddev((uint64_t *)lhs, 1); 118 uint64_t rsd = dt_stddev((uint64_t *)rhs, 1); 119 120 if (lsd < rsd) 121 return (DT_LESSTHAN); 122 123 if (lsd > rsd) 124 return (DT_GREATERTHAN); 125 126 return (0); 127 } 128 129 /*ARGSUSED*/ 130 static void 131 dt_aggregate_lquantize(int64_t *existing, int64_t *new, size_t size) 132 { 133 int64_t arg = *existing++; 134 uint16_t levels = DTRACE_LQUANTIZE_LEVELS(arg); 135 int i; 136 137 for (i = 0; i <= levels + 1; i++) 138 existing[i] = existing[i] + new[i + 1]; 139 } 140 141 static long double 142 dt_aggregate_lquantizedsum(int64_t *lquanta) 143 { 144 int64_t arg = *lquanta++; 145 int32_t base = DTRACE_LQUANTIZE_BASE(arg); 146 uint16_t step = DTRACE_LQUANTIZE_STEP(arg); 147 uint16_t levels = DTRACE_LQUANTIZE_LEVELS(arg), i; 148 long double total = (long double)lquanta[0] * (long double)(base - 1); 149 150 for (i = 0; i < levels; base += step, i++) 151 total += (long double)lquanta[i + 1] * (long double)base; 152 153 return (total + (long double)lquanta[levels + 1] * 154 (long double)(base + 1)); 155 } 156 157 static int64_t 158 dt_aggregate_lquantizedzero(int64_t *lquanta) 159 { 160 int64_t arg = *lquanta++; 161 int32_t base = DTRACE_LQUANTIZE_BASE(arg); 162 uint16_t step = DTRACE_LQUANTIZE_STEP(arg); 163 uint16_t levels = DTRACE_LQUANTIZE_LEVELS(arg), i; 164 165 if (base - 1 == 0) 166 return (lquanta[0]); 167 168 for (i = 0; i < levels; base += step, i++) { 169 if (base != 0) 170 continue; 171 172 return (lquanta[i + 1]); 173 } 174 175 if (base + 1 == 0) 176 return (lquanta[levels + 1]); 177 178 return (0); 179 } 180 181 static int 182 dt_aggregate_lquantizedcmp(int64_t *lhs, int64_t *rhs) 183 { 184 long double lsum = dt_aggregate_lquantizedsum(lhs); 185 long double rsum = dt_aggregate_lquantizedsum(rhs); 186 int64_t lzero, rzero; 187 188 if (lsum < rsum) 189 return (DT_LESSTHAN); 190 191 if (lsum > rsum) 192 return (DT_GREATERTHAN); 193 194 /* 195 * If they're both equal, then we will compare based on the weights at 196 * zero. If the weights at zero are equal (or if zero is not within 197 * the range of the linear quantization), then this will be judged a 198 * tie and will be resolved based on the key comparison. 199 */ 200 lzero = dt_aggregate_lquantizedzero(lhs); 201 rzero = dt_aggregate_lquantizedzero(rhs); 202 203 if (lzero < rzero) 204 return (DT_LESSTHAN); 205 206 if (lzero > rzero) 207 return (DT_GREATERTHAN); 208 209 return (0); 210 } 211 212 static int 213 dt_aggregate_quantizedcmp(int64_t *lhs, int64_t *rhs) 214 { 215 int nbuckets = DTRACE_QUANTIZE_NBUCKETS; 216 long double ltotal = 0, rtotal = 0; 217 int64_t lzero, rzero; 218 uint_t i; 219 220 for (i = 0; i < nbuckets; i++) { 221 int64_t bucketval = DTRACE_QUANTIZE_BUCKETVAL(i); 222 223 if (bucketval == 0) { 224 lzero = lhs[i]; 225 rzero = rhs[i]; 226 } 227 228 ltotal += (long double)bucketval * (long double)lhs[i]; 229 rtotal += (long double)bucketval * (long double)rhs[i]; 230 } 231 232 if (ltotal < rtotal) 233 return (DT_LESSTHAN); 234 235 if (ltotal > rtotal) 236 return (DT_GREATERTHAN); 237 238 /* 239 * If they're both equal, then we will compare based on the weights at 240 * zero. If the weights at zero are equal, then this will be judged a 241 * tie and will be resolved based on the key comparison. 242 */ 243 if (lzero < rzero) 244 return (DT_LESSTHAN); 245 246 if (lzero > rzero) 247 return (DT_GREATERTHAN); 248 249 return (0); 250 } 251 252 static void 253 dt_aggregate_usym(dtrace_hdl_t *dtp, uint64_t *data) 254 { 255 uint64_t pid = data[0]; 256 uint64_t *pc = &data[1]; 257 struct ps_prochandle *P; 258 GElf_Sym sym; 259 260 if (dtp->dt_vector != NULL) 261 return; 262 263 if ((P = dt_proc_grab(dtp, pid, PGRAB_RDONLY | PGRAB_FORCE, 0)) == NULL) 264 return; 265 266 dt_proc_lock(dtp, P); 267 268 if (Plookup_by_addr(P, *pc, NULL, 0, &sym) == 0) 269 *pc = sym.st_value; 270 271 dt_proc_unlock(dtp, P); 272 dt_proc_release(dtp, P); 273 } 274 275 static void 276 dt_aggregate_umod(dtrace_hdl_t *dtp, uint64_t *data) 277 { 278 uint64_t pid = data[0]; 279 uint64_t *pc = &data[1]; 280 struct ps_prochandle *P; 281 const prmap_t *map; 282 283 if (dtp->dt_vector != NULL) 284 return; 285 286 if ((P = dt_proc_grab(dtp, pid, PGRAB_RDONLY | PGRAB_FORCE, 0)) == NULL) 287 return; 288 289 dt_proc_lock(dtp, P); 290 291 if ((map = Paddr_to_map(P, *pc)) != NULL) 292 *pc = map->pr_vaddr; 293 294 dt_proc_unlock(dtp, P); 295 dt_proc_release(dtp, P); 296 } 297 298 static void 299 dt_aggregate_sym(dtrace_hdl_t *dtp, uint64_t *data) 300 { 301 GElf_Sym sym; 302 uint64_t *pc = data; 303 304 if (dtrace_lookup_by_addr(dtp, *pc, &sym, NULL) == 0) 305 *pc = sym.st_value; 306 } 307 308 static void 309 dt_aggregate_mod(dtrace_hdl_t *dtp, uint64_t *data) 310 { 311 uint64_t *pc = data; 312 dt_module_t *dmp; 313 314 if (dtp->dt_vector != NULL) { 315 /* 316 * We don't have a way of just getting the module for a 317 * vectored open, and it doesn't seem to be worth defining 318 * one. This means that use of mod() won't get true 319 * aggregation in the postmortem case (some modules may 320 * appear more than once in aggregation output). It seems 321 * unlikely that anyone will ever notice or care... 322 */ 323 return; 324 } 325 326 for (dmp = dt_list_next(&dtp->dt_modlist); dmp != NULL; 327 dmp = dt_list_next(dmp)) { 328 if (*pc - dmp->dm_text_va < dmp->dm_text_size) { 329 *pc = dmp->dm_text_va; 330 return; 331 } 332 } 333 } 334 335 static dtrace_aggvarid_t 336 dt_aggregate_aggvarid(dt_ahashent_t *ent) 337 { 338 dtrace_aggdesc_t *agg = ent->dtahe_data.dtada_desc; 339 caddr_t data = ent->dtahe_data.dtada_data; 340 dtrace_recdesc_t *rec = agg->dtagd_rec; 341 342 /* 343 * First, we'll check the variable ID in the aggdesc. If it's valid, 344 * we'll return it. If not, we'll use the compiler-generated ID 345 * present as the first record. 346 */ 347 if (agg->dtagd_varid != DTRACE_AGGVARIDNONE) 348 return (agg->dtagd_varid); 349 350 agg->dtagd_varid = *((dtrace_aggvarid_t *)(uintptr_t)(data + 351 rec->dtrd_offset)); 352 353 return (agg->dtagd_varid); 354 } 355 356 357 static int 358 dt_aggregate_snap_cpu(dtrace_hdl_t *dtp, processorid_t cpu) 359 { 360 dtrace_epid_t id; 361 uint64_t hashval; 362 size_t offs, roffs, size, ndx; 363 int i, j, rval; 364 caddr_t addr, data; 365 dtrace_recdesc_t *rec; 366 dt_aggregate_t *agp = &dtp->dt_aggregate; 367 dtrace_aggdesc_t *agg; 368 dt_ahash_t *hash = &agp->dtat_hash; 369 dt_ahashent_t *h; 370 dtrace_bufdesc_t b = agp->dtat_buf, *buf = &b; 371 dtrace_aggdata_t *aggdata; 372 int flags = agp->dtat_flags; 373 374 buf->dtbd_cpu = cpu; 375 376 #if defined(sun) 377 if (dt_ioctl(dtp, DTRACEIOC_AGGSNAP, buf) == -1) { 378 #else 379 if (dt_ioctl(dtp, DTRACEIOC_AGGSNAP, &buf) == -1) { 380 #endif 381 if (errno == ENOENT) { 382 /* 383 * If that failed with ENOENT, it may be because the 384 * CPU was unconfigured. This is okay; we'll just 385 * do nothing but return success. 386 */ 387 return (0); 388 } 389 390 return (dt_set_errno(dtp, errno)); 391 } 392 393 if (buf->dtbd_drops != 0) { 394 if (dt_handle_cpudrop(dtp, cpu, 395 DTRACEDROP_AGGREGATION, buf->dtbd_drops) == -1) 396 return (-1); 397 } 398 399 if (buf->dtbd_size == 0) 400 return (0); 401 402 if (hash->dtah_hash == NULL) { 403 size_t size; 404 405 hash->dtah_size = DTRACE_AHASHSIZE; 406 size = hash->dtah_size * sizeof (dt_ahashent_t *); 407 408 if ((hash->dtah_hash = malloc(size)) == NULL) 409 return (dt_set_errno(dtp, EDT_NOMEM)); 410 411 bzero(hash->dtah_hash, size); 412 } 413 414 for (offs = 0; offs < buf->dtbd_size; ) { 415 /* 416 * We're guaranteed to have an ID. 417 */ 418 id = *((dtrace_epid_t *)((uintptr_t)buf->dtbd_data + 419 (uintptr_t)offs)); 420 421 if (id == DTRACE_AGGIDNONE) { 422 /* 423 * This is filler to assure proper alignment of the 424 * next record; we simply ignore it. 425 */ 426 offs += sizeof (id); 427 continue; 428 } 429 430 if ((rval = dt_aggid_lookup(dtp, id, &agg)) != 0) 431 return (rval); 432 433 addr = buf->dtbd_data + offs; 434 size = agg->dtagd_size; 435 hashval = 0; 436 437 for (j = 0; j < agg->dtagd_nrecs - 1; j++) { 438 rec = &agg->dtagd_rec[j]; 439 roffs = rec->dtrd_offset; 440 441 switch (rec->dtrd_action) { 442 case DTRACEACT_USYM: 443 dt_aggregate_usym(dtp, 444 /* LINTED - alignment */ 445 (uint64_t *)&addr[roffs]); 446 break; 447 448 case DTRACEACT_UMOD: 449 dt_aggregate_umod(dtp, 450 /* LINTED - alignment */ 451 (uint64_t *)&addr[roffs]); 452 break; 453 454 case DTRACEACT_SYM: 455 /* LINTED - alignment */ 456 dt_aggregate_sym(dtp, (uint64_t *)&addr[roffs]); 457 break; 458 459 case DTRACEACT_MOD: 460 /* LINTED - alignment */ 461 dt_aggregate_mod(dtp, (uint64_t *)&addr[roffs]); 462 break; 463 464 default: 465 break; 466 } 467 468 for (i = 0; i < rec->dtrd_size; i++) 469 hashval += addr[roffs + i]; 470 } 471 472 ndx = hashval % hash->dtah_size; 473 474 for (h = hash->dtah_hash[ndx]; h != NULL; h = h->dtahe_next) { 475 if (h->dtahe_hashval != hashval) 476 continue; 477 478 if (h->dtahe_size != size) 479 continue; 480 481 aggdata = &h->dtahe_data; 482 data = aggdata->dtada_data; 483 484 for (j = 0; j < agg->dtagd_nrecs - 1; j++) { 485 rec = &agg->dtagd_rec[j]; 486 roffs = rec->dtrd_offset; 487 488 for (i = 0; i < rec->dtrd_size; i++) 489 if (addr[roffs + i] != data[roffs + i]) 490 goto hashnext; 491 } 492 493 /* 494 * We found it. Now we need to apply the aggregating 495 * action on the data here. 496 */ 497 rec = &agg->dtagd_rec[agg->dtagd_nrecs - 1]; 498 roffs = rec->dtrd_offset; 499 /* LINTED - alignment */ 500 h->dtahe_aggregate((int64_t *)&data[roffs], 501 /* LINTED - alignment */ 502 (int64_t *)&addr[roffs], rec->dtrd_size); 503 504 /* 505 * If we're keeping per CPU data, apply the aggregating 506 * action there as well. 507 */ 508 if (aggdata->dtada_percpu != NULL) { 509 data = aggdata->dtada_percpu[cpu]; 510 511 /* LINTED - alignment */ 512 h->dtahe_aggregate((int64_t *)data, 513 /* LINTED - alignment */ 514 (int64_t *)&addr[roffs], rec->dtrd_size); 515 } 516 517 goto bufnext; 518 hashnext: 519 continue; 520 } 521 522 /* 523 * If we're here, we couldn't find an entry for this record. 524 */ 525 if ((h = malloc(sizeof (dt_ahashent_t))) == NULL) 526 return (dt_set_errno(dtp, EDT_NOMEM)); 527 bzero(h, sizeof (dt_ahashent_t)); 528 aggdata = &h->dtahe_data; 529 530 if ((aggdata->dtada_data = malloc(size)) == NULL) { 531 free(h); 532 return (dt_set_errno(dtp, EDT_NOMEM)); 533 } 534 535 bcopy(addr, aggdata->dtada_data, size); 536 aggdata->dtada_size = size; 537 aggdata->dtada_desc = agg; 538 aggdata->dtada_handle = dtp; 539 (void) dt_epid_lookup(dtp, agg->dtagd_epid, 540 &aggdata->dtada_edesc, &aggdata->dtada_pdesc); 541 aggdata->dtada_normal = 1; 542 543 h->dtahe_hashval = hashval; 544 h->dtahe_size = size; 545 (void) dt_aggregate_aggvarid(h); 546 547 rec = &agg->dtagd_rec[agg->dtagd_nrecs - 1]; 548 549 if (flags & DTRACE_A_PERCPU) { 550 int max_cpus = agp->dtat_maxcpu; 551 caddr_t *percpu = malloc(max_cpus * sizeof (caddr_t)); 552 553 if (percpu == NULL) { 554 free(aggdata->dtada_data); 555 free(h); 556 return (dt_set_errno(dtp, EDT_NOMEM)); 557 } 558 559 for (j = 0; j < max_cpus; j++) { 560 percpu[j] = malloc(rec->dtrd_size); 561 562 if (percpu[j] == NULL) { 563 while (--j >= 0) 564 free(percpu[j]); 565 566 free(aggdata->dtada_data); 567 free(h); 568 return (dt_set_errno(dtp, EDT_NOMEM)); 569 } 570 571 if (j == cpu) { 572 bcopy(&addr[rec->dtrd_offset], 573 percpu[j], rec->dtrd_size); 574 } else { 575 bzero(percpu[j], rec->dtrd_size); 576 } 577 } 578 579 aggdata->dtada_percpu = percpu; 580 } 581 582 switch (rec->dtrd_action) { 583 case DTRACEAGG_MIN: 584 h->dtahe_aggregate = dt_aggregate_min; 585 break; 586 587 case DTRACEAGG_MAX: 588 h->dtahe_aggregate = dt_aggregate_max; 589 break; 590 591 case DTRACEAGG_LQUANTIZE: 592 h->dtahe_aggregate = dt_aggregate_lquantize; 593 break; 594 595 case DTRACEAGG_COUNT: 596 case DTRACEAGG_SUM: 597 case DTRACEAGG_AVG: 598 case DTRACEAGG_STDDEV: 599 case DTRACEAGG_QUANTIZE: 600 h->dtahe_aggregate = dt_aggregate_count; 601 break; 602 603 default: 604 return (dt_set_errno(dtp, EDT_BADAGG)); 605 } 606 607 if (hash->dtah_hash[ndx] != NULL) 608 hash->dtah_hash[ndx]->dtahe_prev = h; 609 610 h->dtahe_next = hash->dtah_hash[ndx]; 611 hash->dtah_hash[ndx] = h; 612 613 if (hash->dtah_all != NULL) 614 hash->dtah_all->dtahe_prevall = h; 615 616 h->dtahe_nextall = hash->dtah_all; 617 hash->dtah_all = h; 618 bufnext: 619 offs += agg->dtagd_size; 620 } 621 622 return (0); 623 } 624 625 int 626 dtrace_aggregate_snap(dtrace_hdl_t *dtp) 627 { 628 int i, rval; 629 dt_aggregate_t *agp = &dtp->dt_aggregate; 630 hrtime_t now = gethrtime(); 631 dtrace_optval_t interval = dtp->dt_options[DTRACEOPT_AGGRATE]; 632 633 if (dtp->dt_lastagg != 0) { 634 if (now - dtp->dt_lastagg < interval) 635 return (0); 636 637 dtp->dt_lastagg += interval; 638 } else { 639 dtp->dt_lastagg = now; 640 } 641 642 if (!dtp->dt_active) 643 return (dt_set_errno(dtp, EINVAL)); 644 645 if (agp->dtat_buf.dtbd_size == 0) 646 return (0); 647 648 for (i = 0; i < agp->dtat_ncpus; i++) { 649 if ((rval = dt_aggregate_snap_cpu(dtp, agp->dtat_cpus[i]))) 650 return (rval); 651 } 652 653 return (0); 654 } 655 656 static int 657 dt_aggregate_hashcmp(const void *lhs, const void *rhs) 658 { 659 dt_ahashent_t *lh = *((dt_ahashent_t **)lhs); 660 dt_ahashent_t *rh = *((dt_ahashent_t **)rhs); 661 dtrace_aggdesc_t *lagg = lh->dtahe_data.dtada_desc; 662 dtrace_aggdesc_t *ragg = rh->dtahe_data.dtada_desc; 663 664 if (lagg->dtagd_nrecs < ragg->dtagd_nrecs) 665 return (DT_LESSTHAN); 666 667 if (lagg->dtagd_nrecs > ragg->dtagd_nrecs) 668 return (DT_GREATERTHAN); 669 670 return (0); 671 } 672 673 static int 674 dt_aggregate_varcmp(const void *lhs, const void *rhs) 675 { 676 dt_ahashent_t *lh = *((dt_ahashent_t **)lhs); 677 dt_ahashent_t *rh = *((dt_ahashent_t **)rhs); 678 dtrace_aggvarid_t lid, rid; 679 680 lid = dt_aggregate_aggvarid(lh); 681 rid = dt_aggregate_aggvarid(rh); 682 683 if (lid < rid) 684 return (DT_LESSTHAN); 685 686 if (lid > rid) 687 return (DT_GREATERTHAN); 688 689 return (0); 690 } 691 692 static int 693 dt_aggregate_keycmp(const void *lhs, const void *rhs) 694 { 695 dt_ahashent_t *lh = *((dt_ahashent_t **)lhs); 696 dt_ahashent_t *rh = *((dt_ahashent_t **)rhs); 697 dtrace_aggdesc_t *lagg = lh->dtahe_data.dtada_desc; 698 dtrace_aggdesc_t *ragg = rh->dtahe_data.dtada_desc; 699 dtrace_recdesc_t *lrec, *rrec; 700 char *ldata, *rdata; 701 int rval, i, j, keypos, nrecs; 702 703 if ((rval = dt_aggregate_hashcmp(lhs, rhs)) != 0) 704 return (rval); 705 706 nrecs = lagg->dtagd_nrecs - 1; 707 assert(nrecs == ragg->dtagd_nrecs - 1); 708 709 keypos = dt_keypos + 1 >= nrecs ? 0 : dt_keypos; 710 711 for (i = 1; i < nrecs; i++) { 712 uint64_t lval, rval; 713 int ndx = i + keypos; 714 715 if (ndx >= nrecs) 716 ndx = ndx - nrecs + 1; 717 718 lrec = &lagg->dtagd_rec[ndx]; 719 rrec = &ragg->dtagd_rec[ndx]; 720 721 ldata = lh->dtahe_data.dtada_data + lrec->dtrd_offset; 722 rdata = rh->dtahe_data.dtada_data + rrec->dtrd_offset; 723 724 if (lrec->dtrd_size < rrec->dtrd_size) 725 return (DT_LESSTHAN); 726 727 if (lrec->dtrd_size > rrec->dtrd_size) 728 return (DT_GREATERTHAN); 729 730 switch (lrec->dtrd_size) { 731 case sizeof (uint64_t): 732 /* LINTED - alignment */ 733 lval = *((uint64_t *)ldata); 734 /* LINTED - alignment */ 735 rval = *((uint64_t *)rdata); 736 break; 737 738 case sizeof (uint32_t): 739 /* LINTED - alignment */ 740 lval = *((uint32_t *)ldata); 741 /* LINTED - alignment */ 742 rval = *((uint32_t *)rdata); 743 break; 744 745 case sizeof (uint16_t): 746 /* LINTED - alignment */ 747 lval = *((uint16_t *)ldata); 748 /* LINTED - alignment */ 749 rval = *((uint16_t *)rdata); 750 break; 751 752 case sizeof (uint8_t): 753 lval = *((uint8_t *)ldata); 754 rval = *((uint8_t *)rdata); 755 break; 756 757 default: 758 switch (lrec->dtrd_action) { 759 case DTRACEACT_UMOD: 760 case DTRACEACT_UADDR: 761 case DTRACEACT_USYM: 762 for (j = 0; j < 2; j++) { 763 /* LINTED - alignment */ 764 lval = ((uint64_t *)ldata)[j]; 765 /* LINTED - alignment */ 766 rval = ((uint64_t *)rdata)[j]; 767 768 if (lval < rval) 769 return (DT_LESSTHAN); 770 771 if (lval > rval) 772 return (DT_GREATERTHAN); 773 } 774 775 break; 776 777 default: 778 for (j = 0; j < lrec->dtrd_size; j++) { 779 lval = ((uint8_t *)ldata)[j]; 780 rval = ((uint8_t *)rdata)[j]; 781 782 if (lval < rval) 783 return (DT_LESSTHAN); 784 785 if (lval > rval) 786 return (DT_GREATERTHAN); 787 } 788 } 789 790 continue; 791 } 792 793 if (lval < rval) 794 return (DT_LESSTHAN); 795 796 if (lval > rval) 797 return (DT_GREATERTHAN); 798 } 799 800 return (0); 801 } 802 803 static int 804 dt_aggregate_valcmp(const void *lhs, const void *rhs) 805 { 806 dt_ahashent_t *lh = *((dt_ahashent_t **)lhs); 807 dt_ahashent_t *rh = *((dt_ahashent_t **)rhs); 808 dtrace_aggdesc_t *lagg = lh->dtahe_data.dtada_desc; 809 dtrace_aggdesc_t *ragg = rh->dtahe_data.dtada_desc; 810 caddr_t ldata = lh->dtahe_data.dtada_data; 811 caddr_t rdata = rh->dtahe_data.dtada_data; 812 dtrace_recdesc_t *lrec, *rrec; 813 int64_t *laddr, *raddr; 814 int rval, i; 815 816 if ((rval = dt_aggregate_hashcmp(lhs, rhs)) != 0) 817 return (rval); 818 819 if (lagg->dtagd_nrecs > ragg->dtagd_nrecs) 820 return (DT_GREATERTHAN); 821 822 if (lagg->dtagd_nrecs < ragg->dtagd_nrecs) 823 return (DT_LESSTHAN); 824 825 for (i = 0; i < lagg->dtagd_nrecs; i++) { 826 lrec = &lagg->dtagd_rec[i]; 827 rrec = &ragg->dtagd_rec[i]; 828 829 if (lrec->dtrd_offset < rrec->dtrd_offset) 830 return (DT_LESSTHAN); 831 832 if (lrec->dtrd_offset > rrec->dtrd_offset) 833 return (DT_GREATERTHAN); 834 835 if (lrec->dtrd_action < rrec->dtrd_action) 836 return (DT_LESSTHAN); 837 838 if (lrec->dtrd_action > rrec->dtrd_action) 839 return (DT_GREATERTHAN); 840 } 841 842 laddr = (int64_t *)(uintptr_t)(ldata + lrec->dtrd_offset); 843 raddr = (int64_t *)(uintptr_t)(rdata + rrec->dtrd_offset); 844 845 switch (lrec->dtrd_action) { 846 case DTRACEAGG_AVG: 847 rval = dt_aggregate_averagecmp(laddr, raddr); 848 break; 849 850 case DTRACEAGG_STDDEV: 851 rval = dt_aggregate_stddevcmp(laddr, raddr); 852 break; 853 854 case DTRACEAGG_QUANTIZE: 855 rval = dt_aggregate_quantizedcmp(laddr, raddr); 856 break; 857 858 case DTRACEAGG_LQUANTIZE: 859 rval = dt_aggregate_lquantizedcmp(laddr, raddr); 860 break; 861 862 case DTRACEAGG_COUNT: 863 case DTRACEAGG_SUM: 864 case DTRACEAGG_MIN: 865 case DTRACEAGG_MAX: 866 rval = dt_aggregate_countcmp(laddr, raddr); 867 break; 868 869 default: 870 assert(0); 871 } 872 873 return (rval); 874 } 875 876 static int 877 dt_aggregate_valkeycmp(const void *lhs, const void *rhs) 878 { 879 int rval; 880 881 if ((rval = dt_aggregate_valcmp(lhs, rhs)) != 0) 882 return (rval); 883 884 /* 885 * If we're here, the values for the two aggregation elements are 886 * equal. We already know that the key layout is the same for the two 887 * elements; we must now compare the keys themselves as a tie-breaker. 888 */ 889 return (dt_aggregate_keycmp(lhs, rhs)); 890 } 891 892 static int 893 dt_aggregate_keyvarcmp(const void *lhs, const void *rhs) 894 { 895 int rval; 896 897 if ((rval = dt_aggregate_keycmp(lhs, rhs)) != 0) 898 return (rval); 899 900 return (dt_aggregate_varcmp(lhs, rhs)); 901 } 902 903 static int 904 dt_aggregate_varkeycmp(const void *lhs, const void *rhs) 905 { 906 int rval; 907 908 if ((rval = dt_aggregate_varcmp(lhs, rhs)) != 0) 909 return (rval); 910 911 return (dt_aggregate_keycmp(lhs, rhs)); 912 } 913 914 static int 915 dt_aggregate_valvarcmp(const void *lhs, const void *rhs) 916 { 917 int rval; 918 919 if ((rval = dt_aggregate_valkeycmp(lhs, rhs)) != 0) 920 return (rval); 921 922 return (dt_aggregate_varcmp(lhs, rhs)); 923 } 924 925 static int 926 dt_aggregate_varvalcmp(const void *lhs, const void *rhs) 927 { 928 int rval; 929 930 if ((rval = dt_aggregate_varcmp(lhs, rhs)) != 0) 931 return (rval); 932 933 return (dt_aggregate_valkeycmp(lhs, rhs)); 934 } 935 936 static int 937 dt_aggregate_keyvarrevcmp(const void *lhs, const void *rhs) 938 { 939 return (dt_aggregate_keyvarcmp(rhs, lhs)); 940 } 941 942 static int 943 dt_aggregate_varkeyrevcmp(const void *lhs, const void *rhs) 944 { 945 return (dt_aggregate_varkeycmp(rhs, lhs)); 946 } 947 948 static int 949 dt_aggregate_valvarrevcmp(const void *lhs, const void *rhs) 950 { 951 return (dt_aggregate_valvarcmp(rhs, lhs)); 952 } 953 954 static int 955 dt_aggregate_varvalrevcmp(const void *lhs, const void *rhs) 956 { 957 return (dt_aggregate_varvalcmp(rhs, lhs)); 958 } 959 960 static int 961 dt_aggregate_bundlecmp(const void *lhs, const void *rhs) 962 { 963 dt_ahashent_t **lh = *((dt_ahashent_t ***)lhs); 964 dt_ahashent_t **rh = *((dt_ahashent_t ***)rhs); 965 int i, rval; 966 967 if (dt_keysort) { 968 /* 969 * If we're sorting on keys, we need to scan until we find the 970 * last entry -- that's the representative key. (The order of 971 * the bundle is values followed by key to accommodate the 972 * default behavior of sorting by value.) If the keys are 973 * equal, we'll fall into the value comparison loop, below. 974 */ 975 for (i = 0; lh[i + 1] != NULL; i++) 976 continue; 977 978 assert(i != 0); 979 assert(rh[i + 1] == NULL); 980 981 if ((rval = dt_aggregate_keycmp(&lh[i], &rh[i])) != 0) 982 return (rval); 983 } 984 985 for (i = 0; ; i++) { 986 if (lh[i + 1] == NULL) { 987 /* 988 * All of the values are equal; if we're sorting on 989 * keys, then we're only here because the keys were 990 * found to be equal and these records are therefore 991 * equal. If we're not sorting on keys, we'll use the 992 * key comparison from the representative key as the 993 * tie-breaker. 994 */ 995 if (dt_keysort) 996 return (0); 997 998 assert(i != 0); 999 assert(rh[i + 1] == NULL); 1000 return (dt_aggregate_keycmp(&lh[i], &rh[i])); 1001 } else { 1002 if ((rval = dt_aggregate_valcmp(&lh[i], &rh[i])) != 0) 1003 return (rval); 1004 } 1005 } 1006 } 1007 1008 int 1009 dt_aggregate_go(dtrace_hdl_t *dtp) 1010 { 1011 dt_aggregate_t *agp = &dtp->dt_aggregate; 1012 dtrace_optval_t size, cpu; 1013 dtrace_bufdesc_t *buf = &agp->dtat_buf; 1014 int rval, i; 1015 1016 assert(agp->dtat_maxcpu == 0); 1017 assert(agp->dtat_ncpu == 0); 1018 assert(agp->dtat_cpus == NULL); 1019 1020 agp->dtat_maxcpu = dt_sysconf(dtp, _SC_CPUID_MAX) + 1; 1021 agp->dtat_ncpu = dt_sysconf(dtp, _SC_NPROCESSORS_MAX); 1022 agp->dtat_cpus = malloc(agp->dtat_ncpu * sizeof (processorid_t)); 1023 1024 if (agp->dtat_cpus == NULL) 1025 return (dt_set_errno(dtp, EDT_NOMEM)); 1026 1027 /* 1028 * Use the aggregation buffer size as reloaded from the kernel. 1029 */ 1030 size = dtp->dt_options[DTRACEOPT_AGGSIZE]; 1031 1032 rval = dtrace_getopt(dtp, "aggsize", &size); 1033 assert(rval == 0); 1034 1035 if (size == 0 || size == DTRACEOPT_UNSET) 1036 return (0); 1037 1038 buf = &agp->dtat_buf; 1039 buf->dtbd_size = size; 1040 1041 if ((buf->dtbd_data = malloc(buf->dtbd_size)) == NULL) 1042 return (dt_set_errno(dtp, EDT_NOMEM)); 1043 1044 /* 1045 * Now query for the CPUs enabled. 1046 */ 1047 rval = dtrace_getopt(dtp, "cpu", &cpu); 1048 assert(rval == 0 && cpu != DTRACEOPT_UNSET); 1049 1050 if (cpu != DTRACE_CPUALL) { 1051 assert(cpu < agp->dtat_ncpu); 1052 agp->dtat_cpus[agp->dtat_ncpus++] = (processorid_t)cpu; 1053 1054 return (0); 1055 } 1056 1057 agp->dtat_ncpus = 0; 1058 for (i = 0; i < agp->dtat_maxcpu; i++) { 1059 if (dt_status(dtp, i) == -1) 1060 continue; 1061 1062 agp->dtat_cpus[agp->dtat_ncpus++] = i; 1063 } 1064 1065 return (0); 1066 } 1067 1068 static int 1069 dt_aggwalk_rval(dtrace_hdl_t *dtp, dt_ahashent_t *h, int rval) 1070 { 1071 dt_aggregate_t *agp = &dtp->dt_aggregate; 1072 dtrace_aggdata_t *data; 1073 dtrace_aggdesc_t *aggdesc; 1074 dtrace_recdesc_t *rec; 1075 int i; 1076 1077 switch (rval) { 1078 case DTRACE_AGGWALK_NEXT: 1079 break; 1080 1081 case DTRACE_AGGWALK_CLEAR: { 1082 uint32_t size, offs = 0; 1083 1084 aggdesc = h->dtahe_data.dtada_desc; 1085 rec = &aggdesc->dtagd_rec[aggdesc->dtagd_nrecs - 1]; 1086 size = rec->dtrd_size; 1087 data = &h->dtahe_data; 1088 1089 if (rec->dtrd_action == DTRACEAGG_LQUANTIZE) { 1090 offs = sizeof (uint64_t); 1091 size -= sizeof (uint64_t); 1092 } 1093 1094 bzero(&data->dtada_data[rec->dtrd_offset] + offs, size); 1095 1096 if (data->dtada_percpu == NULL) 1097 break; 1098 1099 for (i = 0; i < dtp->dt_aggregate.dtat_maxcpu; i++) 1100 bzero(data->dtada_percpu[i] + offs, size); 1101 break; 1102 } 1103 1104 case DTRACE_AGGWALK_ERROR: 1105 /* 1106 * We assume that errno is already set in this case. 1107 */ 1108 return (dt_set_errno(dtp, errno)); 1109 1110 case DTRACE_AGGWALK_ABORT: 1111 return (dt_set_errno(dtp, EDT_DIRABORT)); 1112 1113 case DTRACE_AGGWALK_DENORMALIZE: 1114 h->dtahe_data.dtada_normal = 1; 1115 return (0); 1116 1117 case DTRACE_AGGWALK_NORMALIZE: 1118 if (h->dtahe_data.dtada_normal == 0) { 1119 h->dtahe_data.dtada_normal = 1; 1120 return (dt_set_errno(dtp, EDT_BADRVAL)); 1121 } 1122 1123 return (0); 1124 1125 case DTRACE_AGGWALK_REMOVE: { 1126 dtrace_aggdata_t *aggdata = &h->dtahe_data; 1127 int max_cpus = agp->dtat_maxcpu; 1128 1129 /* 1130 * First, remove this hash entry from its hash chain. 1131 */ 1132 if (h->dtahe_prev != NULL) { 1133 h->dtahe_prev->dtahe_next = h->dtahe_next; 1134 } else { 1135 dt_ahash_t *hash = &agp->dtat_hash; 1136 size_t ndx = h->dtahe_hashval % hash->dtah_size; 1137 1138 assert(hash->dtah_hash[ndx] == h); 1139 hash->dtah_hash[ndx] = h->dtahe_next; 1140 } 1141 1142 if (h->dtahe_next != NULL) 1143 h->dtahe_next->dtahe_prev = h->dtahe_prev; 1144 1145 /* 1146 * Now remove it from the list of all hash entries. 1147 */ 1148 if (h->dtahe_prevall != NULL) { 1149 h->dtahe_prevall->dtahe_nextall = h->dtahe_nextall; 1150 } else { 1151 dt_ahash_t *hash = &agp->dtat_hash; 1152 1153 assert(hash->dtah_all == h); 1154 hash->dtah_all = h->dtahe_nextall; 1155 } 1156 1157 if (h->dtahe_nextall != NULL) 1158 h->dtahe_nextall->dtahe_prevall = h->dtahe_prevall; 1159 1160 /* 1161 * We're unlinked. We can safely destroy the data. 1162 */ 1163 if (aggdata->dtada_percpu != NULL) { 1164 for (i = 0; i < max_cpus; i++) 1165 free(aggdata->dtada_percpu[i]); 1166 free(aggdata->dtada_percpu); 1167 } 1168 1169 free(aggdata->dtada_data); 1170 free(h); 1171 1172 return (0); 1173 } 1174 1175 default: 1176 return (dt_set_errno(dtp, EDT_BADRVAL)); 1177 } 1178 1179 return (0); 1180 } 1181 1182 void 1183 dt_aggregate_qsort(dtrace_hdl_t *dtp, void *base, size_t nel, size_t width, 1184 int (*compar)(const void *, const void *)) 1185 { 1186 int rev = dt_revsort, key = dt_keysort, keypos = dt_keypos; 1187 dtrace_optval_t keyposopt = dtp->dt_options[DTRACEOPT_AGGSORTKEYPOS]; 1188 1189 dt_revsort = (dtp->dt_options[DTRACEOPT_AGGSORTREV] != DTRACEOPT_UNSET); 1190 dt_keysort = (dtp->dt_options[DTRACEOPT_AGGSORTKEY] != DTRACEOPT_UNSET); 1191 1192 if (keyposopt != DTRACEOPT_UNSET && keyposopt <= INT_MAX) { 1193 dt_keypos = (int)keyposopt; 1194 } else { 1195 dt_keypos = 0; 1196 } 1197 1198 if (compar == NULL) { 1199 if (!dt_keysort) { 1200 compar = dt_aggregate_varvalcmp; 1201 } else { 1202 compar = dt_aggregate_varkeycmp; 1203 } 1204 } 1205 1206 qsort(base, nel, width, compar); 1207 1208 dt_revsort = rev; 1209 dt_keysort = key; 1210 dt_keypos = keypos; 1211 } 1212 1213 int 1214 dtrace_aggregate_walk(dtrace_hdl_t *dtp, dtrace_aggregate_f *func, void *arg) 1215 { 1216 dt_ahashent_t *h, *next; 1217 dt_ahash_t *hash = &dtp->dt_aggregate.dtat_hash; 1218 1219 for (h = hash->dtah_all; h != NULL; h = next) { 1220 /* 1221 * dt_aggwalk_rval() can potentially remove the current hash 1222 * entry; we need to load the next hash entry before calling 1223 * into it. 1224 */ 1225 next = h->dtahe_nextall; 1226 1227 if (dt_aggwalk_rval(dtp, h, func(&h->dtahe_data, arg)) == -1) 1228 return (-1); 1229 } 1230 1231 return (0); 1232 } 1233 1234 static int 1235 dt_aggregate_walk_sorted(dtrace_hdl_t *dtp, 1236 dtrace_aggregate_f *func, void *arg, 1237 int (*sfunc)(const void *, const void *)) 1238 { 1239 dt_aggregate_t *agp = &dtp->dt_aggregate; 1240 dt_ahashent_t *h, **sorted; 1241 dt_ahash_t *hash = &agp->dtat_hash; 1242 size_t i, nentries = 0; 1243 1244 for (h = hash->dtah_all; h != NULL; h = h->dtahe_nextall) 1245 nentries++; 1246 1247 sorted = dt_alloc(dtp, nentries * sizeof (dt_ahashent_t *)); 1248 1249 if (sorted == NULL) 1250 return (-1); 1251 1252 for (h = hash->dtah_all, i = 0; h != NULL; h = h->dtahe_nextall) 1253 sorted[i++] = h; 1254 1255 (void) pthread_mutex_lock(&dt_qsort_lock); 1256 1257 if (sfunc == NULL) { 1258 dt_aggregate_qsort(dtp, sorted, nentries, 1259 sizeof (dt_ahashent_t *), NULL); 1260 } else { 1261 /* 1262 * If we've been explicitly passed a sorting function, 1263 * we'll use that -- ignoring the values of the "aggsortrev", 1264 * "aggsortkey" and "aggsortkeypos" options. 1265 */ 1266 qsort(sorted, nentries, sizeof (dt_ahashent_t *), sfunc); 1267 } 1268 1269 (void) pthread_mutex_unlock(&dt_qsort_lock); 1270 1271 for (i = 0; i < nentries; i++) { 1272 h = sorted[i]; 1273 1274 if (dt_aggwalk_rval(dtp, h, func(&h->dtahe_data, arg)) == -1) { 1275 dt_free(dtp, sorted); 1276 return (-1); 1277 } 1278 } 1279 1280 dt_free(dtp, sorted); 1281 return (0); 1282 } 1283 1284 int 1285 dtrace_aggregate_walk_sorted(dtrace_hdl_t *dtp, 1286 dtrace_aggregate_f *func, void *arg) 1287 { 1288 return (dt_aggregate_walk_sorted(dtp, func, arg, NULL)); 1289 } 1290 1291 int 1292 dtrace_aggregate_walk_keysorted(dtrace_hdl_t *dtp, 1293 dtrace_aggregate_f *func, void *arg) 1294 { 1295 return (dt_aggregate_walk_sorted(dtp, func, 1296 arg, dt_aggregate_varkeycmp)); 1297 } 1298 1299 int 1300 dtrace_aggregate_walk_valsorted(dtrace_hdl_t *dtp, 1301 dtrace_aggregate_f *func, void *arg) 1302 { 1303 return (dt_aggregate_walk_sorted(dtp, func, 1304 arg, dt_aggregate_varvalcmp)); 1305 } 1306 1307 int 1308 dtrace_aggregate_walk_keyvarsorted(dtrace_hdl_t *dtp, 1309 dtrace_aggregate_f *func, void *arg) 1310 { 1311 return (dt_aggregate_walk_sorted(dtp, func, 1312 arg, dt_aggregate_keyvarcmp)); 1313 } 1314 1315 int 1316 dtrace_aggregate_walk_valvarsorted(dtrace_hdl_t *dtp, 1317 dtrace_aggregate_f *func, void *arg) 1318 { 1319 return (dt_aggregate_walk_sorted(dtp, func, 1320 arg, dt_aggregate_valvarcmp)); 1321 } 1322 1323 int 1324 dtrace_aggregate_walk_keyrevsorted(dtrace_hdl_t *dtp, 1325 dtrace_aggregate_f *func, void *arg) 1326 { 1327 return (dt_aggregate_walk_sorted(dtp, func, 1328 arg, dt_aggregate_varkeyrevcmp)); 1329 } 1330 1331 int 1332 dtrace_aggregate_walk_valrevsorted(dtrace_hdl_t *dtp, 1333 dtrace_aggregate_f *func, void *arg) 1334 { 1335 return (dt_aggregate_walk_sorted(dtp, func, 1336 arg, dt_aggregate_varvalrevcmp)); 1337 } 1338 1339 int 1340 dtrace_aggregate_walk_keyvarrevsorted(dtrace_hdl_t *dtp, 1341 dtrace_aggregate_f *func, void *arg) 1342 { 1343 return (dt_aggregate_walk_sorted(dtp, func, 1344 arg, dt_aggregate_keyvarrevcmp)); 1345 } 1346 1347 int 1348 dtrace_aggregate_walk_valvarrevsorted(dtrace_hdl_t *dtp, 1349 dtrace_aggregate_f *func, void *arg) 1350 { 1351 return (dt_aggregate_walk_sorted(dtp, func, 1352 arg, dt_aggregate_valvarrevcmp)); 1353 } 1354 1355 int 1356 dtrace_aggregate_walk_joined(dtrace_hdl_t *dtp, dtrace_aggvarid_t *aggvars, 1357 int naggvars, dtrace_aggregate_walk_joined_f *func, void *arg) 1358 { 1359 dt_aggregate_t *agp = &dtp->dt_aggregate; 1360 dt_ahashent_t *h, **sorted = NULL, ***bundle, **nbundle; 1361 const dtrace_aggdata_t **data; 1362 dt_ahashent_t *zaggdata = NULL; 1363 dt_ahash_t *hash = &agp->dtat_hash; 1364 size_t nentries = 0, nbundles = 0, start, zsize = 0, bundlesize; 1365 dtrace_aggvarid_t max = 0, aggvar; 1366 int rval = -1, *map, *remap = NULL; 1367 int i, j; 1368 dtrace_optval_t sortpos = dtp->dt_options[DTRACEOPT_AGGSORTPOS]; 1369 1370 /* 1371 * If the sorting position is greater than the number of aggregation 1372 * variable IDs, we silently set it to 0. 1373 */ 1374 if (sortpos == DTRACEOPT_UNSET || sortpos >= naggvars) 1375 sortpos = 0; 1376 1377 /* 1378 * First we need to translate the specified aggregation variable IDs 1379 * into a linear map that will allow us to translate an aggregation 1380 * variable ID into its position in the specified aggvars. 1381 */ 1382 for (i = 0; i < naggvars; i++) { 1383 if (aggvars[i] == DTRACE_AGGVARIDNONE || aggvars[i] < 0) 1384 return (dt_set_errno(dtp, EDT_BADAGGVAR)); 1385 1386 if (aggvars[i] > max) 1387 max = aggvars[i]; 1388 } 1389 1390 if ((map = dt_zalloc(dtp, (max + 1) * sizeof (int))) == NULL) 1391 return (-1); 1392 1393 zaggdata = dt_zalloc(dtp, naggvars * sizeof (dt_ahashent_t)); 1394 1395 if (zaggdata == NULL) 1396 goto out; 1397 1398 for (i = 0; i < naggvars; i++) { 1399 int ndx = i + sortpos; 1400 1401 if (ndx >= naggvars) 1402 ndx -= naggvars; 1403 1404 aggvar = aggvars[ndx]; 1405 assert(aggvar <= max); 1406 1407 if (map[aggvar]) { 1408 /* 1409 * We have an aggregation variable that is present 1410 * more than once in the array of aggregation 1411 * variables. While it's unclear why one might want 1412 * to do this, it's legal. To support this construct, 1413 * we will allocate a remap that will indicate the 1414 * position from which this aggregation variable 1415 * should be pulled. (That is, where the remap will 1416 * map from one position to another.) 1417 */ 1418 if (remap == NULL) { 1419 remap = dt_zalloc(dtp, naggvars * sizeof (int)); 1420 1421 if (remap == NULL) 1422 goto out; 1423 } 1424 1425 /* 1426 * Given that the variable is already present, assert 1427 * that following through the mapping and adjusting 1428 * for the sort position yields the same aggregation 1429 * variable ID. 1430 */ 1431 assert(aggvars[(map[aggvar] - 1 + sortpos) % 1432 naggvars] == aggvars[ndx]); 1433 1434 remap[i] = map[aggvar]; 1435 continue; 1436 } 1437 1438 map[aggvar] = i + 1; 1439 } 1440 1441 /* 1442 * We need to take two passes over the data to size our allocation, so 1443 * we'll use the first pass to also fill in the zero-filled data to be 1444 * used to properly format a zero-valued aggregation. 1445 */ 1446 for (h = hash->dtah_all; h != NULL; h = h->dtahe_nextall) { 1447 dtrace_aggvarid_t id; 1448 int ndx; 1449 1450 if ((id = dt_aggregate_aggvarid(h)) > max || !(ndx = map[id])) 1451 continue; 1452 1453 if (zaggdata[ndx - 1].dtahe_size == 0) { 1454 zaggdata[ndx - 1].dtahe_size = h->dtahe_size; 1455 zaggdata[ndx - 1].dtahe_data = h->dtahe_data; 1456 } 1457 1458 nentries++; 1459 } 1460 1461 if (nentries == 0) { 1462 /* 1463 * We couldn't find any entries; there is nothing else to do. 1464 */ 1465 rval = 0; 1466 goto out; 1467 } 1468 1469 /* 1470 * Before we sort the data, we're going to look for any holes in our 1471 * zero-filled data. This will occur if an aggregation variable that 1472 * we are being asked to print has not yet been assigned the result of 1473 * any aggregating action for _any_ tuple. The issue becomes that we 1474 * would like a zero value to be printed for all columns for this 1475 * aggregation, but without any record description, we don't know the 1476 * aggregating action that corresponds to the aggregation variable. To 1477 * try to find a match, we're simply going to lookup aggregation IDs 1478 * (which are guaranteed to be contiguous and to start from 1), looking 1479 * for the specified aggregation variable ID. If we find a match, 1480 * we'll use that. If we iterate over all aggregation IDs and don't 1481 * find a match, then we must be an anonymous enabling. (Anonymous 1482 * enablings can't currently derive either aggregation variable IDs or 1483 * aggregation variable names given only an aggregation ID.) In this 1484 * obscure case (anonymous enabling, multiple aggregation printa() with 1485 * some aggregations not represented for any tuple), our defined 1486 * behavior is that the zero will be printed in the format of the first 1487 * aggregation variable that contains any non-zero value. 1488 */ 1489 for (i = 0; i < naggvars; i++) { 1490 if (zaggdata[i].dtahe_size == 0) { 1491 dtrace_aggvarid_t aggvar; 1492 1493 aggvar = aggvars[(i - sortpos + naggvars) % naggvars]; 1494 assert(zaggdata[i].dtahe_data.dtada_data == NULL); 1495 1496 for (j = DTRACE_AGGIDNONE + 1; ; j++) { 1497 dtrace_aggdesc_t *agg; 1498 dtrace_aggdata_t *aggdata; 1499 1500 if (dt_aggid_lookup(dtp, j, &agg) != 0) 1501 break; 1502 1503 if (agg->dtagd_varid != aggvar) 1504 continue; 1505 1506 /* 1507 * We have our description -- now we need to 1508 * cons up the zaggdata entry for it. 1509 */ 1510 aggdata = &zaggdata[i].dtahe_data; 1511 aggdata->dtada_size = agg->dtagd_size; 1512 aggdata->dtada_desc = agg; 1513 aggdata->dtada_handle = dtp; 1514 (void) dt_epid_lookup(dtp, agg->dtagd_epid, 1515 &aggdata->dtada_edesc, 1516 &aggdata->dtada_pdesc); 1517 aggdata->dtada_normal = 1; 1518 zaggdata[i].dtahe_hashval = 0; 1519 zaggdata[i].dtahe_size = agg->dtagd_size; 1520 break; 1521 } 1522 1523 if (zaggdata[i].dtahe_size == 0) { 1524 caddr_t data; 1525 1526 /* 1527 * We couldn't find this aggregation, meaning 1528 * that we have never seen it before for any 1529 * tuple _and_ this is an anonymous enabling. 1530 * That is, we're in the obscure case outlined 1531 * above. In this case, our defined behavior 1532 * is to format the data in the format of the 1533 * first non-zero aggregation -- of which, of 1534 * course, we know there to be at least one 1535 * (or nentries would have been zero). 1536 */ 1537 for (j = 0; j < naggvars; j++) { 1538 if (zaggdata[j].dtahe_size != 0) 1539 break; 1540 } 1541 1542 assert(j < naggvars); 1543 zaggdata[i] = zaggdata[j]; 1544 1545 data = zaggdata[i].dtahe_data.dtada_data; 1546 assert(data != NULL); 1547 } 1548 } 1549 } 1550 1551 /* 1552 * Now we need to allocate our zero-filled data for use for 1553 * aggregations that don't have a value corresponding to a given key. 1554 */ 1555 for (i = 0; i < naggvars; i++) { 1556 dtrace_aggdata_t *aggdata = &zaggdata[i].dtahe_data; 1557 dtrace_aggdesc_t *aggdesc = aggdata->dtada_desc; 1558 dtrace_recdesc_t *rec; 1559 uint64_t larg; 1560 caddr_t zdata; 1561 1562 zsize = zaggdata[i].dtahe_size; 1563 assert(zsize != 0); 1564 1565 if ((zdata = dt_zalloc(dtp, zsize)) == NULL) { 1566 /* 1567 * If we failed to allocated some zero-filled data, we 1568 * need to zero out the remaining dtada_data pointers 1569 * to prevent the wrong data from being freed below. 1570 */ 1571 for (j = i; j < naggvars; j++) 1572 zaggdata[j].dtahe_data.dtada_data = NULL; 1573 goto out; 1574 } 1575 1576 aggvar = aggvars[(i - sortpos + naggvars) % naggvars]; 1577 1578 /* 1579 * First, the easy bit. To maintain compatibility with 1580 * consumers that pull the compiler-generated ID out of the 1581 * data, we put that ID at the top of the zero-filled data. 1582 */ 1583 rec = &aggdesc->dtagd_rec[0]; 1584 /* LINTED - alignment */ 1585 *((dtrace_aggvarid_t *)(zdata + rec->dtrd_offset)) = aggvar; 1586 1587 rec = &aggdesc->dtagd_rec[aggdesc->dtagd_nrecs - 1]; 1588 1589 /* 1590 * Now for the more complicated part. If (and only if) this 1591 * is an lquantize() aggregating action, zero-filled data is 1592 * not equivalent to an empty record: we must also get the 1593 * parameters for the lquantize(). 1594 */ 1595 if (rec->dtrd_action == DTRACEAGG_LQUANTIZE) { 1596 if (aggdata->dtada_data != NULL) { 1597 /* 1598 * The easier case here is if we actually have 1599 * some prototype data -- in which case we 1600 * manually dig it out of the aggregation 1601 * record. 1602 */ 1603 /* LINTED - alignment */ 1604 larg = *((uint64_t *)(aggdata->dtada_data + 1605 rec->dtrd_offset)); 1606 } else { 1607 /* 1608 * We don't have any prototype data. As a 1609 * result, we know that we _do_ have the 1610 * compiler-generated information. (If this 1611 * were an anonymous enabling, all of our 1612 * zero-filled data would have prototype data 1613 * -- either directly or indirectly.) So as 1614 * gross as it is, we'll grovel around in the 1615 * compiler-generated information to find the 1616 * lquantize() parameters. 1617 */ 1618 dtrace_stmtdesc_t *sdp; 1619 dt_ident_t *aid; 1620 dt_idsig_t *isp; 1621 1622 sdp = (dtrace_stmtdesc_t *)(uintptr_t) 1623 aggdesc->dtagd_rec[0].dtrd_uarg; 1624 aid = sdp->dtsd_aggdata; 1625 isp = (dt_idsig_t *)aid->di_data; 1626 assert(isp->dis_auxinfo != 0); 1627 larg = isp->dis_auxinfo; 1628 } 1629 1630 /* LINTED - alignment */ 1631 *((uint64_t *)(zdata + rec->dtrd_offset)) = larg; 1632 } 1633 1634 aggdata->dtada_data = zdata; 1635 } 1636 1637 /* 1638 * Now that we've dealt with setting up our zero-filled data, we can 1639 * allocate our sorted array, and take another pass over the data to 1640 * fill it. 1641 */ 1642 sorted = dt_alloc(dtp, nentries * sizeof (dt_ahashent_t *)); 1643 1644 if (sorted == NULL) 1645 goto out; 1646 1647 for (h = hash->dtah_all, i = 0; h != NULL; h = h->dtahe_nextall) { 1648 dtrace_aggvarid_t id; 1649 1650 if ((id = dt_aggregate_aggvarid(h)) > max || !map[id]) 1651 continue; 1652 1653 sorted[i++] = h; 1654 } 1655 1656 assert(i == nentries); 1657 1658 /* 1659 * We've loaded our array; now we need to sort by value to allow us 1660 * to create bundles of like value. We're going to acquire the 1661 * dt_qsort_lock here, and hold it across all of our subsequent 1662 * comparison and sorting. 1663 */ 1664 (void) pthread_mutex_lock(&dt_qsort_lock); 1665 1666 qsort(sorted, nentries, sizeof (dt_ahashent_t *), 1667 dt_aggregate_keyvarcmp); 1668 1669 /* 1670 * Now we need to go through and create bundles. Because the number 1671 * of bundles is bounded by the size of the sorted array, we're going 1672 * to reuse the underlying storage. And note that "bundle" is an 1673 * array of pointers to arrays of pointers to dt_ahashent_t -- making 1674 * its type (regrettably) "dt_ahashent_t ***". (Regrettable because 1675 * '*' -- like '_' and 'X' -- should never appear in triplicate in 1676 * an ideal world.) 1677 */ 1678 bundle = (dt_ahashent_t ***)sorted; 1679 1680 for (i = 1, start = 0; i <= nentries; i++) { 1681 if (i < nentries && 1682 dt_aggregate_keycmp(&sorted[i], &sorted[i - 1]) == 0) 1683 continue; 1684 1685 /* 1686 * We have a bundle boundary. Everything from start to 1687 * (i - 1) belongs in one bundle. 1688 */ 1689 assert(i - start <= naggvars); 1690 bundlesize = (naggvars + 2) * sizeof (dt_ahashent_t *); 1691 1692 if ((nbundle = dt_zalloc(dtp, bundlesize)) == NULL) { 1693 (void) pthread_mutex_unlock(&dt_qsort_lock); 1694 goto out; 1695 } 1696 1697 for (j = start; j < i; j++) { 1698 dtrace_aggvarid_t id = dt_aggregate_aggvarid(sorted[j]); 1699 1700 assert(id <= max); 1701 assert(map[id] != 0); 1702 assert(map[id] - 1 < naggvars); 1703 assert(nbundle[map[id] - 1] == NULL); 1704 nbundle[map[id] - 1] = sorted[j]; 1705 1706 if (nbundle[naggvars] == NULL) 1707 nbundle[naggvars] = sorted[j]; 1708 } 1709 1710 for (j = 0; j < naggvars; j++) { 1711 if (nbundle[j] != NULL) 1712 continue; 1713 1714 /* 1715 * Before we assume that this aggregation variable 1716 * isn't present (and fall back to using the 1717 * zero-filled data allocated earlier), check the 1718 * remap. If we have a remapping, we'll drop it in 1719 * here. Note that we might be remapping an 1720 * aggregation variable that isn't present for this 1721 * key; in this case, the aggregation data that we 1722 * copy will point to the zeroed data. 1723 */ 1724 if (remap != NULL && remap[j]) { 1725 assert(remap[j] - 1 < j); 1726 assert(nbundle[remap[j] - 1] != NULL); 1727 nbundle[j] = nbundle[remap[j] - 1]; 1728 } else { 1729 nbundle[j] = &zaggdata[j]; 1730 } 1731 } 1732 1733 bundle[nbundles++] = nbundle; 1734 start = i; 1735 } 1736 1737 /* 1738 * Now we need to re-sort based on the first value. 1739 */ 1740 dt_aggregate_qsort(dtp, bundle, nbundles, sizeof (dt_ahashent_t **), 1741 dt_aggregate_bundlecmp); 1742 1743 (void) pthread_mutex_unlock(&dt_qsort_lock); 1744 1745 /* 1746 * We're done! Now we just need to go back over the sorted bundles, 1747 * calling the function. 1748 */ 1749 data = alloca((naggvars + 1) * sizeof (dtrace_aggdata_t *)); 1750 1751 for (i = 0; i < nbundles; i++) { 1752 for (j = 0; j < naggvars; j++) 1753 data[j + 1] = NULL; 1754 1755 for (j = 0; j < naggvars; j++) { 1756 int ndx = j - sortpos; 1757 1758 if (ndx < 0) 1759 ndx += naggvars; 1760 1761 assert(bundle[i][ndx] != NULL); 1762 data[j + 1] = &bundle[i][ndx]->dtahe_data; 1763 } 1764 1765 for (j = 0; j < naggvars; j++) 1766 assert(data[j + 1] != NULL); 1767 1768 /* 1769 * The representative key is the last element in the bundle. 1770 * Assert that we have one, and then set it to be the first 1771 * element of data. 1772 */ 1773 assert(bundle[i][j] != NULL); 1774 data[0] = &bundle[i][j]->dtahe_data; 1775 1776 if ((rval = func(data, naggvars + 1, arg)) == -1) 1777 goto out; 1778 } 1779 1780 rval = 0; 1781 out: 1782 for (i = 0; i < nbundles; i++) 1783 dt_free(dtp, bundle[i]); 1784 1785 if (zaggdata != NULL) { 1786 for (i = 0; i < naggvars; i++) 1787 dt_free(dtp, zaggdata[i].dtahe_data.dtada_data); 1788 } 1789 1790 dt_free(dtp, zaggdata); 1791 dt_free(dtp, sorted); 1792 dt_free(dtp, remap); 1793 dt_free(dtp, map); 1794 1795 return (rval); 1796 } 1797 1798 int 1799 dtrace_aggregate_print(dtrace_hdl_t *dtp, FILE *fp, 1800 dtrace_aggregate_walk_f *func) 1801 { 1802 dt_print_aggdata_t pd; 1803 1804 pd.dtpa_dtp = dtp; 1805 pd.dtpa_fp = fp; 1806 pd.dtpa_allunprint = 1; 1807 1808 if (func == NULL) 1809 func = dtrace_aggregate_walk_sorted; 1810 1811 if ((*func)(dtp, dt_print_agg, &pd) == -1) 1812 return (dt_set_errno(dtp, dtp->dt_errno)); 1813 1814 return (0); 1815 } 1816 1817 void 1818 dtrace_aggregate_clear(dtrace_hdl_t *dtp) 1819 { 1820 dt_aggregate_t *agp = &dtp->dt_aggregate; 1821 dt_ahash_t *hash = &agp->dtat_hash; 1822 dt_ahashent_t *h; 1823 dtrace_aggdata_t *data; 1824 dtrace_aggdesc_t *aggdesc; 1825 dtrace_recdesc_t *rec; 1826 int i, max_cpus = agp->dtat_maxcpu; 1827 1828 for (h = hash->dtah_all; h != NULL; h = h->dtahe_nextall) { 1829 aggdesc = h->dtahe_data.dtada_desc; 1830 rec = &aggdesc->dtagd_rec[aggdesc->dtagd_nrecs - 1]; 1831 data = &h->dtahe_data; 1832 1833 bzero(&data->dtada_data[rec->dtrd_offset], rec->dtrd_size); 1834 1835 if (data->dtada_percpu == NULL) 1836 continue; 1837 1838 for (i = 0; i < max_cpus; i++) 1839 bzero(data->dtada_percpu[i], rec->dtrd_size); 1840 } 1841 } 1842 1843 void 1844 dt_aggregate_destroy(dtrace_hdl_t *dtp) 1845 { 1846 dt_aggregate_t *agp = &dtp->dt_aggregate; 1847 dt_ahash_t *hash = &agp->dtat_hash; 1848 dt_ahashent_t *h, *next; 1849 dtrace_aggdata_t *aggdata; 1850 int i, max_cpus = agp->dtat_maxcpu; 1851 1852 if (hash->dtah_hash == NULL) { 1853 assert(hash->dtah_all == NULL); 1854 } else { 1855 free(hash->dtah_hash); 1856 1857 for (h = hash->dtah_all; h != NULL; h = next) { 1858 next = h->dtahe_nextall; 1859 1860 aggdata = &h->dtahe_data; 1861 1862 if (aggdata->dtada_percpu != NULL) { 1863 for (i = 0; i < max_cpus; i++) 1864 free(aggdata->dtada_percpu[i]); 1865 free(aggdata->dtada_percpu); 1866 } 1867 1868 free(aggdata->dtada_data); 1869 free(h); 1870 } 1871 1872 hash->dtah_hash = NULL; 1873 hash->dtah_all = NULL; 1874 hash->dtah_size = 0; 1875 } 1876 1877 free(agp->dtat_buf.dtbd_data); 1878 free(agp->dtat_cpus); 1879 } 1880