1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * CPU Performance Counter system calls and device driver. 28 * 29 * This module uses a combination of thread context operators, and 30 * thread-specific data to export CPU performance counters 31 * via both a system call and a driver interface. 32 * 33 * There are three access methods exported - the 'shared' device 34 * and the 'private' and 'agent' variants of the system call. 35 * 36 * The shared device treats the performance counter registers as 37 * a processor metric, regardless of the work scheduled on them. 38 * The private system call treats the performance counter registers 39 * as a property of a single lwp. This is achieved by using the 40 * thread context operators to virtualize the contents of the 41 * performance counter registers between lwps. 42 * 43 * The agent method is like the private method, except that it must 44 * be accessed via /proc's agent lwp to allow the counter context of 45 * other threads to be examined safely. 46 * 47 * The shared usage fundamentally conflicts with the agent and private usage; 48 * almost all of the complexity of the module is needed to allow these two 49 * models to co-exist in a reasonable way. 50 */ 51 52 #include <sys/types.h> 53 #include <sys/file.h> 54 #include <sys/errno.h> 55 #include <sys/open.h> 56 #include <sys/cred.h> 57 #include <sys/conf.h> 58 #include <sys/stat.h> 59 #include <sys/processor.h> 60 #include <sys/cpuvar.h> 61 #include <sys/disp.h> 62 #include <sys/kmem.h> 63 #include <sys/modctl.h> 64 #include <sys/ddi.h> 65 #include <sys/sunddi.h> 66 #include <sys/nvpair.h> 67 #include <sys/policy.h> 68 #include <sys/machsystm.h> 69 #include <sys/cpc_impl.h> 70 #include <sys/cpc_pcbe.h> 71 #include <sys/kcpc.h> 72 73 static int kcpc_copyin_set(kcpc_set_t **set, void *ubuf, size_t len); 74 static int kcpc_verify_set(kcpc_set_t *set); 75 static uint32_t kcpc_nvlist_npairs(nvlist_t *list); 76 77 /* 78 * Generic attributes supported regardless of processor. 79 */ 80 81 #define ATTRLIST "picnum" 82 #define SEPARATOR "," 83 84 /* 85 * System call to access CPU performance counters. 86 */ 87 static int 88 cpc(int cmd, id_t lwpid, void *udata1, void *udata2, void *udata3) 89 { 90 kthread_t *t; 91 int error; 92 int size; 93 const char *str; 94 int code; 95 96 /* 97 * This CPC syscall should only be loaded if it found a PCBE to use. 98 */ 99 ASSERT(pcbe_ops != NULL); 100 101 if (curproc->p_agenttp == curthread) { 102 /* 103 * Only if /proc is invoking this system call from 104 * the agent thread do we allow the caller to examine 105 * the contexts of other lwps in the process. And 106 * because we know we're the agent, we know we don't 107 * have to grab p_lock because no-one else can change 108 * the state of the process. 109 */ 110 if ((t = idtot(curproc, lwpid)) == NULL || t == curthread) 111 return (set_errno(ESRCH)); 112 ASSERT(t->t_tid == lwpid && ttolwp(t) != NULL); 113 } else 114 t = curthread; 115 116 if (t->t_cpc_set == NULL && (cmd == CPC_SAMPLE || cmd == CPC_RELE)) 117 return (set_errno(EINVAL)); 118 119 switch (cmd) { 120 case CPC_BIND: 121 /* 122 * udata1 = pointer to packed nvlist buffer 123 * udata2 = size of packed nvlist buffer 124 * udata3 = User addr to return error subcode in. 125 */ 126 127 rw_enter(&kcpc_cpuctx_lock, RW_READER); 128 if (kcpc_cpuctx || dtrace_cpc_in_use) { 129 rw_exit(&kcpc_cpuctx_lock); 130 return (set_errno(EAGAIN)); 131 } 132 133 if (kcpc_hw_lwp_hook() != 0) { 134 rw_exit(&kcpc_cpuctx_lock); 135 return (set_errno(EACCES)); 136 } 137 138 /* 139 * An LWP may only have one set bound to it at a time; if there 140 * is a set bound to this LWP already, we unbind it here. 141 */ 142 if (t->t_cpc_set != NULL) 143 (void) kcpc_unbind(t->t_cpc_set); 144 ASSERT(t->t_cpc_set == NULL); 145 146 if ((error = kcpc_copyin_set(&t->t_cpc_set, udata1, 147 (size_t)udata2)) != 0) { 148 rw_exit(&kcpc_cpuctx_lock); 149 return (set_errno(error)); 150 } 151 152 if ((error = kcpc_verify_set(t->t_cpc_set)) != 0) { 153 rw_exit(&kcpc_cpuctx_lock); 154 kcpc_free_set(t->t_cpc_set); 155 t->t_cpc_set = NULL; 156 if (copyout(&error, udata3, sizeof (error)) == -1) 157 return (set_errno(EFAULT)); 158 return (set_errno(EINVAL)); 159 } 160 161 if ((error = kcpc_bind_thread(t->t_cpc_set, t, &code)) != 0) { 162 rw_exit(&kcpc_cpuctx_lock); 163 kcpc_free_set(t->t_cpc_set); 164 t->t_cpc_set = NULL; 165 /* 166 * EINVAL and EACCES are the only errors with more 167 * specific subcodes. 168 */ 169 if ((error == EINVAL || error == EACCES) && 170 copyout(&code, udata3, sizeof (code)) == -1) 171 return (set_errno(EFAULT)); 172 return (set_errno(error)); 173 } 174 175 rw_exit(&kcpc_cpuctx_lock); 176 return (0); 177 case CPC_SAMPLE: 178 /* 179 * udata1 = pointer to user's buffer 180 * udata2 = pointer to user's hrtime 181 * udata3 = pointer to user's tick 182 */ 183 /* 184 * We only allow thread-bound sets to be sampled via the 185 * syscall, so if this set has a CPU-bound context, return an 186 * error. 187 */ 188 if (t->t_cpc_set->ks_ctx->kc_cpuid != -1) 189 return (set_errno(EINVAL)); 190 if ((error = kcpc_sample(t->t_cpc_set, udata1, udata2, 191 udata3)) != 0) 192 return (set_errno(error)); 193 194 return (0); 195 case CPC_PRESET: 196 case CPC_RESTART: 197 /* 198 * These are valid only if this lwp has a bound set. 199 */ 200 if (t->t_cpc_set == NULL) 201 return (set_errno(EINVAL)); 202 if (cmd == CPC_PRESET) { 203 /* 204 * The preset is shipped up to us from userland in two 205 * parts. This lets us handle 64-bit values from 32-bit 206 * and 64-bit applications in the same manner. 207 * 208 * udata1 = index of request to preset 209 * udata2 = new 64-bit preset (most sig. 32 bits) 210 * udata3 = new 64-bit preset (least sig. 32 bits) 211 */ 212 if ((error = kcpc_preset(t->t_cpc_set, (intptr_t)udata1, 213 ((uint64_t)(uintptr_t)udata2 << 32ULL) | 214 (uint64_t)(uintptr_t)udata3)) != 0) 215 return (set_errno(error)); 216 } else { 217 /* 218 * udata[1-3] = unused 219 */ 220 if ((error = kcpc_restart(t->t_cpc_set)) != 0) 221 return (set_errno(error)); 222 } 223 return (0); 224 case CPC_ENABLE: 225 case CPC_DISABLE: 226 udata1 = 0; 227 /*FALLTHROUGH*/ 228 case CPC_USR_EVENTS: 229 case CPC_SYS_EVENTS: 230 if (t != curthread || t->t_cpc_set == NULL) 231 return (set_errno(EINVAL)); 232 /* 233 * Provided for backwards compatibility with CPCv1. 234 * 235 * Stop the counters and record the current counts. Use the 236 * counts as the preset to rebind a new set with the requests 237 * reconfigured as requested. 238 * 239 * udata1: 1 == enable; 0 == disable 240 * udata{2,3}: unused 241 */ 242 rw_enter(&kcpc_cpuctx_lock, RW_READER); 243 if ((error = kcpc_enable(t, 244 cmd, (int)(uintptr_t)udata1)) != 0) { 245 rw_exit(&kcpc_cpuctx_lock); 246 return (set_errno(error)); 247 } 248 rw_exit(&kcpc_cpuctx_lock); 249 return (0); 250 case CPC_NPIC: 251 return (cpc_ncounters); 252 case CPC_CAPS: 253 return (pcbe_ops->pcbe_caps); 254 case CPC_EVLIST_SIZE: 255 case CPC_LIST_EVENTS: 256 /* 257 * udata1 = pointer to user's int or buffer 258 * udata2 = picnum 259 * udata3 = unused 260 */ 261 if ((uintptr_t)udata2 >= cpc_ncounters) 262 return (set_errno(EINVAL)); 263 264 size = strlen( 265 pcbe_ops->pcbe_list_events((uintptr_t)udata2)) + 1; 266 267 if (cmd == CPC_EVLIST_SIZE) { 268 if (suword32(udata1, size) == -1) 269 return (set_errno(EFAULT)); 270 } else { 271 if (copyout( 272 pcbe_ops->pcbe_list_events((uintptr_t)udata2), 273 udata1, size) == -1) 274 return (set_errno(EFAULT)); 275 } 276 return (0); 277 case CPC_ATTRLIST_SIZE: 278 case CPC_LIST_ATTRS: 279 /* 280 * udata1 = pointer to user's int or buffer 281 * udata2 = unused 282 * udata3 = unused 283 * 284 * attrlist size is length of PCBE-supported attributes, plus 285 * room for "picnum\0" plus an optional ',' separator char. 286 */ 287 str = pcbe_ops->pcbe_list_attrs(); 288 size = strlen(str) + sizeof (SEPARATOR ATTRLIST) + 1; 289 if (str[0] != '\0') 290 /* 291 * A ',' separator character is necessary. 292 */ 293 size += 1; 294 295 if (cmd == CPC_ATTRLIST_SIZE) { 296 if (suword32(udata1, size) == -1) 297 return (set_errno(EFAULT)); 298 } else { 299 /* 300 * Copyout the PCBE attributes, and then append the 301 * generic attribute list (with separator if necessary). 302 */ 303 if (copyout(str, udata1, strlen(str)) == -1) 304 return (set_errno(EFAULT)); 305 if (str[0] != '\0') { 306 if (copyout(SEPARATOR ATTRLIST, 307 ((char *)udata1) + strlen(str), 308 strlen(SEPARATOR ATTRLIST) + 1) 309 == -1) 310 return (set_errno(EFAULT)); 311 } else 312 if (copyout(ATTRLIST, 313 (char *)udata1 + strlen(str), 314 strlen(ATTRLIST) + 1) == -1) 315 return (set_errno(EFAULT)); 316 } 317 return (0); 318 case CPC_IMPL_NAME: 319 case CPC_CPUREF: 320 /* 321 * udata1 = pointer to user's buffer 322 * udata2 = unused 323 * udata3 = unused 324 */ 325 if (cmd == CPC_IMPL_NAME) { 326 str = pcbe_ops->pcbe_impl_name(); 327 ASSERT(strlen(str) < CPC_MAX_IMPL_NAME); 328 } else { 329 str = pcbe_ops->pcbe_cpuref(); 330 ASSERT(strlen(str) < CPC_MAX_CPUREF); 331 } 332 333 if (copyout(str, udata1, strlen(str) + 1) != 0) 334 return (set_errno(EFAULT)); 335 return (0); 336 case CPC_INVALIDATE: 337 kcpc_invalidate(t); 338 return (0); 339 case CPC_RELE: 340 if ((error = kcpc_unbind(t->t_cpc_set)) != 0) 341 return (set_errno(error)); 342 return (0); 343 default: 344 return (set_errno(EINVAL)); 345 } 346 } 347 348 /* 349 * The 'shared' device allows direct access to the 350 * performance counter control register of the current CPU. 351 * The major difference between the contexts created here and those 352 * above is that the context handlers are -not- installed, thus 353 * no context switching behaviour occurs. 354 * 355 * Because they manipulate per-cpu state, these ioctls can 356 * only be invoked from a bound lwp, by a caller with the cpc_cpu privilege 357 * who can open the relevant entry in /devices (the act of holding it open 358 * causes other uses of the counters to be suspended). 359 * 360 * Note that for correct results, the caller -must- ensure that 361 * all existing per-lwp contexts are either inactive or marked invalid; 362 * that's what the open routine does. 363 */ 364 /*ARGSUSED*/ 365 static int 366 kcpc_ioctl(dev_t dev, int cmd, intptr_t data, int flags, cred_t *cr, int *rvp) 367 { 368 kthread_t *t = curthread; 369 processorid_t cpuid; 370 void *udata1 = NULL; 371 void *udata2 = NULL; 372 void *udata3 = NULL; 373 int error; 374 int code; 375 376 STRUCT_DECL(__cpc_args, args); 377 378 STRUCT_INIT(args, flags); 379 380 if (curthread->t_bind_cpu != getminor(dev)) 381 return (EAGAIN); /* someone unbound it? */ 382 383 cpuid = getminor(dev); 384 385 if (cmd == CPCIO_BIND || cmd == CPCIO_SAMPLE) { 386 if (copyin((void *)data, STRUCT_BUF(args), 387 STRUCT_SIZE(args)) == -1) 388 return (EFAULT); 389 390 udata1 = STRUCT_FGETP(args, udata1); 391 udata2 = STRUCT_FGETP(args, udata2); 392 udata3 = STRUCT_FGETP(args, udata3); 393 } 394 395 switch (cmd) { 396 case CPCIO_BIND: 397 /* 398 * udata1 = pointer to packed nvlist buffer 399 * udata2 = size of packed nvlist buffer 400 * udata3 = User addr to return error subcode in. 401 */ 402 if (t->t_cpc_set != NULL) { 403 (void) kcpc_unbind(t->t_cpc_set); 404 ASSERT(t->t_cpc_set == NULL); 405 } 406 407 if ((error = kcpc_copyin_set(&t->t_cpc_set, udata1, 408 (size_t)udata2)) != 0) { 409 return (error); 410 } 411 412 if ((error = kcpc_verify_set(t->t_cpc_set)) != 0) { 413 kcpc_free_set(t->t_cpc_set); 414 t->t_cpc_set = NULL; 415 if (copyout(&error, udata3, sizeof (error)) == -1) 416 return (EFAULT); 417 return (EINVAL); 418 } 419 420 if ((error = kcpc_bind_cpu(t->t_cpc_set, cpuid, &code)) != 0) { 421 kcpc_free_set(t->t_cpc_set); 422 t->t_cpc_set = NULL; 423 /* 424 * Subcodes are only returned for EINVAL and EACCESS. 425 */ 426 if ((error == EINVAL || error == EACCES) && 427 copyout(&code, udata3, sizeof (code)) == -1) 428 return (EFAULT); 429 return (error); 430 } 431 432 return (0); 433 case CPCIO_SAMPLE: 434 /* 435 * udata1 = pointer to user's buffer 436 * udata2 = pointer to user's hrtime 437 * udata3 = pointer to user's tick 438 */ 439 /* 440 * Only CPU-bound sets may be sampled via the ioctl(). If this 441 * set has no CPU-bound context, return an error. 442 */ 443 if (t->t_cpc_set == NULL) 444 return (EINVAL); 445 if ((error = kcpc_sample(t->t_cpc_set, udata1, udata2, 446 udata3)) != 0) 447 return (error); 448 return (0); 449 case CPCIO_RELE: 450 if (t->t_cpc_set == NULL) 451 return (EINVAL); 452 return (kcpc_unbind(t->t_cpc_set)); 453 default: 454 return (EINVAL); 455 } 456 } 457 458 /* 459 * The device supports multiple opens, but only one open 460 * is allowed per processor. This is to enable multiple 461 * instances of tools looking at different processors. 462 */ 463 #define KCPC_MINOR_SHARED ((minor_t)0x3fffful) 464 465 static ulong_t *kcpc_cpumap; /* bitmap of cpus */ 466 467 /*ARGSUSED1*/ 468 static int 469 kcpc_open(dev_t *dev, int flags, int otyp, cred_t *cr) 470 { 471 processorid_t cpuid; 472 int error; 473 474 ASSERT(pcbe_ops != NULL); 475 476 if ((error = secpolicy_cpc_cpu(cr)) != 0) 477 return (error); 478 if (getminor(*dev) != KCPC_MINOR_SHARED) 479 return (ENXIO); 480 if ((cpuid = curthread->t_bind_cpu) == PBIND_NONE) 481 return (EINVAL); 482 if (cpuid > max_cpuid) 483 return (EINVAL); 484 485 rw_enter(&kcpc_cpuctx_lock, RW_WRITER); 486 if (++kcpc_cpuctx == 1) { 487 ASSERT(kcpc_cpumap == NULL); 488 489 /* 490 * Bail out if DTrace is already using the counters. 491 */ 492 if (dtrace_cpc_in_use) { 493 kcpc_cpuctx--; 494 rw_exit(&kcpc_cpuctx_lock); 495 return (EAGAIN); 496 } 497 kcpc_cpumap = kmem_zalloc(BT_SIZEOFMAP(max_cpuid + 1), 498 KM_SLEEP); 499 /* 500 * When this device is open for processor-based contexts, 501 * no further lwp-based contexts can be created. 502 * 503 * Since this is the first open, ensure that all existing 504 * contexts are invalidated. 505 */ 506 kcpc_invalidate_all(); 507 } else if (BT_TEST(kcpc_cpumap, cpuid)) { 508 kcpc_cpuctx--; 509 rw_exit(&kcpc_cpuctx_lock); 510 return (EAGAIN); 511 } else if (kcpc_hw_cpu_hook(cpuid, kcpc_cpumap) != 0) { 512 kcpc_cpuctx--; 513 rw_exit(&kcpc_cpuctx_lock); 514 return (EACCES); 515 } 516 BT_SET(kcpc_cpumap, cpuid); 517 rw_exit(&kcpc_cpuctx_lock); 518 519 *dev = makedevice(getmajor(*dev), (minor_t)cpuid); 520 521 return (0); 522 } 523 524 /*ARGSUSED1*/ 525 static int 526 kcpc_close(dev_t dev, int flags, int otyp, cred_t *cr) 527 { 528 rw_enter(&kcpc_cpuctx_lock, RW_WRITER); 529 BT_CLEAR(kcpc_cpumap, getminor(dev)); 530 if (--kcpc_cpuctx == 0) { 531 kmem_free(kcpc_cpumap, BT_SIZEOFMAP(max_cpuid + 1)); 532 kcpc_cpumap = NULL; 533 } 534 ASSERT(kcpc_cpuctx >= 0); 535 rw_exit(&kcpc_cpuctx_lock); 536 537 return (0); 538 } 539 540 /* 541 * Sane boundaries on the size of packed lists. In bytes. 542 */ 543 #define CPC_MIN_PACKSIZE 4 544 #define CPC_MAX_PACKSIZE 10000 545 546 /* 547 * Sane boundary on the number of requests a set can contain. 548 */ 549 #define CPC_MAX_NREQS 100 550 551 /* 552 * Sane boundary on the number of attributes a request can contain. 553 */ 554 #define CPC_MAX_ATTRS 50 555 556 /* 557 * Copy in a packed nvlist from the user and create a request set out of it. 558 * If successful, return 0 and store a pointer to the set we've created. Returns 559 * error code on error. 560 */ 561 int 562 kcpc_copyin_set(kcpc_set_t **inset, void *ubuf, size_t len) 563 { 564 kcpc_set_t *set; 565 int i; 566 int j; 567 char *packbuf; 568 569 nvlist_t *nvl; 570 nvpair_t *nvp = NULL; 571 572 nvlist_t *attrs; 573 nvpair_t *nvp_attr; 574 kcpc_attr_t *attrp; 575 576 nvlist_t **reqlist; 577 uint_t nreqs; 578 uint64_t uint64; 579 uint32_t uint32; 580 uint32_t setflags = (uint32_t)-1; 581 char *string; 582 char *name; 583 584 if (len < CPC_MIN_PACKSIZE || len > CPC_MAX_PACKSIZE) 585 return (EINVAL); 586 587 packbuf = kmem_alloc(len, KM_SLEEP); 588 589 if (copyin(ubuf, packbuf, len) == -1) { 590 kmem_free(packbuf, len); 591 return (EFAULT); 592 } 593 594 if (nvlist_unpack(packbuf, len, &nvl, KM_SLEEP) != 0) { 595 kmem_free(packbuf, len); 596 return (EINVAL); 597 } 598 599 /* 600 * The nvlist has been unpacked so there is no need for the packed 601 * representation from this point on. 602 */ 603 kmem_free(packbuf, len); 604 605 i = 0; 606 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 607 switch (nvpair_type(nvp)) { 608 case DATA_TYPE_UINT32: 609 if (strcmp(nvpair_name(nvp), "flags") != 0 || 610 nvpair_value_uint32(nvp, &setflags) != 0) { 611 nvlist_free(nvl); 612 return (EINVAL); 613 } 614 break; 615 case DATA_TYPE_NVLIST_ARRAY: 616 if (strcmp(nvpair_name(nvp), "reqs") != 0 || 617 nvpair_value_nvlist_array(nvp, &reqlist, 618 &nreqs) != 0) { 619 nvlist_free(nvl); 620 return (EINVAL); 621 } 622 break; 623 default: 624 nvlist_free(nvl); 625 return (EINVAL); 626 } 627 i++; 628 } 629 630 /* 631 * There should be two members in the top-level nvlist: 632 * an array of nvlists consisting of the requests, and flags. 633 * Anything else is an invalid set. 634 */ 635 if (i != 2) { 636 nvlist_free(nvl); 637 return (EINVAL); 638 } 639 640 if (nreqs > CPC_MAX_NREQS) { 641 nvlist_free(nvl); 642 return (EINVAL); 643 } 644 645 /* 646 * The requests are now stored in the nvlist array at reqlist. 647 * Note that the use of kmem_zalloc() to alloc the kcpc_set_t means 648 * we don't need to call the init routines for ks_lock and ks_condv. 649 */ 650 set = kmem_zalloc(sizeof (kcpc_set_t), KM_SLEEP); 651 set->ks_req = (kcpc_request_t *)kmem_zalloc(sizeof (kcpc_request_t) * 652 nreqs, KM_SLEEP); 653 set->ks_nreqs = nreqs; 654 /* 655 * If the nvlist didn't contain a flags member, setflags was initialized 656 * with an illegal value and this set will fail sanity checks later on. 657 */ 658 set->ks_flags = setflags; 659 /* 660 * Initialize bind/unbind set synchronization. 661 */ 662 set->ks_state &= ~KCPC_SET_BOUND; 663 664 /* 665 * Build the set up one request at a time, always keeping it self- 666 * consistent so we can give it to kcpc_free_set() if we need to back 667 * out and return and error. 668 */ 669 for (i = 0; i < nreqs; i++) { 670 nvp = NULL; 671 set->ks_req[i].kr_picnum = -1; 672 while ((nvp = nvlist_next_nvpair(reqlist[i], nvp)) != NULL) { 673 name = nvpair_name(nvp); 674 switch (nvpair_type(nvp)) { 675 case DATA_TYPE_UINT32: 676 if (nvpair_value_uint32(nvp, &uint32) == EINVAL) 677 goto inval; 678 if (strcmp(name, "cr_flags") == 0) 679 set->ks_req[i].kr_flags = uint32; 680 if (strcmp(name, "cr_index") == 0) 681 set->ks_req[i].kr_index = uint32; 682 break; 683 case DATA_TYPE_UINT64: 684 if (nvpair_value_uint64(nvp, &uint64) == EINVAL) 685 goto inval; 686 if (strcmp(name, "cr_preset") == 0) 687 set->ks_req[i].kr_preset = uint64; 688 break; 689 case DATA_TYPE_STRING: 690 if (nvpair_value_string(nvp, &string) == EINVAL) 691 goto inval; 692 if (strcmp(name, "cr_event") == 0) 693 (void) strncpy(set->ks_req[i].kr_event, 694 string, CPC_MAX_EVENT_LEN); 695 break; 696 case DATA_TYPE_NVLIST: 697 if (strcmp(name, "cr_attr") != 0) 698 goto inval; 699 if (nvpair_value_nvlist(nvp, &attrs) == EINVAL) 700 goto inval; 701 nvp_attr = NULL; 702 /* 703 * If the picnum has been specified as an 704 * attribute, consume that attribute here and 705 * remove it from the list of attributes. 706 */ 707 if (nvlist_lookup_uint64(attrs, "picnum", 708 &uint64) == 0) { 709 if (nvlist_remove(attrs, "picnum", 710 DATA_TYPE_UINT64) != 0) 711 panic("nvlist %p faulty", 712 (void *)attrs); 713 set->ks_req[i].kr_picnum = uint64; 714 } 715 716 if ((set->ks_req[i].kr_nattrs = 717 kcpc_nvlist_npairs(attrs)) == 0) 718 break; 719 720 if (set->ks_req[i].kr_nattrs > CPC_MAX_ATTRS) 721 goto inval; 722 723 set->ks_req[i].kr_attr = 724 kmem_alloc(set->ks_req[i].kr_nattrs * 725 sizeof (kcpc_attr_t), KM_SLEEP); 726 j = 0; 727 728 while ((nvp_attr = nvlist_next_nvpair(attrs, 729 nvp_attr)) != NULL) { 730 attrp = &set->ks_req[i].kr_attr[j]; 731 732 if (nvpair_type(nvp_attr) != 733 DATA_TYPE_UINT64) 734 goto inval; 735 736 (void) strncpy(attrp->ka_name, 737 nvpair_name(nvp_attr), 738 CPC_MAX_ATTR_LEN); 739 740 if (nvpair_value_uint64(nvp_attr, 741 &(attrp->ka_val)) == EINVAL) 742 goto inval; 743 j++; 744 } 745 ASSERT(j == set->ks_req[i].kr_nattrs); 746 default: 747 break; 748 } 749 } 750 } 751 752 nvlist_free(nvl); 753 *inset = set; 754 return (0); 755 756 inval: 757 nvlist_free(nvl); 758 kcpc_free_set(set); 759 return (EINVAL); 760 } 761 762 /* 763 * Count the number of nvpairs in the supplied nvlist. 764 */ 765 static uint32_t 766 kcpc_nvlist_npairs(nvlist_t *list) 767 { 768 nvpair_t *nvp = NULL; 769 uint32_t n = 0; 770 771 while ((nvp = nvlist_next_nvpair(list, nvp)) != NULL) 772 n++; 773 774 return (n); 775 } 776 777 /* 778 * Performs sanity checks on the given set. 779 * Returns 0 if the set checks out OK. 780 * Returns a detailed error subcode, or -1 if there is no applicable subcode. 781 */ 782 static int 783 kcpc_verify_set(kcpc_set_t *set) 784 { 785 kcpc_request_t *rp; 786 int i; 787 uint64_t bitmap = 0; 788 int n; 789 790 if (set->ks_nreqs > cpc_ncounters) 791 return (-1); 792 793 if (CPC_SET_VALID_FLAGS(set->ks_flags) == 0) 794 return (-1); 795 796 for (i = 0; i < set->ks_nreqs; i++) { 797 rp = &set->ks_req[i]; 798 799 /* 800 * The following comparison must cast cpc_ncounters to an int, 801 * because kr_picnum will be -1 if the request didn't explicitly 802 * choose a PIC. 803 */ 804 if (rp->kr_picnum >= (int)cpc_ncounters) 805 return (CPC_INVALID_PICNUM); 806 807 /* 808 * Of the pics whose physical picnum has been specified, make 809 * sure each PIC appears only once in set. 810 */ 811 if ((n = set->ks_req[i].kr_picnum) != -1) { 812 if ((bitmap & (1 << n)) != 0) 813 return (-1); 814 bitmap |= (1 << n); 815 } 816 817 /* 818 * Make sure the requested index falls within the range of all 819 * requests. 820 */ 821 if (rp->kr_index < 0 || rp->kr_index >= set->ks_nreqs) 822 return (-1); 823 824 /* 825 * Make sure there are no unknown flags. 826 */ 827 if (KCPC_REQ_VALID_FLAGS(rp->kr_flags) == 0) 828 return (CPC_REQ_INVALID_FLAGS); 829 } 830 831 return (0); 832 } 833 834 static struct cb_ops cb_ops = { 835 kcpc_open, 836 kcpc_close, 837 nodev, /* strategy */ 838 nodev, /* print */ 839 nodev, /* dump */ 840 nodev, /* read */ 841 nodev, /* write */ 842 kcpc_ioctl, 843 nodev, /* devmap */ 844 nodev, /* mmap */ 845 nodev, /* segmap */ 846 nochpoll, /* poll */ 847 ddi_prop_op, 848 NULL, 849 D_NEW | D_MP 850 }; 851 852 /*ARGSUSED*/ 853 static int 854 kcpc_probe(dev_info_t *devi) 855 { 856 return (DDI_PROBE_SUCCESS); 857 } 858 859 static dev_info_t *kcpc_devi; 860 861 static int 862 kcpc_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 863 { 864 if (cmd != DDI_ATTACH) 865 return (DDI_FAILURE); 866 kcpc_devi = devi; 867 return (ddi_create_minor_node(devi, "shared", S_IFCHR, 868 KCPC_MINOR_SHARED, DDI_PSEUDO, 0)); 869 } 870 871 /*ARGSUSED*/ 872 static int 873 kcpc_getinfo(dev_info_t *devi, ddi_info_cmd_t cmd, void *arg, void **result) 874 { 875 switch (cmd) { 876 case DDI_INFO_DEVT2DEVINFO: 877 switch (getminor((dev_t)arg)) { 878 case KCPC_MINOR_SHARED: 879 *result = kcpc_devi; 880 return (DDI_SUCCESS); 881 default: 882 break; 883 } 884 break; 885 case DDI_INFO_DEVT2INSTANCE: 886 *result = 0; 887 return (DDI_SUCCESS); 888 default: 889 break; 890 } 891 892 return (DDI_FAILURE); 893 } 894 895 static struct dev_ops dev_ops = { 896 DEVO_REV, 897 0, 898 kcpc_getinfo, 899 nulldev, /* identify */ 900 kcpc_probe, 901 kcpc_attach, 902 nodev, /* detach */ 903 nodev, /* reset */ 904 &cb_ops, 905 (struct bus_ops *)0, 906 NULL, 907 ddi_quiesce_not_needed, /* quiesce */ 908 }; 909 910 static struct modldrv modldrv = { 911 &mod_driverops, 912 "cpc sampling driver", 913 &dev_ops 914 }; 915 916 static struct sysent cpc_sysent = { 917 5, 918 SE_NOUNLOAD | SE_ARGC | SE_32RVAL1, 919 cpc 920 }; 921 922 static struct modlsys modlsys = { 923 &mod_syscallops, 924 "cpc sampling system call", 925 &cpc_sysent 926 }; 927 928 #ifdef _SYSCALL32_IMPL 929 static struct modlsys modlsys32 = { 930 &mod_syscallops32, 931 "32-bit cpc sampling system call", 932 &cpc_sysent 933 }; 934 #endif 935 936 static struct modlinkage modl = { 937 MODREV_1, 938 &modldrv, 939 &modlsys, 940 #ifdef _SYSCALL32_IMPL 941 &modlsys32, 942 #endif 943 }; 944 945 static void 946 kcpc_init(void) 947 { 948 long hash; 949 950 rw_init(&kcpc_cpuctx_lock, NULL, RW_DEFAULT, NULL); 951 for (hash = 0; hash < CPC_HASH_BUCKETS; hash++) 952 mutex_init(&kcpc_ctx_llock[hash], 953 NULL, MUTEX_DRIVER, (void *)(uintptr_t)15); 954 } 955 956 static void 957 kcpc_fini(void) 958 { 959 long hash; 960 961 for (hash = 0; hash < CPC_HASH_BUCKETS; hash++) 962 mutex_destroy(&kcpc_ctx_llock[hash]); 963 rw_destroy(&kcpc_cpuctx_lock); 964 } 965 966 int 967 _init(void) 968 { 969 int ret; 970 971 if (kcpc_hw_load_pcbe() != 0) 972 return (ENOTSUP); 973 974 kcpc_init(); 975 if ((ret = mod_install(&modl)) != 0) 976 kcpc_fini(); 977 return (ret); 978 } 979 980 int 981 _fini(void) 982 { 983 int ret; 984 985 if ((ret = mod_remove(&modl)) == 0) 986 kcpc_fini(); 987 return (ret); 988 } 989 990 int 991 _info(struct modinfo *mi) 992 { 993 return (mod_info(&modl, mi)); 994 } 995