1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 27 /* 28 * CPU Performance Counter system calls and device driver. 29 * 30 * This module uses a combination of thread context operators, and 31 * thread-specific data to export CPU performance counters 32 * via both a system call and a driver interface. 33 * 34 * There are three access methods exported - the 'shared' device 35 * and the 'private' and 'agent' variants of the system call. 36 * 37 * The shared device treats the performance counter registers as 38 * a processor metric, regardless of the work scheduled on them. 39 * The private system call treats the performance counter registers 40 * as a property of a single lwp. This is achieved by using the 41 * thread context operators to virtualize the contents of the 42 * performance counter registers between lwps. 43 * 44 * The agent method is like the private method, except that it must 45 * be accessed via /proc's agent lwp to allow the counter context of 46 * other threads to be examined safely. 47 * 48 * The shared usage fundamentally conflicts with the agent and private usage; 49 * almost all of the complexity of the module is needed to allow these two 50 * models to co-exist in a reasonable way. 51 */ 52 53 #include <sys/types.h> 54 #include <sys/file.h> 55 #include <sys/errno.h> 56 #include <sys/open.h> 57 #include <sys/cred.h> 58 #include <sys/conf.h> 59 #include <sys/stat.h> 60 #include <sys/processor.h> 61 #include <sys/cpuvar.h> 62 #include <sys/disp.h> 63 #include <sys/kmem.h> 64 #include <sys/modctl.h> 65 #include <sys/ddi.h> 66 #include <sys/sunddi.h> 67 #include <sys/nvpair.h> 68 #include <sys/policy.h> 69 #include <sys/machsystm.h> 70 #include <sys/cpc_impl.h> 71 #include <sys/cpc_pcbe.h> 72 #include <sys/kcpc.h> 73 74 static int kcpc_copyin_set(kcpc_set_t **set, void *ubuf, size_t len); 75 static int kcpc_verify_set(kcpc_set_t *set); 76 static uint32_t kcpc_nvlist_npairs(nvlist_t *list); 77 78 /* 79 * Generic attributes supported regardless of processor. 80 */ 81 82 #define ATTRLIST "picnum" 83 #define SEPARATOR "," 84 85 /* 86 * System call to access CPU performance counters. 87 */ 88 static int 89 cpc(int cmd, id_t lwpid, void *udata1, void *udata2, void *udata3) 90 { 91 kthread_t *t; 92 int error; 93 int size; 94 const char *str; 95 int code; 96 97 /* 98 * This CPC syscall should only be loaded if it found a PCBE to use. 99 */ 100 ASSERT(pcbe_ops != NULL); 101 102 if (curproc->p_agenttp == curthread) { 103 /* 104 * Only if /proc is invoking this system call from 105 * the agent thread do we allow the caller to examine 106 * the contexts of other lwps in the process. And 107 * because we know we're the agent, we know we don't 108 * have to grab p_lock because no-one else can change 109 * the state of the process. 110 */ 111 if ((t = idtot(curproc, lwpid)) == NULL || t == curthread) 112 return (set_errno(ESRCH)); 113 ASSERT(t->t_tid == lwpid && ttolwp(t) != NULL); 114 } else 115 t = curthread; 116 117 if (t->t_cpc_set == NULL && (cmd == CPC_SAMPLE || cmd == CPC_RELE)) 118 return (set_errno(EINVAL)); 119 120 switch (cmd) { 121 case CPC_BIND: 122 /* 123 * udata1 = pointer to packed nvlist buffer 124 * udata2 = size of packed nvlist buffer 125 * udata3 = User addr to return error subcode in. 126 */ 127 128 rw_enter(&kcpc_cpuctx_lock, RW_READER); 129 if (kcpc_cpuctx) { 130 rw_exit(&kcpc_cpuctx_lock); 131 return (set_errno(EAGAIN)); 132 } 133 134 if (kcpc_hw_lwp_hook() != 0) { 135 rw_exit(&kcpc_cpuctx_lock); 136 return (set_errno(EACCES)); 137 } 138 139 /* 140 * An LWP may only have one set bound to it at a time; if there 141 * is a set bound to this LWP already, we unbind it here. 142 */ 143 if (t->t_cpc_set != NULL) 144 (void) kcpc_unbind(t->t_cpc_set); 145 ASSERT(t->t_cpc_set == NULL); 146 147 if ((error = kcpc_copyin_set(&t->t_cpc_set, udata1, 148 (size_t)udata2)) != 0) { 149 rw_exit(&kcpc_cpuctx_lock); 150 return (set_errno(error)); 151 } 152 153 if ((error = kcpc_verify_set(t->t_cpc_set)) != 0) { 154 rw_exit(&kcpc_cpuctx_lock); 155 kcpc_free_set(t->t_cpc_set); 156 t->t_cpc_set = NULL; 157 if (copyout(&error, udata3, sizeof (error)) == -1) 158 return (set_errno(EFAULT)); 159 return (set_errno(EINVAL)); 160 } 161 162 if ((error = kcpc_bind_thread(t->t_cpc_set, t, &code)) != 0) { 163 rw_exit(&kcpc_cpuctx_lock); 164 kcpc_free_set(t->t_cpc_set); 165 t->t_cpc_set = NULL; 166 /* 167 * EINVAL and EACCES are the only errors with more 168 * specific subcodes. 169 */ 170 if ((error == EINVAL || error == EACCES) && 171 copyout(&code, udata3, sizeof (code)) == -1) 172 return (set_errno(EFAULT)); 173 return (set_errno(error)); 174 } 175 176 rw_exit(&kcpc_cpuctx_lock); 177 return (0); 178 case CPC_SAMPLE: 179 /* 180 * udata1 = pointer to user's buffer 181 * udata2 = pointer to user's hrtime 182 * udata3 = pointer to user's tick 183 */ 184 /* 185 * We only allow thread-bound sets to be sampled via the 186 * syscall, so if this set has a CPU-bound context, return an 187 * error. 188 */ 189 if (t->t_cpc_set->ks_ctx->kc_cpuid != -1) 190 return (set_errno(EINVAL)); 191 if ((error = kcpc_sample(t->t_cpc_set, udata1, udata2, 192 udata3)) != 0) 193 return (set_errno(error)); 194 195 return (0); 196 case CPC_PRESET: 197 case CPC_RESTART: 198 /* 199 * These are valid only if this lwp has a bound set. 200 */ 201 if (t->t_cpc_set == NULL) 202 return (set_errno(EINVAL)); 203 if (cmd == CPC_PRESET) { 204 /* 205 * The preset is shipped up to us from userland in two 206 * parts. This lets us handle 64-bit values from 32-bit 207 * and 64-bit applications in the same manner. 208 * 209 * udata1 = index of request to preset 210 * udata2 = new 64-bit preset (most sig. 32 bits) 211 * udata3 = new 64-bit preset (least sig. 32 bits) 212 */ 213 if ((error = kcpc_preset(t->t_cpc_set, (intptr_t)udata1, 214 ((uint64_t)(uintptr_t)udata2 << 32ULL) | 215 (uint64_t)(uintptr_t)udata3)) != 0) 216 return (set_errno(error)); 217 } else { 218 /* 219 * udata[1-3] = unused 220 */ 221 if ((error = kcpc_restart(t->t_cpc_set)) != 0) 222 return (set_errno(error)); 223 } 224 return (0); 225 case CPC_ENABLE: 226 case CPC_DISABLE: 227 udata1 = 0; 228 /*FALLTHROUGH*/ 229 case CPC_USR_EVENTS: 230 case CPC_SYS_EVENTS: 231 if (t != curthread || t->t_cpc_set == NULL) 232 return (set_errno(EINVAL)); 233 /* 234 * Provided for backwards compatibility with CPCv1. 235 * 236 * Stop the counters and record the current counts. Use the 237 * counts as the preset to rebind a new set with the requests 238 * reconfigured as requested. 239 * 240 * udata1: 1 == enable; 0 == disable 241 * udata{2,3}: unused 242 */ 243 rw_enter(&kcpc_cpuctx_lock, RW_READER); 244 if ((error = kcpc_enable(t, 245 cmd, (int)(uintptr_t)udata1)) != 0) { 246 rw_exit(&kcpc_cpuctx_lock); 247 return (set_errno(error)); 248 } 249 rw_exit(&kcpc_cpuctx_lock); 250 return (0); 251 case CPC_NPIC: 252 return (cpc_ncounters); 253 case CPC_CAPS: 254 return (pcbe_ops->pcbe_caps); 255 case CPC_EVLIST_SIZE: 256 case CPC_LIST_EVENTS: 257 /* 258 * udata1 = pointer to user's int or buffer 259 * udata2 = picnum 260 * udata3 = unused 261 */ 262 if ((uintptr_t)udata2 >= cpc_ncounters) 263 return (set_errno(EINVAL)); 264 265 size = strlen( 266 pcbe_ops->pcbe_list_events((uintptr_t)udata2)) + 1; 267 268 if (cmd == CPC_EVLIST_SIZE) { 269 if (suword32(udata1, size) == -1) 270 return (set_errno(EFAULT)); 271 } else { 272 if (copyout( 273 pcbe_ops->pcbe_list_events((uintptr_t)udata2), 274 udata1, size) == -1) 275 return (set_errno(EFAULT)); 276 } 277 return (0); 278 case CPC_ATTRLIST_SIZE: 279 case CPC_LIST_ATTRS: 280 /* 281 * udata1 = pointer to user's int or buffer 282 * udata2 = unused 283 * udata3 = unused 284 * 285 * attrlist size is length of PCBE-supported attributes, plus 286 * room for "picnum\0" plus an optional ',' separator char. 287 */ 288 str = pcbe_ops->pcbe_list_attrs(); 289 size = strlen(str) + sizeof (SEPARATOR ATTRLIST) + 1; 290 if (str[0] != '\0') 291 /* 292 * A ',' separator character is necessary. 293 */ 294 size += 1; 295 296 if (cmd == CPC_ATTRLIST_SIZE) { 297 if (suword32(udata1, size) == -1) 298 return (set_errno(EFAULT)); 299 } else { 300 /* 301 * Copyout the PCBE attributes, and then append the 302 * generic attribute list (with separator if necessary). 303 */ 304 if (copyout(str, udata1, strlen(str)) == -1) 305 return (set_errno(EFAULT)); 306 if (str[0] != '\0') { 307 if (copyout(SEPARATOR ATTRLIST, 308 ((char *)udata1) + strlen(str), 309 strlen(SEPARATOR ATTRLIST) + 1) 310 == -1) 311 return (set_errno(EFAULT)); 312 } else 313 if (copyout(ATTRLIST, 314 (char *)udata1 + strlen(str), 315 strlen(ATTRLIST) + 1) == -1) 316 return (set_errno(EFAULT)); 317 } 318 return (0); 319 case CPC_IMPL_NAME: 320 case CPC_CPUREF: 321 /* 322 * udata1 = pointer to user's buffer 323 * udata2 = unused 324 * udata3 = unused 325 */ 326 if (cmd == CPC_IMPL_NAME) { 327 str = pcbe_ops->pcbe_impl_name(); 328 ASSERT(strlen(str) < CPC_MAX_IMPL_NAME); 329 } else { 330 str = pcbe_ops->pcbe_cpuref(); 331 ASSERT(strlen(str) < CPC_MAX_CPUREF); 332 } 333 334 if (copyout(str, udata1, strlen(str) + 1) != 0) 335 return (set_errno(EFAULT)); 336 return (0); 337 case CPC_INVALIDATE: 338 kcpc_invalidate(t); 339 return (0); 340 case CPC_RELE: 341 if ((error = kcpc_unbind(t->t_cpc_set)) != 0) 342 return (set_errno(error)); 343 return (0); 344 default: 345 return (set_errno(EINVAL)); 346 } 347 } 348 349 /* 350 * The 'shared' device allows direct access to the 351 * performance counter control register of the current CPU. 352 * The major difference between the contexts created here and those 353 * above is that the context handlers are -not- installed, thus 354 * no context switching behaviour occurs. 355 * 356 * Because they manipulate per-cpu state, these ioctls can 357 * only be invoked from a bound lwp, by a caller with the cpc_cpu privilege 358 * who can open the relevant entry in /devices (the act of holding it open 359 * causes other uses of the counters to be suspended). 360 * 361 * Note that for correct results, the caller -must- ensure that 362 * all existing per-lwp contexts are either inactive or marked invalid; 363 * that's what the open routine does. 364 */ 365 /*ARGSUSED*/ 366 static int 367 kcpc_ioctl(dev_t dev, int cmd, intptr_t data, int flags, cred_t *cr, int *rvp) 368 { 369 kthread_t *t = curthread; 370 processorid_t cpuid; 371 void *udata1 = NULL; 372 void *udata2 = NULL; 373 void *udata3 = NULL; 374 int error; 375 int code; 376 377 STRUCT_DECL(__cpc_args, args); 378 379 STRUCT_INIT(args, flags); 380 381 if (curthread->t_bind_cpu != getminor(dev)) 382 return (EAGAIN); /* someone unbound it? */ 383 384 cpuid = getminor(dev); 385 386 if (cmd == CPCIO_BIND || cmd == CPCIO_SAMPLE) { 387 if (copyin((void *)data, STRUCT_BUF(args), 388 STRUCT_SIZE(args)) == -1) 389 return (EFAULT); 390 391 udata1 = STRUCT_FGETP(args, udata1); 392 udata2 = STRUCT_FGETP(args, udata2); 393 udata3 = STRUCT_FGETP(args, udata3); 394 } 395 396 switch (cmd) { 397 case CPCIO_BIND: 398 /* 399 * udata1 = pointer to packed nvlist buffer 400 * udata2 = size of packed nvlist buffer 401 * udata3 = User addr to return error subcode in. 402 */ 403 if (t->t_cpc_set != NULL) { 404 (void) kcpc_unbind(t->t_cpc_set); 405 ASSERT(t->t_cpc_set == NULL); 406 } 407 408 if ((error = kcpc_copyin_set(&t->t_cpc_set, udata1, 409 (size_t)udata2)) != 0) { 410 return (error); 411 } 412 413 if ((error = kcpc_verify_set(t->t_cpc_set)) != 0) { 414 kcpc_free_set(t->t_cpc_set); 415 t->t_cpc_set = NULL; 416 if (copyout(&error, udata3, sizeof (error)) == -1) 417 return (EFAULT); 418 return (EINVAL); 419 } 420 421 if ((error = kcpc_bind_cpu(t->t_cpc_set, cpuid, &code)) != 0) { 422 kcpc_free_set(t->t_cpc_set); 423 t->t_cpc_set = NULL; 424 /* 425 * Subcodes are only returned for EINVAL and EACCESS. 426 */ 427 if ((error == EINVAL || error == EACCES) && 428 copyout(&code, udata3, sizeof (code)) == -1) 429 return (EFAULT); 430 return (error); 431 } 432 433 return (0); 434 case CPCIO_SAMPLE: 435 /* 436 * udata1 = pointer to user's buffer 437 * udata2 = pointer to user's hrtime 438 * udata3 = pointer to user's tick 439 */ 440 /* 441 * Only CPU-bound sets may be sampled via the ioctl(). If this 442 * set has no CPU-bound context, return an error. 443 */ 444 if (t->t_cpc_set == NULL) 445 return (EINVAL); 446 if ((error = kcpc_sample(t->t_cpc_set, udata1, udata2, 447 udata3)) != 0) 448 return (error); 449 return (0); 450 case CPCIO_RELE: 451 if (t->t_cpc_set == NULL) 452 return (EINVAL); 453 return (kcpc_unbind(t->t_cpc_set)); 454 default: 455 return (EINVAL); 456 } 457 } 458 459 /* 460 * The device supports multiple opens, but only one open 461 * is allowed per processor. This is to enable multiple 462 * instances of tools looking at different processors. 463 */ 464 #define KCPC_MINOR_SHARED ((minor_t)0x3fffful) 465 466 static ulong_t *kcpc_cpumap; /* bitmap of cpus */ 467 468 /*ARGSUSED1*/ 469 static int 470 kcpc_open(dev_t *dev, int flags, int otyp, cred_t *cr) 471 { 472 processorid_t cpuid; 473 int error; 474 475 ASSERT(pcbe_ops != NULL); 476 477 if ((error = secpolicy_cpc_cpu(cr)) != 0) 478 return (error); 479 if (getminor(*dev) != KCPC_MINOR_SHARED) 480 return (ENXIO); 481 if ((cpuid = curthread->t_bind_cpu) == PBIND_NONE) 482 return (EINVAL); 483 if (cpuid > max_cpuid) 484 return (EINVAL); 485 486 rw_enter(&kcpc_cpuctx_lock, RW_WRITER); 487 if (++kcpc_cpuctx == 1) { 488 ASSERT(kcpc_cpumap == NULL); 489 kcpc_cpumap = kmem_zalloc(BT_SIZEOFMAP(max_cpuid + 1), 490 KM_SLEEP); 491 /* 492 * When this device is open for processor-based contexts, 493 * no further lwp-based contexts can be created. 494 * 495 * Since this is the first open, ensure that all existing 496 * contexts are invalidated. 497 */ 498 kcpc_invalidate_all(); 499 } else if (BT_TEST(kcpc_cpumap, cpuid)) { 500 kcpc_cpuctx--; 501 rw_exit(&kcpc_cpuctx_lock); 502 return (EAGAIN); 503 } else if (kcpc_hw_cpu_hook(cpuid, kcpc_cpumap) != 0) { 504 kcpc_cpuctx--; 505 rw_exit(&kcpc_cpuctx_lock); 506 return (EACCES); 507 } 508 BT_SET(kcpc_cpumap, cpuid); 509 rw_exit(&kcpc_cpuctx_lock); 510 511 *dev = makedevice(getmajor(*dev), (minor_t)cpuid); 512 513 return (0); 514 } 515 516 /*ARGSUSED1*/ 517 static int 518 kcpc_close(dev_t dev, int flags, int otyp, cred_t *cr) 519 { 520 rw_enter(&kcpc_cpuctx_lock, RW_WRITER); 521 BT_CLEAR(kcpc_cpumap, getminor(dev)); 522 if (--kcpc_cpuctx == 0) { 523 kmem_free(kcpc_cpumap, BT_SIZEOFMAP(max_cpuid + 1)); 524 kcpc_cpumap = NULL; 525 } 526 ASSERT(kcpc_cpuctx >= 0); 527 rw_exit(&kcpc_cpuctx_lock); 528 529 return (0); 530 } 531 532 /* 533 * Sane boundaries on the size of packed lists. In bytes. 534 */ 535 #define CPC_MIN_PACKSIZE 4 536 #define CPC_MAX_PACKSIZE 10000 537 538 /* 539 * Sane boundary on the number of requests a set can contain. 540 */ 541 #define CPC_MAX_NREQS 100 542 543 /* 544 * Sane boundary on the number of attributes a request can contain. 545 */ 546 #define CPC_MAX_ATTRS 50 547 548 /* 549 * Copy in a packed nvlist from the user and create a request set out of it. 550 * If successful, return 0 and store a pointer to the set we've created. Returns 551 * error code on error. 552 */ 553 int 554 kcpc_copyin_set(kcpc_set_t **inset, void *ubuf, size_t len) 555 { 556 kcpc_set_t *set; 557 int i; 558 int j; 559 char *packbuf; 560 561 nvlist_t *nvl; 562 nvpair_t *nvp = NULL; 563 564 nvlist_t *attrs; 565 nvpair_t *nvp_attr; 566 kcpc_attr_t *attrp; 567 568 nvlist_t **reqlist; 569 uint_t nreqs; 570 uint64_t uint64; 571 uint32_t uint32; 572 uint32_t setflags = (uint32_t)-1; 573 char *string; 574 char *name; 575 576 if (len < CPC_MIN_PACKSIZE || len > CPC_MAX_PACKSIZE) 577 return (EINVAL); 578 579 packbuf = kmem_alloc(len, KM_SLEEP); 580 581 if (copyin(ubuf, packbuf, len) == -1) { 582 kmem_free(packbuf, len); 583 return (EFAULT); 584 } 585 586 if (nvlist_unpack(packbuf, len, &nvl, KM_SLEEP) != 0) { 587 kmem_free(packbuf, len); 588 return (EINVAL); 589 } 590 591 /* 592 * The nvlist has been unpacked so there is no need for the packed 593 * representation from this point on. 594 */ 595 kmem_free(packbuf, len); 596 597 i = 0; 598 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 599 switch (nvpair_type(nvp)) { 600 case DATA_TYPE_UINT32: 601 if (strcmp(nvpair_name(nvp), "flags") != 0 || 602 nvpair_value_uint32(nvp, &setflags) != 0) { 603 nvlist_free(nvl); 604 return (EINVAL); 605 } 606 break; 607 case DATA_TYPE_NVLIST_ARRAY: 608 if (strcmp(nvpair_name(nvp), "reqs") != 0 || 609 nvpair_value_nvlist_array(nvp, &reqlist, 610 &nreqs) != 0) { 611 nvlist_free(nvl); 612 return (EINVAL); 613 } 614 break; 615 default: 616 nvlist_free(nvl); 617 return (EINVAL); 618 } 619 i++; 620 } 621 622 /* 623 * There should be two members in the top-level nvlist: 624 * an array of nvlists consisting of the requests, and flags. 625 * Anything else is an invalid set. 626 */ 627 if (i != 2) { 628 nvlist_free(nvl); 629 return (EINVAL); 630 } 631 632 if (nreqs > CPC_MAX_NREQS) { 633 nvlist_free(nvl); 634 return (EINVAL); 635 } 636 637 /* 638 * The requests are now stored in the nvlist array at reqlist. 639 * Note that the use of kmem_zalloc() to alloc the kcpc_set_t means 640 * we don't need to call the init routines for ks_lock and ks_condv. 641 */ 642 set = kmem_zalloc(sizeof (kcpc_set_t), KM_SLEEP); 643 set->ks_req = (kcpc_request_t *)kmem_zalloc(sizeof (kcpc_request_t) * 644 nreqs, KM_SLEEP); 645 set->ks_nreqs = nreqs; 646 /* 647 * If the nvlist didn't contain a flags member, setflags was initialized 648 * with an illegal value and this set will fail sanity checks later on. 649 */ 650 set->ks_flags = setflags; 651 /* 652 * Initialize bind/unbind set synchronization. 653 */ 654 set->ks_state &= ~KCPC_SET_BOUND; 655 656 /* 657 * Build the set up one request at a time, always keeping it self- 658 * consistent so we can give it to kcpc_free_set() if we need to back 659 * out and return and error. 660 */ 661 for (i = 0; i < nreqs; i++) { 662 nvp = NULL; 663 set->ks_req[i].kr_picnum = -1; 664 while ((nvp = nvlist_next_nvpair(reqlist[i], nvp)) != NULL) { 665 name = nvpair_name(nvp); 666 switch (nvpair_type(nvp)) { 667 case DATA_TYPE_UINT32: 668 if (nvpair_value_uint32(nvp, &uint32) == EINVAL) 669 goto inval; 670 if (strcmp(name, "cr_flags") == 0) 671 set->ks_req[i].kr_flags = uint32; 672 if (strcmp(name, "cr_index") == 0) 673 set->ks_req[i].kr_index = uint32; 674 break; 675 case DATA_TYPE_UINT64: 676 if (nvpair_value_uint64(nvp, &uint64) == EINVAL) 677 goto inval; 678 if (strcmp(name, "cr_preset") == 0) 679 set->ks_req[i].kr_preset = uint64; 680 break; 681 case DATA_TYPE_STRING: 682 if (nvpair_value_string(nvp, &string) == EINVAL) 683 goto inval; 684 if (strcmp(name, "cr_event") == 0) 685 (void) strncpy(set->ks_req[i].kr_event, 686 string, CPC_MAX_EVENT_LEN); 687 break; 688 case DATA_TYPE_NVLIST: 689 if (strcmp(name, "cr_attr") != 0) 690 goto inval; 691 if (nvpair_value_nvlist(nvp, &attrs) == EINVAL) 692 goto inval; 693 nvp_attr = NULL; 694 /* 695 * If the picnum has been specified as an 696 * attribute, consume that attribute here and 697 * remove it from the list of attributes. 698 */ 699 if (nvlist_lookup_uint64(attrs, "picnum", 700 &uint64) == 0) { 701 if (nvlist_remove(attrs, "picnum", 702 DATA_TYPE_UINT64) != 0) 703 panic("nvlist %p faulty", 704 (void *)attrs); 705 set->ks_req[i].kr_picnum = uint64; 706 } 707 708 if ((set->ks_req[i].kr_nattrs = 709 kcpc_nvlist_npairs(attrs)) == 0) 710 break; 711 712 if (set->ks_req[i].kr_nattrs > CPC_MAX_ATTRS) 713 goto inval; 714 715 set->ks_req[i].kr_attr = 716 kmem_alloc(set->ks_req[i].kr_nattrs * 717 sizeof (kcpc_attr_t), KM_SLEEP); 718 j = 0; 719 720 while ((nvp_attr = nvlist_next_nvpair(attrs, 721 nvp_attr)) != NULL) { 722 attrp = &set->ks_req[i].kr_attr[j]; 723 724 if (nvpair_type(nvp_attr) != 725 DATA_TYPE_UINT64) 726 goto inval; 727 728 (void) strncpy(attrp->ka_name, 729 nvpair_name(nvp_attr), 730 CPC_MAX_ATTR_LEN); 731 732 if (nvpair_value_uint64(nvp_attr, 733 &(attrp->ka_val)) == EINVAL) 734 goto inval; 735 j++; 736 } 737 ASSERT(j == set->ks_req[i].kr_nattrs); 738 default: 739 break; 740 } 741 } 742 } 743 744 nvlist_free(nvl); 745 *inset = set; 746 return (0); 747 748 inval: 749 nvlist_free(nvl); 750 kcpc_free_set(set); 751 return (EINVAL); 752 } 753 754 /* 755 * Count the number of nvpairs in the supplied nvlist. 756 */ 757 static uint32_t 758 kcpc_nvlist_npairs(nvlist_t *list) 759 { 760 nvpair_t *nvp = NULL; 761 uint32_t n = 0; 762 763 while ((nvp = nvlist_next_nvpair(list, nvp)) != NULL) 764 n++; 765 766 return (n); 767 } 768 769 /* 770 * Performs sanity checks on the given set. 771 * Returns 0 if the set checks out OK. 772 * Returns a detailed error subcode, or -1 if there is no applicable subcode. 773 */ 774 static int 775 kcpc_verify_set(kcpc_set_t *set) 776 { 777 kcpc_request_t *rp; 778 int i; 779 uint64_t bitmap = 0; 780 int n; 781 782 if (set->ks_nreqs > cpc_ncounters) 783 return (-1); 784 785 if (CPC_SET_VALID_FLAGS(set->ks_flags) == 0) 786 return (-1); 787 788 for (i = 0; i < set->ks_nreqs; i++) { 789 rp = &set->ks_req[i]; 790 791 /* 792 * The following comparison must cast cpc_ncounters to an int, 793 * because kr_picnum will be -1 if the request didn't explicitly 794 * choose a PIC. 795 */ 796 if (rp->kr_picnum >= (int)cpc_ncounters) 797 return (CPC_INVALID_PICNUM); 798 799 /* 800 * Of the pics whose physical picnum has been specified, make 801 * sure each PIC appears only once in set. 802 */ 803 if ((n = set->ks_req[i].kr_picnum) != -1) { 804 if ((bitmap & (1 << n)) != 0) 805 return (-1); 806 bitmap |= (1 << n); 807 } 808 809 /* 810 * Make sure the requested index falls within the range of all 811 * requests. 812 */ 813 if (rp->kr_index < 0 || rp->kr_index >= set->ks_nreqs) 814 return (-1); 815 816 /* 817 * Make sure there are no unknown flags. 818 */ 819 if (KCPC_REQ_VALID_FLAGS(rp->kr_flags) == 0) 820 return (CPC_REQ_INVALID_FLAGS); 821 } 822 823 return (0); 824 } 825 826 static struct cb_ops cb_ops = { 827 kcpc_open, 828 kcpc_close, 829 nodev, /* strategy */ 830 nodev, /* print */ 831 nodev, /* dump */ 832 nodev, /* read */ 833 nodev, /* write */ 834 kcpc_ioctl, 835 nodev, /* devmap */ 836 nodev, /* mmap */ 837 nodev, /* segmap */ 838 nochpoll, /* poll */ 839 ddi_prop_op, 840 NULL, 841 D_NEW | D_MP 842 }; 843 844 /*ARGSUSED*/ 845 static int 846 kcpc_probe(dev_info_t *devi) 847 { 848 return (DDI_PROBE_SUCCESS); 849 } 850 851 static dev_info_t *kcpc_devi; 852 853 static int 854 kcpc_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 855 { 856 if (cmd != DDI_ATTACH) 857 return (DDI_FAILURE); 858 kcpc_devi = devi; 859 return (ddi_create_minor_node(devi, "shared", S_IFCHR, 860 KCPC_MINOR_SHARED, DDI_PSEUDO, 0)); 861 } 862 863 /*ARGSUSED*/ 864 static int 865 kcpc_getinfo(dev_info_t *devi, ddi_info_cmd_t cmd, void *arg, void **result) 866 { 867 switch (cmd) { 868 case DDI_INFO_DEVT2DEVINFO: 869 switch (getminor((dev_t)arg)) { 870 case KCPC_MINOR_SHARED: 871 *result = kcpc_devi; 872 return (DDI_SUCCESS); 873 default: 874 break; 875 } 876 break; 877 case DDI_INFO_DEVT2INSTANCE: 878 *result = 0; 879 return (DDI_SUCCESS); 880 default: 881 break; 882 } 883 884 return (DDI_FAILURE); 885 } 886 887 static struct dev_ops dev_ops = { 888 DEVO_REV, 889 0, 890 kcpc_getinfo, 891 nulldev, /* identify */ 892 kcpc_probe, 893 kcpc_attach, 894 nodev, /* detach */ 895 nodev, /* reset */ 896 &cb_ops, 897 (struct bus_ops *)0, 898 NULL, 899 ddi_quiesce_not_needed, /* quiesce */ 900 }; 901 902 static struct modldrv modldrv = { 903 &mod_driverops, 904 "cpc sampling driver", 905 &dev_ops 906 }; 907 908 static struct sysent cpc_sysent = { 909 5, 910 SE_NOUNLOAD | SE_ARGC | SE_32RVAL1, 911 cpc 912 }; 913 914 static struct modlsys modlsys = { 915 &mod_syscallops, 916 "cpc sampling system call", 917 &cpc_sysent 918 }; 919 920 #ifdef _SYSCALL32_IMPL 921 static struct modlsys modlsys32 = { 922 &mod_syscallops32, 923 "32-bit cpc sampling system call", 924 &cpc_sysent 925 }; 926 #endif 927 928 static struct modlinkage modl = { 929 MODREV_1, 930 &modldrv, 931 &modlsys, 932 #ifdef _SYSCALL32_IMPL 933 &modlsys32, 934 #endif 935 }; 936 937 static void 938 kcpc_init(void) 939 { 940 long hash; 941 942 rw_init(&kcpc_cpuctx_lock, NULL, RW_DEFAULT, NULL); 943 for (hash = 0; hash < CPC_HASH_BUCKETS; hash++) 944 mutex_init(&kcpc_ctx_llock[hash], 945 NULL, MUTEX_DRIVER, (void *)(uintptr_t)15); 946 } 947 948 static void 949 kcpc_fini(void) 950 { 951 long hash; 952 953 for (hash = 0; hash < CPC_HASH_BUCKETS; hash++) 954 mutex_destroy(&kcpc_ctx_llock[hash]); 955 rw_destroy(&kcpc_cpuctx_lock); 956 } 957 958 int 959 _init(void) 960 { 961 int ret; 962 963 if (kcpc_hw_load_pcbe() != 0) 964 return (ENOTSUP); 965 966 kcpc_init(); 967 if ((ret = mod_install(&modl)) != 0) 968 kcpc_fini(); 969 return (ret); 970 } 971 972 int 973 _fini(void) 974 { 975 int ret; 976 977 if ((ret = mod_remove(&modl)) == 0) 978 kcpc_fini(); 979 return (ret); 980 } 981 982 int 983 _info(struct modinfo *mi) 984 { 985 return (mod_info(&modl, mi)); 986 } 987