1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 * 21 * $FreeBSD$ 22 */ 23 24 /* 25 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. 26 * Copyright (c) 2016, Joyent, Inc. All rights reserved. 27 * Copyright (c) 2012, 2014 by Delphix. All rights reserved. 28 */ 29 30 /* 31 * DTrace - Dynamic Tracing for Solaris 32 * 33 * This is the implementation of the Solaris Dynamic Tracing framework 34 * (DTrace). The user-visible interface to DTrace is described at length in 35 * the "Solaris Dynamic Tracing Guide". The interfaces between the libdtrace 36 * library, the in-kernel DTrace framework, and the DTrace providers are 37 * described in the block comments in the <sys/dtrace.h> header file. The 38 * internal architecture of DTrace is described in the block comments in the 39 * <sys/dtrace_impl.h> header file. The comments contained within the DTrace 40 * implementation very much assume mastery of all of these sources; if one has 41 * an unanswered question about the implementation, one should consult them 42 * first. 43 * 44 * The functions here are ordered roughly as follows: 45 * 46 * - Probe context functions 47 * - Probe hashing functions 48 * - Non-probe context utility functions 49 * - Matching functions 50 * - Provider-to-Framework API functions 51 * - Probe management functions 52 * - DIF object functions 53 * - Format functions 54 * - Predicate functions 55 * - ECB functions 56 * - Buffer functions 57 * - Enabling functions 58 * - DOF functions 59 * - Anonymous enabling functions 60 * - Consumer state functions 61 * - Helper functions 62 * - Hook functions 63 * - Driver cookbook functions 64 * 65 * Each group of functions begins with a block comment labelled the "DTrace 66 * [Group] Functions", allowing one to find each block by searching forward 67 * on capital-f functions. 68 */ 69 #include <sys/errno.h> 70 #ifndef illumos 71 #include <sys/time.h> 72 #endif 73 #include <sys/stat.h> 74 #include <sys/modctl.h> 75 #include <sys/conf.h> 76 #include <sys/systm.h> 77 #ifdef illumos 78 #include <sys/ddi.h> 79 #include <sys/sunddi.h> 80 #endif 81 #include <sys/cpuvar.h> 82 #include <sys/kmem.h> 83 #ifdef illumos 84 #include <sys/strsubr.h> 85 #endif 86 #include <sys/sysmacros.h> 87 #include <sys/dtrace_impl.h> 88 #include <sys/atomic.h> 89 #include <sys/cmn_err.h> 90 #ifdef illumos 91 #include <sys/mutex_impl.h> 92 #include <sys/rwlock_impl.h> 93 #endif 94 #include <sys/ctf_api.h> 95 #ifdef illumos 96 #include <sys/panic.h> 97 #include <sys/priv_impl.h> 98 #endif 99 #include <sys/policy.h> 100 #ifdef illumos 101 #include <sys/cred_impl.h> 102 #include <sys/procfs_isa.h> 103 #endif 104 #include <sys/taskq.h> 105 #ifdef illumos 106 #include <sys/mkdev.h> 107 #include <sys/kdi.h> 108 #endif 109 #include <sys/zone.h> 110 #include <sys/socket.h> 111 #include <netinet/in.h> 112 #include "strtolctype.h" 113 114 /* FreeBSD includes: */ 115 #ifndef illumos 116 #include <sys/callout.h> 117 #include <sys/ctype.h> 118 #include <sys/eventhandler.h> 119 #include <sys/limits.h> 120 #include <sys/linker.h> 121 #include <sys/kdb.h> 122 #include <sys/kernel.h> 123 #include <sys/malloc.h> 124 #include <sys/lock.h> 125 #include <sys/mutex.h> 126 #include <sys/ptrace.h> 127 #include <sys/random.h> 128 #include <sys/rwlock.h> 129 #include <sys/sx.h> 130 #include <sys/sysctl.h> 131 132 #include <sys/dtrace_bsd.h> 133 134 #include <netinet/in.h> 135 136 #include "dtrace_cddl.h" 137 #include "dtrace_debug.c" 138 #endif 139 140 #include "dtrace_xoroshiro128_plus.h" 141 142 /* 143 * DTrace Tunable Variables 144 * 145 * The following variables may be tuned by adding a line to /etc/system that 146 * includes both the name of the DTrace module ("dtrace") and the name of the 147 * variable. For example: 148 * 149 * set dtrace:dtrace_destructive_disallow = 1 150 * 151 * In general, the only variables that one should be tuning this way are those 152 * that affect system-wide DTrace behavior, and for which the default behavior 153 * is undesirable. Most of these variables are tunable on a per-consumer 154 * basis using DTrace options, and need not be tuned on a system-wide basis. 155 * When tuning these variables, avoid pathological values; while some attempt 156 * is made to verify the integrity of these variables, they are not considered 157 * part of the supported interface to DTrace, and they are therefore not 158 * checked comprehensively. Further, these variables should not be tuned 159 * dynamically via "mdb -kw" or other means; they should only be tuned via 160 * /etc/system. 161 */ 162 int dtrace_destructive_disallow = 0; 163 #ifndef illumos 164 /* Positive logic version of dtrace_destructive_disallow for loader tunable */ 165 int dtrace_allow_destructive = 1; 166 #endif 167 dtrace_optval_t dtrace_nonroot_maxsize = (16 * 1024 * 1024); 168 size_t dtrace_difo_maxsize = (256 * 1024); 169 dtrace_optval_t dtrace_dof_maxsize = (8 * 1024 * 1024); 170 size_t dtrace_statvar_maxsize = (16 * 1024); 171 size_t dtrace_actions_max = (16 * 1024); 172 size_t dtrace_retain_max = 1024; 173 dtrace_optval_t dtrace_helper_actions_max = 128; 174 dtrace_optval_t dtrace_helper_providers_max = 32; 175 dtrace_optval_t dtrace_dstate_defsize = (1 * 1024 * 1024); 176 size_t dtrace_strsize_default = 256; 177 dtrace_optval_t dtrace_cleanrate_default = 9900990; /* 101 hz */ 178 dtrace_optval_t dtrace_cleanrate_min = 200000; /* 5000 hz */ 179 dtrace_optval_t dtrace_cleanrate_max = (uint64_t)60 * NANOSEC; /* 1/minute */ 180 dtrace_optval_t dtrace_aggrate_default = NANOSEC; /* 1 hz */ 181 dtrace_optval_t dtrace_statusrate_default = NANOSEC; /* 1 hz */ 182 dtrace_optval_t dtrace_statusrate_max = (hrtime_t)10 * NANOSEC; /* 6/minute */ 183 dtrace_optval_t dtrace_switchrate_default = NANOSEC; /* 1 hz */ 184 dtrace_optval_t dtrace_nspec_default = 1; 185 dtrace_optval_t dtrace_specsize_default = 32 * 1024; 186 dtrace_optval_t dtrace_stackframes_default = 20; 187 dtrace_optval_t dtrace_ustackframes_default = 20; 188 dtrace_optval_t dtrace_jstackframes_default = 50; 189 dtrace_optval_t dtrace_jstackstrsize_default = 512; 190 int dtrace_msgdsize_max = 128; 191 hrtime_t dtrace_chill_max = MSEC2NSEC(500); /* 500 ms */ 192 hrtime_t dtrace_chill_interval = NANOSEC; /* 1000 ms */ 193 int dtrace_devdepth_max = 32; 194 int dtrace_err_verbose; 195 hrtime_t dtrace_deadman_interval = NANOSEC; 196 hrtime_t dtrace_deadman_timeout = (hrtime_t)10 * NANOSEC; 197 hrtime_t dtrace_deadman_user = (hrtime_t)30 * NANOSEC; 198 hrtime_t dtrace_unregister_defunct_reap = (hrtime_t)60 * NANOSEC; 199 #ifndef illumos 200 int dtrace_memstr_max = 4096; 201 #endif 202 203 /* 204 * DTrace External Variables 205 * 206 * As dtrace(7D) is a kernel module, any DTrace variables are obviously 207 * available to DTrace consumers via the backtick (`) syntax. One of these, 208 * dtrace_zero, is made deliberately so: it is provided as a source of 209 * well-known, zero-filled memory. While this variable is not documented, 210 * it is used by some translators as an implementation detail. 211 */ 212 const char dtrace_zero[256] = { 0 }; /* zero-filled memory */ 213 214 /* 215 * DTrace Internal Variables 216 */ 217 #ifdef illumos 218 static dev_info_t *dtrace_devi; /* device info */ 219 #endif 220 #ifdef illumos 221 static vmem_t *dtrace_arena; /* probe ID arena */ 222 static vmem_t *dtrace_minor; /* minor number arena */ 223 #else 224 static taskq_t *dtrace_taskq; /* task queue */ 225 static struct unrhdr *dtrace_arena; /* Probe ID number. */ 226 #endif 227 static dtrace_probe_t **dtrace_probes; /* array of all probes */ 228 static int dtrace_nprobes; /* number of probes */ 229 static dtrace_provider_t *dtrace_provider; /* provider list */ 230 static dtrace_meta_t *dtrace_meta_pid; /* user-land meta provider */ 231 static int dtrace_opens; /* number of opens */ 232 static int dtrace_helpers; /* number of helpers */ 233 static int dtrace_getf; /* number of unpriv getf()s */ 234 #ifdef illumos 235 static void *dtrace_softstate; /* softstate pointer */ 236 #endif 237 static dtrace_hash_t *dtrace_bymod; /* probes hashed by module */ 238 static dtrace_hash_t *dtrace_byfunc; /* probes hashed by function */ 239 static dtrace_hash_t *dtrace_byname; /* probes hashed by name */ 240 static dtrace_toxrange_t *dtrace_toxrange; /* toxic range array */ 241 static int dtrace_toxranges; /* number of toxic ranges */ 242 static int dtrace_toxranges_max; /* size of toxic range array */ 243 static dtrace_anon_t dtrace_anon; /* anonymous enabling */ 244 static kmem_cache_t *dtrace_state_cache; /* cache for dynamic state */ 245 static uint64_t dtrace_vtime_references; /* number of vtimestamp refs */ 246 static kthread_t *dtrace_panicked; /* panicking thread */ 247 static dtrace_ecb_t *dtrace_ecb_create_cache; /* cached created ECB */ 248 static dtrace_genid_t dtrace_probegen; /* current probe generation */ 249 static dtrace_helpers_t *dtrace_deferred_pid; /* deferred helper list */ 250 static dtrace_enabling_t *dtrace_retained; /* list of retained enablings */ 251 static dtrace_genid_t dtrace_retained_gen; /* current retained enab gen */ 252 static dtrace_dynvar_t dtrace_dynhash_sink; /* end of dynamic hash chains */ 253 static int dtrace_dynvar_failclean; /* dynvars failed to clean */ 254 #ifndef illumos 255 static struct mtx dtrace_unr_mtx; 256 MTX_SYSINIT(dtrace_unr_mtx, &dtrace_unr_mtx, "Unique resource identifier", MTX_DEF); 257 static eventhandler_tag dtrace_kld_load_tag; 258 static eventhandler_tag dtrace_kld_unload_try_tag; 259 #endif 260 261 /* 262 * DTrace Locking 263 * DTrace is protected by three (relatively coarse-grained) locks: 264 * 265 * (1) dtrace_lock is required to manipulate essentially any DTrace state, 266 * including enabling state, probes, ECBs, consumer state, helper state, 267 * etc. Importantly, dtrace_lock is _not_ required when in probe context; 268 * probe context is lock-free -- synchronization is handled via the 269 * dtrace_sync() cross call mechanism. 270 * 271 * (2) dtrace_provider_lock is required when manipulating provider state, or 272 * when provider state must be held constant. 273 * 274 * (3) dtrace_meta_lock is required when manipulating meta provider state, or 275 * when meta provider state must be held constant. 276 * 277 * The lock ordering between these three locks is dtrace_meta_lock before 278 * dtrace_provider_lock before dtrace_lock. (In particular, there are 279 * several places where dtrace_provider_lock is held by the framework as it 280 * calls into the providers -- which then call back into the framework, 281 * grabbing dtrace_lock.) 282 * 283 * There are two other locks in the mix: mod_lock and cpu_lock. With respect 284 * to dtrace_provider_lock and dtrace_lock, cpu_lock continues its historical 285 * role as a coarse-grained lock; it is acquired before both of these locks. 286 * With respect to dtrace_meta_lock, its behavior is stranger: cpu_lock must 287 * be acquired _between_ dtrace_meta_lock and any other DTrace locks. 288 * mod_lock is similar with respect to dtrace_provider_lock in that it must be 289 * acquired _between_ dtrace_provider_lock and dtrace_lock. 290 */ 291 static kmutex_t dtrace_lock; /* probe state lock */ 292 static kmutex_t dtrace_provider_lock; /* provider state lock */ 293 static kmutex_t dtrace_meta_lock; /* meta-provider state lock */ 294 295 #ifndef illumos 296 /* XXX FreeBSD hacks. */ 297 #define cr_suid cr_svuid 298 #define cr_sgid cr_svgid 299 #define ipaddr_t in_addr_t 300 #define mod_modname pathname 301 #define vuprintf vprintf 302 #define ttoproc(_a) ((_a)->td_proc) 303 #define crgetzoneid(_a) 0 304 #define SNOCD 0 305 #define CPU_ON_INTR(_a) 0 306 307 #define PRIV_EFFECTIVE (1 << 0) 308 #define PRIV_DTRACE_KERNEL (1 << 1) 309 #define PRIV_DTRACE_PROC (1 << 2) 310 #define PRIV_DTRACE_USER (1 << 3) 311 #define PRIV_PROC_OWNER (1 << 4) 312 #define PRIV_PROC_ZONE (1 << 5) 313 #define PRIV_ALL ~0 314 315 SYSCTL_DECL(_debug_dtrace); 316 SYSCTL_DECL(_kern_dtrace); 317 #endif 318 319 #ifdef illumos 320 #define curcpu CPU->cpu_id 321 #endif 322 323 324 /* 325 * DTrace Provider Variables 326 * 327 * These are the variables relating to DTrace as a provider (that is, the 328 * provider of the BEGIN, END, and ERROR probes). 329 */ 330 static dtrace_pattr_t dtrace_provider_attr = { 331 { DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON }, 332 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, 333 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, 334 { DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON }, 335 { DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON }, 336 }; 337 338 static void 339 dtrace_nullop(void) 340 {} 341 342 static dtrace_pops_t dtrace_provider_ops = { 343 (void (*)(void *, dtrace_probedesc_t *))dtrace_nullop, 344 (void (*)(void *, modctl_t *))dtrace_nullop, 345 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, 346 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, 347 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, 348 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, 349 NULL, 350 NULL, 351 NULL, 352 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop 353 }; 354 355 static dtrace_id_t dtrace_probeid_begin; /* special BEGIN probe */ 356 static dtrace_id_t dtrace_probeid_end; /* special END probe */ 357 dtrace_id_t dtrace_probeid_error; /* special ERROR probe */ 358 359 /* 360 * DTrace Helper Tracing Variables 361 * 362 * These variables should be set dynamically to enable helper tracing. The 363 * only variables that should be set are dtrace_helptrace_enable (which should 364 * be set to a non-zero value to allocate helper tracing buffers on the next 365 * open of /dev/dtrace) and dtrace_helptrace_disable (which should be set to a 366 * non-zero value to deallocate helper tracing buffers on the next close of 367 * /dev/dtrace). When (and only when) helper tracing is disabled, the 368 * buffer size may also be set via dtrace_helptrace_bufsize. 369 */ 370 int dtrace_helptrace_enable = 0; 371 int dtrace_helptrace_disable = 0; 372 int dtrace_helptrace_bufsize = 16 * 1024 * 1024; 373 uint32_t dtrace_helptrace_nlocals; 374 static dtrace_helptrace_t *dtrace_helptrace_buffer; 375 static uint32_t dtrace_helptrace_next = 0; 376 static int dtrace_helptrace_wrapped = 0; 377 378 /* 379 * DTrace Error Hashing 380 * 381 * On DEBUG kernels, DTrace will track the errors that has seen in a hash 382 * table. This is very useful for checking coverage of tests that are 383 * expected to induce DIF or DOF processing errors, and may be useful for 384 * debugging problems in the DIF code generator or in DOF generation . The 385 * error hash may be examined with the ::dtrace_errhash MDB dcmd. 386 */ 387 #ifdef DEBUG 388 static dtrace_errhash_t dtrace_errhash[DTRACE_ERRHASHSZ]; 389 static const char *dtrace_errlast; 390 static kthread_t *dtrace_errthread; 391 static kmutex_t dtrace_errlock; 392 #endif 393 394 /* 395 * DTrace Macros and Constants 396 * 397 * These are various macros that are useful in various spots in the 398 * implementation, along with a few random constants that have no meaning 399 * outside of the implementation. There is no real structure to this cpp 400 * mishmash -- but is there ever? 401 */ 402 #define DTRACE_HASHSTR(hash, probe) \ 403 dtrace_hash_str(*((char **)((uintptr_t)(probe) + (hash)->dth_stroffs))) 404 405 #define DTRACE_HASHNEXT(hash, probe) \ 406 (dtrace_probe_t **)((uintptr_t)(probe) + (hash)->dth_nextoffs) 407 408 #define DTRACE_HASHPREV(hash, probe) \ 409 (dtrace_probe_t **)((uintptr_t)(probe) + (hash)->dth_prevoffs) 410 411 #define DTRACE_HASHEQ(hash, lhs, rhs) \ 412 (strcmp(*((char **)((uintptr_t)(lhs) + (hash)->dth_stroffs)), \ 413 *((char **)((uintptr_t)(rhs) + (hash)->dth_stroffs))) == 0) 414 415 #define DTRACE_AGGHASHSIZE_SLEW 17 416 417 #define DTRACE_V4MAPPED_OFFSET (sizeof (uint32_t) * 3) 418 419 /* 420 * The key for a thread-local variable consists of the lower 61 bits of the 421 * t_did, plus the 3 bits of the highest active interrupt above LOCK_LEVEL. 422 * We add DIF_VARIABLE_MAX to t_did to assure that the thread key is never 423 * equal to a variable identifier. This is necessary (but not sufficient) to 424 * assure that global associative arrays never collide with thread-local 425 * variables. To guarantee that they cannot collide, we must also define the 426 * order for keying dynamic variables. That order is: 427 * 428 * [ key0 ] ... [ keyn ] [ variable-key ] [ tls-key ] 429 * 430 * Because the variable-key and the tls-key are in orthogonal spaces, there is 431 * no way for a global variable key signature to match a thread-local key 432 * signature. 433 */ 434 #ifdef illumos 435 #define DTRACE_TLS_THRKEY(where) { \ 436 uint_t intr = 0; \ 437 uint_t actv = CPU->cpu_intr_actv >> (LOCK_LEVEL + 1); \ 438 for (; actv; actv >>= 1) \ 439 intr++; \ 440 ASSERT(intr < (1 << 3)); \ 441 (where) = ((curthread->t_did + DIF_VARIABLE_MAX) & \ 442 (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \ 443 } 444 #else 445 #define DTRACE_TLS_THRKEY(where) { \ 446 solaris_cpu_t *_c = &solaris_cpu[curcpu]; \ 447 uint_t intr = 0; \ 448 uint_t actv = _c->cpu_intr_actv; \ 449 for (; actv; actv >>= 1) \ 450 intr++; \ 451 ASSERT(intr < (1 << 3)); \ 452 (where) = ((curthread->td_tid + DIF_VARIABLE_MAX) & \ 453 (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \ 454 } 455 #endif 456 457 #define DT_BSWAP_8(x) ((x) & 0xff) 458 #define DT_BSWAP_16(x) ((DT_BSWAP_8(x) << 8) | DT_BSWAP_8((x) >> 8)) 459 #define DT_BSWAP_32(x) ((DT_BSWAP_16(x) << 16) | DT_BSWAP_16((x) >> 16)) 460 #define DT_BSWAP_64(x) ((DT_BSWAP_32(x) << 32) | DT_BSWAP_32((x) >> 32)) 461 462 #define DT_MASK_LO 0x00000000FFFFFFFFULL 463 464 #define DTRACE_STORE(type, tomax, offset, what) \ 465 *((type *)((uintptr_t)(tomax) + (uintptr_t)offset)) = (type)(what); 466 467 #ifndef __x86 468 #define DTRACE_ALIGNCHECK(addr, size, flags) \ 469 if (addr & (size - 1)) { \ 470 *flags |= CPU_DTRACE_BADALIGN; \ 471 cpu_core[curcpu].cpuc_dtrace_illval = addr; \ 472 return (0); \ 473 } 474 #else 475 #define DTRACE_ALIGNCHECK(addr, size, flags) 476 #endif 477 478 /* 479 * Test whether a range of memory starting at testaddr of size testsz falls 480 * within the range of memory described by addr, sz. We take care to avoid 481 * problems with overflow and underflow of the unsigned quantities, and 482 * disallow all negative sizes. Ranges of size 0 are allowed. 483 */ 484 #define DTRACE_INRANGE(testaddr, testsz, baseaddr, basesz) \ 485 ((testaddr) - (uintptr_t)(baseaddr) < (basesz) && \ 486 (testaddr) + (testsz) - (uintptr_t)(baseaddr) <= (basesz) && \ 487 (testaddr) + (testsz) >= (testaddr)) 488 489 #define DTRACE_RANGE_REMAIN(remp, addr, baseaddr, basesz) \ 490 do { \ 491 if ((remp) != NULL) { \ 492 *(remp) = (uintptr_t)(baseaddr) + (basesz) - (addr); \ 493 } \ 494 _NOTE(CONSTCOND) } while (0) 495 496 497 /* 498 * Test whether alloc_sz bytes will fit in the scratch region. We isolate 499 * alloc_sz on the righthand side of the comparison in order to avoid overflow 500 * or underflow in the comparison with it. This is simpler than the INRANGE 501 * check above, because we know that the dtms_scratch_ptr is valid in the 502 * range. Allocations of size zero are allowed. 503 */ 504 #define DTRACE_INSCRATCH(mstate, alloc_sz) \ 505 ((mstate)->dtms_scratch_base + (mstate)->dtms_scratch_size - \ 506 (mstate)->dtms_scratch_ptr >= (alloc_sz)) 507 508 #define DTRACE_LOADFUNC(bits) \ 509 /*CSTYLED*/ \ 510 uint##bits##_t \ 511 dtrace_load##bits(uintptr_t addr) \ 512 { \ 513 size_t size = bits / NBBY; \ 514 /*CSTYLED*/ \ 515 uint##bits##_t rval; \ 516 int i; \ 517 volatile uint16_t *flags = (volatile uint16_t *) \ 518 &cpu_core[curcpu].cpuc_dtrace_flags; \ 519 \ 520 DTRACE_ALIGNCHECK(addr, size, flags); \ 521 \ 522 for (i = 0; i < dtrace_toxranges; i++) { \ 523 if (addr >= dtrace_toxrange[i].dtt_limit) \ 524 continue; \ 525 \ 526 if (addr + size <= dtrace_toxrange[i].dtt_base) \ 527 continue; \ 528 \ 529 /* \ 530 * This address falls within a toxic region; return 0. \ 531 */ \ 532 *flags |= CPU_DTRACE_BADADDR; \ 533 cpu_core[curcpu].cpuc_dtrace_illval = addr; \ 534 return (0); \ 535 } \ 536 \ 537 *flags |= CPU_DTRACE_NOFAULT; \ 538 /*CSTYLED*/ \ 539 rval = *((volatile uint##bits##_t *)addr); \ 540 *flags &= ~CPU_DTRACE_NOFAULT; \ 541 \ 542 return (!(*flags & CPU_DTRACE_FAULT) ? rval : 0); \ 543 } 544 545 #ifdef _LP64 546 #define dtrace_loadptr dtrace_load64 547 #else 548 #define dtrace_loadptr dtrace_load32 549 #endif 550 551 #define DTRACE_DYNHASH_FREE 0 552 #define DTRACE_DYNHASH_SINK 1 553 #define DTRACE_DYNHASH_VALID 2 554 555 #define DTRACE_MATCH_NEXT 0 556 #define DTRACE_MATCH_DONE 1 557 #define DTRACE_ANCHORED(probe) ((probe)->dtpr_func[0] != '\0') 558 #define DTRACE_STATE_ALIGN 64 559 560 #define DTRACE_FLAGS2FLT(flags) \ 561 (((flags) & CPU_DTRACE_BADADDR) ? DTRACEFLT_BADADDR : \ 562 ((flags) & CPU_DTRACE_ILLOP) ? DTRACEFLT_ILLOP : \ 563 ((flags) & CPU_DTRACE_DIVZERO) ? DTRACEFLT_DIVZERO : \ 564 ((flags) & CPU_DTRACE_KPRIV) ? DTRACEFLT_KPRIV : \ 565 ((flags) & CPU_DTRACE_UPRIV) ? DTRACEFLT_UPRIV : \ 566 ((flags) & CPU_DTRACE_TUPOFLOW) ? DTRACEFLT_TUPOFLOW : \ 567 ((flags) & CPU_DTRACE_BADALIGN) ? DTRACEFLT_BADALIGN : \ 568 ((flags) & CPU_DTRACE_NOSCRATCH) ? DTRACEFLT_NOSCRATCH : \ 569 ((flags) & CPU_DTRACE_BADSTACK) ? DTRACEFLT_BADSTACK : \ 570 DTRACEFLT_UNKNOWN) 571 572 #define DTRACEACT_ISSTRING(act) \ 573 ((act)->dta_kind == DTRACEACT_DIFEXPR && \ 574 (act)->dta_difo->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING) 575 576 /* Function prototype definitions: */ 577 static size_t dtrace_strlen(const char *, size_t); 578 static dtrace_probe_t *dtrace_probe_lookup_id(dtrace_id_t id); 579 static void dtrace_enabling_provide(dtrace_provider_t *); 580 static int dtrace_enabling_match(dtrace_enabling_t *, int *); 581 static void dtrace_enabling_matchall(void); 582 static void dtrace_enabling_reap(void); 583 static dtrace_state_t *dtrace_anon_grab(void); 584 static uint64_t dtrace_helper(int, dtrace_mstate_t *, 585 dtrace_state_t *, uint64_t, uint64_t); 586 static dtrace_helpers_t *dtrace_helpers_create(proc_t *); 587 static void dtrace_buffer_drop(dtrace_buffer_t *); 588 static int dtrace_buffer_consumed(dtrace_buffer_t *, hrtime_t when); 589 static intptr_t dtrace_buffer_reserve(dtrace_buffer_t *, size_t, size_t, 590 dtrace_state_t *, dtrace_mstate_t *); 591 static int dtrace_state_option(dtrace_state_t *, dtrace_optid_t, 592 dtrace_optval_t); 593 static int dtrace_ecb_create_enable(dtrace_probe_t *, void *); 594 static void dtrace_helper_provider_destroy(dtrace_helper_provider_t *); 595 uint16_t dtrace_load16(uintptr_t); 596 uint32_t dtrace_load32(uintptr_t); 597 uint64_t dtrace_load64(uintptr_t); 598 uint8_t dtrace_load8(uintptr_t); 599 void dtrace_dynvar_clean(dtrace_dstate_t *); 600 dtrace_dynvar_t *dtrace_dynvar(dtrace_dstate_t *, uint_t, dtrace_key_t *, 601 size_t, dtrace_dynvar_op_t, dtrace_mstate_t *, dtrace_vstate_t *); 602 uintptr_t dtrace_dif_varstr(uintptr_t, dtrace_state_t *, dtrace_mstate_t *); 603 static int dtrace_priv_proc(dtrace_state_t *); 604 static void dtrace_getf_barrier(void); 605 static int dtrace_canload_remains(uint64_t, size_t, size_t *, 606 dtrace_mstate_t *, dtrace_vstate_t *); 607 static int dtrace_canstore_remains(uint64_t, size_t, size_t *, 608 dtrace_mstate_t *, dtrace_vstate_t *); 609 610 /* 611 * DTrace Probe Context Functions 612 * 613 * These functions are called from probe context. Because probe context is 614 * any context in which C may be called, arbitrarily locks may be held, 615 * interrupts may be disabled, we may be in arbitrary dispatched state, etc. 616 * As a result, functions called from probe context may only call other DTrace 617 * support functions -- they may not interact at all with the system at large. 618 * (Note that the ASSERT macro is made probe-context safe by redefining it in 619 * terms of dtrace_assfail(), a probe-context safe function.) If arbitrary 620 * loads are to be performed from probe context, they _must_ be in terms of 621 * the safe dtrace_load*() variants. 622 * 623 * Some functions in this block are not actually called from probe context; 624 * for these functions, there will be a comment above the function reading 625 * "Note: not called from probe context." 626 */ 627 void 628 dtrace_panic(const char *format, ...) 629 { 630 va_list alist; 631 632 va_start(alist, format); 633 #ifdef __FreeBSD__ 634 vpanic(format, alist); 635 #else 636 dtrace_vpanic(format, alist); 637 #endif 638 va_end(alist); 639 } 640 641 int 642 dtrace_assfail(const char *a, const char *f, int l) 643 { 644 dtrace_panic("assertion failed: %s, file: %s, line: %d", a, f, l); 645 646 /* 647 * We just need something here that even the most clever compiler 648 * cannot optimize away. 649 */ 650 return (a[(uintptr_t)f]); 651 } 652 653 /* 654 * Atomically increment a specified error counter from probe context. 655 */ 656 static void 657 dtrace_error(uint32_t *counter) 658 { 659 /* 660 * Most counters stored to in probe context are per-CPU counters. 661 * However, there are some error conditions that are sufficiently 662 * arcane that they don't merit per-CPU storage. If these counters 663 * are incremented concurrently on different CPUs, scalability will be 664 * adversely affected -- but we don't expect them to be white-hot in a 665 * correctly constructed enabling... 666 */ 667 uint32_t oval, nval; 668 669 do { 670 oval = *counter; 671 672 if ((nval = oval + 1) == 0) { 673 /* 674 * If the counter would wrap, set it to 1 -- assuring 675 * that the counter is never zero when we have seen 676 * errors. (The counter must be 32-bits because we 677 * aren't guaranteed a 64-bit compare&swap operation.) 678 * To save this code both the infamy of being fingered 679 * by a priggish news story and the indignity of being 680 * the target of a neo-puritan witch trial, we're 681 * carefully avoiding any colorful description of the 682 * likelihood of this condition -- but suffice it to 683 * say that it is only slightly more likely than the 684 * overflow of predicate cache IDs, as discussed in 685 * dtrace_predicate_create(). 686 */ 687 nval = 1; 688 } 689 } while (dtrace_cas32(counter, oval, nval) != oval); 690 } 691 692 /* 693 * Use the DTRACE_LOADFUNC macro to define functions for each of loading a 694 * uint8_t, a uint16_t, a uint32_t and a uint64_t. 695 */ 696 /* BEGIN CSTYLED */ 697 DTRACE_LOADFUNC(8) 698 DTRACE_LOADFUNC(16) 699 DTRACE_LOADFUNC(32) 700 DTRACE_LOADFUNC(64) 701 /* END CSTYLED */ 702 703 static int 704 dtrace_inscratch(uintptr_t dest, size_t size, dtrace_mstate_t *mstate) 705 { 706 if (dest < mstate->dtms_scratch_base) 707 return (0); 708 709 if (dest + size < dest) 710 return (0); 711 712 if (dest + size > mstate->dtms_scratch_ptr) 713 return (0); 714 715 return (1); 716 } 717 718 static int 719 dtrace_canstore_statvar(uint64_t addr, size_t sz, size_t *remain, 720 dtrace_statvar_t **svars, int nsvars) 721 { 722 int i; 723 size_t maxglobalsize, maxlocalsize; 724 725 if (nsvars == 0) 726 return (0); 727 728 maxglobalsize = dtrace_statvar_maxsize + sizeof (uint64_t); 729 maxlocalsize = maxglobalsize * NCPU; 730 731 for (i = 0; i < nsvars; i++) { 732 dtrace_statvar_t *svar = svars[i]; 733 uint8_t scope; 734 size_t size; 735 736 if (svar == NULL || (size = svar->dtsv_size) == 0) 737 continue; 738 739 scope = svar->dtsv_var.dtdv_scope; 740 741 /* 742 * We verify that our size is valid in the spirit of providing 743 * defense in depth: we want to prevent attackers from using 744 * DTrace to escalate an orthogonal kernel heap corruption bug 745 * into the ability to store to arbitrary locations in memory. 746 */ 747 VERIFY((scope == DIFV_SCOPE_GLOBAL && size <= maxglobalsize) || 748 (scope == DIFV_SCOPE_LOCAL && size <= maxlocalsize)); 749 750 if (DTRACE_INRANGE(addr, sz, svar->dtsv_data, 751 svar->dtsv_size)) { 752 DTRACE_RANGE_REMAIN(remain, addr, svar->dtsv_data, 753 svar->dtsv_size); 754 return (1); 755 } 756 } 757 758 return (0); 759 } 760 761 /* 762 * Check to see if the address is within a memory region to which a store may 763 * be issued. This includes the DTrace scratch areas, and any DTrace variable 764 * region. The caller of dtrace_canstore() is responsible for performing any 765 * alignment checks that are needed before stores are actually executed. 766 */ 767 static int 768 dtrace_canstore(uint64_t addr, size_t sz, dtrace_mstate_t *mstate, 769 dtrace_vstate_t *vstate) 770 { 771 return (dtrace_canstore_remains(addr, sz, NULL, mstate, vstate)); 772 } 773 774 /* 775 * Implementation of dtrace_canstore which communicates the upper bound of the 776 * allowed memory region. 777 */ 778 static int 779 dtrace_canstore_remains(uint64_t addr, size_t sz, size_t *remain, 780 dtrace_mstate_t *mstate, dtrace_vstate_t *vstate) 781 { 782 /* 783 * First, check to see if the address is in scratch space... 784 */ 785 if (DTRACE_INRANGE(addr, sz, mstate->dtms_scratch_base, 786 mstate->dtms_scratch_size)) { 787 DTRACE_RANGE_REMAIN(remain, addr, mstate->dtms_scratch_base, 788 mstate->dtms_scratch_size); 789 return (1); 790 } 791 792 /* 793 * Now check to see if it's a dynamic variable. This check will pick 794 * up both thread-local variables and any global dynamically-allocated 795 * variables. 796 */ 797 if (DTRACE_INRANGE(addr, sz, vstate->dtvs_dynvars.dtds_base, 798 vstate->dtvs_dynvars.dtds_size)) { 799 dtrace_dstate_t *dstate = &vstate->dtvs_dynvars; 800 uintptr_t base = (uintptr_t)dstate->dtds_base + 801 (dstate->dtds_hashsize * sizeof (dtrace_dynhash_t)); 802 uintptr_t chunkoffs; 803 dtrace_dynvar_t *dvar; 804 805 /* 806 * Before we assume that we can store here, we need to make 807 * sure that it isn't in our metadata -- storing to our 808 * dynamic variable metadata would corrupt our state. For 809 * the range to not include any dynamic variable metadata, 810 * it must: 811 * 812 * (1) Start above the hash table that is at the base of 813 * the dynamic variable space 814 * 815 * (2) Have a starting chunk offset that is beyond the 816 * dtrace_dynvar_t that is at the base of every chunk 817 * 818 * (3) Not span a chunk boundary 819 * 820 * (4) Not be in the tuple space of a dynamic variable 821 * 822 */ 823 if (addr < base) 824 return (0); 825 826 chunkoffs = (addr - base) % dstate->dtds_chunksize; 827 828 if (chunkoffs < sizeof (dtrace_dynvar_t)) 829 return (0); 830 831 if (chunkoffs + sz > dstate->dtds_chunksize) 832 return (0); 833 834 dvar = (dtrace_dynvar_t *)((uintptr_t)addr - chunkoffs); 835 836 if (dvar->dtdv_hashval == DTRACE_DYNHASH_FREE) 837 return (0); 838 839 if (chunkoffs < sizeof (dtrace_dynvar_t) + 840 ((dvar->dtdv_tuple.dtt_nkeys - 1) * sizeof (dtrace_key_t))) 841 return (0); 842 843 DTRACE_RANGE_REMAIN(remain, addr, dvar, dstate->dtds_chunksize); 844 return (1); 845 } 846 847 /* 848 * Finally, check the static local and global variables. These checks 849 * take the longest, so we perform them last. 850 */ 851 if (dtrace_canstore_statvar(addr, sz, remain, 852 vstate->dtvs_locals, vstate->dtvs_nlocals)) 853 return (1); 854 855 if (dtrace_canstore_statvar(addr, sz, remain, 856 vstate->dtvs_globals, vstate->dtvs_nglobals)) 857 return (1); 858 859 return (0); 860 } 861 862 863 /* 864 * Convenience routine to check to see if the address is within a memory 865 * region in which a load may be issued given the user's privilege level; 866 * if not, it sets the appropriate error flags and loads 'addr' into the 867 * illegal value slot. 868 * 869 * DTrace subroutines (DIF_SUBR_*) should use this helper to implement 870 * appropriate memory access protection. 871 */ 872 static int 873 dtrace_canload(uint64_t addr, size_t sz, dtrace_mstate_t *mstate, 874 dtrace_vstate_t *vstate) 875 { 876 return (dtrace_canload_remains(addr, sz, NULL, mstate, vstate)); 877 } 878 879 /* 880 * Implementation of dtrace_canload which communicates the uppoer bound of the 881 * allowed memory region. 882 */ 883 static int 884 dtrace_canload_remains(uint64_t addr, size_t sz, size_t *remain, 885 dtrace_mstate_t *mstate, dtrace_vstate_t *vstate) 886 { 887 volatile uintptr_t *illval = &cpu_core[curcpu].cpuc_dtrace_illval; 888 file_t *fp; 889 890 /* 891 * If we hold the privilege to read from kernel memory, then 892 * everything is readable. 893 */ 894 if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) { 895 DTRACE_RANGE_REMAIN(remain, addr, addr, sz); 896 return (1); 897 } 898 899 /* 900 * You can obviously read that which you can store. 901 */ 902 if (dtrace_canstore_remains(addr, sz, remain, mstate, vstate)) 903 return (1); 904 905 /* 906 * We're allowed to read from our own string table. 907 */ 908 if (DTRACE_INRANGE(addr, sz, mstate->dtms_difo->dtdo_strtab, 909 mstate->dtms_difo->dtdo_strlen)) { 910 DTRACE_RANGE_REMAIN(remain, addr, 911 mstate->dtms_difo->dtdo_strtab, 912 mstate->dtms_difo->dtdo_strlen); 913 return (1); 914 } 915 916 if (vstate->dtvs_state != NULL && 917 dtrace_priv_proc(vstate->dtvs_state)) { 918 proc_t *p; 919 920 /* 921 * When we have privileges to the current process, there are 922 * several context-related kernel structures that are safe to 923 * read, even absent the privilege to read from kernel memory. 924 * These reads are safe because these structures contain only 925 * state that (1) we're permitted to read, (2) is harmless or 926 * (3) contains pointers to additional kernel state that we're 927 * not permitted to read (and as such, do not present an 928 * opportunity for privilege escalation). Finally (and 929 * critically), because of the nature of their relation with 930 * the current thread context, the memory associated with these 931 * structures cannot change over the duration of probe context, 932 * and it is therefore impossible for this memory to be 933 * deallocated and reallocated as something else while it's 934 * being operated upon. 935 */ 936 if (DTRACE_INRANGE(addr, sz, curthread, sizeof (kthread_t))) { 937 DTRACE_RANGE_REMAIN(remain, addr, curthread, 938 sizeof (kthread_t)); 939 return (1); 940 } 941 942 if ((p = curthread->t_procp) != NULL && DTRACE_INRANGE(addr, 943 sz, curthread->t_procp, sizeof (proc_t))) { 944 DTRACE_RANGE_REMAIN(remain, addr, curthread->t_procp, 945 sizeof (proc_t)); 946 return (1); 947 } 948 949 if (curthread->t_cred != NULL && DTRACE_INRANGE(addr, sz, 950 curthread->t_cred, sizeof (cred_t))) { 951 DTRACE_RANGE_REMAIN(remain, addr, curthread->t_cred, 952 sizeof (cred_t)); 953 return (1); 954 } 955 956 #ifdef illumos 957 if (p != NULL && p->p_pidp != NULL && DTRACE_INRANGE(addr, sz, 958 &(p->p_pidp->pid_id), sizeof (pid_t))) { 959 DTRACE_RANGE_REMAIN(remain, addr, &(p->p_pidp->pid_id), 960 sizeof (pid_t)); 961 return (1); 962 } 963 964 if (curthread->t_cpu != NULL && DTRACE_INRANGE(addr, sz, 965 curthread->t_cpu, offsetof(cpu_t, cpu_pause_thread))) { 966 DTRACE_RANGE_REMAIN(remain, addr, curthread->t_cpu, 967 offsetof(cpu_t, cpu_pause_thread)); 968 return (1); 969 } 970 #endif 971 } 972 973 if ((fp = mstate->dtms_getf) != NULL) { 974 uintptr_t psz = sizeof (void *); 975 vnode_t *vp; 976 vnodeops_t *op; 977 978 /* 979 * When getf() returns a file_t, the enabling is implicitly 980 * granted the (transient) right to read the returned file_t 981 * as well as the v_path and v_op->vnop_name of the underlying 982 * vnode. These accesses are allowed after a successful 983 * getf() because the members that they refer to cannot change 984 * once set -- and the barrier logic in the kernel's closef() 985 * path assures that the file_t and its referenced vode_t 986 * cannot themselves be stale (that is, it impossible for 987 * either dtms_getf itself or its f_vnode member to reference 988 * freed memory). 989 */ 990 if (DTRACE_INRANGE(addr, sz, fp, sizeof (file_t))) { 991 DTRACE_RANGE_REMAIN(remain, addr, fp, sizeof (file_t)); 992 return (1); 993 } 994 995 if ((vp = fp->f_vnode) != NULL) { 996 size_t slen; 997 #ifdef illumos 998 if (DTRACE_INRANGE(addr, sz, &vp->v_path, psz)) { 999 DTRACE_RANGE_REMAIN(remain, addr, &vp->v_path, 1000 psz); 1001 return (1); 1002 } 1003 slen = strlen(vp->v_path) + 1; 1004 if (DTRACE_INRANGE(addr, sz, vp->v_path, slen)) { 1005 DTRACE_RANGE_REMAIN(remain, addr, vp->v_path, 1006 slen); 1007 return (1); 1008 } 1009 #endif 1010 1011 if (DTRACE_INRANGE(addr, sz, &vp->v_op, psz)) { 1012 DTRACE_RANGE_REMAIN(remain, addr, &vp->v_op, 1013 psz); 1014 return (1); 1015 } 1016 1017 #ifdef illumos 1018 if ((op = vp->v_op) != NULL && 1019 DTRACE_INRANGE(addr, sz, &op->vnop_name, psz)) { 1020 DTRACE_RANGE_REMAIN(remain, addr, 1021 &op->vnop_name, psz); 1022 return (1); 1023 } 1024 1025 if (op != NULL && op->vnop_name != NULL && 1026 DTRACE_INRANGE(addr, sz, op->vnop_name, 1027 (slen = strlen(op->vnop_name) + 1))) { 1028 DTRACE_RANGE_REMAIN(remain, addr, 1029 op->vnop_name, slen); 1030 return (1); 1031 } 1032 #endif 1033 } 1034 } 1035 1036 DTRACE_CPUFLAG_SET(CPU_DTRACE_KPRIV); 1037 *illval = addr; 1038 return (0); 1039 } 1040 1041 /* 1042 * Convenience routine to check to see if a given string is within a memory 1043 * region in which a load may be issued given the user's privilege level; 1044 * this exists so that we don't need to issue unnecessary dtrace_strlen() 1045 * calls in the event that the user has all privileges. 1046 */ 1047 static int 1048 dtrace_strcanload(uint64_t addr, size_t sz, size_t *remain, 1049 dtrace_mstate_t *mstate, dtrace_vstate_t *vstate) 1050 { 1051 size_t rsize; 1052 1053 /* 1054 * If we hold the privilege to read from kernel memory, then 1055 * everything is readable. 1056 */ 1057 if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) { 1058 DTRACE_RANGE_REMAIN(remain, addr, addr, sz); 1059 return (1); 1060 } 1061 1062 /* 1063 * Even if the caller is uninterested in querying the remaining valid 1064 * range, it is required to ensure that the access is allowed. 1065 */ 1066 if (remain == NULL) { 1067 remain = &rsize; 1068 } 1069 if (dtrace_canload_remains(addr, 0, remain, mstate, vstate)) { 1070 size_t strsz; 1071 /* 1072 * Perform the strlen after determining the length of the 1073 * memory region which is accessible. This prevents timing 1074 * information from being used to find NULs in memory which is 1075 * not accessible to the caller. 1076 */ 1077 strsz = 1 + dtrace_strlen((char *)(uintptr_t)addr, 1078 MIN(sz, *remain)); 1079 if (strsz <= *remain) { 1080 return (1); 1081 } 1082 } 1083 1084 return (0); 1085 } 1086 1087 /* 1088 * Convenience routine to check to see if a given variable is within a memory 1089 * region in which a load may be issued given the user's privilege level. 1090 */ 1091 static int 1092 dtrace_vcanload(void *src, dtrace_diftype_t *type, size_t *remain, 1093 dtrace_mstate_t *mstate, dtrace_vstate_t *vstate) 1094 { 1095 size_t sz; 1096 ASSERT(type->dtdt_flags & DIF_TF_BYREF); 1097 1098 /* 1099 * Calculate the max size before performing any checks since even 1100 * DTRACE_ACCESS_KERNEL-credentialed callers expect that this function 1101 * return the max length via 'remain'. 1102 */ 1103 if (type->dtdt_kind == DIF_TYPE_STRING) { 1104 dtrace_state_t *state = vstate->dtvs_state; 1105 1106 if (state != NULL) { 1107 sz = state->dts_options[DTRACEOPT_STRSIZE]; 1108 } else { 1109 /* 1110 * In helper context, we have a NULL state; fall back 1111 * to using the system-wide default for the string size 1112 * in this case. 1113 */ 1114 sz = dtrace_strsize_default; 1115 } 1116 } else { 1117 sz = type->dtdt_size; 1118 } 1119 1120 /* 1121 * If we hold the privilege to read from kernel memory, then 1122 * everything is readable. 1123 */ 1124 if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) { 1125 DTRACE_RANGE_REMAIN(remain, (uintptr_t)src, src, sz); 1126 return (1); 1127 } 1128 1129 if (type->dtdt_kind == DIF_TYPE_STRING) { 1130 return (dtrace_strcanload((uintptr_t)src, sz, remain, mstate, 1131 vstate)); 1132 } 1133 return (dtrace_canload_remains((uintptr_t)src, sz, remain, mstate, 1134 vstate)); 1135 } 1136 1137 /* 1138 * Convert a string to a signed integer using safe loads. 1139 * 1140 * NOTE: This function uses various macros from strtolctype.h to manipulate 1141 * digit values, etc -- these have all been checked to ensure they make 1142 * no additional function calls. 1143 */ 1144 static int64_t 1145 dtrace_strtoll(char *input, int base, size_t limit) 1146 { 1147 uintptr_t pos = (uintptr_t)input; 1148 int64_t val = 0; 1149 int x; 1150 boolean_t neg = B_FALSE; 1151 char c, cc, ccc; 1152 uintptr_t end = pos + limit; 1153 1154 /* 1155 * Consume any whitespace preceding digits. 1156 */ 1157 while ((c = dtrace_load8(pos)) == ' ' || c == '\t') 1158 pos++; 1159 1160 /* 1161 * Handle an explicit sign if one is present. 1162 */ 1163 if (c == '-' || c == '+') { 1164 if (c == '-') 1165 neg = B_TRUE; 1166 c = dtrace_load8(++pos); 1167 } 1168 1169 /* 1170 * Check for an explicit hexadecimal prefix ("0x" or "0X") and skip it 1171 * if present. 1172 */ 1173 if (base == 16 && c == '0' && ((cc = dtrace_load8(pos + 1)) == 'x' || 1174 cc == 'X') && isxdigit(ccc = dtrace_load8(pos + 2))) { 1175 pos += 2; 1176 c = ccc; 1177 } 1178 1179 /* 1180 * Read in contiguous digits until the first non-digit character. 1181 */ 1182 for (; pos < end && c != '\0' && lisalnum(c) && (x = DIGIT(c)) < base; 1183 c = dtrace_load8(++pos)) 1184 val = val * base + x; 1185 1186 return (neg ? -val : val); 1187 } 1188 1189 /* 1190 * Compare two strings using safe loads. 1191 */ 1192 static int 1193 dtrace_strncmp(char *s1, char *s2, size_t limit) 1194 { 1195 uint8_t c1, c2; 1196 volatile uint16_t *flags; 1197 1198 if (s1 == s2 || limit == 0) 1199 return (0); 1200 1201 flags = (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags; 1202 1203 do { 1204 if (s1 == NULL) { 1205 c1 = '\0'; 1206 } else { 1207 c1 = dtrace_load8((uintptr_t)s1++); 1208 } 1209 1210 if (s2 == NULL) { 1211 c2 = '\0'; 1212 } else { 1213 c2 = dtrace_load8((uintptr_t)s2++); 1214 } 1215 1216 if (c1 != c2) 1217 return (c1 - c2); 1218 } while (--limit && c1 != '\0' && !(*flags & CPU_DTRACE_FAULT)); 1219 1220 return (0); 1221 } 1222 1223 /* 1224 * Compute strlen(s) for a string using safe memory accesses. The additional 1225 * len parameter is used to specify a maximum length to ensure completion. 1226 */ 1227 static size_t 1228 dtrace_strlen(const char *s, size_t lim) 1229 { 1230 uint_t len; 1231 1232 for (len = 0; len != lim; len++) { 1233 if (dtrace_load8((uintptr_t)s++) == '\0') 1234 break; 1235 } 1236 1237 return (len); 1238 } 1239 1240 /* 1241 * Check if an address falls within a toxic region. 1242 */ 1243 static int 1244 dtrace_istoxic(uintptr_t kaddr, size_t size) 1245 { 1246 uintptr_t taddr, tsize; 1247 int i; 1248 1249 for (i = 0; i < dtrace_toxranges; i++) { 1250 taddr = dtrace_toxrange[i].dtt_base; 1251 tsize = dtrace_toxrange[i].dtt_limit - taddr; 1252 1253 if (kaddr - taddr < tsize) { 1254 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); 1255 cpu_core[curcpu].cpuc_dtrace_illval = kaddr; 1256 return (1); 1257 } 1258 1259 if (taddr - kaddr < size) { 1260 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); 1261 cpu_core[curcpu].cpuc_dtrace_illval = taddr; 1262 return (1); 1263 } 1264 } 1265 1266 return (0); 1267 } 1268 1269 /* 1270 * Copy src to dst using safe memory accesses. The src is assumed to be unsafe 1271 * memory specified by the DIF program. The dst is assumed to be safe memory 1272 * that we can store to directly because it is managed by DTrace. As with 1273 * standard bcopy, overlapping copies are handled properly. 1274 */ 1275 static void 1276 dtrace_bcopy(const void *src, void *dst, size_t len) 1277 { 1278 if (len != 0) { 1279 uint8_t *s1 = dst; 1280 const uint8_t *s2 = src; 1281 1282 if (s1 <= s2) { 1283 do { 1284 *s1++ = dtrace_load8((uintptr_t)s2++); 1285 } while (--len != 0); 1286 } else { 1287 s2 += len; 1288 s1 += len; 1289 1290 do { 1291 *--s1 = dtrace_load8((uintptr_t)--s2); 1292 } while (--len != 0); 1293 } 1294 } 1295 } 1296 1297 /* 1298 * Copy src to dst using safe memory accesses, up to either the specified 1299 * length, or the point that a nul byte is encountered. The src is assumed to 1300 * be unsafe memory specified by the DIF program. The dst is assumed to be 1301 * safe memory that we can store to directly because it is managed by DTrace. 1302 * Unlike dtrace_bcopy(), overlapping regions are not handled. 1303 */ 1304 static void 1305 dtrace_strcpy(const void *src, void *dst, size_t len) 1306 { 1307 if (len != 0) { 1308 uint8_t *s1 = dst, c; 1309 const uint8_t *s2 = src; 1310 1311 do { 1312 *s1++ = c = dtrace_load8((uintptr_t)s2++); 1313 } while (--len != 0 && c != '\0'); 1314 } 1315 } 1316 1317 /* 1318 * Copy src to dst, deriving the size and type from the specified (BYREF) 1319 * variable type. The src is assumed to be unsafe memory specified by the DIF 1320 * program. The dst is assumed to be DTrace variable memory that is of the 1321 * specified type; we assume that we can store to directly. 1322 */ 1323 static void 1324 dtrace_vcopy(void *src, void *dst, dtrace_diftype_t *type, size_t limit) 1325 { 1326 ASSERT(type->dtdt_flags & DIF_TF_BYREF); 1327 1328 if (type->dtdt_kind == DIF_TYPE_STRING) { 1329 dtrace_strcpy(src, dst, MIN(type->dtdt_size, limit)); 1330 } else { 1331 dtrace_bcopy(src, dst, MIN(type->dtdt_size, limit)); 1332 } 1333 } 1334 1335 /* 1336 * Compare s1 to s2 using safe memory accesses. The s1 data is assumed to be 1337 * unsafe memory specified by the DIF program. The s2 data is assumed to be 1338 * safe memory that we can access directly because it is managed by DTrace. 1339 */ 1340 static int 1341 dtrace_bcmp(const void *s1, const void *s2, size_t len) 1342 { 1343 volatile uint16_t *flags; 1344 1345 flags = (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags; 1346 1347 if (s1 == s2) 1348 return (0); 1349 1350 if (s1 == NULL || s2 == NULL) 1351 return (1); 1352 1353 if (s1 != s2 && len != 0) { 1354 const uint8_t *ps1 = s1; 1355 const uint8_t *ps2 = s2; 1356 1357 do { 1358 if (dtrace_load8((uintptr_t)ps1++) != *ps2++) 1359 return (1); 1360 } while (--len != 0 && !(*flags & CPU_DTRACE_FAULT)); 1361 } 1362 return (0); 1363 } 1364 1365 /* 1366 * Zero the specified region using a simple byte-by-byte loop. Note that this 1367 * is for safe DTrace-managed memory only. 1368 */ 1369 static void 1370 dtrace_bzero(void *dst, size_t len) 1371 { 1372 uchar_t *cp; 1373 1374 for (cp = dst; len != 0; len--) 1375 *cp++ = 0; 1376 } 1377 1378 static void 1379 dtrace_add_128(uint64_t *addend1, uint64_t *addend2, uint64_t *sum) 1380 { 1381 uint64_t result[2]; 1382 1383 result[0] = addend1[0] + addend2[0]; 1384 result[1] = addend1[1] + addend2[1] + 1385 (result[0] < addend1[0] || result[0] < addend2[0] ? 1 : 0); 1386 1387 sum[0] = result[0]; 1388 sum[1] = result[1]; 1389 } 1390 1391 /* 1392 * Shift the 128-bit value in a by b. If b is positive, shift left. 1393 * If b is negative, shift right. 1394 */ 1395 static void 1396 dtrace_shift_128(uint64_t *a, int b) 1397 { 1398 uint64_t mask; 1399 1400 if (b == 0) 1401 return; 1402 1403 if (b < 0) { 1404 b = -b; 1405 if (b >= 64) { 1406 a[0] = a[1] >> (b - 64); 1407 a[1] = 0; 1408 } else { 1409 a[0] >>= b; 1410 mask = 1LL << (64 - b); 1411 mask -= 1; 1412 a[0] |= ((a[1] & mask) << (64 - b)); 1413 a[1] >>= b; 1414 } 1415 } else { 1416 if (b >= 64) { 1417 a[1] = a[0] << (b - 64); 1418 a[0] = 0; 1419 } else { 1420 a[1] <<= b; 1421 mask = a[0] >> (64 - b); 1422 a[1] |= mask; 1423 a[0] <<= b; 1424 } 1425 } 1426 } 1427 1428 /* 1429 * The basic idea is to break the 2 64-bit values into 4 32-bit values, 1430 * use native multiplication on those, and then re-combine into the 1431 * resulting 128-bit value. 1432 * 1433 * (hi1 << 32 + lo1) * (hi2 << 32 + lo2) = 1434 * hi1 * hi2 << 64 + 1435 * hi1 * lo2 << 32 + 1436 * hi2 * lo1 << 32 + 1437 * lo1 * lo2 1438 */ 1439 static void 1440 dtrace_multiply_128(uint64_t factor1, uint64_t factor2, uint64_t *product) 1441 { 1442 uint64_t hi1, hi2, lo1, lo2; 1443 uint64_t tmp[2]; 1444 1445 hi1 = factor1 >> 32; 1446 hi2 = factor2 >> 32; 1447 1448 lo1 = factor1 & DT_MASK_LO; 1449 lo2 = factor2 & DT_MASK_LO; 1450 1451 product[0] = lo1 * lo2; 1452 product[1] = hi1 * hi2; 1453 1454 tmp[0] = hi1 * lo2; 1455 tmp[1] = 0; 1456 dtrace_shift_128(tmp, 32); 1457 dtrace_add_128(product, tmp, product); 1458 1459 tmp[0] = hi2 * lo1; 1460 tmp[1] = 0; 1461 dtrace_shift_128(tmp, 32); 1462 dtrace_add_128(product, tmp, product); 1463 } 1464 1465 /* 1466 * This privilege check should be used by actions and subroutines to 1467 * verify that the user credentials of the process that enabled the 1468 * invoking ECB match the target credentials 1469 */ 1470 static int 1471 dtrace_priv_proc_common_user(dtrace_state_t *state) 1472 { 1473 cred_t *cr, *s_cr = state->dts_cred.dcr_cred; 1474 1475 /* 1476 * We should always have a non-NULL state cred here, since if cred 1477 * is null (anonymous tracing), we fast-path bypass this routine. 1478 */ 1479 ASSERT(s_cr != NULL); 1480 1481 if ((cr = CRED()) != NULL && 1482 s_cr->cr_uid == cr->cr_uid && 1483 s_cr->cr_uid == cr->cr_ruid && 1484 s_cr->cr_uid == cr->cr_suid && 1485 s_cr->cr_gid == cr->cr_gid && 1486 s_cr->cr_gid == cr->cr_rgid && 1487 s_cr->cr_gid == cr->cr_sgid) 1488 return (1); 1489 1490 return (0); 1491 } 1492 1493 /* 1494 * This privilege check should be used by actions and subroutines to 1495 * verify that the zone of the process that enabled the invoking ECB 1496 * matches the target credentials 1497 */ 1498 static int 1499 dtrace_priv_proc_common_zone(dtrace_state_t *state) 1500 { 1501 #ifdef illumos 1502 cred_t *cr, *s_cr = state->dts_cred.dcr_cred; 1503 1504 /* 1505 * We should always have a non-NULL state cred here, since if cred 1506 * is null (anonymous tracing), we fast-path bypass this routine. 1507 */ 1508 ASSERT(s_cr != NULL); 1509 1510 if ((cr = CRED()) != NULL && s_cr->cr_zone == cr->cr_zone) 1511 return (1); 1512 1513 return (0); 1514 #else 1515 return (1); 1516 #endif 1517 } 1518 1519 /* 1520 * This privilege check should be used by actions and subroutines to 1521 * verify that the process has not setuid or changed credentials. 1522 */ 1523 static int 1524 dtrace_priv_proc_common_nocd(void) 1525 { 1526 proc_t *proc; 1527 1528 if ((proc = ttoproc(curthread)) != NULL && 1529 !(proc->p_flag & SNOCD)) 1530 return (1); 1531 1532 return (0); 1533 } 1534 1535 static int 1536 dtrace_priv_proc_destructive(dtrace_state_t *state) 1537 { 1538 int action = state->dts_cred.dcr_action; 1539 1540 if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE) == 0) && 1541 dtrace_priv_proc_common_zone(state) == 0) 1542 goto bad; 1543 1544 if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER) == 0) && 1545 dtrace_priv_proc_common_user(state) == 0) 1546 goto bad; 1547 1548 if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG) == 0) && 1549 dtrace_priv_proc_common_nocd() == 0) 1550 goto bad; 1551 1552 return (1); 1553 1554 bad: 1555 cpu_core[curcpu].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV; 1556 1557 return (0); 1558 } 1559 1560 static int 1561 dtrace_priv_proc_control(dtrace_state_t *state) 1562 { 1563 if (state->dts_cred.dcr_action & DTRACE_CRA_PROC_CONTROL) 1564 return (1); 1565 1566 if (dtrace_priv_proc_common_zone(state) && 1567 dtrace_priv_proc_common_user(state) && 1568 dtrace_priv_proc_common_nocd()) 1569 return (1); 1570 1571 cpu_core[curcpu].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV; 1572 1573 return (0); 1574 } 1575 1576 static int 1577 dtrace_priv_proc(dtrace_state_t *state) 1578 { 1579 if (state->dts_cred.dcr_action & DTRACE_CRA_PROC) 1580 return (1); 1581 1582 cpu_core[curcpu].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV; 1583 1584 return (0); 1585 } 1586 1587 static int 1588 dtrace_priv_kernel(dtrace_state_t *state) 1589 { 1590 if (state->dts_cred.dcr_action & DTRACE_CRA_KERNEL) 1591 return (1); 1592 1593 cpu_core[curcpu].cpuc_dtrace_flags |= CPU_DTRACE_KPRIV; 1594 1595 return (0); 1596 } 1597 1598 static int 1599 dtrace_priv_kernel_destructive(dtrace_state_t *state) 1600 { 1601 if (state->dts_cred.dcr_action & DTRACE_CRA_KERNEL_DESTRUCTIVE) 1602 return (1); 1603 1604 cpu_core[curcpu].cpuc_dtrace_flags |= CPU_DTRACE_KPRIV; 1605 1606 return (0); 1607 } 1608 1609 /* 1610 * Determine if the dte_cond of the specified ECB allows for processing of 1611 * the current probe to continue. Note that this routine may allow continued 1612 * processing, but with access(es) stripped from the mstate's dtms_access 1613 * field. 1614 */ 1615 static int 1616 dtrace_priv_probe(dtrace_state_t *state, dtrace_mstate_t *mstate, 1617 dtrace_ecb_t *ecb) 1618 { 1619 dtrace_probe_t *probe = ecb->dte_probe; 1620 dtrace_provider_t *prov = probe->dtpr_provider; 1621 dtrace_pops_t *pops = &prov->dtpv_pops; 1622 int mode = DTRACE_MODE_NOPRIV_DROP; 1623 1624 ASSERT(ecb->dte_cond); 1625 1626 #ifdef illumos 1627 if (pops->dtps_mode != NULL) { 1628 mode = pops->dtps_mode(prov->dtpv_arg, 1629 probe->dtpr_id, probe->dtpr_arg); 1630 1631 ASSERT((mode & DTRACE_MODE_USER) || 1632 (mode & DTRACE_MODE_KERNEL)); 1633 ASSERT((mode & DTRACE_MODE_NOPRIV_RESTRICT) || 1634 (mode & DTRACE_MODE_NOPRIV_DROP)); 1635 } 1636 1637 /* 1638 * If the dte_cond bits indicate that this consumer is only allowed to 1639 * see user-mode firings of this probe, call the provider's dtps_mode() 1640 * entry point to check that the probe was fired while in a user 1641 * context. If that's not the case, use the policy specified by the 1642 * provider to determine if we drop the probe or merely restrict 1643 * operation. 1644 */ 1645 if (ecb->dte_cond & DTRACE_COND_USERMODE) { 1646 ASSERT(mode != DTRACE_MODE_NOPRIV_DROP); 1647 1648 if (!(mode & DTRACE_MODE_USER)) { 1649 if (mode & DTRACE_MODE_NOPRIV_DROP) 1650 return (0); 1651 1652 mstate->dtms_access &= ~DTRACE_ACCESS_ARGS; 1653 } 1654 } 1655 #endif 1656 1657 /* 1658 * This is more subtle than it looks. We have to be absolutely certain 1659 * that CRED() isn't going to change out from under us so it's only 1660 * legit to examine that structure if we're in constrained situations. 1661 * Currently, the only times we'll this check is if a non-super-user 1662 * has enabled the profile or syscall providers -- providers that 1663 * allow visibility of all processes. For the profile case, the check 1664 * above will ensure that we're examining a user context. 1665 */ 1666 if (ecb->dte_cond & DTRACE_COND_OWNER) { 1667 cred_t *cr; 1668 cred_t *s_cr = state->dts_cred.dcr_cred; 1669 proc_t *proc; 1670 1671 ASSERT(s_cr != NULL); 1672 1673 if ((cr = CRED()) == NULL || 1674 s_cr->cr_uid != cr->cr_uid || 1675 s_cr->cr_uid != cr->cr_ruid || 1676 s_cr->cr_uid != cr->cr_suid || 1677 s_cr->cr_gid != cr->cr_gid || 1678 s_cr->cr_gid != cr->cr_rgid || 1679 s_cr->cr_gid != cr->cr_sgid || 1680 (proc = ttoproc(curthread)) == NULL || 1681 (proc->p_flag & SNOCD)) { 1682 if (mode & DTRACE_MODE_NOPRIV_DROP) 1683 return (0); 1684 1685 #ifdef illumos 1686 mstate->dtms_access &= ~DTRACE_ACCESS_PROC; 1687 #endif 1688 } 1689 } 1690 1691 #ifdef illumos 1692 /* 1693 * If our dte_cond is set to DTRACE_COND_ZONEOWNER and we are not 1694 * in our zone, check to see if our mode policy is to restrict rather 1695 * than to drop; if to restrict, strip away both DTRACE_ACCESS_PROC 1696 * and DTRACE_ACCESS_ARGS 1697 */ 1698 if (ecb->dte_cond & DTRACE_COND_ZONEOWNER) { 1699 cred_t *cr; 1700 cred_t *s_cr = state->dts_cred.dcr_cred; 1701 1702 ASSERT(s_cr != NULL); 1703 1704 if ((cr = CRED()) == NULL || 1705 s_cr->cr_zone->zone_id != cr->cr_zone->zone_id) { 1706 if (mode & DTRACE_MODE_NOPRIV_DROP) 1707 return (0); 1708 1709 mstate->dtms_access &= 1710 ~(DTRACE_ACCESS_PROC | DTRACE_ACCESS_ARGS); 1711 } 1712 } 1713 #endif 1714 1715 return (1); 1716 } 1717 1718 /* 1719 * Note: not called from probe context. This function is called 1720 * asynchronously (and at a regular interval) from outside of probe context to 1721 * clean the dirty dynamic variable lists on all CPUs. Dynamic variable 1722 * cleaning is explained in detail in <sys/dtrace_impl.h>. 1723 */ 1724 void 1725 dtrace_dynvar_clean(dtrace_dstate_t *dstate) 1726 { 1727 dtrace_dynvar_t *dirty; 1728 dtrace_dstate_percpu_t *dcpu; 1729 dtrace_dynvar_t **rinsep; 1730 int i, j, work = 0; 1731 1732 for (i = 0; i < NCPU; i++) { 1733 dcpu = &dstate->dtds_percpu[i]; 1734 rinsep = &dcpu->dtdsc_rinsing; 1735 1736 /* 1737 * If the dirty list is NULL, there is no dirty work to do. 1738 */ 1739 if (dcpu->dtdsc_dirty == NULL) 1740 continue; 1741 1742 if (dcpu->dtdsc_rinsing != NULL) { 1743 /* 1744 * If the rinsing list is non-NULL, then it is because 1745 * this CPU was selected to accept another CPU's 1746 * dirty list -- and since that time, dirty buffers 1747 * have accumulated. This is a highly unlikely 1748 * condition, but we choose to ignore the dirty 1749 * buffers -- they'll be picked up a future cleanse. 1750 */ 1751 continue; 1752 } 1753 1754 if (dcpu->dtdsc_clean != NULL) { 1755 /* 1756 * If the clean list is non-NULL, then we're in a 1757 * situation where a CPU has done deallocations (we 1758 * have a non-NULL dirty list) but no allocations (we 1759 * also have a non-NULL clean list). We can't simply 1760 * move the dirty list into the clean list on this 1761 * CPU, yet we also don't want to allow this condition 1762 * to persist, lest a short clean list prevent a 1763 * massive dirty list from being cleaned (which in 1764 * turn could lead to otherwise avoidable dynamic 1765 * drops). To deal with this, we look for some CPU 1766 * with a NULL clean list, NULL dirty list, and NULL 1767 * rinsing list -- and then we borrow this CPU to 1768 * rinse our dirty list. 1769 */ 1770 for (j = 0; j < NCPU; j++) { 1771 dtrace_dstate_percpu_t *rinser; 1772 1773 rinser = &dstate->dtds_percpu[j]; 1774 1775 if (rinser->dtdsc_rinsing != NULL) 1776 continue; 1777 1778 if (rinser->dtdsc_dirty != NULL) 1779 continue; 1780 1781 if (rinser->dtdsc_clean != NULL) 1782 continue; 1783 1784 rinsep = &rinser->dtdsc_rinsing; 1785 break; 1786 } 1787 1788 if (j == NCPU) { 1789 /* 1790 * We were unable to find another CPU that 1791 * could accept this dirty list -- we are 1792 * therefore unable to clean it now. 1793 */ 1794 dtrace_dynvar_failclean++; 1795 continue; 1796 } 1797 } 1798 1799 work = 1; 1800 1801 /* 1802 * Atomically move the dirty list aside. 1803 */ 1804 do { 1805 dirty = dcpu->dtdsc_dirty; 1806 1807 /* 1808 * Before we zap the dirty list, set the rinsing list. 1809 * (This allows for a potential assertion in 1810 * dtrace_dynvar(): if a free dynamic variable appears 1811 * on a hash chain, either the dirty list or the 1812 * rinsing list for some CPU must be non-NULL.) 1813 */ 1814 *rinsep = dirty; 1815 dtrace_membar_producer(); 1816 } while (dtrace_casptr(&dcpu->dtdsc_dirty, 1817 dirty, NULL) != dirty); 1818 } 1819 1820 if (!work) { 1821 /* 1822 * We have no work to do; we can simply return. 1823 */ 1824 return; 1825 } 1826 1827 dtrace_sync(); 1828 1829 for (i = 0; i < NCPU; i++) { 1830 dcpu = &dstate->dtds_percpu[i]; 1831 1832 if (dcpu->dtdsc_rinsing == NULL) 1833 continue; 1834 1835 /* 1836 * We are now guaranteed that no hash chain contains a pointer 1837 * into this dirty list; we can make it clean. 1838 */ 1839 ASSERT(dcpu->dtdsc_clean == NULL); 1840 dcpu->dtdsc_clean = dcpu->dtdsc_rinsing; 1841 dcpu->dtdsc_rinsing = NULL; 1842 } 1843 1844 /* 1845 * Before we actually set the state to be DTRACE_DSTATE_CLEAN, make 1846 * sure that all CPUs have seen all of the dtdsc_clean pointers. 1847 * This prevents a race whereby a CPU incorrectly decides that 1848 * the state should be something other than DTRACE_DSTATE_CLEAN 1849 * after dtrace_dynvar_clean() has completed. 1850 */ 1851 dtrace_sync(); 1852 1853 dstate->dtds_state = DTRACE_DSTATE_CLEAN; 1854 } 1855 1856 /* 1857 * Depending on the value of the op parameter, this function looks-up, 1858 * allocates or deallocates an arbitrarily-keyed dynamic variable. If an 1859 * allocation is requested, this function will return a pointer to a 1860 * dtrace_dynvar_t corresponding to the allocated variable -- or NULL if no 1861 * variable can be allocated. If NULL is returned, the appropriate counter 1862 * will be incremented. 1863 */ 1864 dtrace_dynvar_t * 1865 dtrace_dynvar(dtrace_dstate_t *dstate, uint_t nkeys, 1866 dtrace_key_t *key, size_t dsize, dtrace_dynvar_op_t op, 1867 dtrace_mstate_t *mstate, dtrace_vstate_t *vstate) 1868 { 1869 uint64_t hashval = DTRACE_DYNHASH_VALID; 1870 dtrace_dynhash_t *hash = dstate->dtds_hash; 1871 dtrace_dynvar_t *free, *new_free, *next, *dvar, *start, *prev = NULL; 1872 processorid_t me = curcpu, cpu = me; 1873 dtrace_dstate_percpu_t *dcpu = &dstate->dtds_percpu[me]; 1874 size_t bucket, ksize; 1875 size_t chunksize = dstate->dtds_chunksize; 1876 uintptr_t kdata, lock, nstate; 1877 uint_t i; 1878 1879 ASSERT(nkeys != 0); 1880 1881 /* 1882 * Hash the key. As with aggregations, we use Jenkins' "One-at-a-time" 1883 * algorithm. For the by-value portions, we perform the algorithm in 1884 * 16-bit chunks (as opposed to 8-bit chunks). This speeds things up a 1885 * bit, and seems to have only a minute effect on distribution. For 1886 * the by-reference data, we perform "One-at-a-time" iterating (safely) 1887 * over each referenced byte. It's painful to do this, but it's much 1888 * better than pathological hash distribution. The efficacy of the 1889 * hashing algorithm (and a comparison with other algorithms) may be 1890 * found by running the ::dtrace_dynstat MDB dcmd. 1891 */ 1892 for (i = 0; i < nkeys; i++) { 1893 if (key[i].dttk_size == 0) { 1894 uint64_t val = key[i].dttk_value; 1895 1896 hashval += (val >> 48) & 0xffff; 1897 hashval += (hashval << 10); 1898 hashval ^= (hashval >> 6); 1899 1900 hashval += (val >> 32) & 0xffff; 1901 hashval += (hashval << 10); 1902 hashval ^= (hashval >> 6); 1903 1904 hashval += (val >> 16) & 0xffff; 1905 hashval += (hashval << 10); 1906 hashval ^= (hashval >> 6); 1907 1908 hashval += val & 0xffff; 1909 hashval += (hashval << 10); 1910 hashval ^= (hashval >> 6); 1911 } else { 1912 /* 1913 * This is incredibly painful, but it beats the hell 1914 * out of the alternative. 1915 */ 1916 uint64_t j, size = key[i].dttk_size; 1917 uintptr_t base = (uintptr_t)key[i].dttk_value; 1918 1919 if (!dtrace_canload(base, size, mstate, vstate)) 1920 break; 1921 1922 for (j = 0; j < size; j++) { 1923 hashval += dtrace_load8(base + j); 1924 hashval += (hashval << 10); 1925 hashval ^= (hashval >> 6); 1926 } 1927 } 1928 } 1929 1930 if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAULT)) 1931 return (NULL); 1932 1933 hashval += (hashval << 3); 1934 hashval ^= (hashval >> 11); 1935 hashval += (hashval << 15); 1936 1937 /* 1938 * There is a remote chance (ideally, 1 in 2^31) that our hashval 1939 * comes out to be one of our two sentinel hash values. If this 1940 * actually happens, we set the hashval to be a value known to be a 1941 * non-sentinel value. 1942 */ 1943 if (hashval == DTRACE_DYNHASH_FREE || hashval == DTRACE_DYNHASH_SINK) 1944 hashval = DTRACE_DYNHASH_VALID; 1945 1946 /* 1947 * Yes, it's painful to do a divide here. If the cycle count becomes 1948 * important here, tricks can be pulled to reduce it. (However, it's 1949 * critical that hash collisions be kept to an absolute minimum; 1950 * they're much more painful than a divide.) It's better to have a 1951 * solution that generates few collisions and still keeps things 1952 * relatively simple. 1953 */ 1954 bucket = hashval % dstate->dtds_hashsize; 1955 1956 if (op == DTRACE_DYNVAR_DEALLOC) { 1957 volatile uintptr_t *lockp = &hash[bucket].dtdh_lock; 1958 1959 for (;;) { 1960 while ((lock = *lockp) & 1) 1961 continue; 1962 1963 if (dtrace_casptr((volatile void *)lockp, 1964 (volatile void *)lock, (volatile void *)(lock + 1)) == (void *)lock) 1965 break; 1966 } 1967 1968 dtrace_membar_producer(); 1969 } 1970 1971 top: 1972 prev = NULL; 1973 lock = hash[bucket].dtdh_lock; 1974 1975 dtrace_membar_consumer(); 1976 1977 start = hash[bucket].dtdh_chain; 1978 ASSERT(start != NULL && (start->dtdv_hashval == DTRACE_DYNHASH_SINK || 1979 start->dtdv_hashval != DTRACE_DYNHASH_FREE || 1980 op != DTRACE_DYNVAR_DEALLOC)); 1981 1982 for (dvar = start; dvar != NULL; dvar = dvar->dtdv_next) { 1983 dtrace_tuple_t *dtuple = &dvar->dtdv_tuple; 1984 dtrace_key_t *dkey = &dtuple->dtt_key[0]; 1985 1986 if (dvar->dtdv_hashval != hashval) { 1987 if (dvar->dtdv_hashval == DTRACE_DYNHASH_SINK) { 1988 /* 1989 * We've reached the sink, and therefore the 1990 * end of the hash chain; we can kick out of 1991 * the loop knowing that we have seen a valid 1992 * snapshot of state. 1993 */ 1994 ASSERT(dvar->dtdv_next == NULL); 1995 ASSERT(dvar == &dtrace_dynhash_sink); 1996 break; 1997 } 1998 1999 if (dvar->dtdv_hashval == DTRACE_DYNHASH_FREE) { 2000 /* 2001 * We've gone off the rails: somewhere along 2002 * the line, one of the members of this hash 2003 * chain was deleted. Note that we could also 2004 * detect this by simply letting this loop run 2005 * to completion, as we would eventually hit 2006 * the end of the dirty list. However, we 2007 * want to avoid running the length of the 2008 * dirty list unnecessarily (it might be quite 2009 * long), so we catch this as early as 2010 * possible by detecting the hash marker. In 2011 * this case, we simply set dvar to NULL and 2012 * break; the conditional after the loop will 2013 * send us back to top. 2014 */ 2015 dvar = NULL; 2016 break; 2017 } 2018 2019 goto next; 2020 } 2021 2022 if (dtuple->dtt_nkeys != nkeys) 2023 goto next; 2024 2025 for (i = 0; i < nkeys; i++, dkey++) { 2026 if (dkey->dttk_size != key[i].dttk_size) 2027 goto next; /* size or type mismatch */ 2028 2029 if (dkey->dttk_size != 0) { 2030 if (dtrace_bcmp( 2031 (void *)(uintptr_t)key[i].dttk_value, 2032 (void *)(uintptr_t)dkey->dttk_value, 2033 dkey->dttk_size)) 2034 goto next; 2035 } else { 2036 if (dkey->dttk_value != key[i].dttk_value) 2037 goto next; 2038 } 2039 } 2040 2041 if (op != DTRACE_DYNVAR_DEALLOC) 2042 return (dvar); 2043 2044 ASSERT(dvar->dtdv_next == NULL || 2045 dvar->dtdv_next->dtdv_hashval != DTRACE_DYNHASH_FREE); 2046 2047 if (prev != NULL) { 2048 ASSERT(hash[bucket].dtdh_chain != dvar); 2049 ASSERT(start != dvar); 2050 ASSERT(prev->dtdv_next == dvar); 2051 prev->dtdv_next = dvar->dtdv_next; 2052 } else { 2053 if (dtrace_casptr(&hash[bucket].dtdh_chain, 2054 start, dvar->dtdv_next) != start) { 2055 /* 2056 * We have failed to atomically swing the 2057 * hash table head pointer, presumably because 2058 * of a conflicting allocation on another CPU. 2059 * We need to reread the hash chain and try 2060 * again. 2061 */ 2062 goto top; 2063 } 2064 } 2065 2066 dtrace_membar_producer(); 2067 2068 /* 2069 * Now set the hash value to indicate that it's free. 2070 */ 2071 ASSERT(hash[bucket].dtdh_chain != dvar); 2072 dvar->dtdv_hashval = DTRACE_DYNHASH_FREE; 2073 2074 dtrace_membar_producer(); 2075 2076 /* 2077 * Set the next pointer to point at the dirty list, and 2078 * atomically swing the dirty pointer to the newly freed dvar. 2079 */ 2080 do { 2081 next = dcpu->dtdsc_dirty; 2082 dvar->dtdv_next = next; 2083 } while (dtrace_casptr(&dcpu->dtdsc_dirty, next, dvar) != next); 2084 2085 /* 2086 * Finally, unlock this hash bucket. 2087 */ 2088 ASSERT(hash[bucket].dtdh_lock == lock); 2089 ASSERT(lock & 1); 2090 hash[bucket].dtdh_lock++; 2091 2092 return (NULL); 2093 next: 2094 prev = dvar; 2095 continue; 2096 } 2097 2098 if (dvar == NULL) { 2099 /* 2100 * If dvar is NULL, it is because we went off the rails: 2101 * one of the elements that we traversed in the hash chain 2102 * was deleted while we were traversing it. In this case, 2103 * we assert that we aren't doing a dealloc (deallocs lock 2104 * the hash bucket to prevent themselves from racing with 2105 * one another), and retry the hash chain traversal. 2106 */ 2107 ASSERT(op != DTRACE_DYNVAR_DEALLOC); 2108 goto top; 2109 } 2110 2111 if (op != DTRACE_DYNVAR_ALLOC) { 2112 /* 2113 * If we are not to allocate a new variable, we want to 2114 * return NULL now. Before we return, check that the value 2115 * of the lock word hasn't changed. If it has, we may have 2116 * seen an inconsistent snapshot. 2117 */ 2118 if (op == DTRACE_DYNVAR_NOALLOC) { 2119 if (hash[bucket].dtdh_lock != lock) 2120 goto top; 2121 } else { 2122 ASSERT(op == DTRACE_DYNVAR_DEALLOC); 2123 ASSERT(hash[bucket].dtdh_lock == lock); 2124 ASSERT(lock & 1); 2125 hash[bucket].dtdh_lock++; 2126 } 2127 2128 return (NULL); 2129 } 2130 2131 /* 2132 * We need to allocate a new dynamic variable. The size we need is the 2133 * size of dtrace_dynvar plus the size of nkeys dtrace_key_t's plus the 2134 * size of any auxiliary key data (rounded up to 8-byte alignment) plus 2135 * the size of any referred-to data (dsize). We then round the final 2136 * size up to the chunksize for allocation. 2137 */ 2138 for (ksize = 0, i = 0; i < nkeys; i++) 2139 ksize += P2ROUNDUP(key[i].dttk_size, sizeof (uint64_t)); 2140 2141 /* 2142 * This should be pretty much impossible, but could happen if, say, 2143 * strange DIF specified the tuple. Ideally, this should be an 2144 * assertion and not an error condition -- but that requires that the 2145 * chunksize calculation in dtrace_difo_chunksize() be absolutely 2146 * bullet-proof. (That is, it must not be able to be fooled by 2147 * malicious DIF.) Given the lack of backwards branches in DIF, 2148 * solving this would presumably not amount to solving the Halting 2149 * Problem -- but it still seems awfully hard. 2150 */ 2151 if (sizeof (dtrace_dynvar_t) + sizeof (dtrace_key_t) * (nkeys - 1) + 2152 ksize + dsize > chunksize) { 2153 dcpu->dtdsc_drops++; 2154 return (NULL); 2155 } 2156 2157 nstate = DTRACE_DSTATE_EMPTY; 2158 2159 do { 2160 retry: 2161 free = dcpu->dtdsc_free; 2162 2163 if (free == NULL) { 2164 dtrace_dynvar_t *clean = dcpu->dtdsc_clean; 2165 void *rval; 2166 2167 if (clean == NULL) { 2168 /* 2169 * We're out of dynamic variable space on 2170 * this CPU. Unless we have tried all CPUs, 2171 * we'll try to allocate from a different 2172 * CPU. 2173 */ 2174 switch (dstate->dtds_state) { 2175 case DTRACE_DSTATE_CLEAN: { 2176 void *sp = &dstate->dtds_state; 2177 2178 if (++cpu >= NCPU) 2179 cpu = 0; 2180 2181 if (dcpu->dtdsc_dirty != NULL && 2182 nstate == DTRACE_DSTATE_EMPTY) 2183 nstate = DTRACE_DSTATE_DIRTY; 2184 2185 if (dcpu->dtdsc_rinsing != NULL) 2186 nstate = DTRACE_DSTATE_RINSING; 2187 2188 dcpu = &dstate->dtds_percpu[cpu]; 2189 2190 if (cpu != me) 2191 goto retry; 2192 2193 (void) dtrace_cas32(sp, 2194 DTRACE_DSTATE_CLEAN, nstate); 2195 2196 /* 2197 * To increment the correct bean 2198 * counter, take another lap. 2199 */ 2200 goto retry; 2201 } 2202 2203 case DTRACE_DSTATE_DIRTY: 2204 dcpu->dtdsc_dirty_drops++; 2205 break; 2206 2207 case DTRACE_DSTATE_RINSING: 2208 dcpu->dtdsc_rinsing_drops++; 2209 break; 2210 2211 case DTRACE_DSTATE_EMPTY: 2212 dcpu->dtdsc_drops++; 2213 break; 2214 } 2215 2216 DTRACE_CPUFLAG_SET(CPU_DTRACE_DROP); 2217 return (NULL); 2218 } 2219 2220 /* 2221 * The clean list appears to be non-empty. We want to 2222 * move the clean list to the free list; we start by 2223 * moving the clean pointer aside. 2224 */ 2225 if (dtrace_casptr(&dcpu->dtdsc_clean, 2226 clean, NULL) != clean) { 2227 /* 2228 * We are in one of two situations: 2229 * 2230 * (a) The clean list was switched to the 2231 * free list by another CPU. 2232 * 2233 * (b) The clean list was added to by the 2234 * cleansing cyclic. 2235 * 2236 * In either of these situations, we can 2237 * just reattempt the free list allocation. 2238 */ 2239 goto retry; 2240 } 2241 2242 ASSERT(clean->dtdv_hashval == DTRACE_DYNHASH_FREE); 2243 2244 /* 2245 * Now we'll move the clean list to our free list. 2246 * It's impossible for this to fail: the only way 2247 * the free list can be updated is through this 2248 * code path, and only one CPU can own the clean list. 2249 * Thus, it would only be possible for this to fail if 2250 * this code were racing with dtrace_dynvar_clean(). 2251 * (That is, if dtrace_dynvar_clean() updated the clean 2252 * list, and we ended up racing to update the free 2253 * list.) This race is prevented by the dtrace_sync() 2254 * in dtrace_dynvar_clean() -- which flushes the 2255 * owners of the clean lists out before resetting 2256 * the clean lists. 2257 */ 2258 dcpu = &dstate->dtds_percpu[me]; 2259 rval = dtrace_casptr(&dcpu->dtdsc_free, NULL, clean); 2260 ASSERT(rval == NULL); 2261 goto retry; 2262 } 2263 2264 dvar = free; 2265 new_free = dvar->dtdv_next; 2266 } while (dtrace_casptr(&dcpu->dtdsc_free, free, new_free) != free); 2267 2268 /* 2269 * We have now allocated a new chunk. We copy the tuple keys into the 2270 * tuple array and copy any referenced key data into the data space 2271 * following the tuple array. As we do this, we relocate dttk_value 2272 * in the final tuple to point to the key data address in the chunk. 2273 */ 2274 kdata = (uintptr_t)&dvar->dtdv_tuple.dtt_key[nkeys]; 2275 dvar->dtdv_data = (void *)(kdata + ksize); 2276 dvar->dtdv_tuple.dtt_nkeys = nkeys; 2277 2278 for (i = 0; i < nkeys; i++) { 2279 dtrace_key_t *dkey = &dvar->dtdv_tuple.dtt_key[i]; 2280 size_t kesize = key[i].dttk_size; 2281 2282 if (kesize != 0) { 2283 dtrace_bcopy( 2284 (const void *)(uintptr_t)key[i].dttk_value, 2285 (void *)kdata, kesize); 2286 dkey->dttk_value = kdata; 2287 kdata += P2ROUNDUP(kesize, sizeof (uint64_t)); 2288 } else { 2289 dkey->dttk_value = key[i].dttk_value; 2290 } 2291 2292 dkey->dttk_size = kesize; 2293 } 2294 2295 ASSERT(dvar->dtdv_hashval == DTRACE_DYNHASH_FREE); 2296 dvar->dtdv_hashval = hashval; 2297 dvar->dtdv_next = start; 2298 2299 if (dtrace_casptr(&hash[bucket].dtdh_chain, start, dvar) == start) 2300 return (dvar); 2301 2302 /* 2303 * The cas has failed. Either another CPU is adding an element to 2304 * this hash chain, or another CPU is deleting an element from this 2305 * hash chain. The simplest way to deal with both of these cases 2306 * (though not necessarily the most efficient) is to free our 2307 * allocated block and re-attempt it all. Note that the free is 2308 * to the dirty list and _not_ to the free list. This is to prevent 2309 * races with allocators, above. 2310 */ 2311 dvar->dtdv_hashval = DTRACE_DYNHASH_FREE; 2312 2313 dtrace_membar_producer(); 2314 2315 do { 2316 free = dcpu->dtdsc_dirty; 2317 dvar->dtdv_next = free; 2318 } while (dtrace_casptr(&dcpu->dtdsc_dirty, free, dvar) != free); 2319 2320 goto top; 2321 } 2322 2323 /*ARGSUSED*/ 2324 static void 2325 dtrace_aggregate_min(uint64_t *oval, uint64_t nval, uint64_t arg) 2326 { 2327 if ((int64_t)nval < (int64_t)*oval) 2328 *oval = nval; 2329 } 2330 2331 /*ARGSUSED*/ 2332 static void 2333 dtrace_aggregate_max(uint64_t *oval, uint64_t nval, uint64_t arg) 2334 { 2335 if ((int64_t)nval > (int64_t)*oval) 2336 *oval = nval; 2337 } 2338 2339 static void 2340 dtrace_aggregate_quantize(uint64_t *quanta, uint64_t nval, uint64_t incr) 2341 { 2342 int i, zero = DTRACE_QUANTIZE_ZEROBUCKET; 2343 int64_t val = (int64_t)nval; 2344 2345 if (val < 0) { 2346 for (i = 0; i < zero; i++) { 2347 if (val <= DTRACE_QUANTIZE_BUCKETVAL(i)) { 2348 quanta[i] += incr; 2349 return; 2350 } 2351 } 2352 } else { 2353 for (i = zero + 1; i < DTRACE_QUANTIZE_NBUCKETS; i++) { 2354 if (val < DTRACE_QUANTIZE_BUCKETVAL(i)) { 2355 quanta[i - 1] += incr; 2356 return; 2357 } 2358 } 2359 2360 quanta[DTRACE_QUANTIZE_NBUCKETS - 1] += incr; 2361 return; 2362 } 2363 2364 ASSERT(0); 2365 } 2366 2367 static void 2368 dtrace_aggregate_lquantize(uint64_t *lquanta, uint64_t nval, uint64_t incr) 2369 { 2370 uint64_t arg = *lquanta++; 2371 int32_t base = DTRACE_LQUANTIZE_BASE(arg); 2372 uint16_t step = DTRACE_LQUANTIZE_STEP(arg); 2373 uint16_t levels = DTRACE_LQUANTIZE_LEVELS(arg); 2374 int32_t val = (int32_t)nval, level; 2375 2376 ASSERT(step != 0); 2377 ASSERT(levels != 0); 2378 2379 if (val < base) { 2380 /* 2381 * This is an underflow. 2382 */ 2383 lquanta[0] += incr; 2384 return; 2385 } 2386 2387 level = (val - base) / step; 2388 2389 if (level < levels) { 2390 lquanta[level + 1] += incr; 2391 return; 2392 } 2393 2394 /* 2395 * This is an overflow. 2396 */ 2397 lquanta[levels + 1] += incr; 2398 } 2399 2400 static int 2401 dtrace_aggregate_llquantize_bucket(uint16_t factor, uint16_t low, 2402 uint16_t high, uint16_t nsteps, int64_t value) 2403 { 2404 int64_t this = 1, last, next; 2405 int base = 1, order; 2406 2407 ASSERT(factor <= nsteps); 2408 ASSERT(nsteps % factor == 0); 2409 2410 for (order = 0; order < low; order++) 2411 this *= factor; 2412 2413 /* 2414 * If our value is less than our factor taken to the power of the 2415 * low order of magnitude, it goes into the zeroth bucket. 2416 */ 2417 if (value < (last = this)) 2418 return (0); 2419 2420 for (this *= factor; order <= high; order++) { 2421 int nbuckets = this > nsteps ? nsteps : this; 2422 2423 if ((next = this * factor) < this) { 2424 /* 2425 * We should not generally get log/linear quantizations 2426 * with a high magnitude that allows 64-bits to 2427 * overflow, but we nonetheless protect against this 2428 * by explicitly checking for overflow, and clamping 2429 * our value accordingly. 2430 */ 2431 value = this - 1; 2432 } 2433 2434 if (value < this) { 2435 /* 2436 * If our value lies within this order of magnitude, 2437 * determine its position by taking the offset within 2438 * the order of magnitude, dividing by the bucket 2439 * width, and adding to our (accumulated) base. 2440 */ 2441 return (base + (value - last) / (this / nbuckets)); 2442 } 2443 2444 base += nbuckets - (nbuckets / factor); 2445 last = this; 2446 this = next; 2447 } 2448 2449 /* 2450 * Our value is greater than or equal to our factor taken to the 2451 * power of one plus the high magnitude -- return the top bucket. 2452 */ 2453 return (base); 2454 } 2455 2456 static void 2457 dtrace_aggregate_llquantize(uint64_t *llquanta, uint64_t nval, uint64_t incr) 2458 { 2459 uint64_t arg = *llquanta++; 2460 uint16_t factor = DTRACE_LLQUANTIZE_FACTOR(arg); 2461 uint16_t low = DTRACE_LLQUANTIZE_LOW(arg); 2462 uint16_t high = DTRACE_LLQUANTIZE_HIGH(arg); 2463 uint16_t nsteps = DTRACE_LLQUANTIZE_NSTEP(arg); 2464 2465 llquanta[dtrace_aggregate_llquantize_bucket(factor, 2466 low, high, nsteps, nval)] += incr; 2467 } 2468 2469 /*ARGSUSED*/ 2470 static void 2471 dtrace_aggregate_avg(uint64_t *data, uint64_t nval, uint64_t arg) 2472 { 2473 data[0]++; 2474 data[1] += nval; 2475 } 2476 2477 /*ARGSUSED*/ 2478 static void 2479 dtrace_aggregate_stddev(uint64_t *data, uint64_t nval, uint64_t arg) 2480 { 2481 int64_t snval = (int64_t)nval; 2482 uint64_t tmp[2]; 2483 2484 data[0]++; 2485 data[1] += nval; 2486 2487 /* 2488 * What we want to say here is: 2489 * 2490 * data[2] += nval * nval; 2491 * 2492 * But given that nval is 64-bit, we could easily overflow, so 2493 * we do this as 128-bit arithmetic. 2494 */ 2495 if (snval < 0) 2496 snval = -snval; 2497 2498 dtrace_multiply_128((uint64_t)snval, (uint64_t)snval, tmp); 2499 dtrace_add_128(data + 2, tmp, data + 2); 2500 } 2501 2502 /*ARGSUSED*/ 2503 static void 2504 dtrace_aggregate_count(uint64_t *oval, uint64_t nval, uint64_t arg) 2505 { 2506 *oval = *oval + 1; 2507 } 2508 2509 /*ARGSUSED*/ 2510 static void 2511 dtrace_aggregate_sum(uint64_t *oval, uint64_t nval, uint64_t arg) 2512 { 2513 *oval += nval; 2514 } 2515 2516 /* 2517 * Aggregate given the tuple in the principal data buffer, and the aggregating 2518 * action denoted by the specified dtrace_aggregation_t. The aggregation 2519 * buffer is specified as the buf parameter. This routine does not return 2520 * failure; if there is no space in the aggregation buffer, the data will be 2521 * dropped, and a corresponding counter incremented. 2522 */ 2523 static void 2524 dtrace_aggregate(dtrace_aggregation_t *agg, dtrace_buffer_t *dbuf, 2525 intptr_t offset, dtrace_buffer_t *buf, uint64_t expr, uint64_t arg) 2526 { 2527 dtrace_recdesc_t *rec = &agg->dtag_action.dta_rec; 2528 uint32_t i, ndx, size, fsize; 2529 uint32_t align = sizeof (uint64_t) - 1; 2530 dtrace_aggbuffer_t *agb; 2531 dtrace_aggkey_t *key; 2532 uint32_t hashval = 0, limit, isstr; 2533 caddr_t tomax, data, kdata; 2534 dtrace_actkind_t action; 2535 dtrace_action_t *act; 2536 uintptr_t offs; 2537 2538 if (buf == NULL) 2539 return; 2540 2541 if (!agg->dtag_hasarg) { 2542 /* 2543 * Currently, only quantize() and lquantize() take additional 2544 * arguments, and they have the same semantics: an increment 2545 * value that defaults to 1 when not present. If additional 2546 * aggregating actions take arguments, the setting of the 2547 * default argument value will presumably have to become more 2548 * sophisticated... 2549 */ 2550 arg = 1; 2551 } 2552 2553 action = agg->dtag_action.dta_kind - DTRACEACT_AGGREGATION; 2554 size = rec->dtrd_offset - agg->dtag_base; 2555 fsize = size + rec->dtrd_size; 2556 2557 ASSERT(dbuf->dtb_tomax != NULL); 2558 data = dbuf->dtb_tomax + offset + agg->dtag_base; 2559 2560 if ((tomax = buf->dtb_tomax) == NULL) { 2561 dtrace_buffer_drop(buf); 2562 return; 2563 } 2564 2565 /* 2566 * The metastructure is always at the bottom of the buffer. 2567 */ 2568 agb = (dtrace_aggbuffer_t *)(tomax + buf->dtb_size - 2569 sizeof (dtrace_aggbuffer_t)); 2570 2571 if (buf->dtb_offset == 0) { 2572 /* 2573 * We just kludge up approximately 1/8th of the size to be 2574 * buckets. If this guess ends up being routinely 2575 * off-the-mark, we may need to dynamically readjust this 2576 * based on past performance. 2577 */ 2578 uintptr_t hashsize = (buf->dtb_size >> 3) / sizeof (uintptr_t); 2579 2580 if ((uintptr_t)agb - hashsize * sizeof (dtrace_aggkey_t *) < 2581 (uintptr_t)tomax || hashsize == 0) { 2582 /* 2583 * We've been given a ludicrously small buffer; 2584 * increment our drop count and leave. 2585 */ 2586 dtrace_buffer_drop(buf); 2587 return; 2588 } 2589 2590 /* 2591 * And now, a pathetic attempt to try to get a an odd (or 2592 * perchance, a prime) hash size for better hash distribution. 2593 */ 2594 if (hashsize > (DTRACE_AGGHASHSIZE_SLEW << 3)) 2595 hashsize -= DTRACE_AGGHASHSIZE_SLEW; 2596 2597 agb->dtagb_hashsize = hashsize; 2598 agb->dtagb_hash = (dtrace_aggkey_t **)((uintptr_t)agb - 2599 agb->dtagb_hashsize * sizeof (dtrace_aggkey_t *)); 2600 agb->dtagb_free = (uintptr_t)agb->dtagb_hash; 2601 2602 for (i = 0; i < agb->dtagb_hashsize; i++) 2603 agb->dtagb_hash[i] = NULL; 2604 } 2605 2606 ASSERT(agg->dtag_first != NULL); 2607 ASSERT(agg->dtag_first->dta_intuple); 2608 2609 /* 2610 * Calculate the hash value based on the key. Note that we _don't_ 2611 * include the aggid in the hashing (but we will store it as part of 2612 * the key). The hashing algorithm is Bob Jenkins' "One-at-a-time" 2613 * algorithm: a simple, quick algorithm that has no known funnels, and 2614 * gets good distribution in practice. The efficacy of the hashing 2615 * algorithm (and a comparison with other algorithms) may be found by 2616 * running the ::dtrace_aggstat MDB dcmd. 2617 */ 2618 for (act = agg->dtag_first; act->dta_intuple; act = act->dta_next) { 2619 i = act->dta_rec.dtrd_offset - agg->dtag_base; 2620 limit = i + act->dta_rec.dtrd_size; 2621 ASSERT(limit <= size); 2622 isstr = DTRACEACT_ISSTRING(act); 2623 2624 for (; i < limit; i++) { 2625 hashval += data[i]; 2626 hashval += (hashval << 10); 2627 hashval ^= (hashval >> 6); 2628 2629 if (isstr && data[i] == '\0') 2630 break; 2631 } 2632 } 2633 2634 hashval += (hashval << 3); 2635 hashval ^= (hashval >> 11); 2636 hashval += (hashval << 15); 2637 2638 /* 2639 * Yes, the divide here is expensive -- but it's generally the least 2640 * of the performance issues given the amount of data that we iterate 2641 * over to compute hash values, compare data, etc. 2642 */ 2643 ndx = hashval % agb->dtagb_hashsize; 2644 2645 for (key = agb->dtagb_hash[ndx]; key != NULL; key = key->dtak_next) { 2646 ASSERT((caddr_t)key >= tomax); 2647 ASSERT((caddr_t)key < tomax + buf->dtb_size); 2648 2649 if (hashval != key->dtak_hashval || key->dtak_size != size) 2650 continue; 2651 2652 kdata = key->dtak_data; 2653 ASSERT(kdata >= tomax && kdata < tomax + buf->dtb_size); 2654 2655 for (act = agg->dtag_first; act->dta_intuple; 2656 act = act->dta_next) { 2657 i = act->dta_rec.dtrd_offset - agg->dtag_base; 2658 limit = i + act->dta_rec.dtrd_size; 2659 ASSERT(limit <= size); 2660 isstr = DTRACEACT_ISSTRING(act); 2661 2662 for (; i < limit; i++) { 2663 if (kdata[i] != data[i]) 2664 goto next; 2665 2666 if (isstr && data[i] == '\0') 2667 break; 2668 } 2669 } 2670 2671 if (action != key->dtak_action) { 2672 /* 2673 * We are aggregating on the same value in the same 2674 * aggregation with two different aggregating actions. 2675 * (This should have been picked up in the compiler, 2676 * so we may be dealing with errant or devious DIF.) 2677 * This is an error condition; we indicate as much, 2678 * and return. 2679 */ 2680 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); 2681 return; 2682 } 2683 2684 /* 2685 * This is a hit: we need to apply the aggregator to 2686 * the value at this key. 2687 */ 2688 agg->dtag_aggregate((uint64_t *)(kdata + size), expr, arg); 2689 return; 2690 next: 2691 continue; 2692 } 2693 2694 /* 2695 * We didn't find it. We need to allocate some zero-filled space, 2696 * link it into the hash table appropriately, and apply the aggregator 2697 * to the (zero-filled) value. 2698 */ 2699 offs = buf->dtb_offset; 2700 while (offs & (align - 1)) 2701 offs += sizeof (uint32_t); 2702 2703 /* 2704 * If we don't have enough room to both allocate a new key _and_ 2705 * its associated data, increment the drop count and return. 2706 */ 2707 if ((uintptr_t)tomax + offs + fsize > 2708 agb->dtagb_free - sizeof (dtrace_aggkey_t)) { 2709 dtrace_buffer_drop(buf); 2710 return; 2711 } 2712 2713 /*CONSTCOND*/ 2714 ASSERT(!(sizeof (dtrace_aggkey_t) & (sizeof (uintptr_t) - 1))); 2715 key = (dtrace_aggkey_t *)(agb->dtagb_free - sizeof (dtrace_aggkey_t)); 2716 agb->dtagb_free -= sizeof (dtrace_aggkey_t); 2717 2718 key->dtak_data = kdata = tomax + offs; 2719 buf->dtb_offset = offs + fsize; 2720 2721 /* 2722 * Now copy the data across. 2723 */ 2724 *((dtrace_aggid_t *)kdata) = agg->dtag_id; 2725 2726 for (i = sizeof (dtrace_aggid_t); i < size; i++) 2727 kdata[i] = data[i]; 2728 2729 /* 2730 * Because strings are not zeroed out by default, we need to iterate 2731 * looking for actions that store strings, and we need to explicitly 2732 * pad these strings out with zeroes. 2733 */ 2734 for (act = agg->dtag_first; act->dta_intuple; act = act->dta_next) { 2735 int nul; 2736 2737 if (!DTRACEACT_ISSTRING(act)) 2738 continue; 2739 2740 i = act->dta_rec.dtrd_offset - agg->dtag_base; 2741 limit = i + act->dta_rec.dtrd_size; 2742 ASSERT(limit <= size); 2743 2744 for (nul = 0; i < limit; i++) { 2745 if (nul) { 2746 kdata[i] = '\0'; 2747 continue; 2748 } 2749 2750 if (data[i] != '\0') 2751 continue; 2752 2753 nul = 1; 2754 } 2755 } 2756 2757 for (i = size; i < fsize; i++) 2758 kdata[i] = 0; 2759 2760 key->dtak_hashval = hashval; 2761 key->dtak_size = size; 2762 key->dtak_action = action; 2763 key->dtak_next = agb->dtagb_hash[ndx]; 2764 agb->dtagb_hash[ndx] = key; 2765 2766 /* 2767 * Finally, apply the aggregator. 2768 */ 2769 *((uint64_t *)(key->dtak_data + size)) = agg->dtag_initial; 2770 agg->dtag_aggregate((uint64_t *)(key->dtak_data + size), expr, arg); 2771 } 2772 2773 /* 2774 * Given consumer state, this routine finds a speculation in the INACTIVE 2775 * state and transitions it into the ACTIVE state. If there is no speculation 2776 * in the INACTIVE state, 0 is returned. In this case, no error counter is 2777 * incremented -- it is up to the caller to take appropriate action. 2778 */ 2779 static int 2780 dtrace_speculation(dtrace_state_t *state) 2781 { 2782 int i = 0; 2783 dtrace_speculation_state_t current; 2784 uint32_t *stat = &state->dts_speculations_unavail, count; 2785 2786 while (i < state->dts_nspeculations) { 2787 dtrace_speculation_t *spec = &state->dts_speculations[i]; 2788 2789 current = spec->dtsp_state; 2790 2791 if (current != DTRACESPEC_INACTIVE) { 2792 if (current == DTRACESPEC_COMMITTINGMANY || 2793 current == DTRACESPEC_COMMITTING || 2794 current == DTRACESPEC_DISCARDING) 2795 stat = &state->dts_speculations_busy; 2796 i++; 2797 continue; 2798 } 2799 2800 if (dtrace_cas32((uint32_t *)&spec->dtsp_state, 2801 current, DTRACESPEC_ACTIVE) == current) 2802 return (i + 1); 2803 } 2804 2805 /* 2806 * We couldn't find a speculation. If we found as much as a single 2807 * busy speculation buffer, we'll attribute this failure as "busy" 2808 * instead of "unavail". 2809 */ 2810 do { 2811 count = *stat; 2812 } while (dtrace_cas32(stat, count, count + 1) != count); 2813 2814 return (0); 2815 } 2816 2817 /* 2818 * This routine commits an active speculation. If the specified speculation 2819 * is not in a valid state to perform a commit(), this routine will silently do 2820 * nothing. The state of the specified speculation is transitioned according 2821 * to the state transition diagram outlined in <sys/dtrace_impl.h> 2822 */ 2823 static void 2824 dtrace_speculation_commit(dtrace_state_t *state, processorid_t cpu, 2825 dtrace_specid_t which) 2826 { 2827 dtrace_speculation_t *spec; 2828 dtrace_buffer_t *src, *dest; 2829 uintptr_t daddr, saddr, dlimit, slimit; 2830 dtrace_speculation_state_t current, new = 0; 2831 intptr_t offs; 2832 uint64_t timestamp; 2833 2834 if (which == 0) 2835 return; 2836 2837 if (which > state->dts_nspeculations) { 2838 cpu_core[cpu].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP; 2839 return; 2840 } 2841 2842 spec = &state->dts_speculations[which - 1]; 2843 src = &spec->dtsp_buffer[cpu]; 2844 dest = &state->dts_buffer[cpu]; 2845 2846 do { 2847 current = spec->dtsp_state; 2848 2849 if (current == DTRACESPEC_COMMITTINGMANY) 2850 break; 2851 2852 switch (current) { 2853 case DTRACESPEC_INACTIVE: 2854 case DTRACESPEC_DISCARDING: 2855 return; 2856 2857 case DTRACESPEC_COMMITTING: 2858 /* 2859 * This is only possible if we are (a) commit()'ing 2860 * without having done a prior speculate() on this CPU 2861 * and (b) racing with another commit() on a different 2862 * CPU. There's nothing to do -- we just assert that 2863 * our offset is 0. 2864 */ 2865 ASSERT(src->dtb_offset == 0); 2866 return; 2867 2868 case DTRACESPEC_ACTIVE: 2869 new = DTRACESPEC_COMMITTING; 2870 break; 2871 2872 case DTRACESPEC_ACTIVEONE: 2873 /* 2874 * This speculation is active on one CPU. If our 2875 * buffer offset is non-zero, we know that the one CPU 2876 * must be us. Otherwise, we are committing on a 2877 * different CPU from the speculate(), and we must 2878 * rely on being asynchronously cleaned. 2879 */ 2880 if (src->dtb_offset != 0) { 2881 new = DTRACESPEC_COMMITTING; 2882 break; 2883 } 2884 /*FALLTHROUGH*/ 2885 2886 case DTRACESPEC_ACTIVEMANY: 2887 new = DTRACESPEC_COMMITTINGMANY; 2888 break; 2889 2890 default: 2891 ASSERT(0); 2892 } 2893 } while (dtrace_cas32((uint32_t *)&spec->dtsp_state, 2894 current, new) != current); 2895 2896 /* 2897 * We have set the state to indicate that we are committing this 2898 * speculation. Now reserve the necessary space in the destination 2899 * buffer. 2900 */ 2901 if ((offs = dtrace_buffer_reserve(dest, src->dtb_offset, 2902 sizeof (uint64_t), state, NULL)) < 0) { 2903 dtrace_buffer_drop(dest); 2904 goto out; 2905 } 2906 2907 /* 2908 * We have sufficient space to copy the speculative buffer into the 2909 * primary buffer. First, modify the speculative buffer, filling 2910 * in the timestamp of all entries with the current time. The data 2911 * must have the commit() time rather than the time it was traced, 2912 * so that all entries in the primary buffer are in timestamp order. 2913 */ 2914 timestamp = dtrace_gethrtime(); 2915 saddr = (uintptr_t)src->dtb_tomax; 2916 slimit = saddr + src->dtb_offset; 2917 while (saddr < slimit) { 2918 size_t size; 2919 dtrace_rechdr_t *dtrh = (dtrace_rechdr_t *)saddr; 2920 2921 if (dtrh->dtrh_epid == DTRACE_EPIDNONE) { 2922 saddr += sizeof (dtrace_epid_t); 2923 continue; 2924 } 2925 ASSERT3U(dtrh->dtrh_epid, <=, state->dts_necbs); 2926 size = state->dts_ecbs[dtrh->dtrh_epid - 1]->dte_size; 2927 2928 ASSERT3U(saddr + size, <=, slimit); 2929 ASSERT3U(size, >=, sizeof (dtrace_rechdr_t)); 2930 ASSERT3U(DTRACE_RECORD_LOAD_TIMESTAMP(dtrh), ==, UINT64_MAX); 2931 2932 DTRACE_RECORD_STORE_TIMESTAMP(dtrh, timestamp); 2933 2934 saddr += size; 2935 } 2936 2937 /* 2938 * Copy the buffer across. (Note that this is a 2939 * highly subobtimal bcopy(); in the unlikely event that this becomes 2940 * a serious performance issue, a high-performance DTrace-specific 2941 * bcopy() should obviously be invented.) 2942 */ 2943 daddr = (uintptr_t)dest->dtb_tomax + offs; 2944 dlimit = daddr + src->dtb_offset; 2945 saddr = (uintptr_t)src->dtb_tomax; 2946 2947 /* 2948 * First, the aligned portion. 2949 */ 2950 while (dlimit - daddr >= sizeof (uint64_t)) { 2951 *((uint64_t *)daddr) = *((uint64_t *)saddr); 2952 2953 daddr += sizeof (uint64_t); 2954 saddr += sizeof (uint64_t); 2955 } 2956 2957 /* 2958 * Now any left-over bit... 2959 */ 2960 while (dlimit - daddr) 2961 *((uint8_t *)daddr++) = *((uint8_t *)saddr++); 2962 2963 /* 2964 * Finally, commit the reserved space in the destination buffer. 2965 */ 2966 dest->dtb_offset = offs + src->dtb_offset; 2967 2968 out: 2969 /* 2970 * If we're lucky enough to be the only active CPU on this speculation 2971 * buffer, we can just set the state back to DTRACESPEC_INACTIVE. 2972 */ 2973 if (current == DTRACESPEC_ACTIVE || 2974 (current == DTRACESPEC_ACTIVEONE && new == DTRACESPEC_COMMITTING)) { 2975 uint32_t rval = dtrace_cas32((uint32_t *)&spec->dtsp_state, 2976 DTRACESPEC_COMMITTING, DTRACESPEC_INACTIVE); 2977 2978 ASSERT(rval == DTRACESPEC_COMMITTING); 2979 } 2980 2981 src->dtb_offset = 0; 2982 src->dtb_xamot_drops += src->dtb_drops; 2983 src->dtb_drops = 0; 2984 } 2985 2986 /* 2987 * This routine discards an active speculation. If the specified speculation 2988 * is not in a valid state to perform a discard(), this routine will silently 2989 * do nothing. The state of the specified speculation is transitioned 2990 * according to the state transition diagram outlined in <sys/dtrace_impl.h> 2991 */ 2992 static void 2993 dtrace_speculation_discard(dtrace_state_t *state, processorid_t cpu, 2994 dtrace_specid_t which) 2995 { 2996 dtrace_speculation_t *spec; 2997 dtrace_speculation_state_t current, new = 0; 2998 dtrace_buffer_t *buf; 2999 3000 if (which == 0) 3001 return; 3002 3003 if (which > state->dts_nspeculations) { 3004 cpu_core[cpu].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP; 3005 return; 3006 } 3007 3008 spec = &state->dts_speculations[which - 1]; 3009 buf = &spec->dtsp_buffer[cpu]; 3010 3011 do { 3012 current = spec->dtsp_state; 3013 3014 switch (current) { 3015 case DTRACESPEC_INACTIVE: 3016 case DTRACESPEC_COMMITTINGMANY: 3017 case DTRACESPEC_COMMITTING: 3018 case DTRACESPEC_DISCARDING: 3019 return; 3020 3021 case DTRACESPEC_ACTIVE: 3022 case DTRACESPEC_ACTIVEMANY: 3023 new = DTRACESPEC_DISCARDING; 3024 break; 3025 3026 case DTRACESPEC_ACTIVEONE: 3027 if (buf->dtb_offset != 0) { 3028 new = DTRACESPEC_INACTIVE; 3029 } else { 3030 new = DTRACESPEC_DISCARDING; 3031 } 3032 break; 3033 3034 default: 3035 ASSERT(0); 3036 } 3037 } while (dtrace_cas32((uint32_t *)&spec->dtsp_state, 3038 current, new) != current); 3039 3040 buf->dtb_offset = 0; 3041 buf->dtb_drops = 0; 3042 } 3043 3044 /* 3045 * Note: not called from probe context. This function is called 3046 * asynchronously from cross call context to clean any speculations that are 3047 * in the COMMITTINGMANY or DISCARDING states. These speculations may not be 3048 * transitioned back to the INACTIVE state until all CPUs have cleaned the 3049 * speculation. 3050 */ 3051 static void 3052 dtrace_speculation_clean_here(dtrace_state_t *state) 3053 { 3054 dtrace_icookie_t cookie; 3055 processorid_t cpu = curcpu; 3056 dtrace_buffer_t *dest = &state->dts_buffer[cpu]; 3057 dtrace_specid_t i; 3058 3059 cookie = dtrace_interrupt_disable(); 3060 3061 if (dest->dtb_tomax == NULL) { 3062 dtrace_interrupt_enable(cookie); 3063 return; 3064 } 3065 3066 for (i = 0; i < state->dts_nspeculations; i++) { 3067 dtrace_speculation_t *spec = &state->dts_speculations[i]; 3068 dtrace_buffer_t *src = &spec->dtsp_buffer[cpu]; 3069 3070 if (src->dtb_tomax == NULL) 3071 continue; 3072 3073 if (spec->dtsp_state == DTRACESPEC_DISCARDING) { 3074 src->dtb_offset = 0; 3075 continue; 3076 } 3077 3078 if (spec->dtsp_state != DTRACESPEC_COMMITTINGMANY) 3079 continue; 3080 3081 if (src->dtb_offset == 0) 3082 continue; 3083 3084 dtrace_speculation_commit(state, cpu, i + 1); 3085 } 3086 3087 dtrace_interrupt_enable(cookie); 3088 } 3089 3090 /* 3091 * Note: not called from probe context. This function is called 3092 * asynchronously (and at a regular interval) to clean any speculations that 3093 * are in the COMMITTINGMANY or DISCARDING states. If it discovers that there 3094 * is work to be done, it cross calls all CPUs to perform that work; 3095 * COMMITMANY and DISCARDING speculations may not be transitioned back to the 3096 * INACTIVE state until they have been cleaned by all CPUs. 3097 */ 3098 static void 3099 dtrace_speculation_clean(dtrace_state_t *state) 3100 { 3101 int work = 0, rv; 3102 dtrace_specid_t i; 3103 3104 for (i = 0; i < state->dts_nspeculations; i++) { 3105 dtrace_speculation_t *spec = &state->dts_speculations[i]; 3106 3107 ASSERT(!spec->dtsp_cleaning); 3108 3109 if (spec->dtsp_state != DTRACESPEC_DISCARDING && 3110 spec->dtsp_state != DTRACESPEC_COMMITTINGMANY) 3111 continue; 3112 3113 work++; 3114 spec->dtsp_cleaning = 1; 3115 } 3116 3117 if (!work) 3118 return; 3119 3120 dtrace_xcall(DTRACE_CPUALL, 3121 (dtrace_xcall_t)dtrace_speculation_clean_here, state); 3122 3123 /* 3124 * We now know that all CPUs have committed or discarded their 3125 * speculation buffers, as appropriate. We can now set the state 3126 * to inactive. 3127 */ 3128 for (i = 0; i < state->dts_nspeculations; i++) { 3129 dtrace_speculation_t *spec = &state->dts_speculations[i]; 3130 dtrace_speculation_state_t current, new; 3131 3132 if (!spec->dtsp_cleaning) 3133 continue; 3134 3135 current = spec->dtsp_state; 3136 ASSERT(current == DTRACESPEC_DISCARDING || 3137 current == DTRACESPEC_COMMITTINGMANY); 3138 3139 new = DTRACESPEC_INACTIVE; 3140 3141 rv = dtrace_cas32((uint32_t *)&spec->dtsp_state, current, new); 3142 ASSERT(rv == current); 3143 spec->dtsp_cleaning = 0; 3144 } 3145 } 3146 3147 /* 3148 * Called as part of a speculate() to get the speculative buffer associated 3149 * with a given speculation. Returns NULL if the specified speculation is not 3150 * in an ACTIVE state. If the speculation is in the ACTIVEONE state -- and 3151 * the active CPU is not the specified CPU -- the speculation will be 3152 * atomically transitioned into the ACTIVEMANY state. 3153 */ 3154 static dtrace_buffer_t * 3155 dtrace_speculation_buffer(dtrace_state_t *state, processorid_t cpuid, 3156 dtrace_specid_t which) 3157 { 3158 dtrace_speculation_t *spec; 3159 dtrace_speculation_state_t current, new = 0; 3160 dtrace_buffer_t *buf; 3161 3162 if (which == 0) 3163 return (NULL); 3164 3165 if (which > state->dts_nspeculations) { 3166 cpu_core[cpuid].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP; 3167 return (NULL); 3168 } 3169 3170 spec = &state->dts_speculations[which - 1]; 3171 buf = &spec->dtsp_buffer[cpuid]; 3172 3173 do { 3174 current = spec->dtsp_state; 3175 3176 switch (current) { 3177 case DTRACESPEC_INACTIVE: 3178 case DTRACESPEC_COMMITTINGMANY: 3179 case DTRACESPEC_DISCARDING: 3180 return (NULL); 3181 3182 case DTRACESPEC_COMMITTING: 3183 ASSERT(buf->dtb_offset == 0); 3184 return (NULL); 3185 3186 case DTRACESPEC_ACTIVEONE: 3187 /* 3188 * This speculation is currently active on one CPU. 3189 * Check the offset in the buffer; if it's non-zero, 3190 * that CPU must be us (and we leave the state alone). 3191 * If it's zero, assume that we're starting on a new 3192 * CPU -- and change the state to indicate that the 3193 * speculation is active on more than one CPU. 3194 */ 3195 if (buf->dtb_offset != 0) 3196 return (buf); 3197 3198 new = DTRACESPEC_ACTIVEMANY; 3199 break; 3200 3201 case DTRACESPEC_ACTIVEMANY: 3202 return (buf); 3203 3204 case DTRACESPEC_ACTIVE: 3205 new = DTRACESPEC_ACTIVEONE; 3206 break; 3207 3208 default: 3209 ASSERT(0); 3210 } 3211 } while (dtrace_cas32((uint32_t *)&spec->dtsp_state, 3212 current, new) != current); 3213 3214 ASSERT(new == DTRACESPEC_ACTIVEONE || new == DTRACESPEC_ACTIVEMANY); 3215 return (buf); 3216 } 3217 3218 /* 3219 * Return a string. In the event that the user lacks the privilege to access 3220 * arbitrary kernel memory, we copy the string out to scratch memory so that we 3221 * don't fail access checking. 3222 * 3223 * dtrace_dif_variable() uses this routine as a helper for various 3224 * builtin values such as 'execname' and 'probefunc.' 3225 */ 3226 uintptr_t 3227 dtrace_dif_varstr(uintptr_t addr, dtrace_state_t *state, 3228 dtrace_mstate_t *mstate) 3229 { 3230 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; 3231 uintptr_t ret; 3232 size_t strsz; 3233 3234 /* 3235 * The easy case: this probe is allowed to read all of memory, so 3236 * we can just return this as a vanilla pointer. 3237 */ 3238 if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) 3239 return (addr); 3240 3241 /* 3242 * This is the tougher case: we copy the string in question from 3243 * kernel memory into scratch memory and return it that way: this 3244 * ensures that we won't trip up when access checking tests the 3245 * BYREF return value. 3246 */ 3247 strsz = dtrace_strlen((char *)addr, size) + 1; 3248 3249 if (mstate->dtms_scratch_ptr + strsz > 3250 mstate->dtms_scratch_base + mstate->dtms_scratch_size) { 3251 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 3252 return (0); 3253 } 3254 3255 dtrace_strcpy((const void *)addr, (void *)mstate->dtms_scratch_ptr, 3256 strsz); 3257 ret = mstate->dtms_scratch_ptr; 3258 mstate->dtms_scratch_ptr += strsz; 3259 return (ret); 3260 } 3261 3262 /* 3263 * Return a string from a memoy address which is known to have one or 3264 * more concatenated, individually zero terminated, sub-strings. 3265 * In the event that the user lacks the privilege to access 3266 * arbitrary kernel memory, we copy the string out to scratch memory so that we 3267 * don't fail access checking. 3268 * 3269 * dtrace_dif_variable() uses this routine as a helper for various 3270 * builtin values such as 'execargs'. 3271 */ 3272 static uintptr_t 3273 dtrace_dif_varstrz(uintptr_t addr, size_t strsz, dtrace_state_t *state, 3274 dtrace_mstate_t *mstate) 3275 { 3276 char *p; 3277 size_t i; 3278 uintptr_t ret; 3279 3280 if (mstate->dtms_scratch_ptr + strsz > 3281 mstate->dtms_scratch_base + mstate->dtms_scratch_size) { 3282 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 3283 return (0); 3284 } 3285 3286 dtrace_bcopy((const void *)addr, (void *)mstate->dtms_scratch_ptr, 3287 strsz); 3288 3289 /* Replace sub-string termination characters with a space. */ 3290 for (p = (char *) mstate->dtms_scratch_ptr, i = 0; i < strsz - 1; 3291 p++, i++) 3292 if (*p == '\0') 3293 *p = ' '; 3294 3295 ret = mstate->dtms_scratch_ptr; 3296 mstate->dtms_scratch_ptr += strsz; 3297 return (ret); 3298 } 3299 3300 /* 3301 * This function implements the DIF emulator's variable lookups. The emulator 3302 * passes a reserved variable identifier and optional built-in array index. 3303 */ 3304 static uint64_t 3305 dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, 3306 uint64_t ndx) 3307 { 3308 /* 3309 * If we're accessing one of the uncached arguments, we'll turn this 3310 * into a reference in the args array. 3311 */ 3312 if (v >= DIF_VAR_ARG0 && v <= DIF_VAR_ARG9) { 3313 ndx = v - DIF_VAR_ARG0; 3314 v = DIF_VAR_ARGS; 3315 } 3316 3317 switch (v) { 3318 case DIF_VAR_ARGS: 3319 ASSERT(mstate->dtms_present & DTRACE_MSTATE_ARGS); 3320 if (ndx >= sizeof (mstate->dtms_arg) / 3321 sizeof (mstate->dtms_arg[0])) { 3322 int aframes = mstate->dtms_probe->dtpr_aframes + 2; 3323 dtrace_provider_t *pv; 3324 uint64_t val; 3325 3326 pv = mstate->dtms_probe->dtpr_provider; 3327 if (pv->dtpv_pops.dtps_getargval != NULL) 3328 val = pv->dtpv_pops.dtps_getargval(pv->dtpv_arg, 3329 mstate->dtms_probe->dtpr_id, 3330 mstate->dtms_probe->dtpr_arg, ndx, aframes); 3331 else 3332 val = dtrace_getarg(ndx, aframes); 3333 3334 /* 3335 * This is regrettably required to keep the compiler 3336 * from tail-optimizing the call to dtrace_getarg(). 3337 * The condition always evaluates to true, but the 3338 * compiler has no way of figuring that out a priori. 3339 * (None of this would be necessary if the compiler 3340 * could be relied upon to _always_ tail-optimize 3341 * the call to dtrace_getarg() -- but it can't.) 3342 */ 3343 if (mstate->dtms_probe != NULL) 3344 return (val); 3345 3346 ASSERT(0); 3347 } 3348 3349 return (mstate->dtms_arg[ndx]); 3350 3351 #ifdef illumos 3352 case DIF_VAR_UREGS: { 3353 klwp_t *lwp; 3354 3355 if (!dtrace_priv_proc(state)) 3356 return (0); 3357 3358 if ((lwp = curthread->t_lwp) == NULL) { 3359 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); 3360 cpu_core[curcpu].cpuc_dtrace_illval = NULL; 3361 return (0); 3362 } 3363 3364 return (dtrace_getreg(lwp->lwp_regs, ndx)); 3365 return (0); 3366 } 3367 #else 3368 case DIF_VAR_UREGS: { 3369 struct trapframe *tframe; 3370 3371 if (!dtrace_priv_proc(state)) 3372 return (0); 3373 3374 if ((tframe = curthread->td_frame) == NULL) { 3375 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); 3376 cpu_core[curcpu].cpuc_dtrace_illval = 0; 3377 return (0); 3378 } 3379 3380 return (dtrace_getreg(tframe, ndx)); 3381 } 3382 #endif 3383 3384 case DIF_VAR_CURTHREAD: 3385 if (!dtrace_priv_proc(state)) 3386 return (0); 3387 return ((uint64_t)(uintptr_t)curthread); 3388 3389 case DIF_VAR_TIMESTAMP: 3390 if (!(mstate->dtms_present & DTRACE_MSTATE_TIMESTAMP)) { 3391 mstate->dtms_timestamp = dtrace_gethrtime(); 3392 mstate->dtms_present |= DTRACE_MSTATE_TIMESTAMP; 3393 } 3394 return (mstate->dtms_timestamp); 3395 3396 case DIF_VAR_VTIMESTAMP: 3397 ASSERT(dtrace_vtime_references != 0); 3398 return (curthread->t_dtrace_vtime); 3399 3400 case DIF_VAR_WALLTIMESTAMP: 3401 if (!(mstate->dtms_present & DTRACE_MSTATE_WALLTIMESTAMP)) { 3402 mstate->dtms_walltimestamp = dtrace_gethrestime(); 3403 mstate->dtms_present |= DTRACE_MSTATE_WALLTIMESTAMP; 3404 } 3405 return (mstate->dtms_walltimestamp); 3406 3407 #ifdef illumos 3408 case DIF_VAR_IPL: 3409 if (!dtrace_priv_kernel(state)) 3410 return (0); 3411 if (!(mstate->dtms_present & DTRACE_MSTATE_IPL)) { 3412 mstate->dtms_ipl = dtrace_getipl(); 3413 mstate->dtms_present |= DTRACE_MSTATE_IPL; 3414 } 3415 return (mstate->dtms_ipl); 3416 #endif 3417 3418 case DIF_VAR_EPID: 3419 ASSERT(mstate->dtms_present & DTRACE_MSTATE_EPID); 3420 return (mstate->dtms_epid); 3421 3422 case DIF_VAR_ID: 3423 ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE); 3424 return (mstate->dtms_probe->dtpr_id); 3425 3426 case DIF_VAR_STACKDEPTH: 3427 if (!dtrace_priv_kernel(state)) 3428 return (0); 3429 if (!(mstate->dtms_present & DTRACE_MSTATE_STACKDEPTH)) { 3430 int aframes = mstate->dtms_probe->dtpr_aframes + 2; 3431 3432 mstate->dtms_stackdepth = dtrace_getstackdepth(aframes); 3433 mstate->dtms_present |= DTRACE_MSTATE_STACKDEPTH; 3434 } 3435 return (mstate->dtms_stackdepth); 3436 3437 case DIF_VAR_USTACKDEPTH: 3438 if (!dtrace_priv_proc(state)) 3439 return (0); 3440 if (!(mstate->dtms_present & DTRACE_MSTATE_USTACKDEPTH)) { 3441 /* 3442 * See comment in DIF_VAR_PID. 3443 */ 3444 if (DTRACE_ANCHORED(mstate->dtms_probe) && 3445 CPU_ON_INTR(CPU)) { 3446 mstate->dtms_ustackdepth = 0; 3447 } else { 3448 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 3449 mstate->dtms_ustackdepth = 3450 dtrace_getustackdepth(); 3451 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 3452 } 3453 mstate->dtms_present |= DTRACE_MSTATE_USTACKDEPTH; 3454 } 3455 return (mstate->dtms_ustackdepth); 3456 3457 case DIF_VAR_CALLER: 3458 if (!dtrace_priv_kernel(state)) 3459 return (0); 3460 if (!(mstate->dtms_present & DTRACE_MSTATE_CALLER)) { 3461 int aframes = mstate->dtms_probe->dtpr_aframes + 2; 3462 3463 if (!DTRACE_ANCHORED(mstate->dtms_probe)) { 3464 /* 3465 * If this is an unanchored probe, we are 3466 * required to go through the slow path: 3467 * dtrace_caller() only guarantees correct 3468 * results for anchored probes. 3469 */ 3470 pc_t caller[2] = {0, 0}; 3471 3472 dtrace_getpcstack(caller, 2, aframes, 3473 (uint32_t *)(uintptr_t)mstate->dtms_arg[0]); 3474 mstate->dtms_caller = caller[1]; 3475 } else if ((mstate->dtms_caller = 3476 dtrace_caller(aframes)) == -1) { 3477 /* 3478 * We have failed to do this the quick way; 3479 * we must resort to the slower approach of 3480 * calling dtrace_getpcstack(). 3481 */ 3482 pc_t caller = 0; 3483 3484 dtrace_getpcstack(&caller, 1, aframes, NULL); 3485 mstate->dtms_caller = caller; 3486 } 3487 3488 mstate->dtms_present |= DTRACE_MSTATE_CALLER; 3489 } 3490 return (mstate->dtms_caller); 3491 3492 case DIF_VAR_UCALLER: 3493 if (!dtrace_priv_proc(state)) 3494 return (0); 3495 3496 if (!(mstate->dtms_present & DTRACE_MSTATE_UCALLER)) { 3497 uint64_t ustack[3]; 3498 3499 /* 3500 * dtrace_getupcstack() fills in the first uint64_t 3501 * with the current PID. The second uint64_t will 3502 * be the program counter at user-level. The third 3503 * uint64_t will contain the caller, which is what 3504 * we're after. 3505 */ 3506 ustack[2] = 0; 3507 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 3508 dtrace_getupcstack(ustack, 3); 3509 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 3510 mstate->dtms_ucaller = ustack[2]; 3511 mstate->dtms_present |= DTRACE_MSTATE_UCALLER; 3512 } 3513 3514 return (mstate->dtms_ucaller); 3515 3516 case DIF_VAR_PROBEPROV: 3517 ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE); 3518 return (dtrace_dif_varstr( 3519 (uintptr_t)mstate->dtms_probe->dtpr_provider->dtpv_name, 3520 state, mstate)); 3521 3522 case DIF_VAR_PROBEMOD: 3523 ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE); 3524 return (dtrace_dif_varstr( 3525 (uintptr_t)mstate->dtms_probe->dtpr_mod, 3526 state, mstate)); 3527 3528 case DIF_VAR_PROBEFUNC: 3529 ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE); 3530 return (dtrace_dif_varstr( 3531 (uintptr_t)mstate->dtms_probe->dtpr_func, 3532 state, mstate)); 3533 3534 case DIF_VAR_PROBENAME: 3535 ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE); 3536 return (dtrace_dif_varstr( 3537 (uintptr_t)mstate->dtms_probe->dtpr_name, 3538 state, mstate)); 3539 3540 case DIF_VAR_PID: 3541 if (!dtrace_priv_proc(state)) 3542 return (0); 3543 3544 #ifdef illumos 3545 /* 3546 * Note that we are assuming that an unanchored probe is 3547 * always due to a high-level interrupt. (And we're assuming 3548 * that there is only a single high level interrupt.) 3549 */ 3550 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) 3551 return (pid0.pid_id); 3552 3553 /* 3554 * It is always safe to dereference one's own t_procp pointer: 3555 * it always points to a valid, allocated proc structure. 3556 * Further, it is always safe to dereference the p_pidp member 3557 * of one's own proc structure. (These are truisms becuase 3558 * threads and processes don't clean up their own state -- 3559 * they leave that task to whomever reaps them.) 3560 */ 3561 return ((uint64_t)curthread->t_procp->p_pidp->pid_id); 3562 #else 3563 return ((uint64_t)curproc->p_pid); 3564 #endif 3565 3566 case DIF_VAR_PPID: 3567 if (!dtrace_priv_proc(state)) 3568 return (0); 3569 3570 #ifdef illumos 3571 /* 3572 * See comment in DIF_VAR_PID. 3573 */ 3574 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) 3575 return (pid0.pid_id); 3576 3577 /* 3578 * It is always safe to dereference one's own t_procp pointer: 3579 * it always points to a valid, allocated proc structure. 3580 * (This is true because threads don't clean up their own 3581 * state -- they leave that task to whomever reaps them.) 3582 */ 3583 return ((uint64_t)curthread->t_procp->p_ppid); 3584 #else 3585 if (curproc->p_pid == proc0.p_pid) 3586 return (curproc->p_pid); 3587 else 3588 return (curproc->p_pptr->p_pid); 3589 #endif 3590 3591 case DIF_VAR_TID: 3592 #ifdef illumos 3593 /* 3594 * See comment in DIF_VAR_PID. 3595 */ 3596 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) 3597 return (0); 3598 #endif 3599 3600 return ((uint64_t)curthread->t_tid); 3601 3602 case DIF_VAR_EXECARGS: { 3603 struct pargs *p_args = curthread->td_proc->p_args; 3604 3605 if (p_args == NULL) 3606 return(0); 3607 3608 return (dtrace_dif_varstrz( 3609 (uintptr_t) p_args->ar_args, p_args->ar_length, state, mstate)); 3610 } 3611 3612 case DIF_VAR_EXECNAME: 3613 #ifdef illumos 3614 if (!dtrace_priv_proc(state)) 3615 return (0); 3616 3617 /* 3618 * See comment in DIF_VAR_PID. 3619 */ 3620 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) 3621 return ((uint64_t)(uintptr_t)p0.p_user.u_comm); 3622 3623 /* 3624 * It is always safe to dereference one's own t_procp pointer: 3625 * it always points to a valid, allocated proc structure. 3626 * (This is true because threads don't clean up their own 3627 * state -- they leave that task to whomever reaps them.) 3628 */ 3629 return (dtrace_dif_varstr( 3630 (uintptr_t)curthread->t_procp->p_user.u_comm, 3631 state, mstate)); 3632 #else 3633 return (dtrace_dif_varstr( 3634 (uintptr_t) curthread->td_proc->p_comm, state, mstate)); 3635 #endif 3636 3637 case DIF_VAR_ZONENAME: 3638 #ifdef illumos 3639 if (!dtrace_priv_proc(state)) 3640 return (0); 3641 3642 /* 3643 * See comment in DIF_VAR_PID. 3644 */ 3645 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) 3646 return ((uint64_t)(uintptr_t)p0.p_zone->zone_name); 3647 3648 /* 3649 * It is always safe to dereference one's own t_procp pointer: 3650 * it always points to a valid, allocated proc structure. 3651 * (This is true because threads don't clean up their own 3652 * state -- they leave that task to whomever reaps them.) 3653 */ 3654 return (dtrace_dif_varstr( 3655 (uintptr_t)curthread->t_procp->p_zone->zone_name, 3656 state, mstate)); 3657 #else 3658 return (0); 3659 #endif 3660 3661 case DIF_VAR_UID: 3662 if (!dtrace_priv_proc(state)) 3663 return (0); 3664 3665 #ifdef illumos 3666 /* 3667 * See comment in DIF_VAR_PID. 3668 */ 3669 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) 3670 return ((uint64_t)p0.p_cred->cr_uid); 3671 3672 /* 3673 * It is always safe to dereference one's own t_procp pointer: 3674 * it always points to a valid, allocated proc structure. 3675 * (This is true because threads don't clean up their own 3676 * state -- they leave that task to whomever reaps them.) 3677 * 3678 * Additionally, it is safe to dereference one's own process 3679 * credential, since this is never NULL after process birth. 3680 */ 3681 return ((uint64_t)curthread->t_procp->p_cred->cr_uid); 3682 #else 3683 return ((uint64_t)curthread->td_ucred->cr_uid); 3684 #endif 3685 3686 case DIF_VAR_GID: 3687 if (!dtrace_priv_proc(state)) 3688 return (0); 3689 3690 #ifdef illumos 3691 /* 3692 * See comment in DIF_VAR_PID. 3693 */ 3694 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) 3695 return ((uint64_t)p0.p_cred->cr_gid); 3696 3697 /* 3698 * It is always safe to dereference one's own t_procp pointer: 3699 * it always points to a valid, allocated proc structure. 3700 * (This is true because threads don't clean up their own 3701 * state -- they leave that task to whomever reaps them.) 3702 * 3703 * Additionally, it is safe to dereference one's own process 3704 * credential, since this is never NULL after process birth. 3705 */ 3706 return ((uint64_t)curthread->t_procp->p_cred->cr_gid); 3707 #else 3708 return ((uint64_t)curthread->td_ucred->cr_gid); 3709 #endif 3710 3711 case DIF_VAR_ERRNO: { 3712 #ifdef illumos 3713 klwp_t *lwp; 3714 if (!dtrace_priv_proc(state)) 3715 return (0); 3716 3717 /* 3718 * See comment in DIF_VAR_PID. 3719 */ 3720 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) 3721 return (0); 3722 3723 /* 3724 * It is always safe to dereference one's own t_lwp pointer in 3725 * the event that this pointer is non-NULL. (This is true 3726 * because threads and lwps don't clean up their own state -- 3727 * they leave that task to whomever reaps them.) 3728 */ 3729 if ((lwp = curthread->t_lwp) == NULL) 3730 return (0); 3731 3732 return ((uint64_t)lwp->lwp_errno); 3733 #else 3734 return (curthread->td_errno); 3735 #endif 3736 } 3737 #ifndef illumos 3738 case DIF_VAR_CPU: { 3739 return curcpu; 3740 } 3741 #endif 3742 default: 3743 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); 3744 return (0); 3745 } 3746 } 3747 3748 3749 typedef enum dtrace_json_state { 3750 DTRACE_JSON_REST = 1, 3751 DTRACE_JSON_OBJECT, 3752 DTRACE_JSON_STRING, 3753 DTRACE_JSON_STRING_ESCAPE, 3754 DTRACE_JSON_STRING_ESCAPE_UNICODE, 3755 DTRACE_JSON_COLON, 3756 DTRACE_JSON_COMMA, 3757 DTRACE_JSON_VALUE, 3758 DTRACE_JSON_IDENTIFIER, 3759 DTRACE_JSON_NUMBER, 3760 DTRACE_JSON_NUMBER_FRAC, 3761 DTRACE_JSON_NUMBER_EXP, 3762 DTRACE_JSON_COLLECT_OBJECT 3763 } dtrace_json_state_t; 3764 3765 /* 3766 * This function possesses just enough knowledge about JSON to extract a single 3767 * value from a JSON string and store it in the scratch buffer. It is able 3768 * to extract nested object values, and members of arrays by index. 3769 * 3770 * elemlist is a list of JSON keys, stored as packed NUL-terminated strings, to 3771 * be looked up as we descend into the object tree. e.g. 3772 * 3773 * foo[0].bar.baz[32] --> "foo" NUL "0" NUL "bar" NUL "baz" NUL "32" NUL 3774 * with nelems = 5. 3775 * 3776 * The run time of this function must be bounded above by strsize to limit the 3777 * amount of work done in probe context. As such, it is implemented as a 3778 * simple state machine, reading one character at a time using safe loads 3779 * until we find the requested element, hit a parsing error or run off the 3780 * end of the object or string. 3781 * 3782 * As there is no way for a subroutine to return an error without interrupting 3783 * clause execution, we simply return NULL in the event of a missing key or any 3784 * other error condition. Each NULL return in this function is commented with 3785 * the error condition it represents -- parsing or otherwise. 3786 * 3787 * The set of states for the state machine closely matches the JSON 3788 * specification (http://json.org/). Briefly: 3789 * 3790 * DTRACE_JSON_REST: 3791 * Skip whitespace until we find either a top-level Object, moving 3792 * to DTRACE_JSON_OBJECT; or an Array, moving to DTRACE_JSON_VALUE. 3793 * 3794 * DTRACE_JSON_OBJECT: 3795 * Locate the next key String in an Object. Sets a flag to denote 3796 * the next String as a key string and moves to DTRACE_JSON_STRING. 3797 * 3798 * DTRACE_JSON_COLON: 3799 * Skip whitespace until we find the colon that separates key Strings 3800 * from their values. Once found, move to DTRACE_JSON_VALUE. 3801 * 3802 * DTRACE_JSON_VALUE: 3803 * Detects the type of the next value (String, Number, Identifier, Object 3804 * or Array) and routes to the states that process that type. Here we also 3805 * deal with the element selector list if we are requested to traverse down 3806 * into the object tree. 3807 * 3808 * DTRACE_JSON_COMMA: 3809 * Skip whitespace until we find the comma that separates key-value pairs 3810 * in Objects (returning to DTRACE_JSON_OBJECT) or values in Arrays 3811 * (similarly DTRACE_JSON_VALUE). All following literal value processing 3812 * states return to this state at the end of their value, unless otherwise 3813 * noted. 3814 * 3815 * DTRACE_JSON_NUMBER, DTRACE_JSON_NUMBER_FRAC, DTRACE_JSON_NUMBER_EXP: 3816 * Processes a Number literal from the JSON, including any exponent 3817 * component that may be present. Numbers are returned as strings, which 3818 * may be passed to strtoll() if an integer is required. 3819 * 3820 * DTRACE_JSON_IDENTIFIER: 3821 * Processes a "true", "false" or "null" literal in the JSON. 3822 * 3823 * DTRACE_JSON_STRING, DTRACE_JSON_STRING_ESCAPE, 3824 * DTRACE_JSON_STRING_ESCAPE_UNICODE: 3825 * Processes a String literal from the JSON, whether the String denotes 3826 * a key, a value or part of a larger Object. Handles all escape sequences 3827 * present in the specification, including four-digit unicode characters, 3828 * but merely includes the escape sequence without converting it to the 3829 * actual escaped character. If the String is flagged as a key, we 3830 * move to DTRACE_JSON_COLON rather than DTRACE_JSON_COMMA. 3831 * 3832 * DTRACE_JSON_COLLECT_OBJECT: 3833 * This state collects an entire Object (or Array), correctly handling 3834 * embedded strings. If the full element selector list matches this nested 3835 * object, we return the Object in full as a string. If not, we use this 3836 * state to skip to the next value at this level and continue processing. 3837 * 3838 * NOTE: This function uses various macros from strtolctype.h to manipulate 3839 * digit values, etc -- these have all been checked to ensure they make 3840 * no additional function calls. 3841 */ 3842 static char * 3843 dtrace_json(uint64_t size, uintptr_t json, char *elemlist, int nelems, 3844 char *dest) 3845 { 3846 dtrace_json_state_t state = DTRACE_JSON_REST; 3847 int64_t array_elem = INT64_MIN; 3848 int64_t array_pos = 0; 3849 uint8_t escape_unicount = 0; 3850 boolean_t string_is_key = B_FALSE; 3851 boolean_t collect_object = B_FALSE; 3852 boolean_t found_key = B_FALSE; 3853 boolean_t in_array = B_FALSE; 3854 uint32_t braces = 0, brackets = 0; 3855 char *elem = elemlist; 3856 char *dd = dest; 3857 uintptr_t cur; 3858 3859 for (cur = json; cur < json + size; cur++) { 3860 char cc = dtrace_load8(cur); 3861 if (cc == '\0') 3862 return (NULL); 3863 3864 switch (state) { 3865 case DTRACE_JSON_REST: 3866 if (isspace(cc)) 3867 break; 3868 3869 if (cc == '{') { 3870 state = DTRACE_JSON_OBJECT; 3871 break; 3872 } 3873 3874 if (cc == '[') { 3875 in_array = B_TRUE; 3876 array_pos = 0; 3877 array_elem = dtrace_strtoll(elem, 10, size); 3878 found_key = array_elem == 0 ? B_TRUE : B_FALSE; 3879 state = DTRACE_JSON_VALUE; 3880 break; 3881 } 3882 3883 /* 3884 * ERROR: expected to find a top-level object or array. 3885 */ 3886 return (NULL); 3887 case DTRACE_JSON_OBJECT: 3888 if (isspace(cc)) 3889 break; 3890 3891 if (cc == '"') { 3892 state = DTRACE_JSON_STRING; 3893 string_is_key = B_TRUE; 3894 break; 3895 } 3896 3897 /* 3898 * ERROR: either the object did not start with a key 3899 * string, or we've run off the end of the object 3900 * without finding the requested key. 3901 */ 3902 return (NULL); 3903 case DTRACE_JSON_STRING: 3904 if (cc == '\\') { 3905 *dd++ = '\\'; 3906 state = DTRACE_JSON_STRING_ESCAPE; 3907 break; 3908 } 3909 3910 if (cc == '"') { 3911 if (collect_object) { 3912 /* 3913 * We don't reset the dest here, as 3914 * the string is part of a larger 3915 * object being collected. 3916 */ 3917 *dd++ = cc; 3918 collect_object = B_FALSE; 3919 state = DTRACE_JSON_COLLECT_OBJECT; 3920 break; 3921 } 3922 *dd = '\0'; 3923 dd = dest; /* reset string buffer */ 3924 if (string_is_key) { 3925 if (dtrace_strncmp(dest, elem, 3926 size) == 0) 3927 found_key = B_TRUE; 3928 } else if (found_key) { 3929 if (nelems > 1) { 3930 /* 3931 * We expected an object, not 3932 * this string. 3933 */ 3934 return (NULL); 3935 } 3936 return (dest); 3937 } 3938 state = string_is_key ? DTRACE_JSON_COLON : 3939 DTRACE_JSON_COMMA; 3940 string_is_key = B_FALSE; 3941 break; 3942 } 3943 3944 *dd++ = cc; 3945 break; 3946 case DTRACE_JSON_STRING_ESCAPE: 3947 *dd++ = cc; 3948 if (cc == 'u') { 3949 escape_unicount = 0; 3950 state = DTRACE_JSON_STRING_ESCAPE_UNICODE; 3951 } else { 3952 state = DTRACE_JSON_STRING; 3953 } 3954 break; 3955 case DTRACE_JSON_STRING_ESCAPE_UNICODE: 3956 if (!isxdigit(cc)) { 3957 /* 3958 * ERROR: invalid unicode escape, expected 3959 * four valid hexidecimal digits. 3960 */ 3961 return (NULL); 3962 } 3963 3964 *dd++ = cc; 3965 if (++escape_unicount == 4) 3966 state = DTRACE_JSON_STRING; 3967 break; 3968 case DTRACE_JSON_COLON: 3969 if (isspace(cc)) 3970 break; 3971 3972 if (cc == ':') { 3973 state = DTRACE_JSON_VALUE; 3974 break; 3975 } 3976 3977 /* 3978 * ERROR: expected a colon. 3979 */ 3980 return (NULL); 3981 case DTRACE_JSON_COMMA: 3982 if (isspace(cc)) 3983 break; 3984 3985 if (cc == ',') { 3986 if (in_array) { 3987 state = DTRACE_JSON_VALUE; 3988 if (++array_pos == array_elem) 3989 found_key = B_TRUE; 3990 } else { 3991 state = DTRACE_JSON_OBJECT; 3992 } 3993 break; 3994 } 3995 3996 /* 3997 * ERROR: either we hit an unexpected character, or 3998 * we reached the end of the object or array without 3999 * finding the requested key. 4000 */ 4001 return (NULL); 4002 case DTRACE_JSON_IDENTIFIER: 4003 if (islower(cc)) { 4004 *dd++ = cc; 4005 break; 4006 } 4007 4008 *dd = '\0'; 4009 dd = dest; /* reset string buffer */ 4010 4011 if (dtrace_strncmp(dest, "true", 5) == 0 || 4012 dtrace_strncmp(dest, "false", 6) == 0 || 4013 dtrace_strncmp(dest, "null", 5) == 0) { 4014 if (found_key) { 4015 if (nelems > 1) { 4016 /* 4017 * ERROR: We expected an object, 4018 * not this identifier. 4019 */ 4020 return (NULL); 4021 } 4022 return (dest); 4023 } else { 4024 cur--; 4025 state = DTRACE_JSON_COMMA; 4026 break; 4027 } 4028 } 4029 4030 /* 4031 * ERROR: we did not recognise the identifier as one 4032 * of those in the JSON specification. 4033 */ 4034 return (NULL); 4035 case DTRACE_JSON_NUMBER: 4036 if (cc == '.') { 4037 *dd++ = cc; 4038 state = DTRACE_JSON_NUMBER_FRAC; 4039 break; 4040 } 4041 4042 if (cc == 'x' || cc == 'X') { 4043 /* 4044 * ERROR: specification explicitly excludes 4045 * hexidecimal or octal numbers. 4046 */ 4047 return (NULL); 4048 } 4049 4050 /* FALLTHRU */ 4051 case DTRACE_JSON_NUMBER_FRAC: 4052 if (cc == 'e' || cc == 'E') { 4053 *dd++ = cc; 4054 state = DTRACE_JSON_NUMBER_EXP; 4055 break; 4056 } 4057 4058 if (cc == '+' || cc == '-') { 4059 /* 4060 * ERROR: expect sign as part of exponent only. 4061 */ 4062 return (NULL); 4063 } 4064 /* FALLTHRU */ 4065 case DTRACE_JSON_NUMBER_EXP: 4066 if (isdigit(cc) || cc == '+' || cc == '-') { 4067 *dd++ = cc; 4068 break; 4069 } 4070 4071 *dd = '\0'; 4072 dd = dest; /* reset string buffer */ 4073 if (found_key) { 4074 if (nelems > 1) { 4075 /* 4076 * ERROR: We expected an object, not 4077 * this number. 4078 */ 4079 return (NULL); 4080 } 4081 return (dest); 4082 } 4083 4084 cur--; 4085 state = DTRACE_JSON_COMMA; 4086 break; 4087 case DTRACE_JSON_VALUE: 4088 if (isspace(cc)) 4089 break; 4090 4091 if (cc == '{' || cc == '[') { 4092 if (nelems > 1 && found_key) { 4093 in_array = cc == '[' ? B_TRUE : B_FALSE; 4094 /* 4095 * If our element selector directs us 4096 * to descend into this nested object, 4097 * then move to the next selector 4098 * element in the list and restart the 4099 * state machine. 4100 */ 4101 while (*elem != '\0') 4102 elem++; 4103 elem++; /* skip the inter-element NUL */ 4104 nelems--; 4105 dd = dest; 4106 if (in_array) { 4107 state = DTRACE_JSON_VALUE; 4108 array_pos = 0; 4109 array_elem = dtrace_strtoll( 4110 elem, 10, size); 4111 found_key = array_elem == 0 ? 4112 B_TRUE : B_FALSE; 4113 } else { 4114 found_key = B_FALSE; 4115 state = DTRACE_JSON_OBJECT; 4116 } 4117 break; 4118 } 4119 4120 /* 4121 * Otherwise, we wish to either skip this 4122 * nested object or return it in full. 4123 */ 4124 if (cc == '[') 4125 brackets = 1; 4126 else 4127 braces = 1; 4128 *dd++ = cc; 4129 state = DTRACE_JSON_COLLECT_OBJECT; 4130 break; 4131 } 4132 4133 if (cc == '"') { 4134 state = DTRACE_JSON_STRING; 4135 break; 4136 } 4137 4138 if (islower(cc)) { 4139 /* 4140 * Here we deal with true, false and null. 4141 */ 4142 *dd++ = cc; 4143 state = DTRACE_JSON_IDENTIFIER; 4144 break; 4145 } 4146 4147 if (cc == '-' || isdigit(cc)) { 4148 *dd++ = cc; 4149 state = DTRACE_JSON_NUMBER; 4150 break; 4151 } 4152 4153 /* 4154 * ERROR: unexpected character at start of value. 4155 */ 4156 return (NULL); 4157 case DTRACE_JSON_COLLECT_OBJECT: 4158 if (cc == '\0') 4159 /* 4160 * ERROR: unexpected end of input. 4161 */ 4162 return (NULL); 4163 4164 *dd++ = cc; 4165 if (cc == '"') { 4166 collect_object = B_TRUE; 4167 state = DTRACE_JSON_STRING; 4168 break; 4169 } 4170 4171 if (cc == ']') { 4172 if (brackets-- == 0) { 4173 /* 4174 * ERROR: unbalanced brackets. 4175 */ 4176 return (NULL); 4177 } 4178 } else if (cc == '}') { 4179 if (braces-- == 0) { 4180 /* 4181 * ERROR: unbalanced braces. 4182 */ 4183 return (NULL); 4184 } 4185 } else if (cc == '{') { 4186 braces++; 4187 } else if (cc == '[') { 4188 brackets++; 4189 } 4190 4191 if (brackets == 0 && braces == 0) { 4192 if (found_key) { 4193 *dd = '\0'; 4194 return (dest); 4195 } 4196 dd = dest; /* reset string buffer */ 4197 state = DTRACE_JSON_COMMA; 4198 } 4199 break; 4200 } 4201 } 4202 return (NULL); 4203 } 4204 4205 /* 4206 * Emulate the execution of DTrace ID subroutines invoked by the call opcode. 4207 * Notice that we don't bother validating the proper number of arguments or 4208 * their types in the tuple stack. This isn't needed because all argument 4209 * interpretation is safe because of our load safety -- the worst that can 4210 * happen is that a bogus program can obtain bogus results. 4211 */ 4212 static void 4213 dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, 4214 dtrace_key_t *tupregs, int nargs, 4215 dtrace_mstate_t *mstate, dtrace_state_t *state) 4216 { 4217 volatile uint16_t *flags = &cpu_core[curcpu].cpuc_dtrace_flags; 4218 volatile uintptr_t *illval = &cpu_core[curcpu].cpuc_dtrace_illval; 4219 dtrace_vstate_t *vstate = &state->dts_vstate; 4220 4221 #ifdef illumos 4222 union { 4223 mutex_impl_t mi; 4224 uint64_t mx; 4225 } m; 4226 4227 union { 4228 krwlock_t ri; 4229 uintptr_t rw; 4230 } r; 4231 #else 4232 struct thread *lowner; 4233 union { 4234 struct lock_object *li; 4235 uintptr_t lx; 4236 } l; 4237 #endif 4238 4239 switch (subr) { 4240 case DIF_SUBR_RAND: 4241 regs[rd] = dtrace_xoroshiro128_plus_next( 4242 state->dts_rstate[curcpu]); 4243 break; 4244 4245 #ifdef illumos 4246 case DIF_SUBR_MUTEX_OWNED: 4247 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t), 4248 mstate, vstate)) { 4249 regs[rd] = 0; 4250 break; 4251 } 4252 4253 m.mx = dtrace_load64(tupregs[0].dttk_value); 4254 if (MUTEX_TYPE_ADAPTIVE(&m.mi)) 4255 regs[rd] = MUTEX_OWNER(&m.mi) != MUTEX_NO_OWNER; 4256 else 4257 regs[rd] = LOCK_HELD(&m.mi.m_spin.m_spinlock); 4258 break; 4259 4260 case DIF_SUBR_MUTEX_OWNER: 4261 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t), 4262 mstate, vstate)) { 4263 regs[rd] = 0; 4264 break; 4265 } 4266 4267 m.mx = dtrace_load64(tupregs[0].dttk_value); 4268 if (MUTEX_TYPE_ADAPTIVE(&m.mi) && 4269 MUTEX_OWNER(&m.mi) != MUTEX_NO_OWNER) 4270 regs[rd] = (uintptr_t)MUTEX_OWNER(&m.mi); 4271 else 4272 regs[rd] = 0; 4273 break; 4274 4275 case DIF_SUBR_MUTEX_TYPE_ADAPTIVE: 4276 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t), 4277 mstate, vstate)) { 4278 regs[rd] = 0; 4279 break; 4280 } 4281 4282 m.mx = dtrace_load64(tupregs[0].dttk_value); 4283 regs[rd] = MUTEX_TYPE_ADAPTIVE(&m.mi); 4284 break; 4285 4286 case DIF_SUBR_MUTEX_TYPE_SPIN: 4287 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t), 4288 mstate, vstate)) { 4289 regs[rd] = 0; 4290 break; 4291 } 4292 4293 m.mx = dtrace_load64(tupregs[0].dttk_value); 4294 regs[rd] = MUTEX_TYPE_SPIN(&m.mi); 4295 break; 4296 4297 case DIF_SUBR_RW_READ_HELD: { 4298 uintptr_t tmp; 4299 4300 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (uintptr_t), 4301 mstate, vstate)) { 4302 regs[rd] = 0; 4303 break; 4304 } 4305 4306 r.rw = dtrace_loadptr(tupregs[0].dttk_value); 4307 regs[rd] = _RW_READ_HELD(&r.ri, tmp); 4308 break; 4309 } 4310 4311 case DIF_SUBR_RW_WRITE_HELD: 4312 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (krwlock_t), 4313 mstate, vstate)) { 4314 regs[rd] = 0; 4315 break; 4316 } 4317 4318 r.rw = dtrace_loadptr(tupregs[0].dttk_value); 4319 regs[rd] = _RW_WRITE_HELD(&r.ri); 4320 break; 4321 4322 case DIF_SUBR_RW_ISWRITER: 4323 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (krwlock_t), 4324 mstate, vstate)) { 4325 regs[rd] = 0; 4326 break; 4327 } 4328 4329 r.rw = dtrace_loadptr(tupregs[0].dttk_value); 4330 regs[rd] = _RW_ISWRITER(&r.ri); 4331 break; 4332 4333 #else /* !illumos */ 4334 case DIF_SUBR_MUTEX_OWNED: 4335 if (!dtrace_canload(tupregs[0].dttk_value, 4336 sizeof (struct lock_object), mstate, vstate)) { 4337 regs[rd] = 0; 4338 break; 4339 } 4340 l.lx = dtrace_loadptr((uintptr_t)&tupregs[0].dttk_value); 4341 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 4342 regs[rd] = LOCK_CLASS(l.li)->lc_owner(l.li, &lowner); 4343 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 4344 break; 4345 4346 case DIF_SUBR_MUTEX_OWNER: 4347 if (!dtrace_canload(tupregs[0].dttk_value, 4348 sizeof (struct lock_object), mstate, vstate)) { 4349 regs[rd] = 0; 4350 break; 4351 } 4352 l.lx = dtrace_loadptr((uintptr_t)&tupregs[0].dttk_value); 4353 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 4354 LOCK_CLASS(l.li)->lc_owner(l.li, &lowner); 4355 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 4356 regs[rd] = (uintptr_t)lowner; 4357 break; 4358 4359 case DIF_SUBR_MUTEX_TYPE_ADAPTIVE: 4360 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (struct mtx), 4361 mstate, vstate)) { 4362 regs[rd] = 0; 4363 break; 4364 } 4365 l.lx = dtrace_loadptr((uintptr_t)&tupregs[0].dttk_value); 4366 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 4367 regs[rd] = (LOCK_CLASS(l.li)->lc_flags & LC_SLEEPLOCK) != 0; 4368 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 4369 break; 4370 4371 case DIF_SUBR_MUTEX_TYPE_SPIN: 4372 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (struct mtx), 4373 mstate, vstate)) { 4374 regs[rd] = 0; 4375 break; 4376 } 4377 l.lx = dtrace_loadptr((uintptr_t)&tupregs[0].dttk_value); 4378 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 4379 regs[rd] = (LOCK_CLASS(l.li)->lc_flags & LC_SPINLOCK) != 0; 4380 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 4381 break; 4382 4383 case DIF_SUBR_RW_READ_HELD: 4384 case DIF_SUBR_SX_SHARED_HELD: 4385 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (uintptr_t), 4386 mstate, vstate)) { 4387 regs[rd] = 0; 4388 break; 4389 } 4390 l.lx = dtrace_loadptr((uintptr_t)&tupregs[0].dttk_value); 4391 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 4392 regs[rd] = LOCK_CLASS(l.li)->lc_owner(l.li, &lowner) && 4393 lowner == NULL; 4394 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 4395 break; 4396 4397 case DIF_SUBR_RW_WRITE_HELD: 4398 case DIF_SUBR_SX_EXCLUSIVE_HELD: 4399 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (uintptr_t), 4400 mstate, vstate)) { 4401 regs[rd] = 0; 4402 break; 4403 } 4404 l.lx = dtrace_loadptr(tupregs[0].dttk_value); 4405 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 4406 regs[rd] = LOCK_CLASS(l.li)->lc_owner(l.li, &lowner) && 4407 lowner != NULL; 4408 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 4409 break; 4410 4411 case DIF_SUBR_RW_ISWRITER: 4412 case DIF_SUBR_SX_ISEXCLUSIVE: 4413 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (uintptr_t), 4414 mstate, vstate)) { 4415 regs[rd] = 0; 4416 break; 4417 } 4418 l.lx = dtrace_loadptr(tupregs[0].dttk_value); 4419 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 4420 LOCK_CLASS(l.li)->lc_owner(l.li, &lowner); 4421 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 4422 regs[rd] = (lowner == curthread); 4423 break; 4424 #endif /* illumos */ 4425 4426 case DIF_SUBR_BCOPY: { 4427 /* 4428 * We need to be sure that the destination is in the scratch 4429 * region -- no other region is allowed. 4430 */ 4431 uintptr_t src = tupregs[0].dttk_value; 4432 uintptr_t dest = tupregs[1].dttk_value; 4433 size_t size = tupregs[2].dttk_value; 4434 4435 if (!dtrace_inscratch(dest, size, mstate)) { 4436 *flags |= CPU_DTRACE_BADADDR; 4437 *illval = regs[rd]; 4438 break; 4439 } 4440 4441 if (!dtrace_canload(src, size, mstate, vstate)) { 4442 regs[rd] = 0; 4443 break; 4444 } 4445 4446 dtrace_bcopy((void *)src, (void *)dest, size); 4447 break; 4448 } 4449 4450 case DIF_SUBR_ALLOCA: 4451 case DIF_SUBR_COPYIN: { 4452 uintptr_t dest = P2ROUNDUP(mstate->dtms_scratch_ptr, 8); 4453 uint64_t size = 4454 tupregs[subr == DIF_SUBR_ALLOCA ? 0 : 1].dttk_value; 4455 size_t scratch_size = (dest - mstate->dtms_scratch_ptr) + size; 4456 4457 /* 4458 * This action doesn't require any credential checks since 4459 * probes will not activate in user contexts to which the 4460 * enabling user does not have permissions. 4461 */ 4462 4463 /* 4464 * Rounding up the user allocation size could have overflowed 4465 * a large, bogus allocation (like -1ULL) to 0. 4466 */ 4467 if (scratch_size < size || 4468 !DTRACE_INSCRATCH(mstate, scratch_size)) { 4469 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 4470 regs[rd] = 0; 4471 break; 4472 } 4473 4474 if (subr == DIF_SUBR_COPYIN) { 4475 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 4476 dtrace_copyin(tupregs[0].dttk_value, dest, size, flags); 4477 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 4478 } 4479 4480 mstate->dtms_scratch_ptr += scratch_size; 4481 regs[rd] = dest; 4482 break; 4483 } 4484 4485 case DIF_SUBR_COPYINTO: { 4486 uint64_t size = tupregs[1].dttk_value; 4487 uintptr_t dest = tupregs[2].dttk_value; 4488 4489 /* 4490 * This action doesn't require any credential checks since 4491 * probes will not activate in user contexts to which the 4492 * enabling user does not have permissions. 4493 */ 4494 if (!dtrace_inscratch(dest, size, mstate)) { 4495 *flags |= CPU_DTRACE_BADADDR; 4496 *illval = regs[rd]; 4497 break; 4498 } 4499 4500 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 4501 dtrace_copyin(tupregs[0].dttk_value, dest, size, flags); 4502 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 4503 break; 4504 } 4505 4506 case DIF_SUBR_COPYINSTR: { 4507 uintptr_t dest = mstate->dtms_scratch_ptr; 4508 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; 4509 4510 if (nargs > 1 && tupregs[1].dttk_value < size) 4511 size = tupregs[1].dttk_value + 1; 4512 4513 /* 4514 * This action doesn't require any credential checks since 4515 * probes will not activate in user contexts to which the 4516 * enabling user does not have permissions. 4517 */ 4518 if (!DTRACE_INSCRATCH(mstate, size)) { 4519 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 4520 regs[rd] = 0; 4521 break; 4522 } 4523 4524 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 4525 dtrace_copyinstr(tupregs[0].dttk_value, dest, size, flags); 4526 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 4527 4528 ((char *)dest)[size - 1] = '\0'; 4529 mstate->dtms_scratch_ptr += size; 4530 regs[rd] = dest; 4531 break; 4532 } 4533 4534 #ifdef illumos 4535 case DIF_SUBR_MSGSIZE: 4536 case DIF_SUBR_MSGDSIZE: { 4537 uintptr_t baddr = tupregs[0].dttk_value, daddr; 4538 uintptr_t wptr, rptr; 4539 size_t count = 0; 4540 int cont = 0; 4541 4542 while (baddr != 0 && !(*flags & CPU_DTRACE_FAULT)) { 4543 4544 if (!dtrace_canload(baddr, sizeof (mblk_t), mstate, 4545 vstate)) { 4546 regs[rd] = 0; 4547 break; 4548 } 4549 4550 wptr = dtrace_loadptr(baddr + 4551 offsetof(mblk_t, b_wptr)); 4552 4553 rptr = dtrace_loadptr(baddr + 4554 offsetof(mblk_t, b_rptr)); 4555 4556 if (wptr < rptr) { 4557 *flags |= CPU_DTRACE_BADADDR; 4558 *illval = tupregs[0].dttk_value; 4559 break; 4560 } 4561 4562 daddr = dtrace_loadptr(baddr + 4563 offsetof(mblk_t, b_datap)); 4564 4565 baddr = dtrace_loadptr(baddr + 4566 offsetof(mblk_t, b_cont)); 4567 4568 /* 4569 * We want to prevent against denial-of-service here, 4570 * so we're only going to search the list for 4571 * dtrace_msgdsize_max mblks. 4572 */ 4573 if (cont++ > dtrace_msgdsize_max) { 4574 *flags |= CPU_DTRACE_ILLOP; 4575 break; 4576 } 4577 4578 if (subr == DIF_SUBR_MSGDSIZE) { 4579 if (dtrace_load8(daddr + 4580 offsetof(dblk_t, db_type)) != M_DATA) 4581 continue; 4582 } 4583 4584 count += wptr - rptr; 4585 } 4586 4587 if (!(*flags & CPU_DTRACE_FAULT)) 4588 regs[rd] = count; 4589 4590 break; 4591 } 4592 #endif 4593 4594 case DIF_SUBR_PROGENYOF: { 4595 pid_t pid = tupregs[0].dttk_value; 4596 proc_t *p; 4597 int rval = 0; 4598 4599 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 4600 4601 for (p = curthread->t_procp; p != NULL; p = p->p_parent) { 4602 #ifdef illumos 4603 if (p->p_pidp->pid_id == pid) { 4604 #else 4605 if (p->p_pid == pid) { 4606 #endif 4607 rval = 1; 4608 break; 4609 } 4610 } 4611 4612 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 4613 4614 regs[rd] = rval; 4615 break; 4616 } 4617 4618 case DIF_SUBR_SPECULATION: 4619 regs[rd] = dtrace_speculation(state); 4620 break; 4621 4622 case DIF_SUBR_COPYOUT: { 4623 uintptr_t kaddr = tupregs[0].dttk_value; 4624 uintptr_t uaddr = tupregs[1].dttk_value; 4625 uint64_t size = tupregs[2].dttk_value; 4626 4627 if (!dtrace_destructive_disallow && 4628 dtrace_priv_proc_control(state) && 4629 !dtrace_istoxic(kaddr, size) && 4630 dtrace_canload(kaddr, size, mstate, vstate)) { 4631 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 4632 dtrace_copyout(kaddr, uaddr, size, flags); 4633 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 4634 } 4635 break; 4636 } 4637 4638 case DIF_SUBR_COPYOUTSTR: { 4639 uintptr_t kaddr = tupregs[0].dttk_value; 4640 uintptr_t uaddr = tupregs[1].dttk_value; 4641 uint64_t size = tupregs[2].dttk_value; 4642 size_t lim; 4643 4644 if (!dtrace_destructive_disallow && 4645 dtrace_priv_proc_control(state) && 4646 !dtrace_istoxic(kaddr, size) && 4647 dtrace_strcanload(kaddr, size, &lim, mstate, vstate)) { 4648 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 4649 dtrace_copyoutstr(kaddr, uaddr, lim, flags); 4650 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 4651 } 4652 break; 4653 } 4654 4655 case DIF_SUBR_STRLEN: { 4656 size_t size = state->dts_options[DTRACEOPT_STRSIZE]; 4657 uintptr_t addr = (uintptr_t)tupregs[0].dttk_value; 4658 size_t lim; 4659 4660 if (!dtrace_strcanload(addr, size, &lim, mstate, vstate)) { 4661 regs[rd] = 0; 4662 break; 4663 } 4664 4665 regs[rd] = dtrace_strlen((char *)addr, lim); 4666 break; 4667 } 4668 4669 case DIF_SUBR_STRCHR: 4670 case DIF_SUBR_STRRCHR: { 4671 /* 4672 * We're going to iterate over the string looking for the 4673 * specified character. We will iterate until we have reached 4674 * the string length or we have found the character. If this 4675 * is DIF_SUBR_STRRCHR, we will look for the last occurrence 4676 * of the specified character instead of the first. 4677 */ 4678 uintptr_t addr = tupregs[0].dttk_value; 4679 uintptr_t addr_limit; 4680 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; 4681 size_t lim; 4682 char c, target = (char)tupregs[1].dttk_value; 4683 4684 if (!dtrace_strcanload(addr, size, &lim, mstate, vstate)) { 4685 regs[rd] = 0; 4686 break; 4687 } 4688 addr_limit = addr + lim; 4689 4690 for (regs[rd] = 0; addr < addr_limit; addr++) { 4691 if ((c = dtrace_load8(addr)) == target) { 4692 regs[rd] = addr; 4693 4694 if (subr == DIF_SUBR_STRCHR) 4695 break; 4696 } 4697 4698 if (c == '\0') 4699 break; 4700 } 4701 break; 4702 } 4703 4704 case DIF_SUBR_STRSTR: 4705 case DIF_SUBR_INDEX: 4706 case DIF_SUBR_RINDEX: { 4707 /* 4708 * We're going to iterate over the string looking for the 4709 * specified string. We will iterate until we have reached 4710 * the string length or we have found the string. (Yes, this 4711 * is done in the most naive way possible -- but considering 4712 * that the string we're searching for is likely to be 4713 * relatively short, the complexity of Rabin-Karp or similar 4714 * hardly seems merited.) 4715 */ 4716 char *addr = (char *)(uintptr_t)tupregs[0].dttk_value; 4717 char *substr = (char *)(uintptr_t)tupregs[1].dttk_value; 4718 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; 4719 size_t len = dtrace_strlen(addr, size); 4720 size_t sublen = dtrace_strlen(substr, size); 4721 char *limit = addr + len, *orig = addr; 4722 int notfound = subr == DIF_SUBR_STRSTR ? 0 : -1; 4723 int inc = 1; 4724 4725 regs[rd] = notfound; 4726 4727 if (!dtrace_canload((uintptr_t)addr, len + 1, mstate, vstate)) { 4728 regs[rd] = 0; 4729 break; 4730 } 4731 4732 if (!dtrace_canload((uintptr_t)substr, sublen + 1, mstate, 4733 vstate)) { 4734 regs[rd] = 0; 4735 break; 4736 } 4737 4738 /* 4739 * strstr() and index()/rindex() have similar semantics if 4740 * both strings are the empty string: strstr() returns a 4741 * pointer to the (empty) string, and index() and rindex() 4742 * both return index 0 (regardless of any position argument). 4743 */ 4744 if (sublen == 0 && len == 0) { 4745 if (subr == DIF_SUBR_STRSTR) 4746 regs[rd] = (uintptr_t)addr; 4747 else 4748 regs[rd] = 0; 4749 break; 4750 } 4751 4752 if (subr != DIF_SUBR_STRSTR) { 4753 if (subr == DIF_SUBR_RINDEX) { 4754 limit = orig - 1; 4755 addr += len; 4756 inc = -1; 4757 } 4758 4759 /* 4760 * Both index() and rindex() take an optional position 4761 * argument that denotes the starting position. 4762 */ 4763 if (nargs == 3) { 4764 int64_t pos = (int64_t)tupregs[2].dttk_value; 4765 4766 /* 4767 * If the position argument to index() is 4768 * negative, Perl implicitly clamps it at 4769 * zero. This semantic is a little surprising 4770 * given the special meaning of negative 4771 * positions to similar Perl functions like 4772 * substr(), but it appears to reflect a 4773 * notion that index() can start from a 4774 * negative index and increment its way up to 4775 * the string. Given this notion, Perl's 4776 * rindex() is at least self-consistent in 4777 * that it implicitly clamps positions greater 4778 * than the string length to be the string 4779 * length. Where Perl completely loses 4780 * coherence, however, is when the specified 4781 * substring is the empty string (""). In 4782 * this case, even if the position is 4783 * negative, rindex() returns 0 -- and even if 4784 * the position is greater than the length, 4785 * index() returns the string length. These 4786 * semantics violate the notion that index() 4787 * should never return a value less than the 4788 * specified position and that rindex() should 4789 * never return a value greater than the 4790 * specified position. (One assumes that 4791 * these semantics are artifacts of Perl's 4792 * implementation and not the results of 4793 * deliberate design -- it beggars belief that 4794 * even Larry Wall could desire such oddness.) 4795 * While in the abstract one would wish for 4796 * consistent position semantics across 4797 * substr(), index() and rindex() -- or at the 4798 * very least self-consistent position 4799 * semantics for index() and rindex() -- we 4800 * instead opt to keep with the extant Perl 4801 * semantics, in all their broken glory. (Do 4802 * we have more desire to maintain Perl's 4803 * semantics than Perl does? Probably.) 4804 */ 4805 if (subr == DIF_SUBR_RINDEX) { 4806 if (pos < 0) { 4807 if (sublen == 0) 4808 regs[rd] = 0; 4809 break; 4810 } 4811 4812 if (pos > len) 4813 pos = len; 4814 } else { 4815 if (pos < 0) 4816 pos = 0; 4817 4818 if (pos >= len) { 4819 if (sublen == 0) 4820 regs[rd] = len; 4821 break; 4822 } 4823 } 4824 4825 addr = orig + pos; 4826 } 4827 } 4828 4829 for (regs[rd] = notfound; addr != limit; addr += inc) { 4830 if (dtrace_strncmp(addr, substr, sublen) == 0) { 4831 if (subr != DIF_SUBR_STRSTR) { 4832 /* 4833 * As D index() and rindex() are 4834 * modeled on Perl (and not on awk), 4835 * we return a zero-based (and not a 4836 * one-based) index. (For you Perl 4837 * weenies: no, we're not going to add 4838 * $[ -- and shouldn't you be at a con 4839 * or something?) 4840 */ 4841 regs[rd] = (uintptr_t)(addr - orig); 4842 break; 4843 } 4844 4845 ASSERT(subr == DIF_SUBR_STRSTR); 4846 regs[rd] = (uintptr_t)addr; 4847 break; 4848 } 4849 } 4850 4851 break; 4852 } 4853 4854 case DIF_SUBR_STRTOK: { 4855 uintptr_t addr = tupregs[0].dttk_value; 4856 uintptr_t tokaddr = tupregs[1].dttk_value; 4857 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; 4858 uintptr_t limit, toklimit; 4859 size_t clim; 4860 uint8_t c = 0, tokmap[32]; /* 256 / 8 */ 4861 char *dest = (char *)mstate->dtms_scratch_ptr; 4862 int i; 4863 4864 /* 4865 * Check both the token buffer and (later) the input buffer, 4866 * since both could be non-scratch addresses. 4867 */ 4868 if (!dtrace_strcanload(tokaddr, size, &clim, mstate, vstate)) { 4869 regs[rd] = 0; 4870 break; 4871 } 4872 toklimit = tokaddr + clim; 4873 4874 if (!DTRACE_INSCRATCH(mstate, size)) { 4875 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 4876 regs[rd] = 0; 4877 break; 4878 } 4879 4880 if (addr == 0) { 4881 /* 4882 * If the address specified is NULL, we use our saved 4883 * strtok pointer from the mstate. Note that this 4884 * means that the saved strtok pointer is _only_ 4885 * valid within multiple enablings of the same probe -- 4886 * it behaves like an implicit clause-local variable. 4887 */ 4888 addr = mstate->dtms_strtok; 4889 limit = mstate->dtms_strtok_limit; 4890 } else { 4891 /* 4892 * If the user-specified address is non-NULL we must 4893 * access check it. This is the only time we have 4894 * a chance to do so, since this address may reside 4895 * in the string table of this clause-- future calls 4896 * (when we fetch addr from mstate->dtms_strtok) 4897 * would fail this access check. 4898 */ 4899 if (!dtrace_strcanload(addr, size, &clim, mstate, 4900 vstate)) { 4901 regs[rd] = 0; 4902 break; 4903 } 4904 limit = addr + clim; 4905 } 4906 4907 /* 4908 * First, zero the token map, and then process the token 4909 * string -- setting a bit in the map for every character 4910 * found in the token string. 4911 */ 4912 for (i = 0; i < sizeof (tokmap); i++) 4913 tokmap[i] = 0; 4914 4915 for (; tokaddr < toklimit; tokaddr++) { 4916 if ((c = dtrace_load8(tokaddr)) == '\0') 4917 break; 4918 4919 ASSERT((c >> 3) < sizeof (tokmap)); 4920 tokmap[c >> 3] |= (1 << (c & 0x7)); 4921 } 4922 4923 for (; addr < limit; addr++) { 4924 /* 4925 * We're looking for a character that is _not_ 4926 * contained in the token string. 4927 */ 4928 if ((c = dtrace_load8(addr)) == '\0') 4929 break; 4930 4931 if (!(tokmap[c >> 3] & (1 << (c & 0x7)))) 4932 break; 4933 } 4934 4935 if (c == '\0') { 4936 /* 4937 * We reached the end of the string without finding 4938 * any character that was not in the token string. 4939 * We return NULL in this case, and we set the saved 4940 * address to NULL as well. 4941 */ 4942 regs[rd] = 0; 4943 mstate->dtms_strtok = 0; 4944 mstate->dtms_strtok_limit = 0; 4945 break; 4946 } 4947 4948 /* 4949 * From here on, we're copying into the destination string. 4950 */ 4951 for (i = 0; addr < limit && i < size - 1; addr++) { 4952 if ((c = dtrace_load8(addr)) == '\0') 4953 break; 4954 4955 if (tokmap[c >> 3] & (1 << (c & 0x7))) 4956 break; 4957 4958 ASSERT(i < size); 4959 dest[i++] = c; 4960 } 4961 4962 ASSERT(i < size); 4963 dest[i] = '\0'; 4964 regs[rd] = (uintptr_t)dest; 4965 mstate->dtms_scratch_ptr += size; 4966 mstate->dtms_strtok = addr; 4967 mstate->dtms_strtok_limit = limit; 4968 break; 4969 } 4970 4971 case DIF_SUBR_SUBSTR: { 4972 uintptr_t s = tupregs[0].dttk_value; 4973 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; 4974 char *d = (char *)mstate->dtms_scratch_ptr; 4975 int64_t index = (int64_t)tupregs[1].dttk_value; 4976 int64_t remaining = (int64_t)tupregs[2].dttk_value; 4977 size_t len = dtrace_strlen((char *)s, size); 4978 int64_t i; 4979 4980 if (!dtrace_canload(s, len + 1, mstate, vstate)) { 4981 regs[rd] = 0; 4982 break; 4983 } 4984 4985 if (!DTRACE_INSCRATCH(mstate, size)) { 4986 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 4987 regs[rd] = 0; 4988 break; 4989 } 4990 4991 if (nargs <= 2) 4992 remaining = (int64_t)size; 4993 4994 if (index < 0) { 4995 index += len; 4996 4997 if (index < 0 && index + remaining > 0) { 4998 remaining += index; 4999 index = 0; 5000 } 5001 } 5002 5003 if (index >= len || index < 0) { 5004 remaining = 0; 5005 } else if (remaining < 0) { 5006 remaining += len - index; 5007 } else if (index + remaining > size) { 5008 remaining = size - index; 5009 } 5010 5011 for (i = 0; i < remaining; i++) { 5012 if ((d[i] = dtrace_load8(s + index + i)) == '\0') 5013 break; 5014 } 5015 5016 d[i] = '\0'; 5017 5018 mstate->dtms_scratch_ptr += size; 5019 regs[rd] = (uintptr_t)d; 5020 break; 5021 } 5022 5023 case DIF_SUBR_JSON: { 5024 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; 5025 uintptr_t json = tupregs[0].dttk_value; 5026 size_t jsonlen = dtrace_strlen((char *)json, size); 5027 uintptr_t elem = tupregs[1].dttk_value; 5028 size_t elemlen = dtrace_strlen((char *)elem, size); 5029 5030 char *dest = (char *)mstate->dtms_scratch_ptr; 5031 char *elemlist = (char *)mstate->dtms_scratch_ptr + jsonlen + 1; 5032 char *ee = elemlist; 5033 int nelems = 1; 5034 uintptr_t cur; 5035 5036 if (!dtrace_canload(json, jsonlen + 1, mstate, vstate) || 5037 !dtrace_canload(elem, elemlen + 1, mstate, vstate)) { 5038 regs[rd] = 0; 5039 break; 5040 } 5041 5042 if (!DTRACE_INSCRATCH(mstate, jsonlen + 1 + elemlen + 1)) { 5043 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 5044 regs[rd] = 0; 5045 break; 5046 } 5047 5048 /* 5049 * Read the element selector and split it up into a packed list 5050 * of strings. 5051 */ 5052 for (cur = elem; cur < elem + elemlen; cur++) { 5053 char cc = dtrace_load8(cur); 5054 5055 if (cur == elem && cc == '[') { 5056 /* 5057 * If the first element selector key is 5058 * actually an array index then ignore the 5059 * bracket. 5060 */ 5061 continue; 5062 } 5063 5064 if (cc == ']') 5065 continue; 5066 5067 if (cc == '.' || cc == '[') { 5068 nelems++; 5069 cc = '\0'; 5070 } 5071 5072 *ee++ = cc; 5073 } 5074 *ee++ = '\0'; 5075 5076 if ((regs[rd] = (uintptr_t)dtrace_json(size, json, elemlist, 5077 nelems, dest)) != 0) 5078 mstate->dtms_scratch_ptr += jsonlen + 1; 5079 break; 5080 } 5081 5082 case DIF_SUBR_TOUPPER: 5083 case DIF_SUBR_TOLOWER: { 5084 uintptr_t s = tupregs[0].dttk_value; 5085 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; 5086 char *dest = (char *)mstate->dtms_scratch_ptr, c; 5087 size_t len = dtrace_strlen((char *)s, size); 5088 char lower, upper, convert; 5089 int64_t i; 5090 5091 if (subr == DIF_SUBR_TOUPPER) { 5092 lower = 'a'; 5093 upper = 'z'; 5094 convert = 'A'; 5095 } else { 5096 lower = 'A'; 5097 upper = 'Z'; 5098 convert = 'a'; 5099 } 5100 5101 if (!dtrace_canload(s, len + 1, mstate, vstate)) { 5102 regs[rd] = 0; 5103 break; 5104 } 5105 5106 if (!DTRACE_INSCRATCH(mstate, size)) { 5107 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 5108 regs[rd] = 0; 5109 break; 5110 } 5111 5112 for (i = 0; i < size - 1; i++) { 5113 if ((c = dtrace_load8(s + i)) == '\0') 5114 break; 5115 5116 if (c >= lower && c <= upper) 5117 c = convert + (c - lower); 5118 5119 dest[i] = c; 5120 } 5121 5122 ASSERT(i < size); 5123 dest[i] = '\0'; 5124 regs[rd] = (uintptr_t)dest; 5125 mstate->dtms_scratch_ptr += size; 5126 break; 5127 } 5128 5129 #ifdef illumos 5130 case DIF_SUBR_GETMAJOR: 5131 #ifdef _LP64 5132 regs[rd] = (tupregs[0].dttk_value >> NBITSMINOR64) & MAXMAJ64; 5133 #else 5134 regs[rd] = (tupregs[0].dttk_value >> NBITSMINOR) & MAXMAJ; 5135 #endif 5136 break; 5137 5138 case DIF_SUBR_GETMINOR: 5139 #ifdef _LP64 5140 regs[rd] = tupregs[0].dttk_value & MAXMIN64; 5141 #else 5142 regs[rd] = tupregs[0].dttk_value & MAXMIN; 5143 #endif 5144 break; 5145 5146 case DIF_SUBR_DDI_PATHNAME: { 5147 /* 5148 * This one is a galactic mess. We are going to roughly 5149 * emulate ddi_pathname(), but it's made more complicated 5150 * by the fact that we (a) want to include the minor name and 5151 * (b) must proceed iteratively instead of recursively. 5152 */ 5153 uintptr_t dest = mstate->dtms_scratch_ptr; 5154 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; 5155 char *start = (char *)dest, *end = start + size - 1; 5156 uintptr_t daddr = tupregs[0].dttk_value; 5157 int64_t minor = (int64_t)tupregs[1].dttk_value; 5158 char *s; 5159 int i, len, depth = 0; 5160 5161 /* 5162 * Due to all the pointer jumping we do and context we must 5163 * rely upon, we just mandate that the user must have kernel 5164 * read privileges to use this routine. 5165 */ 5166 if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) == 0) { 5167 *flags |= CPU_DTRACE_KPRIV; 5168 *illval = daddr; 5169 regs[rd] = 0; 5170 } 5171 5172 if (!DTRACE_INSCRATCH(mstate, size)) { 5173 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 5174 regs[rd] = 0; 5175 break; 5176 } 5177 5178 *end = '\0'; 5179 5180 /* 5181 * We want to have a name for the minor. In order to do this, 5182 * we need to walk the minor list from the devinfo. We want 5183 * to be sure that we don't infinitely walk a circular list, 5184 * so we check for circularity by sending a scout pointer 5185 * ahead two elements for every element that we iterate over; 5186 * if the list is circular, these will ultimately point to the 5187 * same element. You may recognize this little trick as the 5188 * answer to a stupid interview question -- one that always 5189 * seems to be asked by those who had to have it laboriously 5190 * explained to them, and who can't even concisely describe 5191 * the conditions under which one would be forced to resort to 5192 * this technique. Needless to say, those conditions are 5193 * found here -- and probably only here. Is this the only use 5194 * of this infamous trick in shipping, production code? If it 5195 * isn't, it probably should be... 5196 */ 5197 if (minor != -1) { 5198 uintptr_t maddr = dtrace_loadptr(daddr + 5199 offsetof(struct dev_info, devi_minor)); 5200 5201 uintptr_t next = offsetof(struct ddi_minor_data, next); 5202 uintptr_t name = offsetof(struct ddi_minor_data, 5203 d_minor) + offsetof(struct ddi_minor, name); 5204 uintptr_t dev = offsetof(struct ddi_minor_data, 5205 d_minor) + offsetof(struct ddi_minor, dev); 5206 uintptr_t scout; 5207 5208 if (maddr != NULL) 5209 scout = dtrace_loadptr(maddr + next); 5210 5211 while (maddr != NULL && !(*flags & CPU_DTRACE_FAULT)) { 5212 uint64_t m; 5213 #ifdef _LP64 5214 m = dtrace_load64(maddr + dev) & MAXMIN64; 5215 #else 5216 m = dtrace_load32(maddr + dev) & MAXMIN; 5217 #endif 5218 if (m != minor) { 5219 maddr = dtrace_loadptr(maddr + next); 5220 5221 if (scout == NULL) 5222 continue; 5223 5224 scout = dtrace_loadptr(scout + next); 5225 5226 if (scout == NULL) 5227 continue; 5228 5229 scout = dtrace_loadptr(scout + next); 5230 5231 if (scout == NULL) 5232 continue; 5233 5234 if (scout == maddr) { 5235 *flags |= CPU_DTRACE_ILLOP; 5236 break; 5237 } 5238 5239 continue; 5240 } 5241 5242 /* 5243 * We have the minor data. Now we need to 5244 * copy the minor's name into the end of the 5245 * pathname. 5246 */ 5247 s = (char *)dtrace_loadptr(maddr + name); 5248 len = dtrace_strlen(s, size); 5249 5250 if (*flags & CPU_DTRACE_FAULT) 5251 break; 5252 5253 if (len != 0) { 5254 if ((end -= (len + 1)) < start) 5255 break; 5256 5257 *end = ':'; 5258 } 5259 5260 for (i = 1; i <= len; i++) 5261 end[i] = dtrace_load8((uintptr_t)s++); 5262 break; 5263 } 5264 } 5265 5266 while (daddr != NULL && !(*flags & CPU_DTRACE_FAULT)) { 5267 ddi_node_state_t devi_state; 5268 5269 devi_state = dtrace_load32(daddr + 5270 offsetof(struct dev_info, devi_node_state)); 5271 5272 if (*flags & CPU_DTRACE_FAULT) 5273 break; 5274 5275 if (devi_state >= DS_INITIALIZED) { 5276 s = (char *)dtrace_loadptr(daddr + 5277 offsetof(struct dev_info, devi_addr)); 5278 len = dtrace_strlen(s, size); 5279 5280 if (*flags & CPU_DTRACE_FAULT) 5281 break; 5282 5283 if (len != 0) { 5284 if ((end -= (len + 1)) < start) 5285 break; 5286 5287 *end = '@'; 5288 } 5289 5290 for (i = 1; i <= len; i++) 5291 end[i] = dtrace_load8((uintptr_t)s++); 5292 } 5293 5294 /* 5295 * Now for the node name... 5296 */ 5297 s = (char *)dtrace_loadptr(daddr + 5298 offsetof(struct dev_info, devi_node_name)); 5299 5300 daddr = dtrace_loadptr(daddr + 5301 offsetof(struct dev_info, devi_parent)); 5302 5303 /* 5304 * If our parent is NULL (that is, if we're the root 5305 * node), we're going to use the special path 5306 * "devices". 5307 */ 5308 if (daddr == 0) 5309 s = "devices"; 5310 5311 len = dtrace_strlen(s, size); 5312 if (*flags & CPU_DTRACE_FAULT) 5313 break; 5314 5315 if ((end -= (len + 1)) < start) 5316 break; 5317 5318 for (i = 1; i <= len; i++) 5319 end[i] = dtrace_load8((uintptr_t)s++); 5320 *end = '/'; 5321 5322 if (depth++ > dtrace_devdepth_max) { 5323 *flags |= CPU_DTRACE_ILLOP; 5324 break; 5325 } 5326 } 5327 5328 if (end < start) 5329 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 5330 5331 if (daddr == 0) { 5332 regs[rd] = (uintptr_t)end; 5333 mstate->dtms_scratch_ptr += size; 5334 } 5335 5336 break; 5337 } 5338 #endif 5339 5340 case DIF_SUBR_STRJOIN: { 5341 char *d = (char *)mstate->dtms_scratch_ptr; 5342 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; 5343 uintptr_t s1 = tupregs[0].dttk_value; 5344 uintptr_t s2 = tupregs[1].dttk_value; 5345 int i = 0, j = 0; 5346 size_t lim1, lim2; 5347 char c; 5348 5349 if (!dtrace_strcanload(s1, size, &lim1, mstate, vstate) || 5350 !dtrace_strcanload(s2, size, &lim2, mstate, vstate)) { 5351 regs[rd] = 0; 5352 break; 5353 } 5354 5355 if (!DTRACE_INSCRATCH(mstate, size)) { 5356 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 5357 regs[rd] = 0; 5358 break; 5359 } 5360 5361 for (;;) { 5362 if (i >= size) { 5363 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 5364 regs[rd] = 0; 5365 break; 5366 } 5367 c = (i >= lim1) ? '\0' : dtrace_load8(s1++); 5368 if ((d[i++] = c) == '\0') { 5369 i--; 5370 break; 5371 } 5372 } 5373 5374 for (;;) { 5375 if (i >= size) { 5376 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 5377 regs[rd] = 0; 5378 break; 5379 } 5380 5381 c = (j++ >= lim2) ? '\0' : dtrace_load8(s2++); 5382 if ((d[i++] = c) == '\0') 5383 break; 5384 } 5385 5386 if (i < size) { 5387 mstate->dtms_scratch_ptr += i; 5388 regs[rd] = (uintptr_t)d; 5389 } 5390 5391 break; 5392 } 5393 5394 case DIF_SUBR_STRTOLL: { 5395 uintptr_t s = tupregs[0].dttk_value; 5396 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; 5397 size_t lim; 5398 int base = 10; 5399 5400 if (nargs > 1) { 5401 if ((base = tupregs[1].dttk_value) <= 1 || 5402 base > ('z' - 'a' + 1) + ('9' - '0' + 1)) { 5403 *flags |= CPU_DTRACE_ILLOP; 5404 break; 5405 } 5406 } 5407 5408 if (!dtrace_strcanload(s, size, &lim, mstate, vstate)) { 5409 regs[rd] = INT64_MIN; 5410 break; 5411 } 5412 5413 regs[rd] = dtrace_strtoll((char *)s, base, lim); 5414 break; 5415 } 5416 5417 case DIF_SUBR_LLTOSTR: { 5418 int64_t i = (int64_t)tupregs[0].dttk_value; 5419 uint64_t val, digit; 5420 uint64_t size = 65; /* enough room for 2^64 in binary */ 5421 char *end = (char *)mstate->dtms_scratch_ptr + size - 1; 5422 int base = 10; 5423 5424 if (nargs > 1) { 5425 if ((base = tupregs[1].dttk_value) <= 1 || 5426 base > ('z' - 'a' + 1) + ('9' - '0' + 1)) { 5427 *flags |= CPU_DTRACE_ILLOP; 5428 break; 5429 } 5430 } 5431 5432 val = (base == 10 && i < 0) ? i * -1 : i; 5433 5434 if (!DTRACE_INSCRATCH(mstate, size)) { 5435 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 5436 regs[rd] = 0; 5437 break; 5438 } 5439 5440 for (*end-- = '\0'; val; val /= base) { 5441 if ((digit = val % base) <= '9' - '0') { 5442 *end-- = '0' + digit; 5443 } else { 5444 *end-- = 'a' + (digit - ('9' - '0') - 1); 5445 } 5446 } 5447 5448 if (i == 0 && base == 16) 5449 *end-- = '0'; 5450 5451 if (base == 16) 5452 *end-- = 'x'; 5453 5454 if (i == 0 || base == 8 || base == 16) 5455 *end-- = '0'; 5456 5457 if (i < 0 && base == 10) 5458 *end-- = '-'; 5459 5460 regs[rd] = (uintptr_t)end + 1; 5461 mstate->dtms_scratch_ptr += size; 5462 break; 5463 } 5464 5465 case DIF_SUBR_HTONS: 5466 case DIF_SUBR_NTOHS: 5467 #if BYTE_ORDER == BIG_ENDIAN 5468 regs[rd] = (uint16_t)tupregs[0].dttk_value; 5469 #else 5470 regs[rd] = DT_BSWAP_16((uint16_t)tupregs[0].dttk_value); 5471 #endif 5472 break; 5473 5474 5475 case DIF_SUBR_HTONL: 5476 case DIF_SUBR_NTOHL: 5477 #if BYTE_ORDER == BIG_ENDIAN 5478 regs[rd] = (uint32_t)tupregs[0].dttk_value; 5479 #else 5480 regs[rd] = DT_BSWAP_32((uint32_t)tupregs[0].dttk_value); 5481 #endif 5482 break; 5483 5484 5485 case DIF_SUBR_HTONLL: 5486 case DIF_SUBR_NTOHLL: 5487 #if BYTE_ORDER == BIG_ENDIAN 5488 regs[rd] = (uint64_t)tupregs[0].dttk_value; 5489 #else 5490 regs[rd] = DT_BSWAP_64((uint64_t)tupregs[0].dttk_value); 5491 #endif 5492 break; 5493 5494 5495 case DIF_SUBR_DIRNAME: 5496 case DIF_SUBR_BASENAME: { 5497 char *dest = (char *)mstate->dtms_scratch_ptr; 5498 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; 5499 uintptr_t src = tupregs[0].dttk_value; 5500 int i, j, len = dtrace_strlen((char *)src, size); 5501 int lastbase = -1, firstbase = -1, lastdir = -1; 5502 int start, end; 5503 5504 if (!dtrace_canload(src, len + 1, mstate, vstate)) { 5505 regs[rd] = 0; 5506 break; 5507 } 5508 5509 if (!DTRACE_INSCRATCH(mstate, size)) { 5510 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 5511 regs[rd] = 0; 5512 break; 5513 } 5514 5515 /* 5516 * The basename and dirname for a zero-length string is 5517 * defined to be "." 5518 */ 5519 if (len == 0) { 5520 len = 1; 5521 src = (uintptr_t)"."; 5522 } 5523 5524 /* 5525 * Start from the back of the string, moving back toward the 5526 * front until we see a character that isn't a slash. That 5527 * character is the last character in the basename. 5528 */ 5529 for (i = len - 1; i >= 0; i--) { 5530 if (dtrace_load8(src + i) != '/') 5531 break; 5532 } 5533 5534 if (i >= 0) 5535 lastbase = i; 5536 5537 /* 5538 * Starting from the last character in the basename, move 5539 * towards the front until we find a slash. The character 5540 * that we processed immediately before that is the first 5541 * character in the basename. 5542 */ 5543 for (; i >= 0; i--) { 5544 if (dtrace_load8(src + i) == '/') 5545 break; 5546 } 5547 5548 if (i >= 0) 5549 firstbase = i + 1; 5550 5551 /* 5552 * Now keep going until we find a non-slash character. That 5553 * character is the last character in the dirname. 5554 */ 5555 for (; i >= 0; i--) { 5556 if (dtrace_load8(src + i) != '/') 5557 break; 5558 } 5559 5560 if (i >= 0) 5561 lastdir = i; 5562 5563 ASSERT(!(lastbase == -1 && firstbase != -1)); 5564 ASSERT(!(firstbase == -1 && lastdir != -1)); 5565 5566 if (lastbase == -1) { 5567 /* 5568 * We didn't find a non-slash character. We know that 5569 * the length is non-zero, so the whole string must be 5570 * slashes. In either the dirname or the basename 5571 * case, we return '/'. 5572 */ 5573 ASSERT(firstbase == -1); 5574 firstbase = lastbase = lastdir = 0; 5575 } 5576 5577 if (firstbase == -1) { 5578 /* 5579 * The entire string consists only of a basename 5580 * component. If we're looking for dirname, we need 5581 * to change our string to be just "."; if we're 5582 * looking for a basename, we'll just set the first 5583 * character of the basename to be 0. 5584 */ 5585 if (subr == DIF_SUBR_DIRNAME) { 5586 ASSERT(lastdir == -1); 5587 src = (uintptr_t)"."; 5588 lastdir = 0; 5589 } else { 5590 firstbase = 0; 5591 } 5592 } 5593 5594 if (subr == DIF_SUBR_DIRNAME) { 5595 if (lastdir == -1) { 5596 /* 5597 * We know that we have a slash in the name -- 5598 * or lastdir would be set to 0, above. And 5599 * because lastdir is -1, we know that this 5600 * slash must be the first character. (That 5601 * is, the full string must be of the form 5602 * "/basename".) In this case, the last 5603 * character of the directory name is 0. 5604 */ 5605 lastdir = 0; 5606 } 5607 5608 start = 0; 5609 end = lastdir; 5610 } else { 5611 ASSERT(subr == DIF_SUBR_BASENAME); 5612 ASSERT(firstbase != -1 && lastbase != -1); 5613 start = firstbase; 5614 end = lastbase; 5615 } 5616 5617 for (i = start, j = 0; i <= end && j < size - 1; i++, j++) 5618 dest[j] = dtrace_load8(src + i); 5619 5620 dest[j] = '\0'; 5621 regs[rd] = (uintptr_t)dest; 5622 mstate->dtms_scratch_ptr += size; 5623 break; 5624 } 5625 5626 case DIF_SUBR_GETF: { 5627 uintptr_t fd = tupregs[0].dttk_value; 5628 struct filedesc *fdp; 5629 file_t *fp; 5630 5631 if (!dtrace_priv_proc(state)) { 5632 regs[rd] = 0; 5633 break; 5634 } 5635 fdp = curproc->p_fd; 5636 FILEDESC_SLOCK(fdp); 5637 fp = fget_locked(fdp, fd); 5638 mstate->dtms_getf = fp; 5639 regs[rd] = (uintptr_t)fp; 5640 FILEDESC_SUNLOCK(fdp); 5641 break; 5642 } 5643 5644 case DIF_SUBR_CLEANPATH: { 5645 char *dest = (char *)mstate->dtms_scratch_ptr, c; 5646 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; 5647 uintptr_t src = tupregs[0].dttk_value; 5648 size_t lim; 5649 int i = 0, j = 0; 5650 #ifdef illumos 5651 zone_t *z; 5652 #endif 5653 5654 if (!dtrace_strcanload(src, size, &lim, mstate, vstate)) { 5655 regs[rd] = 0; 5656 break; 5657 } 5658 5659 if (!DTRACE_INSCRATCH(mstate, size)) { 5660 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 5661 regs[rd] = 0; 5662 break; 5663 } 5664 5665 /* 5666 * Move forward, loading each character. 5667 */ 5668 do { 5669 c = (i >= lim) ? '\0' : dtrace_load8(src + i++); 5670 next: 5671 if (j + 5 >= size) /* 5 = strlen("/..c\0") */ 5672 break; 5673 5674 if (c != '/') { 5675 dest[j++] = c; 5676 continue; 5677 } 5678 5679 c = (i >= lim) ? '\0' : dtrace_load8(src + i++); 5680 5681 if (c == '/') { 5682 /* 5683 * We have two slashes -- we can just advance 5684 * to the next character. 5685 */ 5686 goto next; 5687 } 5688 5689 if (c != '.') { 5690 /* 5691 * This is not "." and it's not ".." -- we can 5692 * just store the "/" and this character and 5693 * drive on. 5694 */ 5695 dest[j++] = '/'; 5696 dest[j++] = c; 5697 continue; 5698 } 5699 5700 c = (i >= lim) ? '\0' : dtrace_load8(src + i++); 5701 5702 if (c == '/') { 5703 /* 5704 * This is a "/./" component. We're not going 5705 * to store anything in the destination buffer; 5706 * we're just going to go to the next component. 5707 */ 5708 goto next; 5709 } 5710 5711 if (c != '.') { 5712 /* 5713 * This is not ".." -- we can just store the 5714 * "/." and this character and continue 5715 * processing. 5716 */ 5717 dest[j++] = '/'; 5718 dest[j++] = '.'; 5719 dest[j++] = c; 5720 continue; 5721 } 5722 5723 c = (i >= lim) ? '\0' : dtrace_load8(src + i++); 5724 5725 if (c != '/' && c != '\0') { 5726 /* 5727 * This is not ".." -- it's "..[mumble]". 5728 * We'll store the "/.." and this character 5729 * and continue processing. 5730 */ 5731 dest[j++] = '/'; 5732 dest[j++] = '.'; 5733 dest[j++] = '.'; 5734 dest[j++] = c; 5735 continue; 5736 } 5737 5738 /* 5739 * This is "/../" or "/..\0". We need to back up 5740 * our destination pointer until we find a "/". 5741 */ 5742 i--; 5743 while (j != 0 && dest[--j] != '/') 5744 continue; 5745 5746 if (c == '\0') 5747 dest[++j] = '/'; 5748 } while (c != '\0'); 5749 5750 dest[j] = '\0'; 5751 5752 #ifdef illumos 5753 if (mstate->dtms_getf != NULL && 5754 !(mstate->dtms_access & DTRACE_ACCESS_KERNEL) && 5755 (z = state->dts_cred.dcr_cred->cr_zone) != kcred->cr_zone) { 5756 /* 5757 * If we've done a getf() as a part of this ECB and we 5758 * don't have kernel access (and we're not in the global 5759 * zone), check if the path we cleaned up begins with 5760 * the zone's root path, and trim it off if so. Note 5761 * that this is an output cleanliness issue, not a 5762 * security issue: knowing one's zone root path does 5763 * not enable privilege escalation. 5764 */ 5765 if (strstr(dest, z->zone_rootpath) == dest) 5766 dest += strlen(z->zone_rootpath) - 1; 5767 } 5768 #endif 5769 5770 regs[rd] = (uintptr_t)dest; 5771 mstate->dtms_scratch_ptr += size; 5772 break; 5773 } 5774 5775 case DIF_SUBR_INET_NTOA: 5776 case DIF_SUBR_INET_NTOA6: 5777 case DIF_SUBR_INET_NTOP: { 5778 size_t size; 5779 int af, argi, i; 5780 char *base, *end; 5781 5782 if (subr == DIF_SUBR_INET_NTOP) { 5783 af = (int)tupregs[0].dttk_value; 5784 argi = 1; 5785 } else { 5786 af = subr == DIF_SUBR_INET_NTOA ? AF_INET: AF_INET6; 5787 argi = 0; 5788 } 5789 5790 if (af == AF_INET) { 5791 ipaddr_t ip4; 5792 uint8_t *ptr8, val; 5793 5794 if (!dtrace_canload(tupregs[argi].dttk_value, 5795 sizeof (ipaddr_t), mstate, vstate)) { 5796 regs[rd] = 0; 5797 break; 5798 } 5799 5800 /* 5801 * Safely load the IPv4 address. 5802 */ 5803 ip4 = dtrace_load32(tupregs[argi].dttk_value); 5804 5805 /* 5806 * Check an IPv4 string will fit in scratch. 5807 */ 5808 size = INET_ADDRSTRLEN; 5809 if (!DTRACE_INSCRATCH(mstate, size)) { 5810 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 5811 regs[rd] = 0; 5812 break; 5813 } 5814 base = (char *)mstate->dtms_scratch_ptr; 5815 end = (char *)mstate->dtms_scratch_ptr + size - 1; 5816 5817 /* 5818 * Stringify as a dotted decimal quad. 5819 */ 5820 *end-- = '\0'; 5821 ptr8 = (uint8_t *)&ip4; 5822 for (i = 3; i >= 0; i--) { 5823 val = ptr8[i]; 5824 5825 if (val == 0) { 5826 *end-- = '0'; 5827 } else { 5828 for (; val; val /= 10) { 5829 *end-- = '0' + (val % 10); 5830 } 5831 } 5832 5833 if (i > 0) 5834 *end-- = '.'; 5835 } 5836 ASSERT(end + 1 >= base); 5837 5838 } else if (af == AF_INET6) { 5839 struct in6_addr ip6; 5840 int firstzero, tryzero, numzero, v6end; 5841 uint16_t val; 5842 const char digits[] = "0123456789abcdef"; 5843 5844 /* 5845 * Stringify using RFC 1884 convention 2 - 16 bit 5846 * hexadecimal values with a zero-run compression. 5847 * Lower case hexadecimal digits are used. 5848 * eg, fe80::214:4fff:fe0b:76c8. 5849 * The IPv4 embedded form is returned for inet_ntop, 5850 * just the IPv4 string is returned for inet_ntoa6. 5851 */ 5852 5853 if (!dtrace_canload(tupregs[argi].dttk_value, 5854 sizeof (struct in6_addr), mstate, vstate)) { 5855 regs[rd] = 0; 5856 break; 5857 } 5858 5859 /* 5860 * Safely load the IPv6 address. 5861 */ 5862 dtrace_bcopy( 5863 (void *)(uintptr_t)tupregs[argi].dttk_value, 5864 (void *)(uintptr_t)&ip6, sizeof (struct in6_addr)); 5865 5866 /* 5867 * Check an IPv6 string will fit in scratch. 5868 */ 5869 size = INET6_ADDRSTRLEN; 5870 if (!DTRACE_INSCRATCH(mstate, size)) { 5871 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 5872 regs[rd] = 0; 5873 break; 5874 } 5875 base = (char *)mstate->dtms_scratch_ptr; 5876 end = (char *)mstate->dtms_scratch_ptr + size - 1; 5877 *end-- = '\0'; 5878 5879 /* 5880 * Find the longest run of 16 bit zero values 5881 * for the single allowed zero compression - "::". 5882 */ 5883 firstzero = -1; 5884 tryzero = -1; 5885 numzero = 1; 5886 for (i = 0; i < sizeof (struct in6_addr); i++) { 5887 #ifdef illumos 5888 if (ip6._S6_un._S6_u8[i] == 0 && 5889 #else 5890 if (ip6.__u6_addr.__u6_addr8[i] == 0 && 5891 #endif 5892 tryzero == -1 && i % 2 == 0) { 5893 tryzero = i; 5894 continue; 5895 } 5896 5897 if (tryzero != -1 && 5898 #ifdef illumos 5899 (ip6._S6_un._S6_u8[i] != 0 || 5900 #else 5901 (ip6.__u6_addr.__u6_addr8[i] != 0 || 5902 #endif 5903 i == sizeof (struct in6_addr) - 1)) { 5904 5905 if (i - tryzero <= numzero) { 5906 tryzero = -1; 5907 continue; 5908 } 5909 5910 firstzero = tryzero; 5911 numzero = i - i % 2 - tryzero; 5912 tryzero = -1; 5913 5914 #ifdef illumos 5915 if (ip6._S6_un._S6_u8[i] == 0 && 5916 #else 5917 if (ip6.__u6_addr.__u6_addr8[i] == 0 && 5918 #endif 5919 i == sizeof (struct in6_addr) - 1) 5920 numzero += 2; 5921 } 5922 } 5923 ASSERT(firstzero + numzero <= sizeof (struct in6_addr)); 5924 5925 /* 5926 * Check for an IPv4 embedded address. 5927 */ 5928 v6end = sizeof (struct in6_addr) - 2; 5929 if (IN6_IS_ADDR_V4MAPPED(&ip6) || 5930 IN6_IS_ADDR_V4COMPAT(&ip6)) { 5931 for (i = sizeof (struct in6_addr) - 1; 5932 i >= DTRACE_V4MAPPED_OFFSET; i--) { 5933 ASSERT(end >= base); 5934 5935 #ifdef illumos 5936 val = ip6._S6_un._S6_u8[i]; 5937 #else 5938 val = ip6.__u6_addr.__u6_addr8[i]; 5939 #endif 5940 5941 if (val == 0) { 5942 *end-- = '0'; 5943 } else { 5944 for (; val; val /= 10) { 5945 *end-- = '0' + val % 10; 5946 } 5947 } 5948 5949 if (i > DTRACE_V4MAPPED_OFFSET) 5950 *end-- = '.'; 5951 } 5952 5953 if (subr == DIF_SUBR_INET_NTOA6) 5954 goto inetout; 5955 5956 /* 5957 * Set v6end to skip the IPv4 address that 5958 * we have already stringified. 5959 */ 5960 v6end = 10; 5961 } 5962 5963 /* 5964 * Build the IPv6 string by working through the 5965 * address in reverse. 5966 */ 5967 for (i = v6end; i >= 0; i -= 2) { 5968 ASSERT(end >= base); 5969 5970 if (i == firstzero + numzero - 2) { 5971 *end-- = ':'; 5972 *end-- = ':'; 5973 i -= numzero - 2; 5974 continue; 5975 } 5976 5977 if (i < 14 && i != firstzero - 2) 5978 *end-- = ':'; 5979 5980 #ifdef illumos 5981 val = (ip6._S6_un._S6_u8[i] << 8) + 5982 ip6._S6_un._S6_u8[i + 1]; 5983 #else 5984 val = (ip6.__u6_addr.__u6_addr8[i] << 8) + 5985 ip6.__u6_addr.__u6_addr8[i + 1]; 5986 #endif 5987 5988 if (val == 0) { 5989 *end-- = '0'; 5990 } else { 5991 for (; val; val /= 16) { 5992 *end-- = digits[val % 16]; 5993 } 5994 } 5995 } 5996 ASSERT(end + 1 >= base); 5997 5998 } else { 5999 /* 6000 * The user didn't use AH_INET or AH_INET6. 6001 */ 6002 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); 6003 regs[rd] = 0; 6004 break; 6005 } 6006 6007 inetout: regs[rd] = (uintptr_t)end + 1; 6008 mstate->dtms_scratch_ptr += size; 6009 break; 6010 } 6011 6012 case DIF_SUBR_MEMREF: { 6013 uintptr_t size = 2 * sizeof(uintptr_t); 6014 uintptr_t *memref = (uintptr_t *) P2ROUNDUP(mstate->dtms_scratch_ptr, sizeof(uintptr_t)); 6015 size_t scratch_size = ((uintptr_t) memref - mstate->dtms_scratch_ptr) + size; 6016 6017 /* address and length */ 6018 memref[0] = tupregs[0].dttk_value; 6019 memref[1] = tupregs[1].dttk_value; 6020 6021 regs[rd] = (uintptr_t) memref; 6022 mstate->dtms_scratch_ptr += scratch_size; 6023 break; 6024 } 6025 6026 #ifndef illumos 6027 case DIF_SUBR_MEMSTR: { 6028 char *str = (char *)mstate->dtms_scratch_ptr; 6029 uintptr_t mem = tupregs[0].dttk_value; 6030 char c = tupregs[1].dttk_value; 6031 size_t size = tupregs[2].dttk_value; 6032 uint8_t n; 6033 int i; 6034 6035 regs[rd] = 0; 6036 6037 if (size == 0) 6038 break; 6039 6040 if (!dtrace_canload(mem, size - 1, mstate, vstate)) 6041 break; 6042 6043 if (!DTRACE_INSCRATCH(mstate, size)) { 6044 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 6045 break; 6046 } 6047 6048 if (dtrace_memstr_max != 0 && size > dtrace_memstr_max) { 6049 *flags |= CPU_DTRACE_ILLOP; 6050 break; 6051 } 6052 6053 for (i = 0; i < size - 1; i++) { 6054 n = dtrace_load8(mem++); 6055 str[i] = (n == 0) ? c : n; 6056 } 6057 str[size - 1] = 0; 6058 6059 regs[rd] = (uintptr_t)str; 6060 mstate->dtms_scratch_ptr += size; 6061 break; 6062 } 6063 #endif 6064 } 6065 } 6066 6067 /* 6068 * Emulate the execution of DTrace IR instructions specified by the given 6069 * DIF object. This function is deliberately void of assertions as all of 6070 * the necessary checks are handled by a call to dtrace_difo_validate(). 6071 */ 6072 static uint64_t 6073 dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, 6074 dtrace_vstate_t *vstate, dtrace_state_t *state) 6075 { 6076 const dif_instr_t *text = difo->dtdo_buf; 6077 const uint_t textlen = difo->dtdo_len; 6078 const char *strtab = difo->dtdo_strtab; 6079 const uint64_t *inttab = difo->dtdo_inttab; 6080 6081 uint64_t rval = 0; 6082 dtrace_statvar_t *svar; 6083 dtrace_dstate_t *dstate = &vstate->dtvs_dynvars; 6084 dtrace_difv_t *v; 6085 volatile uint16_t *flags = &cpu_core[curcpu].cpuc_dtrace_flags; 6086 volatile uintptr_t *illval = &cpu_core[curcpu].cpuc_dtrace_illval; 6087 6088 dtrace_key_t tupregs[DIF_DTR_NREGS + 2]; /* +2 for thread and id */ 6089 uint64_t regs[DIF_DIR_NREGS]; 6090 uint64_t *tmp; 6091 6092 uint8_t cc_n = 0, cc_z = 0, cc_v = 0, cc_c = 0; 6093 int64_t cc_r; 6094 uint_t pc = 0, id, opc = 0; 6095 uint8_t ttop = 0; 6096 dif_instr_t instr; 6097 uint_t r1, r2, rd; 6098 6099 /* 6100 * We stash the current DIF object into the machine state: we need it 6101 * for subsequent access checking. 6102 */ 6103 mstate->dtms_difo = difo; 6104 6105 regs[DIF_REG_R0] = 0; /* %r0 is fixed at zero */ 6106 6107 while (pc < textlen && !(*flags & CPU_DTRACE_FAULT)) { 6108 opc = pc; 6109 6110 instr = text[pc++]; 6111 r1 = DIF_INSTR_R1(instr); 6112 r2 = DIF_INSTR_R2(instr); 6113 rd = DIF_INSTR_RD(instr); 6114 6115 switch (DIF_INSTR_OP(instr)) { 6116 case DIF_OP_OR: 6117 regs[rd] = regs[r1] | regs[r2]; 6118 break; 6119 case DIF_OP_XOR: 6120 regs[rd] = regs[r1] ^ regs[r2]; 6121 break; 6122 case DIF_OP_AND: 6123 regs[rd] = regs[r1] & regs[r2]; 6124 break; 6125 case DIF_OP_SLL: 6126 regs[rd] = regs[r1] << regs[r2]; 6127 break; 6128 case DIF_OP_SRL: 6129 regs[rd] = regs[r1] >> regs[r2]; 6130 break; 6131 case DIF_OP_SUB: 6132 regs[rd] = regs[r1] - regs[r2]; 6133 break; 6134 case DIF_OP_ADD: 6135 regs[rd] = regs[r1] + regs[r2]; 6136 break; 6137 case DIF_OP_MUL: 6138 regs[rd] = regs[r1] * regs[r2]; 6139 break; 6140 case DIF_OP_SDIV: 6141 if (regs[r2] == 0) { 6142 regs[rd] = 0; 6143 *flags |= CPU_DTRACE_DIVZERO; 6144 } else { 6145 regs[rd] = (int64_t)regs[r1] / 6146 (int64_t)regs[r2]; 6147 } 6148 break; 6149 6150 case DIF_OP_UDIV: 6151 if (regs[r2] == 0) { 6152 regs[rd] = 0; 6153 *flags |= CPU_DTRACE_DIVZERO; 6154 } else { 6155 regs[rd] = regs[r1] / regs[r2]; 6156 } 6157 break; 6158 6159 case DIF_OP_SREM: 6160 if (regs[r2] == 0) { 6161 regs[rd] = 0; 6162 *flags |= CPU_DTRACE_DIVZERO; 6163 } else { 6164 regs[rd] = (int64_t)regs[r1] % 6165 (int64_t)regs[r2]; 6166 } 6167 break; 6168 6169 case DIF_OP_UREM: 6170 if (regs[r2] == 0) { 6171 regs[rd] = 0; 6172 *flags |= CPU_DTRACE_DIVZERO; 6173 } else { 6174 regs[rd] = regs[r1] % regs[r2]; 6175 } 6176 break; 6177 6178 case DIF_OP_NOT: 6179 regs[rd] = ~regs[r1]; 6180 break; 6181 case DIF_OP_MOV: 6182 regs[rd] = regs[r1]; 6183 break; 6184 case DIF_OP_CMP: 6185 cc_r = regs[r1] - regs[r2]; 6186 cc_n = cc_r < 0; 6187 cc_z = cc_r == 0; 6188 cc_v = 0; 6189 cc_c = regs[r1] < regs[r2]; 6190 break; 6191 case DIF_OP_TST: 6192 cc_n = cc_v = cc_c = 0; 6193 cc_z = regs[r1] == 0; 6194 break; 6195 case DIF_OP_BA: 6196 pc = DIF_INSTR_LABEL(instr); 6197 break; 6198 case DIF_OP_BE: 6199 if (cc_z) 6200 pc = DIF_INSTR_LABEL(instr); 6201 break; 6202 case DIF_OP_BNE: 6203 if (cc_z == 0) 6204 pc = DIF_INSTR_LABEL(instr); 6205 break; 6206 case DIF_OP_BG: 6207 if ((cc_z | (cc_n ^ cc_v)) == 0) 6208 pc = DIF_INSTR_LABEL(instr); 6209 break; 6210 case DIF_OP_BGU: 6211 if ((cc_c | cc_z) == 0) 6212 pc = DIF_INSTR_LABEL(instr); 6213 break; 6214 case DIF_OP_BGE: 6215 if ((cc_n ^ cc_v) == 0) 6216 pc = DIF_INSTR_LABEL(instr); 6217 break; 6218 case DIF_OP_BGEU: 6219 if (cc_c == 0) 6220 pc = DIF_INSTR_LABEL(instr); 6221 break; 6222 case DIF_OP_BL: 6223 if (cc_n ^ cc_v) 6224 pc = DIF_INSTR_LABEL(instr); 6225 break; 6226 case DIF_OP_BLU: 6227 if (cc_c) 6228 pc = DIF_INSTR_LABEL(instr); 6229 break; 6230 case DIF_OP_BLE: 6231 if (cc_z | (cc_n ^ cc_v)) 6232 pc = DIF_INSTR_LABEL(instr); 6233 break; 6234 case DIF_OP_BLEU: 6235 if (cc_c | cc_z) 6236 pc = DIF_INSTR_LABEL(instr); 6237 break; 6238 case DIF_OP_RLDSB: 6239 if (!dtrace_canload(regs[r1], 1, mstate, vstate)) 6240 break; 6241 /*FALLTHROUGH*/ 6242 case DIF_OP_LDSB: 6243 regs[rd] = (int8_t)dtrace_load8(regs[r1]); 6244 break; 6245 case DIF_OP_RLDSH: 6246 if (!dtrace_canload(regs[r1], 2, mstate, vstate)) 6247 break; 6248 /*FALLTHROUGH*/ 6249 case DIF_OP_LDSH: 6250 regs[rd] = (int16_t)dtrace_load16(regs[r1]); 6251 break; 6252 case DIF_OP_RLDSW: 6253 if (!dtrace_canload(regs[r1], 4, mstate, vstate)) 6254 break; 6255 /*FALLTHROUGH*/ 6256 case DIF_OP_LDSW: 6257 regs[rd] = (int32_t)dtrace_load32(regs[r1]); 6258 break; 6259 case DIF_OP_RLDUB: 6260 if (!dtrace_canload(regs[r1], 1, mstate, vstate)) 6261 break; 6262 /*FALLTHROUGH*/ 6263 case DIF_OP_LDUB: 6264 regs[rd] = dtrace_load8(regs[r1]); 6265 break; 6266 case DIF_OP_RLDUH: 6267 if (!dtrace_canload(regs[r1], 2, mstate, vstate)) 6268 break; 6269 /*FALLTHROUGH*/ 6270 case DIF_OP_LDUH: 6271 regs[rd] = dtrace_load16(regs[r1]); 6272 break; 6273 case DIF_OP_RLDUW: 6274 if (!dtrace_canload(regs[r1], 4, mstate, vstate)) 6275 break; 6276 /*FALLTHROUGH*/ 6277 case DIF_OP_LDUW: 6278 regs[rd] = dtrace_load32(regs[r1]); 6279 break; 6280 case DIF_OP_RLDX: 6281 if (!dtrace_canload(regs[r1], 8, mstate, vstate)) 6282 break; 6283 /*FALLTHROUGH*/ 6284 case DIF_OP_LDX: 6285 regs[rd] = dtrace_load64(regs[r1]); 6286 break; 6287 case DIF_OP_ULDSB: 6288 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 6289 regs[rd] = (int8_t) 6290 dtrace_fuword8((void *)(uintptr_t)regs[r1]); 6291 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 6292 break; 6293 case DIF_OP_ULDSH: 6294 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 6295 regs[rd] = (int16_t) 6296 dtrace_fuword16((void *)(uintptr_t)regs[r1]); 6297 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 6298 break; 6299 case DIF_OP_ULDSW: 6300 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 6301 regs[rd] = (int32_t) 6302 dtrace_fuword32((void *)(uintptr_t)regs[r1]); 6303 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 6304 break; 6305 case DIF_OP_ULDUB: 6306 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 6307 regs[rd] = 6308 dtrace_fuword8((void *)(uintptr_t)regs[r1]); 6309 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 6310 break; 6311 case DIF_OP_ULDUH: 6312 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 6313 regs[rd] = 6314 dtrace_fuword16((void *)(uintptr_t)regs[r1]); 6315 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 6316 break; 6317 case DIF_OP_ULDUW: 6318 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 6319 regs[rd] = 6320 dtrace_fuword32((void *)(uintptr_t)regs[r1]); 6321 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 6322 break; 6323 case DIF_OP_ULDX: 6324 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 6325 regs[rd] = 6326 dtrace_fuword64((void *)(uintptr_t)regs[r1]); 6327 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 6328 break; 6329 case DIF_OP_RET: 6330 rval = regs[rd]; 6331 pc = textlen; 6332 break; 6333 case DIF_OP_NOP: 6334 break; 6335 case DIF_OP_SETX: 6336 regs[rd] = inttab[DIF_INSTR_INTEGER(instr)]; 6337 break; 6338 case DIF_OP_SETS: 6339 regs[rd] = (uint64_t)(uintptr_t) 6340 (strtab + DIF_INSTR_STRING(instr)); 6341 break; 6342 case DIF_OP_SCMP: { 6343 size_t sz = state->dts_options[DTRACEOPT_STRSIZE]; 6344 uintptr_t s1 = regs[r1]; 6345 uintptr_t s2 = regs[r2]; 6346 size_t lim1, lim2; 6347 6348 if (s1 != 0 && 6349 !dtrace_strcanload(s1, sz, &lim1, mstate, vstate)) 6350 break; 6351 if (s2 != 0 && 6352 !dtrace_strcanload(s2, sz, &lim2, mstate, vstate)) 6353 break; 6354 6355 cc_r = dtrace_strncmp((char *)s1, (char *)s2, 6356 MIN(lim1, lim2)); 6357 6358 cc_n = cc_r < 0; 6359 cc_z = cc_r == 0; 6360 cc_v = cc_c = 0; 6361 break; 6362 } 6363 case DIF_OP_LDGA: 6364 regs[rd] = dtrace_dif_variable(mstate, state, 6365 r1, regs[r2]); 6366 break; 6367 case DIF_OP_LDGS: 6368 id = DIF_INSTR_VAR(instr); 6369 6370 if (id >= DIF_VAR_OTHER_UBASE) { 6371 uintptr_t a; 6372 6373 id -= DIF_VAR_OTHER_UBASE; 6374 svar = vstate->dtvs_globals[id]; 6375 ASSERT(svar != NULL); 6376 v = &svar->dtsv_var; 6377 6378 if (!(v->dtdv_type.dtdt_flags & DIF_TF_BYREF)) { 6379 regs[rd] = svar->dtsv_data; 6380 break; 6381 } 6382 6383 a = (uintptr_t)svar->dtsv_data; 6384 6385 if (*(uint8_t *)a == UINT8_MAX) { 6386 /* 6387 * If the 0th byte is set to UINT8_MAX 6388 * then this is to be treated as a 6389 * reference to a NULL variable. 6390 */ 6391 regs[rd] = 0; 6392 } else { 6393 regs[rd] = a + sizeof (uint64_t); 6394 } 6395 6396 break; 6397 } 6398 6399 regs[rd] = dtrace_dif_variable(mstate, state, id, 0); 6400 break; 6401 6402 case DIF_OP_STGS: 6403 id = DIF_INSTR_VAR(instr); 6404 6405 ASSERT(id >= DIF_VAR_OTHER_UBASE); 6406 id -= DIF_VAR_OTHER_UBASE; 6407 6408 VERIFY(id < vstate->dtvs_nglobals); 6409 svar = vstate->dtvs_globals[id]; 6410 ASSERT(svar != NULL); 6411 v = &svar->dtsv_var; 6412 6413 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { 6414 uintptr_t a = (uintptr_t)svar->dtsv_data; 6415 size_t lim; 6416 6417 ASSERT(a != 0); 6418 ASSERT(svar->dtsv_size != 0); 6419 6420 if (regs[rd] == 0) { 6421 *(uint8_t *)a = UINT8_MAX; 6422 break; 6423 } else { 6424 *(uint8_t *)a = 0; 6425 a += sizeof (uint64_t); 6426 } 6427 if (!dtrace_vcanload( 6428 (void *)(uintptr_t)regs[rd], &v->dtdv_type, 6429 &lim, mstate, vstate)) 6430 break; 6431 6432 dtrace_vcopy((void *)(uintptr_t)regs[rd], 6433 (void *)a, &v->dtdv_type, lim); 6434 break; 6435 } 6436 6437 svar->dtsv_data = regs[rd]; 6438 break; 6439 6440 case DIF_OP_LDTA: 6441 /* 6442 * There are no DTrace built-in thread-local arrays at 6443 * present. This opcode is saved for future work. 6444 */ 6445 *flags |= CPU_DTRACE_ILLOP; 6446 regs[rd] = 0; 6447 break; 6448 6449 case DIF_OP_LDLS: 6450 id = DIF_INSTR_VAR(instr); 6451 6452 if (id < DIF_VAR_OTHER_UBASE) { 6453 /* 6454 * For now, this has no meaning. 6455 */ 6456 regs[rd] = 0; 6457 break; 6458 } 6459 6460 id -= DIF_VAR_OTHER_UBASE; 6461 6462 ASSERT(id < vstate->dtvs_nlocals); 6463 ASSERT(vstate->dtvs_locals != NULL); 6464 6465 svar = vstate->dtvs_locals[id]; 6466 ASSERT(svar != NULL); 6467 v = &svar->dtsv_var; 6468 6469 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { 6470 uintptr_t a = (uintptr_t)svar->dtsv_data; 6471 size_t sz = v->dtdv_type.dtdt_size; 6472 size_t lim; 6473 6474 sz += sizeof (uint64_t); 6475 ASSERT(svar->dtsv_size == NCPU * sz); 6476 a += curcpu * sz; 6477 6478 if (*(uint8_t *)a == UINT8_MAX) { 6479 /* 6480 * If the 0th byte is set to UINT8_MAX 6481 * then this is to be treated as a 6482 * reference to a NULL variable. 6483 */ 6484 regs[rd] = 0; 6485 } else { 6486 regs[rd] = a + sizeof (uint64_t); 6487 } 6488 6489 break; 6490 } 6491 6492 ASSERT(svar->dtsv_size == NCPU * sizeof (uint64_t)); 6493 tmp = (uint64_t *)(uintptr_t)svar->dtsv_data; 6494 regs[rd] = tmp[curcpu]; 6495 break; 6496 6497 case DIF_OP_STLS: 6498 id = DIF_INSTR_VAR(instr); 6499 6500 ASSERT(id >= DIF_VAR_OTHER_UBASE); 6501 id -= DIF_VAR_OTHER_UBASE; 6502 VERIFY(id < vstate->dtvs_nlocals); 6503 6504 ASSERT(vstate->dtvs_locals != NULL); 6505 svar = vstate->dtvs_locals[id]; 6506 ASSERT(svar != NULL); 6507 v = &svar->dtsv_var; 6508 6509 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { 6510 uintptr_t a = (uintptr_t)svar->dtsv_data; 6511 size_t sz = v->dtdv_type.dtdt_size; 6512 size_t lim; 6513 6514 sz += sizeof (uint64_t); 6515 ASSERT(svar->dtsv_size == NCPU * sz); 6516 a += curcpu * sz; 6517 6518 if (regs[rd] == 0) { 6519 *(uint8_t *)a = UINT8_MAX; 6520 break; 6521 } else { 6522 *(uint8_t *)a = 0; 6523 a += sizeof (uint64_t); 6524 } 6525 6526 if (!dtrace_vcanload( 6527 (void *)(uintptr_t)regs[rd], &v->dtdv_type, 6528 &lim, mstate, vstate)) 6529 break; 6530 6531 dtrace_vcopy((void *)(uintptr_t)regs[rd], 6532 (void *)a, &v->dtdv_type, lim); 6533 break; 6534 } 6535 6536 ASSERT(svar->dtsv_size == NCPU * sizeof (uint64_t)); 6537 tmp = (uint64_t *)(uintptr_t)svar->dtsv_data; 6538 tmp[curcpu] = regs[rd]; 6539 break; 6540 6541 case DIF_OP_LDTS: { 6542 dtrace_dynvar_t *dvar; 6543 dtrace_key_t *key; 6544 6545 id = DIF_INSTR_VAR(instr); 6546 ASSERT(id >= DIF_VAR_OTHER_UBASE); 6547 id -= DIF_VAR_OTHER_UBASE; 6548 v = &vstate->dtvs_tlocals[id]; 6549 6550 key = &tupregs[DIF_DTR_NREGS]; 6551 key[0].dttk_value = (uint64_t)id; 6552 key[0].dttk_size = 0; 6553 DTRACE_TLS_THRKEY(key[1].dttk_value); 6554 key[1].dttk_size = 0; 6555 6556 dvar = dtrace_dynvar(dstate, 2, key, 6557 sizeof (uint64_t), DTRACE_DYNVAR_NOALLOC, 6558 mstate, vstate); 6559 6560 if (dvar == NULL) { 6561 regs[rd] = 0; 6562 break; 6563 } 6564 6565 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { 6566 regs[rd] = (uint64_t)(uintptr_t)dvar->dtdv_data; 6567 } else { 6568 regs[rd] = *((uint64_t *)dvar->dtdv_data); 6569 } 6570 6571 break; 6572 } 6573 6574 case DIF_OP_STTS: { 6575 dtrace_dynvar_t *dvar; 6576 dtrace_key_t *key; 6577 6578 id = DIF_INSTR_VAR(instr); 6579 ASSERT(id >= DIF_VAR_OTHER_UBASE); 6580 id -= DIF_VAR_OTHER_UBASE; 6581 VERIFY(id < vstate->dtvs_ntlocals); 6582 6583 key = &tupregs[DIF_DTR_NREGS]; 6584 key[0].dttk_value = (uint64_t)id; 6585 key[0].dttk_size = 0; 6586 DTRACE_TLS_THRKEY(key[1].dttk_value); 6587 key[1].dttk_size = 0; 6588 v = &vstate->dtvs_tlocals[id]; 6589 6590 dvar = dtrace_dynvar(dstate, 2, key, 6591 v->dtdv_type.dtdt_size > sizeof (uint64_t) ? 6592 v->dtdv_type.dtdt_size : sizeof (uint64_t), 6593 regs[rd] ? DTRACE_DYNVAR_ALLOC : 6594 DTRACE_DYNVAR_DEALLOC, mstate, vstate); 6595 6596 /* 6597 * Given that we're storing to thread-local data, 6598 * we need to flush our predicate cache. 6599 */ 6600 curthread->t_predcache = 0; 6601 6602 if (dvar == NULL) 6603 break; 6604 6605 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { 6606 size_t lim; 6607 6608 if (!dtrace_vcanload( 6609 (void *)(uintptr_t)regs[rd], 6610 &v->dtdv_type, &lim, mstate, vstate)) 6611 break; 6612 6613 dtrace_vcopy((void *)(uintptr_t)regs[rd], 6614 dvar->dtdv_data, &v->dtdv_type, lim); 6615 } else { 6616 *((uint64_t *)dvar->dtdv_data) = regs[rd]; 6617 } 6618 6619 break; 6620 } 6621 6622 case DIF_OP_SRA: 6623 regs[rd] = (int64_t)regs[r1] >> regs[r2]; 6624 break; 6625 6626 case DIF_OP_CALL: 6627 dtrace_dif_subr(DIF_INSTR_SUBR(instr), rd, 6628 regs, tupregs, ttop, mstate, state); 6629 break; 6630 6631 case DIF_OP_PUSHTR: 6632 if (ttop == DIF_DTR_NREGS) { 6633 *flags |= CPU_DTRACE_TUPOFLOW; 6634 break; 6635 } 6636 6637 if (r1 == DIF_TYPE_STRING) { 6638 /* 6639 * If this is a string type and the size is 0, 6640 * we'll use the system-wide default string 6641 * size. Note that we are _not_ looking at 6642 * the value of the DTRACEOPT_STRSIZE option; 6643 * had this been set, we would expect to have 6644 * a non-zero size value in the "pushtr". 6645 */ 6646 tupregs[ttop].dttk_size = 6647 dtrace_strlen((char *)(uintptr_t)regs[rd], 6648 regs[r2] ? regs[r2] : 6649 dtrace_strsize_default) + 1; 6650 } else { 6651 if (regs[r2] > LONG_MAX) { 6652 *flags |= CPU_DTRACE_ILLOP; 6653 break; 6654 } 6655 6656 tupregs[ttop].dttk_size = regs[r2]; 6657 } 6658 6659 tupregs[ttop++].dttk_value = regs[rd]; 6660 break; 6661 6662 case DIF_OP_PUSHTV: 6663 if (ttop == DIF_DTR_NREGS) { 6664 *flags |= CPU_DTRACE_TUPOFLOW; 6665 break; 6666 } 6667 6668 tupregs[ttop].dttk_value = regs[rd]; 6669 tupregs[ttop++].dttk_size = 0; 6670 break; 6671 6672 case DIF_OP_POPTS: 6673 if (ttop != 0) 6674 ttop--; 6675 break; 6676 6677 case DIF_OP_FLUSHTS: 6678 ttop = 0; 6679 break; 6680 6681 case DIF_OP_LDGAA: 6682 case DIF_OP_LDTAA: { 6683 dtrace_dynvar_t *dvar; 6684 dtrace_key_t *key = tupregs; 6685 uint_t nkeys = ttop; 6686 6687 id = DIF_INSTR_VAR(instr); 6688 ASSERT(id >= DIF_VAR_OTHER_UBASE); 6689 id -= DIF_VAR_OTHER_UBASE; 6690 6691 key[nkeys].dttk_value = (uint64_t)id; 6692 key[nkeys++].dttk_size = 0; 6693 6694 if (DIF_INSTR_OP(instr) == DIF_OP_LDTAA) { 6695 DTRACE_TLS_THRKEY(key[nkeys].dttk_value); 6696 key[nkeys++].dttk_size = 0; 6697 VERIFY(id < vstate->dtvs_ntlocals); 6698 v = &vstate->dtvs_tlocals[id]; 6699 } else { 6700 VERIFY(id < vstate->dtvs_nglobals); 6701 v = &vstate->dtvs_globals[id]->dtsv_var; 6702 } 6703 6704 dvar = dtrace_dynvar(dstate, nkeys, key, 6705 v->dtdv_type.dtdt_size > sizeof (uint64_t) ? 6706 v->dtdv_type.dtdt_size : sizeof (uint64_t), 6707 DTRACE_DYNVAR_NOALLOC, mstate, vstate); 6708 6709 if (dvar == NULL) { 6710 regs[rd] = 0; 6711 break; 6712 } 6713 6714 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { 6715 regs[rd] = (uint64_t)(uintptr_t)dvar->dtdv_data; 6716 } else { 6717 regs[rd] = *((uint64_t *)dvar->dtdv_data); 6718 } 6719 6720 break; 6721 } 6722 6723 case DIF_OP_STGAA: 6724 case DIF_OP_STTAA: { 6725 dtrace_dynvar_t *dvar; 6726 dtrace_key_t *key = tupregs; 6727 uint_t nkeys = ttop; 6728 6729 id = DIF_INSTR_VAR(instr); 6730 ASSERT(id >= DIF_VAR_OTHER_UBASE); 6731 id -= DIF_VAR_OTHER_UBASE; 6732 6733 key[nkeys].dttk_value = (uint64_t)id; 6734 key[nkeys++].dttk_size = 0; 6735 6736 if (DIF_INSTR_OP(instr) == DIF_OP_STTAA) { 6737 DTRACE_TLS_THRKEY(key[nkeys].dttk_value); 6738 key[nkeys++].dttk_size = 0; 6739 VERIFY(id < vstate->dtvs_ntlocals); 6740 v = &vstate->dtvs_tlocals[id]; 6741 } else { 6742 VERIFY(id < vstate->dtvs_nglobals); 6743 v = &vstate->dtvs_globals[id]->dtsv_var; 6744 } 6745 6746 dvar = dtrace_dynvar(dstate, nkeys, key, 6747 v->dtdv_type.dtdt_size > sizeof (uint64_t) ? 6748 v->dtdv_type.dtdt_size : sizeof (uint64_t), 6749 regs[rd] ? DTRACE_DYNVAR_ALLOC : 6750 DTRACE_DYNVAR_DEALLOC, mstate, vstate); 6751 6752 if (dvar == NULL) 6753 break; 6754 6755 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { 6756 size_t lim; 6757 6758 if (!dtrace_vcanload( 6759 (void *)(uintptr_t)regs[rd], &v->dtdv_type, 6760 &lim, mstate, vstate)) 6761 break; 6762 6763 dtrace_vcopy((void *)(uintptr_t)regs[rd], 6764 dvar->dtdv_data, &v->dtdv_type, lim); 6765 } else { 6766 *((uint64_t *)dvar->dtdv_data) = regs[rd]; 6767 } 6768 6769 break; 6770 } 6771 6772 case DIF_OP_ALLOCS: { 6773 uintptr_t ptr = P2ROUNDUP(mstate->dtms_scratch_ptr, 8); 6774 size_t size = ptr - mstate->dtms_scratch_ptr + regs[r1]; 6775 6776 /* 6777 * Rounding up the user allocation size could have 6778 * overflowed large, bogus allocations (like -1ULL) to 6779 * 0. 6780 */ 6781 if (size < regs[r1] || 6782 !DTRACE_INSCRATCH(mstate, size)) { 6783 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 6784 regs[rd] = 0; 6785 break; 6786 } 6787 6788 dtrace_bzero((void *) mstate->dtms_scratch_ptr, size); 6789 mstate->dtms_scratch_ptr += size; 6790 regs[rd] = ptr; 6791 break; 6792 } 6793 6794 case DIF_OP_COPYS: 6795 if (!dtrace_canstore(regs[rd], regs[r2], 6796 mstate, vstate)) { 6797 *flags |= CPU_DTRACE_BADADDR; 6798 *illval = regs[rd]; 6799 break; 6800 } 6801 6802 if (!dtrace_canload(regs[r1], regs[r2], mstate, vstate)) 6803 break; 6804 6805 dtrace_bcopy((void *)(uintptr_t)regs[r1], 6806 (void *)(uintptr_t)regs[rd], (size_t)regs[r2]); 6807 break; 6808 6809 case DIF_OP_STB: 6810 if (!dtrace_canstore(regs[rd], 1, mstate, vstate)) { 6811 *flags |= CPU_DTRACE_BADADDR; 6812 *illval = regs[rd]; 6813 break; 6814 } 6815 *((uint8_t *)(uintptr_t)regs[rd]) = (uint8_t)regs[r1]; 6816 break; 6817 6818 case DIF_OP_STH: 6819 if (!dtrace_canstore(regs[rd], 2, mstate, vstate)) { 6820 *flags |= CPU_DTRACE_BADADDR; 6821 *illval = regs[rd]; 6822 break; 6823 } 6824 if (regs[rd] & 1) { 6825 *flags |= CPU_DTRACE_BADALIGN; 6826 *illval = regs[rd]; 6827 break; 6828 } 6829 *((uint16_t *)(uintptr_t)regs[rd]) = (uint16_t)regs[r1]; 6830 break; 6831 6832 case DIF_OP_STW: 6833 if (!dtrace_canstore(regs[rd], 4, mstate, vstate)) { 6834 *flags |= CPU_DTRACE_BADADDR; 6835 *illval = regs[rd]; 6836 break; 6837 } 6838 if (regs[rd] & 3) { 6839 *flags |= CPU_DTRACE_BADALIGN; 6840 *illval = regs[rd]; 6841 break; 6842 } 6843 *((uint32_t *)(uintptr_t)regs[rd]) = (uint32_t)regs[r1]; 6844 break; 6845 6846 case DIF_OP_STX: 6847 if (!dtrace_canstore(regs[rd], 8, mstate, vstate)) { 6848 *flags |= CPU_DTRACE_BADADDR; 6849 *illval = regs[rd]; 6850 break; 6851 } 6852 if (regs[rd] & 7) { 6853 *flags |= CPU_DTRACE_BADALIGN; 6854 *illval = regs[rd]; 6855 break; 6856 } 6857 *((uint64_t *)(uintptr_t)regs[rd]) = regs[r1]; 6858 break; 6859 } 6860 } 6861 6862 if (!(*flags & CPU_DTRACE_FAULT)) 6863 return (rval); 6864 6865 mstate->dtms_fltoffs = opc * sizeof (dif_instr_t); 6866 mstate->dtms_present |= DTRACE_MSTATE_FLTOFFS; 6867 6868 return (0); 6869 } 6870 6871 static void 6872 dtrace_action_breakpoint(dtrace_ecb_t *ecb) 6873 { 6874 dtrace_probe_t *probe = ecb->dte_probe; 6875 dtrace_provider_t *prov = probe->dtpr_provider; 6876 char c[DTRACE_FULLNAMELEN + 80], *str; 6877 char *msg = "dtrace: breakpoint action at probe "; 6878 char *ecbmsg = " (ecb "; 6879 uintptr_t mask = (0xf << (sizeof (uintptr_t) * NBBY / 4)); 6880 uintptr_t val = (uintptr_t)ecb; 6881 int shift = (sizeof (uintptr_t) * NBBY) - 4, i = 0; 6882 6883 if (dtrace_destructive_disallow) 6884 return; 6885 6886 /* 6887 * It's impossible to be taking action on the NULL probe. 6888 */ 6889 ASSERT(probe != NULL); 6890 6891 /* 6892 * This is a poor man's (destitute man's?) sprintf(): we want to 6893 * print the provider name, module name, function name and name of 6894 * the probe, along with the hex address of the ECB with the breakpoint 6895 * action -- all of which we must place in the character buffer by 6896 * hand. 6897 */ 6898 while (*msg != '\0') 6899 c[i++] = *msg++; 6900 6901 for (str = prov->dtpv_name; *str != '\0'; str++) 6902 c[i++] = *str; 6903 c[i++] = ':'; 6904 6905 for (str = probe->dtpr_mod; *str != '\0'; str++) 6906 c[i++] = *str; 6907 c[i++] = ':'; 6908 6909 for (str = probe->dtpr_func; *str != '\0'; str++) 6910 c[i++] = *str; 6911 c[i++] = ':'; 6912 6913 for (str = probe->dtpr_name; *str != '\0'; str++) 6914 c[i++] = *str; 6915 6916 while (*ecbmsg != '\0') 6917 c[i++] = *ecbmsg++; 6918 6919 while (shift >= 0) { 6920 mask = (uintptr_t)0xf << shift; 6921 6922 if (val >= ((uintptr_t)1 << shift)) 6923 c[i++] = "0123456789abcdef"[(val & mask) >> shift]; 6924 shift -= 4; 6925 } 6926 6927 c[i++] = ')'; 6928 c[i] = '\0'; 6929 6930 #ifdef illumos 6931 debug_enter(c); 6932 #else 6933 kdb_enter(KDB_WHY_DTRACE, "breakpoint action"); 6934 #endif 6935 } 6936 6937 static void 6938 dtrace_action_panic(dtrace_ecb_t *ecb) 6939 { 6940 dtrace_probe_t *probe = ecb->dte_probe; 6941 6942 /* 6943 * It's impossible to be taking action on the NULL probe. 6944 */ 6945 ASSERT(probe != NULL); 6946 6947 if (dtrace_destructive_disallow) 6948 return; 6949 6950 if (dtrace_panicked != NULL) 6951 return; 6952 6953 if (dtrace_casptr(&dtrace_panicked, NULL, curthread) != NULL) 6954 return; 6955 6956 /* 6957 * We won the right to panic. (We want to be sure that only one 6958 * thread calls panic() from dtrace_probe(), and that panic() is 6959 * called exactly once.) 6960 */ 6961 dtrace_panic("dtrace: panic action at probe %s:%s:%s:%s (ecb %p)", 6962 probe->dtpr_provider->dtpv_name, probe->dtpr_mod, 6963 probe->dtpr_func, probe->dtpr_name, (void *)ecb); 6964 } 6965 6966 static void 6967 dtrace_action_raise(uint64_t sig) 6968 { 6969 if (dtrace_destructive_disallow) 6970 return; 6971 6972 if (sig >= NSIG) { 6973 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); 6974 return; 6975 } 6976 6977 #ifdef illumos 6978 /* 6979 * raise() has a queue depth of 1 -- we ignore all subsequent 6980 * invocations of the raise() action. 6981 */ 6982 if (curthread->t_dtrace_sig == 0) 6983 curthread->t_dtrace_sig = (uint8_t)sig; 6984 6985 curthread->t_sig_check = 1; 6986 aston(curthread); 6987 #else 6988 struct proc *p = curproc; 6989 PROC_LOCK(p); 6990 kern_psignal(p, sig); 6991 PROC_UNLOCK(p); 6992 #endif 6993 } 6994 6995 static void 6996 dtrace_action_stop(void) 6997 { 6998 if (dtrace_destructive_disallow) 6999 return; 7000 7001 #ifdef illumos 7002 if (!curthread->t_dtrace_stop) { 7003 curthread->t_dtrace_stop = 1; 7004 curthread->t_sig_check = 1; 7005 aston(curthread); 7006 } 7007 #else 7008 struct proc *p = curproc; 7009 PROC_LOCK(p); 7010 kern_psignal(p, SIGSTOP); 7011 PROC_UNLOCK(p); 7012 #endif 7013 } 7014 7015 static void 7016 dtrace_action_chill(dtrace_mstate_t *mstate, hrtime_t val) 7017 { 7018 hrtime_t now; 7019 volatile uint16_t *flags; 7020 #ifdef illumos 7021 cpu_t *cpu = CPU; 7022 #else 7023 cpu_t *cpu = &solaris_cpu[curcpu]; 7024 #endif 7025 7026 if (dtrace_destructive_disallow) 7027 return; 7028 7029 flags = (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags; 7030 7031 now = dtrace_gethrtime(); 7032 7033 if (now - cpu->cpu_dtrace_chillmark > dtrace_chill_interval) { 7034 /* 7035 * We need to advance the mark to the current time. 7036 */ 7037 cpu->cpu_dtrace_chillmark = now; 7038 cpu->cpu_dtrace_chilled = 0; 7039 } 7040 7041 /* 7042 * Now check to see if the requested chill time would take us over 7043 * the maximum amount of time allowed in the chill interval. (Or 7044 * worse, if the calculation itself induces overflow.) 7045 */ 7046 if (cpu->cpu_dtrace_chilled + val > dtrace_chill_max || 7047 cpu->cpu_dtrace_chilled + val < cpu->cpu_dtrace_chilled) { 7048 *flags |= CPU_DTRACE_ILLOP; 7049 return; 7050 } 7051 7052 while (dtrace_gethrtime() - now < val) 7053 continue; 7054 7055 /* 7056 * Normally, we assure that the value of the variable "timestamp" does 7057 * not change within an ECB. The presence of chill() represents an 7058 * exception to this rule, however. 7059 */ 7060 mstate->dtms_present &= ~DTRACE_MSTATE_TIMESTAMP; 7061 cpu->cpu_dtrace_chilled += val; 7062 } 7063 7064 static void 7065 dtrace_action_ustack(dtrace_mstate_t *mstate, dtrace_state_t *state, 7066 uint64_t *buf, uint64_t arg) 7067 { 7068 int nframes = DTRACE_USTACK_NFRAMES(arg); 7069 int strsize = DTRACE_USTACK_STRSIZE(arg); 7070 uint64_t *pcs = &buf[1], *fps; 7071 char *str = (char *)&pcs[nframes]; 7072 int size, offs = 0, i, j; 7073 size_t rem; 7074 uintptr_t old = mstate->dtms_scratch_ptr, saved; 7075 uint16_t *flags = &cpu_core[curcpu].cpuc_dtrace_flags; 7076 char *sym; 7077 7078 /* 7079 * Should be taking a faster path if string space has not been 7080 * allocated. 7081 */ 7082 ASSERT(strsize != 0); 7083 7084 /* 7085 * We will first allocate some temporary space for the frame pointers. 7086 */ 7087 fps = (uint64_t *)P2ROUNDUP(mstate->dtms_scratch_ptr, 8); 7088 size = (uintptr_t)fps - mstate->dtms_scratch_ptr + 7089 (nframes * sizeof (uint64_t)); 7090 7091 if (!DTRACE_INSCRATCH(mstate, size)) { 7092 /* 7093 * Not enough room for our frame pointers -- need to indicate 7094 * that we ran out of scratch space. 7095 */ 7096 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 7097 return; 7098 } 7099 7100 mstate->dtms_scratch_ptr += size; 7101 saved = mstate->dtms_scratch_ptr; 7102 7103 /* 7104 * Now get a stack with both program counters and frame pointers. 7105 */ 7106 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 7107 dtrace_getufpstack(buf, fps, nframes + 1); 7108 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 7109 7110 /* 7111 * If that faulted, we're cooked. 7112 */ 7113 if (*flags & CPU_DTRACE_FAULT) 7114 goto out; 7115 7116 /* 7117 * Now we want to walk up the stack, calling the USTACK helper. For 7118 * each iteration, we restore the scratch pointer. 7119 */ 7120 for (i = 0; i < nframes; i++) { 7121 mstate->dtms_scratch_ptr = saved; 7122 7123 if (offs >= strsize) 7124 break; 7125 7126 sym = (char *)(uintptr_t)dtrace_helper( 7127 DTRACE_HELPER_ACTION_USTACK, 7128 mstate, state, pcs[i], fps[i]); 7129 7130 /* 7131 * If we faulted while running the helper, we're going to 7132 * clear the fault and null out the corresponding string. 7133 */ 7134 if (*flags & CPU_DTRACE_FAULT) { 7135 *flags &= ~CPU_DTRACE_FAULT; 7136 str[offs++] = '\0'; 7137 continue; 7138 } 7139 7140 if (sym == NULL) { 7141 str[offs++] = '\0'; 7142 continue; 7143 } 7144 7145 if (!dtrace_strcanload((uintptr_t)sym, strsize, &rem, mstate, 7146 &(state->dts_vstate))) { 7147 str[offs++] = '\0'; 7148 continue; 7149 } 7150 7151 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 7152 7153 /* 7154 * Now copy in the string that the helper returned to us. 7155 */ 7156 for (j = 0; offs + j < strsize && j < rem; j++) { 7157 if ((str[offs + j] = sym[j]) == '\0') 7158 break; 7159 } 7160 7161 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 7162 7163 offs += j + 1; 7164 } 7165 7166 if (offs >= strsize) { 7167 /* 7168 * If we didn't have room for all of the strings, we don't 7169 * abort processing -- this needn't be a fatal error -- but we 7170 * still want to increment a counter (dts_stkstroverflows) to 7171 * allow this condition to be warned about. (If this is from 7172 * a jstack() action, it is easily tuned via jstackstrsize.) 7173 */ 7174 dtrace_error(&state->dts_stkstroverflows); 7175 } 7176 7177 while (offs < strsize) 7178 str[offs++] = '\0'; 7179 7180 out: 7181 mstate->dtms_scratch_ptr = old; 7182 } 7183 7184 static void 7185 dtrace_store_by_ref(dtrace_difo_t *dp, caddr_t tomax, size_t size, 7186 size_t *valoffsp, uint64_t *valp, uint64_t end, int intuple, int dtkind) 7187 { 7188 volatile uint16_t *flags; 7189 uint64_t val = *valp; 7190 size_t valoffs = *valoffsp; 7191 7192 flags = (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags; 7193 ASSERT(dtkind == DIF_TF_BYREF || dtkind == DIF_TF_BYUREF); 7194 7195 /* 7196 * If this is a string, we're going to only load until we find the zero 7197 * byte -- after which we'll store zero bytes. 7198 */ 7199 if (dp->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING) { 7200 char c = '\0' + 1; 7201 size_t s; 7202 7203 for (s = 0; s < size; s++) { 7204 if (c != '\0' && dtkind == DIF_TF_BYREF) { 7205 c = dtrace_load8(val++); 7206 } else if (c != '\0' && dtkind == DIF_TF_BYUREF) { 7207 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 7208 c = dtrace_fuword8((void *)(uintptr_t)val++); 7209 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 7210 if (*flags & CPU_DTRACE_FAULT) 7211 break; 7212 } 7213 7214 DTRACE_STORE(uint8_t, tomax, valoffs++, c); 7215 7216 if (c == '\0' && intuple) 7217 break; 7218 } 7219 } else { 7220 uint8_t c; 7221 while (valoffs < end) { 7222 if (dtkind == DIF_TF_BYREF) { 7223 c = dtrace_load8(val++); 7224 } else if (dtkind == DIF_TF_BYUREF) { 7225 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 7226 c = dtrace_fuword8((void *)(uintptr_t)val++); 7227 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 7228 if (*flags & CPU_DTRACE_FAULT) 7229 break; 7230 } 7231 7232 DTRACE_STORE(uint8_t, tomax, 7233 valoffs++, c); 7234 } 7235 } 7236 7237 *valp = val; 7238 *valoffsp = valoffs; 7239 } 7240 7241 /* 7242 * If you're looking for the epicenter of DTrace, you just found it. This 7243 * is the function called by the provider to fire a probe -- from which all 7244 * subsequent probe-context DTrace activity emanates. 7245 */ 7246 void 7247 dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1, 7248 uintptr_t arg2, uintptr_t arg3, uintptr_t arg4) 7249 { 7250 processorid_t cpuid; 7251 dtrace_icookie_t cookie; 7252 dtrace_probe_t *probe; 7253 dtrace_mstate_t mstate; 7254 dtrace_ecb_t *ecb; 7255 dtrace_action_t *act; 7256 intptr_t offs; 7257 size_t size; 7258 int vtime, onintr; 7259 volatile uint16_t *flags; 7260 hrtime_t now; 7261 7262 if (panicstr != NULL) 7263 return; 7264 7265 #ifdef illumos 7266 /* 7267 * Kick out immediately if this CPU is still being born (in which case 7268 * curthread will be set to -1) or the current thread can't allow 7269 * probes in its current context. 7270 */ 7271 if (((uintptr_t)curthread & 1) || (curthread->t_flag & T_DONTDTRACE)) 7272 return; 7273 #endif 7274 7275 cookie = dtrace_interrupt_disable(); 7276 probe = dtrace_probes[id - 1]; 7277 cpuid = curcpu; 7278 onintr = CPU_ON_INTR(CPU); 7279 7280 if (!onintr && probe->dtpr_predcache != DTRACE_CACHEIDNONE && 7281 probe->dtpr_predcache == curthread->t_predcache) { 7282 /* 7283 * We have hit in the predicate cache; we know that 7284 * this predicate would evaluate to be false. 7285 */ 7286 dtrace_interrupt_enable(cookie); 7287 return; 7288 } 7289 7290 #ifdef illumos 7291 if (panic_quiesce) { 7292 #else 7293 if (panicstr != NULL) { 7294 #endif 7295 /* 7296 * We don't trace anything if we're panicking. 7297 */ 7298 dtrace_interrupt_enable(cookie); 7299 return; 7300 } 7301 7302 now = mstate.dtms_timestamp = dtrace_gethrtime(); 7303 mstate.dtms_present |= DTRACE_MSTATE_TIMESTAMP; 7304 vtime = dtrace_vtime_references != 0; 7305 7306 if (vtime && curthread->t_dtrace_start) 7307 curthread->t_dtrace_vtime += now - curthread->t_dtrace_start; 7308 7309 mstate.dtms_difo = NULL; 7310 mstate.dtms_probe = probe; 7311 mstate.dtms_strtok = 0; 7312 mstate.dtms_arg[0] = arg0; 7313 mstate.dtms_arg[1] = arg1; 7314 mstate.dtms_arg[2] = arg2; 7315 mstate.dtms_arg[3] = arg3; 7316 mstate.dtms_arg[4] = arg4; 7317 7318 flags = (volatile uint16_t *)&cpu_core[cpuid].cpuc_dtrace_flags; 7319 7320 for (ecb = probe->dtpr_ecb; ecb != NULL; ecb = ecb->dte_next) { 7321 dtrace_predicate_t *pred = ecb->dte_predicate; 7322 dtrace_state_t *state = ecb->dte_state; 7323 dtrace_buffer_t *buf = &state->dts_buffer[cpuid]; 7324 dtrace_buffer_t *aggbuf = &state->dts_aggbuffer[cpuid]; 7325 dtrace_vstate_t *vstate = &state->dts_vstate; 7326 dtrace_provider_t *prov = probe->dtpr_provider; 7327 uint64_t tracememsize = 0; 7328 int committed = 0; 7329 caddr_t tomax; 7330 7331 /* 7332 * A little subtlety with the following (seemingly innocuous) 7333 * declaration of the automatic 'val': by looking at the 7334 * code, you might think that it could be declared in the 7335 * action processing loop, below. (That is, it's only used in 7336 * the action processing loop.) However, it must be declared 7337 * out of that scope because in the case of DIF expression 7338 * arguments to aggregating actions, one iteration of the 7339 * action loop will use the last iteration's value. 7340 */ 7341 uint64_t val = 0; 7342 7343 mstate.dtms_present = DTRACE_MSTATE_ARGS | DTRACE_MSTATE_PROBE; 7344 mstate.dtms_getf = NULL; 7345 7346 *flags &= ~CPU_DTRACE_ERROR; 7347 7348 if (prov == dtrace_provider) { 7349 /* 7350 * If dtrace itself is the provider of this probe, 7351 * we're only going to continue processing the ECB if 7352 * arg0 (the dtrace_state_t) is equal to the ECB's 7353 * creating state. (This prevents disjoint consumers 7354 * from seeing one another's metaprobes.) 7355 */ 7356 if (arg0 != (uint64_t)(uintptr_t)state) 7357 continue; 7358 } 7359 7360 if (state->dts_activity != DTRACE_ACTIVITY_ACTIVE) { 7361 /* 7362 * We're not currently active. If our provider isn't 7363 * the dtrace pseudo provider, we're not interested. 7364 */ 7365 if (prov != dtrace_provider) 7366 continue; 7367 7368 /* 7369 * Now we must further check if we are in the BEGIN 7370 * probe. If we are, we will only continue processing 7371 * if we're still in WARMUP -- if one BEGIN enabling 7372 * has invoked the exit() action, we don't want to 7373 * evaluate subsequent BEGIN enablings. 7374 */ 7375 if (probe->dtpr_id == dtrace_probeid_begin && 7376 state->dts_activity != DTRACE_ACTIVITY_WARMUP) { 7377 ASSERT(state->dts_activity == 7378 DTRACE_ACTIVITY_DRAINING); 7379 continue; 7380 } 7381 } 7382 7383 if (ecb->dte_cond) { 7384 /* 7385 * If the dte_cond bits indicate that this 7386 * consumer is only allowed to see user-mode firings 7387 * of this probe, call the provider's dtps_usermode() 7388 * entry point to check that the probe was fired 7389 * while in a user context. Skip this ECB if that's 7390 * not the case. 7391 */ 7392 if ((ecb->dte_cond & DTRACE_COND_USERMODE) && 7393 prov->dtpv_pops.dtps_usermode(prov->dtpv_arg, 7394 probe->dtpr_id, probe->dtpr_arg) == 0) 7395 continue; 7396 7397 #ifdef illumos 7398 /* 7399 * This is more subtle than it looks. We have to be 7400 * absolutely certain that CRED() isn't going to 7401 * change out from under us so it's only legit to 7402 * examine that structure if we're in constrained 7403 * situations. Currently, the only times we'll this 7404 * check is if a non-super-user has enabled the 7405 * profile or syscall providers -- providers that 7406 * allow visibility of all processes. For the 7407 * profile case, the check above will ensure that 7408 * we're examining a user context. 7409 */ 7410 if (ecb->dte_cond & DTRACE_COND_OWNER) { 7411 cred_t *cr; 7412 cred_t *s_cr = 7413 ecb->dte_state->dts_cred.dcr_cred; 7414 proc_t *proc; 7415 7416 ASSERT(s_cr != NULL); 7417 7418 if ((cr = CRED()) == NULL || 7419 s_cr->cr_uid != cr->cr_uid || 7420 s_cr->cr_uid != cr->cr_ruid || 7421 s_cr->cr_uid != cr->cr_suid || 7422 s_cr->cr_gid != cr->cr_gid || 7423 s_cr->cr_gid != cr->cr_rgid || 7424 s_cr->cr_gid != cr->cr_sgid || 7425 (proc = ttoproc(curthread)) == NULL || 7426 (proc->p_flag & SNOCD)) 7427 continue; 7428 } 7429 7430 if (ecb->dte_cond & DTRACE_COND_ZONEOWNER) { 7431 cred_t *cr; 7432 cred_t *s_cr = 7433 ecb->dte_state->dts_cred.dcr_cred; 7434 7435 ASSERT(s_cr != NULL); 7436 7437 if ((cr = CRED()) == NULL || 7438 s_cr->cr_zone->zone_id != 7439 cr->cr_zone->zone_id) 7440 continue; 7441 } 7442 #endif 7443 } 7444 7445 if (now - state->dts_alive > dtrace_deadman_timeout) { 7446 /* 7447 * We seem to be dead. Unless we (a) have kernel 7448 * destructive permissions (b) have explicitly enabled 7449 * destructive actions and (c) destructive actions have 7450 * not been disabled, we're going to transition into 7451 * the KILLED state, from which no further processing 7452 * on this state will be performed. 7453 */ 7454 if (!dtrace_priv_kernel_destructive(state) || 7455 !state->dts_cred.dcr_destructive || 7456 dtrace_destructive_disallow) { 7457 void *activity = &state->dts_activity; 7458 dtrace_activity_t current; 7459 7460 do { 7461 current = state->dts_activity; 7462 } while (dtrace_cas32(activity, current, 7463 DTRACE_ACTIVITY_KILLED) != current); 7464 7465 continue; 7466 } 7467 } 7468 7469 if ((offs = dtrace_buffer_reserve(buf, ecb->dte_needed, 7470 ecb->dte_alignment, state, &mstate)) < 0) 7471 continue; 7472 7473 tomax = buf->dtb_tomax; 7474 ASSERT(tomax != NULL); 7475 7476 if (ecb->dte_size != 0) { 7477 dtrace_rechdr_t dtrh; 7478 if (!(mstate.dtms_present & DTRACE_MSTATE_TIMESTAMP)) { 7479 mstate.dtms_timestamp = dtrace_gethrtime(); 7480 mstate.dtms_present |= DTRACE_MSTATE_TIMESTAMP; 7481 } 7482 ASSERT3U(ecb->dte_size, >=, sizeof (dtrace_rechdr_t)); 7483 dtrh.dtrh_epid = ecb->dte_epid; 7484 DTRACE_RECORD_STORE_TIMESTAMP(&dtrh, 7485 mstate.dtms_timestamp); 7486 *((dtrace_rechdr_t *)(tomax + offs)) = dtrh; 7487 } 7488 7489 mstate.dtms_epid = ecb->dte_epid; 7490 mstate.dtms_present |= DTRACE_MSTATE_EPID; 7491 7492 if (state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL) 7493 mstate.dtms_access = DTRACE_ACCESS_KERNEL; 7494 else 7495 mstate.dtms_access = 0; 7496 7497 if (pred != NULL) { 7498 dtrace_difo_t *dp = pred->dtp_difo; 7499 uint64_t rval; 7500 7501 rval = dtrace_dif_emulate(dp, &mstate, vstate, state); 7502 7503 if (!(*flags & CPU_DTRACE_ERROR) && !rval) { 7504 dtrace_cacheid_t cid = probe->dtpr_predcache; 7505 7506 if (cid != DTRACE_CACHEIDNONE && !onintr) { 7507 /* 7508 * Update the predicate cache... 7509 */ 7510 ASSERT(cid == pred->dtp_cacheid); 7511 curthread->t_predcache = cid; 7512 } 7513 7514 continue; 7515 } 7516 } 7517 7518 for (act = ecb->dte_action; !(*flags & CPU_DTRACE_ERROR) && 7519 act != NULL; act = act->dta_next) { 7520 size_t valoffs; 7521 dtrace_difo_t *dp; 7522 dtrace_recdesc_t *rec = &act->dta_rec; 7523 7524 size = rec->dtrd_size; 7525 valoffs = offs + rec->dtrd_offset; 7526 7527 if (DTRACEACT_ISAGG(act->dta_kind)) { 7528 uint64_t v = 0xbad; 7529 dtrace_aggregation_t *agg; 7530 7531 agg = (dtrace_aggregation_t *)act; 7532 7533 if ((dp = act->dta_difo) != NULL) 7534 v = dtrace_dif_emulate(dp, 7535 &mstate, vstate, state); 7536 7537 if (*flags & CPU_DTRACE_ERROR) 7538 continue; 7539 7540 /* 7541 * Note that we always pass the expression 7542 * value from the previous iteration of the 7543 * action loop. This value will only be used 7544 * if there is an expression argument to the 7545 * aggregating action, denoted by the 7546 * dtag_hasarg field. 7547 */ 7548 dtrace_aggregate(agg, buf, 7549 offs, aggbuf, v, val); 7550 continue; 7551 } 7552 7553 switch (act->dta_kind) { 7554 case DTRACEACT_STOP: 7555 if (dtrace_priv_proc_destructive(state)) 7556 dtrace_action_stop(); 7557 continue; 7558 7559 case DTRACEACT_BREAKPOINT: 7560 if (dtrace_priv_kernel_destructive(state)) 7561 dtrace_action_breakpoint(ecb); 7562 continue; 7563 7564 case DTRACEACT_PANIC: 7565 if (dtrace_priv_kernel_destructive(state)) 7566 dtrace_action_panic(ecb); 7567 continue; 7568 7569 case DTRACEACT_STACK: 7570 if (!dtrace_priv_kernel(state)) 7571 continue; 7572 7573 dtrace_getpcstack((pc_t *)(tomax + valoffs), 7574 size / sizeof (pc_t), probe->dtpr_aframes, 7575 DTRACE_ANCHORED(probe) ? NULL : 7576 (uint32_t *)arg0); 7577 continue; 7578 7579 case DTRACEACT_JSTACK: 7580 case DTRACEACT_USTACK: 7581 if (!dtrace_priv_proc(state)) 7582 continue; 7583 7584 /* 7585 * See comment in DIF_VAR_PID. 7586 */ 7587 if (DTRACE_ANCHORED(mstate.dtms_probe) && 7588 CPU_ON_INTR(CPU)) { 7589 int depth = DTRACE_USTACK_NFRAMES( 7590 rec->dtrd_arg) + 1; 7591 7592 dtrace_bzero((void *)(tomax + valoffs), 7593 DTRACE_USTACK_STRSIZE(rec->dtrd_arg) 7594 + depth * sizeof (uint64_t)); 7595 7596 continue; 7597 } 7598 7599 if (DTRACE_USTACK_STRSIZE(rec->dtrd_arg) != 0 && 7600 curproc->p_dtrace_helpers != NULL) { 7601 /* 7602 * This is the slow path -- we have 7603 * allocated string space, and we're 7604 * getting the stack of a process that 7605 * has helpers. Call into a separate 7606 * routine to perform this processing. 7607 */ 7608 dtrace_action_ustack(&mstate, state, 7609 (uint64_t *)(tomax + valoffs), 7610 rec->dtrd_arg); 7611 continue; 7612 } 7613 7614 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 7615 dtrace_getupcstack((uint64_t *) 7616 (tomax + valoffs), 7617 DTRACE_USTACK_NFRAMES(rec->dtrd_arg) + 1); 7618 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 7619 continue; 7620 7621 default: 7622 break; 7623 } 7624 7625 dp = act->dta_difo; 7626 ASSERT(dp != NULL); 7627 7628 val = dtrace_dif_emulate(dp, &mstate, vstate, state); 7629 7630 if (*flags & CPU_DTRACE_ERROR) 7631 continue; 7632 7633 switch (act->dta_kind) { 7634 case DTRACEACT_SPECULATE: { 7635 dtrace_rechdr_t *dtrh; 7636 7637 ASSERT(buf == &state->dts_buffer[cpuid]); 7638 buf = dtrace_speculation_buffer(state, 7639 cpuid, val); 7640 7641 if (buf == NULL) { 7642 *flags |= CPU_DTRACE_DROP; 7643 continue; 7644 } 7645 7646 offs = dtrace_buffer_reserve(buf, 7647 ecb->dte_needed, ecb->dte_alignment, 7648 state, NULL); 7649 7650 if (offs < 0) { 7651 *flags |= CPU_DTRACE_DROP; 7652 continue; 7653 } 7654 7655 tomax = buf->dtb_tomax; 7656 ASSERT(tomax != NULL); 7657 7658 if (ecb->dte_size == 0) 7659 continue; 7660 7661 ASSERT3U(ecb->dte_size, >=, 7662 sizeof (dtrace_rechdr_t)); 7663 dtrh = ((void *)(tomax + offs)); 7664 dtrh->dtrh_epid = ecb->dte_epid; 7665 /* 7666 * When the speculation is committed, all of 7667 * the records in the speculative buffer will 7668 * have their timestamps set to the commit 7669 * time. Until then, it is set to a sentinel 7670 * value, for debugability. 7671 */ 7672 DTRACE_RECORD_STORE_TIMESTAMP(dtrh, UINT64_MAX); 7673 continue; 7674 } 7675 7676 case DTRACEACT_PRINTM: { 7677 /* The DIF returns a 'memref'. */ 7678 uintptr_t *memref = (uintptr_t *)(uintptr_t) val; 7679 7680 /* Get the size from the memref. */ 7681 size = memref[1]; 7682 7683 /* 7684 * Check if the size exceeds the allocated 7685 * buffer size. 7686 */ 7687 if (size + sizeof(uintptr_t) > dp->dtdo_rtype.dtdt_size) { 7688 /* Flag a drop! */ 7689 *flags |= CPU_DTRACE_DROP; 7690 continue; 7691 } 7692 7693 /* Store the size in the buffer first. */ 7694 DTRACE_STORE(uintptr_t, tomax, 7695 valoffs, size); 7696 7697 /* 7698 * Offset the buffer address to the start 7699 * of the data. 7700 */ 7701 valoffs += sizeof(uintptr_t); 7702 7703 /* 7704 * Reset to the memory address rather than 7705 * the memref array, then let the BYREF 7706 * code below do the work to store the 7707 * memory data in the buffer. 7708 */ 7709 val = memref[0]; 7710 break; 7711 } 7712 7713 case DTRACEACT_CHILL: 7714 if (dtrace_priv_kernel_destructive(state)) 7715 dtrace_action_chill(&mstate, val); 7716 continue; 7717 7718 case DTRACEACT_RAISE: 7719 if (dtrace_priv_proc_destructive(state)) 7720 dtrace_action_raise(val); 7721 continue; 7722 7723 case DTRACEACT_COMMIT: 7724 ASSERT(!committed); 7725 7726 /* 7727 * We need to commit our buffer state. 7728 */ 7729 if (ecb->dte_size) 7730 buf->dtb_offset = offs + ecb->dte_size; 7731 buf = &state->dts_buffer[cpuid]; 7732 dtrace_speculation_commit(state, cpuid, val); 7733 committed = 1; 7734 continue; 7735 7736 case DTRACEACT_DISCARD: 7737 dtrace_speculation_discard(state, cpuid, val); 7738 continue; 7739 7740 case DTRACEACT_DIFEXPR: 7741 case DTRACEACT_LIBACT: 7742 case DTRACEACT_PRINTF: 7743 case DTRACEACT_PRINTA: 7744 case DTRACEACT_SYSTEM: 7745 case DTRACEACT_FREOPEN: 7746 case DTRACEACT_TRACEMEM: 7747 break; 7748 7749 case DTRACEACT_TRACEMEM_DYNSIZE: 7750 tracememsize = val; 7751 break; 7752 7753 case DTRACEACT_SYM: 7754 case DTRACEACT_MOD: 7755 if (!dtrace_priv_kernel(state)) 7756 continue; 7757 break; 7758 7759 case DTRACEACT_USYM: 7760 case DTRACEACT_UMOD: 7761 case DTRACEACT_UADDR: { 7762 #ifdef illumos 7763 struct pid *pid = curthread->t_procp->p_pidp; 7764 #endif 7765 7766 if (!dtrace_priv_proc(state)) 7767 continue; 7768 7769 DTRACE_STORE(uint64_t, tomax, 7770 #ifdef illumos 7771 valoffs, (uint64_t)pid->pid_id); 7772 #else 7773 valoffs, (uint64_t) curproc->p_pid); 7774 #endif 7775 DTRACE_STORE(uint64_t, tomax, 7776 valoffs + sizeof (uint64_t), val); 7777 7778 continue; 7779 } 7780 7781 case DTRACEACT_EXIT: { 7782 /* 7783 * For the exit action, we are going to attempt 7784 * to atomically set our activity to be 7785 * draining. If this fails (either because 7786 * another CPU has beat us to the exit action, 7787 * or because our current activity is something 7788 * other than ACTIVE or WARMUP), we will 7789 * continue. This assures that the exit action 7790 * can be successfully recorded at most once 7791 * when we're in the ACTIVE state. If we're 7792 * encountering the exit() action while in 7793 * COOLDOWN, however, we want to honor the new 7794 * status code. (We know that we're the only 7795 * thread in COOLDOWN, so there is no race.) 7796 */ 7797 void *activity = &state->dts_activity; 7798 dtrace_activity_t current = state->dts_activity; 7799 7800 if (current == DTRACE_ACTIVITY_COOLDOWN) 7801 break; 7802 7803 if (current != DTRACE_ACTIVITY_WARMUP) 7804 current = DTRACE_ACTIVITY_ACTIVE; 7805 7806 if (dtrace_cas32(activity, current, 7807 DTRACE_ACTIVITY_DRAINING) != current) { 7808 *flags |= CPU_DTRACE_DROP; 7809 continue; 7810 } 7811 7812 break; 7813 } 7814 7815 default: 7816 ASSERT(0); 7817 } 7818 7819 if (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF || 7820 dp->dtdo_rtype.dtdt_flags & DIF_TF_BYUREF) { 7821 uintptr_t end = valoffs + size; 7822 7823 if (tracememsize != 0 && 7824 valoffs + tracememsize < end) { 7825 end = valoffs + tracememsize; 7826 tracememsize = 0; 7827 } 7828 7829 if (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF && 7830 !dtrace_vcanload((void *)(uintptr_t)val, 7831 &dp->dtdo_rtype, NULL, &mstate, vstate)) 7832 continue; 7833 7834 dtrace_store_by_ref(dp, tomax, size, &valoffs, 7835 &val, end, act->dta_intuple, 7836 dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF ? 7837 DIF_TF_BYREF: DIF_TF_BYUREF); 7838 continue; 7839 } 7840 7841 switch (size) { 7842 case 0: 7843 break; 7844 7845 case sizeof (uint8_t): 7846 DTRACE_STORE(uint8_t, tomax, valoffs, val); 7847 break; 7848 case sizeof (uint16_t): 7849 DTRACE_STORE(uint16_t, tomax, valoffs, val); 7850 break; 7851 case sizeof (uint32_t): 7852 DTRACE_STORE(uint32_t, tomax, valoffs, val); 7853 break; 7854 case sizeof (uint64_t): 7855 DTRACE_STORE(uint64_t, tomax, valoffs, val); 7856 break; 7857 default: 7858 /* 7859 * Any other size should have been returned by 7860 * reference, not by value. 7861 */ 7862 ASSERT(0); 7863 break; 7864 } 7865 } 7866 7867 if (*flags & CPU_DTRACE_DROP) 7868 continue; 7869 7870 if (*flags & CPU_DTRACE_FAULT) { 7871 int ndx; 7872 dtrace_action_t *err; 7873 7874 buf->dtb_errors++; 7875 7876 if (probe->dtpr_id == dtrace_probeid_error) { 7877 /* 7878 * There's nothing we can do -- we had an 7879 * error on the error probe. We bump an 7880 * error counter to at least indicate that 7881 * this condition happened. 7882 */ 7883 dtrace_error(&state->dts_dblerrors); 7884 continue; 7885 } 7886 7887 if (vtime) { 7888 /* 7889 * Before recursing on dtrace_probe(), we 7890 * need to explicitly clear out our start 7891 * time to prevent it from being accumulated 7892 * into t_dtrace_vtime. 7893 */ 7894 curthread->t_dtrace_start = 0; 7895 } 7896 7897 /* 7898 * Iterate over the actions to figure out which action 7899 * we were processing when we experienced the error. 7900 * Note that act points _past_ the faulting action; if 7901 * act is ecb->dte_action, the fault was in the 7902 * predicate, if it's ecb->dte_action->dta_next it's 7903 * in action #1, and so on. 7904 */ 7905 for (err = ecb->dte_action, ndx = 0; 7906 err != act; err = err->dta_next, ndx++) 7907 continue; 7908 7909 dtrace_probe_error(state, ecb->dte_epid, ndx, 7910 (mstate.dtms_present & DTRACE_MSTATE_FLTOFFS) ? 7911 mstate.dtms_fltoffs : -1, DTRACE_FLAGS2FLT(*flags), 7912 cpu_core[cpuid].cpuc_dtrace_illval); 7913 7914 continue; 7915 } 7916 7917 if (!committed) 7918 buf->dtb_offset = offs + ecb->dte_size; 7919 } 7920 7921 if (vtime) 7922 curthread->t_dtrace_start = dtrace_gethrtime(); 7923 7924 dtrace_interrupt_enable(cookie); 7925 } 7926 7927 /* 7928 * DTrace Probe Hashing Functions 7929 * 7930 * The functions in this section (and indeed, the functions in remaining 7931 * sections) are not _called_ from probe context. (Any exceptions to this are 7932 * marked with a "Note:".) Rather, they are called from elsewhere in the 7933 * DTrace framework to look-up probes in, add probes to and remove probes from 7934 * the DTrace probe hashes. (Each probe is hashed by each element of the 7935 * probe tuple -- allowing for fast lookups, regardless of what was 7936 * specified.) 7937 */ 7938 static uint_t 7939 dtrace_hash_str(const char *p) 7940 { 7941 unsigned int g; 7942 uint_t hval = 0; 7943 7944 while (*p) { 7945 hval = (hval << 4) + *p++; 7946 if ((g = (hval & 0xf0000000)) != 0) 7947 hval ^= g >> 24; 7948 hval &= ~g; 7949 } 7950 return (hval); 7951 } 7952 7953 static dtrace_hash_t * 7954 dtrace_hash_create(uintptr_t stroffs, uintptr_t nextoffs, uintptr_t prevoffs) 7955 { 7956 dtrace_hash_t *hash = kmem_zalloc(sizeof (dtrace_hash_t), KM_SLEEP); 7957 7958 hash->dth_stroffs = stroffs; 7959 hash->dth_nextoffs = nextoffs; 7960 hash->dth_prevoffs = prevoffs; 7961 7962 hash->dth_size = 1; 7963 hash->dth_mask = hash->dth_size - 1; 7964 7965 hash->dth_tab = kmem_zalloc(hash->dth_size * 7966 sizeof (dtrace_hashbucket_t *), KM_SLEEP); 7967 7968 return (hash); 7969 } 7970 7971 static void 7972 dtrace_hash_destroy(dtrace_hash_t *hash) 7973 { 7974 #ifdef DEBUG 7975 int i; 7976 7977 for (i = 0; i < hash->dth_size; i++) 7978 ASSERT(hash->dth_tab[i] == NULL); 7979 #endif 7980 7981 kmem_free(hash->dth_tab, 7982 hash->dth_size * sizeof (dtrace_hashbucket_t *)); 7983 kmem_free(hash, sizeof (dtrace_hash_t)); 7984 } 7985 7986 static void 7987 dtrace_hash_resize(dtrace_hash_t *hash) 7988 { 7989 int size = hash->dth_size, i, ndx; 7990 int new_size = hash->dth_size << 1; 7991 int new_mask = new_size - 1; 7992 dtrace_hashbucket_t **new_tab, *bucket, *next; 7993 7994 ASSERT((new_size & new_mask) == 0); 7995 7996 new_tab = kmem_zalloc(new_size * sizeof (void *), KM_SLEEP); 7997 7998 for (i = 0; i < size; i++) { 7999 for (bucket = hash->dth_tab[i]; bucket != NULL; bucket = next) { 8000 dtrace_probe_t *probe = bucket->dthb_chain; 8001 8002 ASSERT(probe != NULL); 8003 ndx = DTRACE_HASHSTR(hash, probe) & new_mask; 8004 8005 next = bucket->dthb_next; 8006 bucket->dthb_next = new_tab[ndx]; 8007 new_tab[ndx] = bucket; 8008 } 8009 } 8010 8011 kmem_free(hash->dth_tab, hash->dth_size * sizeof (void *)); 8012 hash->dth_tab = new_tab; 8013 hash->dth_size = new_size; 8014 hash->dth_mask = new_mask; 8015 } 8016 8017 static void 8018 dtrace_hash_add(dtrace_hash_t *hash, dtrace_probe_t *new) 8019 { 8020 int hashval = DTRACE_HASHSTR(hash, new); 8021 int ndx = hashval & hash->dth_mask; 8022 dtrace_hashbucket_t *bucket = hash->dth_tab[ndx]; 8023 dtrace_probe_t **nextp, **prevp; 8024 8025 for (; bucket != NULL; bucket = bucket->dthb_next) { 8026 if (DTRACE_HASHEQ(hash, bucket->dthb_chain, new)) 8027 goto add; 8028 } 8029 8030 if ((hash->dth_nbuckets >> 1) > hash->dth_size) { 8031 dtrace_hash_resize(hash); 8032 dtrace_hash_add(hash, new); 8033 return; 8034 } 8035 8036 bucket = kmem_zalloc(sizeof (dtrace_hashbucket_t), KM_SLEEP); 8037 bucket->dthb_next = hash->dth_tab[ndx]; 8038 hash->dth_tab[ndx] = bucket; 8039 hash->dth_nbuckets++; 8040 8041 add: 8042 nextp = DTRACE_HASHNEXT(hash, new); 8043 ASSERT(*nextp == NULL && *(DTRACE_HASHPREV(hash, new)) == NULL); 8044 *nextp = bucket->dthb_chain; 8045 8046 if (bucket->dthb_chain != NULL) { 8047 prevp = DTRACE_HASHPREV(hash, bucket->dthb_chain); 8048 ASSERT(*prevp == NULL); 8049 *prevp = new; 8050 } 8051 8052 bucket->dthb_chain = new; 8053 bucket->dthb_len++; 8054 } 8055 8056 static dtrace_probe_t * 8057 dtrace_hash_lookup(dtrace_hash_t *hash, dtrace_probe_t *template) 8058 { 8059 int hashval = DTRACE_HASHSTR(hash, template); 8060 int ndx = hashval & hash->dth_mask; 8061 dtrace_hashbucket_t *bucket = hash->dth_tab[ndx]; 8062 8063 for (; bucket != NULL; bucket = bucket->dthb_next) { 8064 if (DTRACE_HASHEQ(hash, bucket->dthb_chain, template)) 8065 return (bucket->dthb_chain); 8066 } 8067 8068 return (NULL); 8069 } 8070 8071 static int 8072 dtrace_hash_collisions(dtrace_hash_t *hash, dtrace_probe_t *template) 8073 { 8074 int hashval = DTRACE_HASHSTR(hash, template); 8075 int ndx = hashval & hash->dth_mask; 8076 dtrace_hashbucket_t *bucket = hash->dth_tab[ndx]; 8077 8078 for (; bucket != NULL; bucket = bucket->dthb_next) { 8079 if (DTRACE_HASHEQ(hash, bucket->dthb_chain, template)) 8080 return (bucket->dthb_len); 8081 } 8082 8083 return (0); 8084 } 8085 8086 static void 8087 dtrace_hash_remove(dtrace_hash_t *hash, dtrace_probe_t *probe) 8088 { 8089 int ndx = DTRACE_HASHSTR(hash, probe) & hash->dth_mask; 8090 dtrace_hashbucket_t *bucket = hash->dth_tab[ndx]; 8091 8092 dtrace_probe_t **prevp = DTRACE_HASHPREV(hash, probe); 8093 dtrace_probe_t **nextp = DTRACE_HASHNEXT(hash, probe); 8094 8095 /* 8096 * Find the bucket that we're removing this probe from. 8097 */ 8098 for (; bucket != NULL; bucket = bucket->dthb_next) { 8099 if (DTRACE_HASHEQ(hash, bucket->dthb_chain, probe)) 8100 break; 8101 } 8102 8103 ASSERT(bucket != NULL); 8104 8105 if (*prevp == NULL) { 8106 if (*nextp == NULL) { 8107 /* 8108 * The removed probe was the only probe on this 8109 * bucket; we need to remove the bucket. 8110 */ 8111 dtrace_hashbucket_t *b = hash->dth_tab[ndx]; 8112 8113 ASSERT(bucket->dthb_chain == probe); 8114 ASSERT(b != NULL); 8115 8116 if (b == bucket) { 8117 hash->dth_tab[ndx] = bucket->dthb_next; 8118 } else { 8119 while (b->dthb_next != bucket) 8120 b = b->dthb_next; 8121 b->dthb_next = bucket->dthb_next; 8122 } 8123 8124 ASSERT(hash->dth_nbuckets > 0); 8125 hash->dth_nbuckets--; 8126 kmem_free(bucket, sizeof (dtrace_hashbucket_t)); 8127 return; 8128 } 8129 8130 bucket->dthb_chain = *nextp; 8131 } else { 8132 *(DTRACE_HASHNEXT(hash, *prevp)) = *nextp; 8133 } 8134 8135 if (*nextp != NULL) 8136 *(DTRACE_HASHPREV(hash, *nextp)) = *prevp; 8137 } 8138 8139 /* 8140 * DTrace Utility Functions 8141 * 8142 * These are random utility functions that are _not_ called from probe context. 8143 */ 8144 static int 8145 dtrace_badattr(const dtrace_attribute_t *a) 8146 { 8147 return (a->dtat_name > DTRACE_STABILITY_MAX || 8148 a->dtat_data > DTRACE_STABILITY_MAX || 8149 a->dtat_class > DTRACE_CLASS_MAX); 8150 } 8151 8152 /* 8153 * Return a duplicate copy of a string. If the specified string is NULL, 8154 * this function returns a zero-length string. 8155 */ 8156 static char * 8157 dtrace_strdup(const char *str) 8158 { 8159 char *new = kmem_zalloc((str != NULL ? strlen(str) : 0) + 1, KM_SLEEP); 8160 8161 if (str != NULL) 8162 (void) strcpy(new, str); 8163 8164 return (new); 8165 } 8166 8167 #define DTRACE_ISALPHA(c) \ 8168 (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z')) 8169 8170 static int 8171 dtrace_badname(const char *s) 8172 { 8173 char c; 8174 8175 if (s == NULL || (c = *s++) == '\0') 8176 return (0); 8177 8178 if (!DTRACE_ISALPHA(c) && c != '-' && c != '_' && c != '.') 8179 return (1); 8180 8181 while ((c = *s++) != '\0') { 8182 if (!DTRACE_ISALPHA(c) && (c < '0' || c > '9') && 8183 c != '-' && c != '_' && c != '.' && c != '`') 8184 return (1); 8185 } 8186 8187 return (0); 8188 } 8189 8190 static void 8191 dtrace_cred2priv(cred_t *cr, uint32_t *privp, uid_t *uidp, zoneid_t *zoneidp) 8192 { 8193 uint32_t priv; 8194 8195 #ifdef illumos 8196 if (cr == NULL || PRIV_POLICY_ONLY(cr, PRIV_ALL, B_FALSE)) { 8197 /* 8198 * For DTRACE_PRIV_ALL, the uid and zoneid don't matter. 8199 */ 8200 priv = DTRACE_PRIV_ALL; 8201 } else { 8202 *uidp = crgetuid(cr); 8203 *zoneidp = crgetzoneid(cr); 8204 8205 priv = 0; 8206 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_KERNEL, B_FALSE)) 8207 priv |= DTRACE_PRIV_KERNEL | DTRACE_PRIV_USER; 8208 else if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_USER, B_FALSE)) 8209 priv |= DTRACE_PRIV_USER; 8210 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_PROC, B_FALSE)) 8211 priv |= DTRACE_PRIV_PROC; 8212 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE)) 8213 priv |= DTRACE_PRIV_OWNER; 8214 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE)) 8215 priv |= DTRACE_PRIV_ZONEOWNER; 8216 } 8217 #else 8218 priv = DTRACE_PRIV_ALL; 8219 #endif 8220 8221 *privp = priv; 8222 } 8223 8224 #ifdef DTRACE_ERRDEBUG 8225 static void 8226 dtrace_errdebug(const char *str) 8227 { 8228 int hval = dtrace_hash_str(str) % DTRACE_ERRHASHSZ; 8229 int occupied = 0; 8230 8231 mutex_enter(&dtrace_errlock); 8232 dtrace_errlast = str; 8233 dtrace_errthread = curthread; 8234 8235 while (occupied++ < DTRACE_ERRHASHSZ) { 8236 if (dtrace_errhash[hval].dter_msg == str) { 8237 dtrace_errhash[hval].dter_count++; 8238 goto out; 8239 } 8240 8241 if (dtrace_errhash[hval].dter_msg != NULL) { 8242 hval = (hval + 1) % DTRACE_ERRHASHSZ; 8243 continue; 8244 } 8245 8246 dtrace_errhash[hval].dter_msg = str; 8247 dtrace_errhash[hval].dter_count = 1; 8248 goto out; 8249 } 8250 8251 panic("dtrace: undersized error hash"); 8252 out: 8253 mutex_exit(&dtrace_errlock); 8254 } 8255 #endif 8256 8257 /* 8258 * DTrace Matching Functions 8259 * 8260 * These functions are used to match groups of probes, given some elements of 8261 * a probe tuple, or some globbed expressions for elements of a probe tuple. 8262 */ 8263 static int 8264 dtrace_match_priv(const dtrace_probe_t *prp, uint32_t priv, uid_t uid, 8265 zoneid_t zoneid) 8266 { 8267 if (priv != DTRACE_PRIV_ALL) { 8268 uint32_t ppriv = prp->dtpr_provider->dtpv_priv.dtpp_flags; 8269 uint32_t match = priv & ppriv; 8270 8271 /* 8272 * No PRIV_DTRACE_* privileges... 8273 */ 8274 if ((priv & (DTRACE_PRIV_PROC | DTRACE_PRIV_USER | 8275 DTRACE_PRIV_KERNEL)) == 0) 8276 return (0); 8277 8278 /* 8279 * No matching bits, but there were bits to match... 8280 */ 8281 if (match == 0 && ppriv != 0) 8282 return (0); 8283 8284 /* 8285 * Need to have permissions to the process, but don't... 8286 */ 8287 if (((ppriv & ~match) & DTRACE_PRIV_OWNER) != 0 && 8288 uid != prp->dtpr_provider->dtpv_priv.dtpp_uid) { 8289 return (0); 8290 } 8291 8292 /* 8293 * Need to be in the same zone unless we possess the 8294 * privilege to examine all zones. 8295 */ 8296 if (((ppriv & ~match) & DTRACE_PRIV_ZONEOWNER) != 0 && 8297 zoneid != prp->dtpr_provider->dtpv_priv.dtpp_zoneid) { 8298 return (0); 8299 } 8300 } 8301 8302 return (1); 8303 } 8304 8305 /* 8306 * dtrace_match_probe compares a dtrace_probe_t to a pre-compiled key, which 8307 * consists of input pattern strings and an ops-vector to evaluate them. 8308 * This function returns >0 for match, 0 for no match, and <0 for error. 8309 */ 8310 static int 8311 dtrace_match_probe(const dtrace_probe_t *prp, const dtrace_probekey_t *pkp, 8312 uint32_t priv, uid_t uid, zoneid_t zoneid) 8313 { 8314 dtrace_provider_t *pvp = prp->dtpr_provider; 8315 int rv; 8316 8317 if (pvp->dtpv_defunct) 8318 return (0); 8319 8320 if ((rv = pkp->dtpk_pmatch(pvp->dtpv_name, pkp->dtpk_prov, 0)) <= 0) 8321 return (rv); 8322 8323 if ((rv = pkp->dtpk_mmatch(prp->dtpr_mod, pkp->dtpk_mod, 0)) <= 0) 8324 return (rv); 8325 8326 if ((rv = pkp->dtpk_fmatch(prp->dtpr_func, pkp->dtpk_func, 0)) <= 0) 8327 return (rv); 8328 8329 if ((rv = pkp->dtpk_nmatch(prp->dtpr_name, pkp->dtpk_name, 0)) <= 0) 8330 return (rv); 8331 8332 if (dtrace_match_priv(prp, priv, uid, zoneid) == 0) 8333 return (0); 8334 8335 return (rv); 8336 } 8337 8338 /* 8339 * dtrace_match_glob() is a safe kernel implementation of the gmatch(3GEN) 8340 * interface for matching a glob pattern 'p' to an input string 's'. Unlike 8341 * libc's version, the kernel version only applies to 8-bit ASCII strings. 8342 * In addition, all of the recursion cases except for '*' matching have been 8343 * unwound. For '*', we still implement recursive evaluation, but a depth 8344 * counter is maintained and matching is aborted if we recurse too deep. 8345 * The function returns 0 if no match, >0 if match, and <0 if recursion error. 8346 */ 8347 static int 8348 dtrace_match_glob(const char *s, const char *p, int depth) 8349 { 8350 const char *olds; 8351 char s1, c; 8352 int gs; 8353 8354 if (depth > DTRACE_PROBEKEY_MAXDEPTH) 8355 return (-1); 8356 8357 if (s == NULL) 8358 s = ""; /* treat NULL as empty string */ 8359 8360 top: 8361 olds = s; 8362 s1 = *s++; 8363 8364 if (p == NULL) 8365 return (0); 8366 8367 if ((c = *p++) == '\0') 8368 return (s1 == '\0'); 8369 8370 switch (c) { 8371 case '[': { 8372 int ok = 0, notflag = 0; 8373 char lc = '\0'; 8374 8375 if (s1 == '\0') 8376 return (0); 8377 8378 if (*p == '!') { 8379 notflag = 1; 8380 p++; 8381 } 8382 8383 if ((c = *p++) == '\0') 8384 return (0); 8385 8386 do { 8387 if (c == '-' && lc != '\0' && *p != ']') { 8388 if ((c = *p++) == '\0') 8389 return (0); 8390 if (c == '\\' && (c = *p++) == '\0') 8391 return (0); 8392 8393 if (notflag) { 8394 if (s1 < lc || s1 > c) 8395 ok++; 8396 else 8397 return (0); 8398 } else if (lc <= s1 && s1 <= c) 8399 ok++; 8400 8401 } else if (c == '\\' && (c = *p++) == '\0') 8402 return (0); 8403 8404 lc = c; /* save left-hand 'c' for next iteration */ 8405 8406 if (notflag) { 8407 if (s1 != c) 8408 ok++; 8409 else 8410 return (0); 8411 } else if (s1 == c) 8412 ok++; 8413 8414 if ((c = *p++) == '\0') 8415 return (0); 8416 8417 } while (c != ']'); 8418 8419 if (ok) 8420 goto top; 8421 8422 return (0); 8423 } 8424 8425 case '\\': 8426 if ((c = *p++) == '\0') 8427 return (0); 8428 /*FALLTHRU*/ 8429 8430 default: 8431 if (c != s1) 8432 return (0); 8433 /*FALLTHRU*/ 8434 8435 case '?': 8436 if (s1 != '\0') 8437 goto top; 8438 return (0); 8439 8440 case '*': 8441 while (*p == '*') 8442 p++; /* consecutive *'s are identical to a single one */ 8443 8444 if (*p == '\0') 8445 return (1); 8446 8447 for (s = olds; *s != '\0'; s++) { 8448 if ((gs = dtrace_match_glob(s, p, depth + 1)) != 0) 8449 return (gs); 8450 } 8451 8452 return (0); 8453 } 8454 } 8455 8456 /*ARGSUSED*/ 8457 static int 8458 dtrace_match_string(const char *s, const char *p, int depth) 8459 { 8460 return (s != NULL && strcmp(s, p) == 0); 8461 } 8462 8463 /*ARGSUSED*/ 8464 static int 8465 dtrace_match_nul(const char *s, const char *p, int depth) 8466 { 8467 return (1); /* always match the empty pattern */ 8468 } 8469 8470 /*ARGSUSED*/ 8471 static int 8472 dtrace_match_nonzero(const char *s, const char *p, int depth) 8473 { 8474 return (s != NULL && s[0] != '\0'); 8475 } 8476 8477 static int 8478 dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid, 8479 zoneid_t zoneid, int (*matched)(dtrace_probe_t *, void *), void *arg) 8480 { 8481 dtrace_probe_t template, *probe; 8482 dtrace_hash_t *hash = NULL; 8483 int len, best = INT_MAX, nmatched = 0; 8484 dtrace_id_t i; 8485 8486 ASSERT(MUTEX_HELD(&dtrace_lock)); 8487 8488 /* 8489 * If the probe ID is specified in the key, just lookup by ID and 8490 * invoke the match callback once if a matching probe is found. 8491 */ 8492 if (pkp->dtpk_id != DTRACE_IDNONE) { 8493 if ((probe = dtrace_probe_lookup_id(pkp->dtpk_id)) != NULL && 8494 dtrace_match_probe(probe, pkp, priv, uid, zoneid) > 0) { 8495 (void) (*matched)(probe, arg); 8496 nmatched++; 8497 } 8498 return (nmatched); 8499 } 8500 8501 template.dtpr_mod = (char *)pkp->dtpk_mod; 8502 template.dtpr_func = (char *)pkp->dtpk_func; 8503 template.dtpr_name = (char *)pkp->dtpk_name; 8504 8505 /* 8506 * We want to find the most distinct of the module name, function 8507 * name, and name. So for each one that is not a glob pattern or 8508 * empty string, we perform a lookup in the corresponding hash and 8509 * use the hash table with the fewest collisions to do our search. 8510 */ 8511 if (pkp->dtpk_mmatch == &dtrace_match_string && 8512 (len = dtrace_hash_collisions(dtrace_bymod, &template)) < best) { 8513 best = len; 8514 hash = dtrace_bymod; 8515 } 8516 8517 if (pkp->dtpk_fmatch == &dtrace_match_string && 8518 (len = dtrace_hash_collisions(dtrace_byfunc, &template)) < best) { 8519 best = len; 8520 hash = dtrace_byfunc; 8521 } 8522 8523 if (pkp->dtpk_nmatch == &dtrace_match_string && 8524 (len = dtrace_hash_collisions(dtrace_byname, &template)) < best) { 8525 best = len; 8526 hash = dtrace_byname; 8527 } 8528 8529 /* 8530 * If we did not select a hash table, iterate over every probe and 8531 * invoke our callback for each one that matches our input probe key. 8532 */ 8533 if (hash == NULL) { 8534 for (i = 0; i < dtrace_nprobes; i++) { 8535 if ((probe = dtrace_probes[i]) == NULL || 8536 dtrace_match_probe(probe, pkp, priv, uid, 8537 zoneid) <= 0) 8538 continue; 8539 8540 nmatched++; 8541 8542 if ((*matched)(probe, arg) != DTRACE_MATCH_NEXT) 8543 break; 8544 } 8545 8546 return (nmatched); 8547 } 8548 8549 /* 8550 * If we selected a hash table, iterate over each probe of the same key 8551 * name and invoke the callback for every probe that matches the other 8552 * attributes of our input probe key. 8553 */ 8554 for (probe = dtrace_hash_lookup(hash, &template); probe != NULL; 8555 probe = *(DTRACE_HASHNEXT(hash, probe))) { 8556 8557 if (dtrace_match_probe(probe, pkp, priv, uid, zoneid) <= 0) 8558 continue; 8559 8560 nmatched++; 8561 8562 if ((*matched)(probe, arg) != DTRACE_MATCH_NEXT) 8563 break; 8564 } 8565 8566 return (nmatched); 8567 } 8568 8569 /* 8570 * Return the function pointer dtrace_probecmp() should use to compare the 8571 * specified pattern with a string. For NULL or empty patterns, we select 8572 * dtrace_match_nul(). For glob pattern strings, we use dtrace_match_glob(). 8573 * For non-empty non-glob strings, we use dtrace_match_string(). 8574 */ 8575 static dtrace_probekey_f * 8576 dtrace_probekey_func(const char *p) 8577 { 8578 char c; 8579 8580 if (p == NULL || *p == '\0') 8581 return (&dtrace_match_nul); 8582 8583 while ((c = *p++) != '\0') { 8584 if (c == '[' || c == '?' || c == '*' || c == '\\') 8585 return (&dtrace_match_glob); 8586 } 8587 8588 return (&dtrace_match_string); 8589 } 8590 8591 /* 8592 * Build a probe comparison key for use with dtrace_match_probe() from the 8593 * given probe description. By convention, a null key only matches anchored 8594 * probes: if each field is the empty string, reset dtpk_fmatch to 8595 * dtrace_match_nonzero(). 8596 */ 8597 static void 8598 dtrace_probekey(dtrace_probedesc_t *pdp, dtrace_probekey_t *pkp) 8599 { 8600 pkp->dtpk_prov = pdp->dtpd_provider; 8601 pkp->dtpk_pmatch = dtrace_probekey_func(pdp->dtpd_provider); 8602 8603 pkp->dtpk_mod = pdp->dtpd_mod; 8604 pkp->dtpk_mmatch = dtrace_probekey_func(pdp->dtpd_mod); 8605 8606 pkp->dtpk_func = pdp->dtpd_func; 8607 pkp->dtpk_fmatch = dtrace_probekey_func(pdp->dtpd_func); 8608 8609 pkp->dtpk_name = pdp->dtpd_name; 8610 pkp->dtpk_nmatch = dtrace_probekey_func(pdp->dtpd_name); 8611 8612 pkp->dtpk_id = pdp->dtpd_id; 8613 8614 if (pkp->dtpk_id == DTRACE_IDNONE && 8615 pkp->dtpk_pmatch == &dtrace_match_nul && 8616 pkp->dtpk_mmatch == &dtrace_match_nul && 8617 pkp->dtpk_fmatch == &dtrace_match_nul && 8618 pkp->dtpk_nmatch == &dtrace_match_nul) 8619 pkp->dtpk_fmatch = &dtrace_match_nonzero; 8620 } 8621 8622 /* 8623 * DTrace Provider-to-Framework API Functions 8624 * 8625 * These functions implement much of the Provider-to-Framework API, as 8626 * described in <sys/dtrace.h>. The parts of the API not in this section are 8627 * the functions in the API for probe management (found below), and 8628 * dtrace_probe() itself (found above). 8629 */ 8630 8631 /* 8632 * Register the calling provider with the DTrace framework. This should 8633 * generally be called by DTrace providers in their attach(9E) entry point. 8634 */ 8635 int 8636 dtrace_register(const char *name, const dtrace_pattr_t *pap, uint32_t priv, 8637 cred_t *cr, const dtrace_pops_t *pops, void *arg, dtrace_provider_id_t *idp) 8638 { 8639 dtrace_provider_t *provider; 8640 8641 if (name == NULL || pap == NULL || pops == NULL || idp == NULL) { 8642 cmn_err(CE_WARN, "failed to register provider '%s': invalid " 8643 "arguments", name ? name : "<NULL>"); 8644 return (EINVAL); 8645 } 8646 8647 if (name[0] == '\0' || dtrace_badname(name)) { 8648 cmn_err(CE_WARN, "failed to register provider '%s': invalid " 8649 "provider name", name); 8650 return (EINVAL); 8651 } 8652 8653 if ((pops->dtps_provide == NULL && pops->dtps_provide_module == NULL) || 8654 pops->dtps_enable == NULL || pops->dtps_disable == NULL || 8655 pops->dtps_destroy == NULL || 8656 ((pops->dtps_resume == NULL) != (pops->dtps_suspend == NULL))) { 8657 cmn_err(CE_WARN, "failed to register provider '%s': invalid " 8658 "provider ops", name); 8659 return (EINVAL); 8660 } 8661 8662 if (dtrace_badattr(&pap->dtpa_provider) || 8663 dtrace_badattr(&pap->dtpa_mod) || 8664 dtrace_badattr(&pap->dtpa_func) || 8665 dtrace_badattr(&pap->dtpa_name) || 8666 dtrace_badattr(&pap->dtpa_args)) { 8667 cmn_err(CE_WARN, "failed to register provider '%s': invalid " 8668 "provider attributes", name); 8669 return (EINVAL); 8670 } 8671 8672 if (priv & ~DTRACE_PRIV_ALL) { 8673 cmn_err(CE_WARN, "failed to register provider '%s': invalid " 8674 "privilege attributes", name); 8675 return (EINVAL); 8676 } 8677 8678 if ((priv & DTRACE_PRIV_KERNEL) && 8679 (priv & (DTRACE_PRIV_USER | DTRACE_PRIV_OWNER)) && 8680 pops->dtps_usermode == NULL) { 8681 cmn_err(CE_WARN, "failed to register provider '%s': need " 8682 "dtps_usermode() op for given privilege attributes", name); 8683 return (EINVAL); 8684 } 8685 8686 provider = kmem_zalloc(sizeof (dtrace_provider_t), KM_SLEEP); 8687 provider->dtpv_name = kmem_alloc(strlen(name) + 1, KM_SLEEP); 8688 (void) strcpy(provider->dtpv_name, name); 8689 8690 provider->dtpv_attr = *pap; 8691 provider->dtpv_priv.dtpp_flags = priv; 8692 if (cr != NULL) { 8693 provider->dtpv_priv.dtpp_uid = crgetuid(cr); 8694 provider->dtpv_priv.dtpp_zoneid = crgetzoneid(cr); 8695 } 8696 provider->dtpv_pops = *pops; 8697 8698 if (pops->dtps_provide == NULL) { 8699 ASSERT(pops->dtps_provide_module != NULL); 8700 provider->dtpv_pops.dtps_provide = 8701 (void (*)(void *, dtrace_probedesc_t *))dtrace_nullop; 8702 } 8703 8704 if (pops->dtps_provide_module == NULL) { 8705 ASSERT(pops->dtps_provide != NULL); 8706 provider->dtpv_pops.dtps_provide_module = 8707 (void (*)(void *, modctl_t *))dtrace_nullop; 8708 } 8709 8710 if (pops->dtps_suspend == NULL) { 8711 ASSERT(pops->dtps_resume == NULL); 8712 provider->dtpv_pops.dtps_suspend = 8713 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop; 8714 provider->dtpv_pops.dtps_resume = 8715 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop; 8716 } 8717 8718 provider->dtpv_arg = arg; 8719 *idp = (dtrace_provider_id_t)provider; 8720 8721 if (pops == &dtrace_provider_ops) { 8722 ASSERT(MUTEX_HELD(&dtrace_provider_lock)); 8723 ASSERT(MUTEX_HELD(&dtrace_lock)); 8724 ASSERT(dtrace_anon.dta_enabling == NULL); 8725 8726 /* 8727 * We make sure that the DTrace provider is at the head of 8728 * the provider chain. 8729 */ 8730 provider->dtpv_next = dtrace_provider; 8731 dtrace_provider = provider; 8732 return (0); 8733 } 8734 8735 mutex_enter(&dtrace_provider_lock); 8736 mutex_enter(&dtrace_lock); 8737 8738 /* 8739 * If there is at least one provider registered, we'll add this 8740 * provider after the first provider. 8741 */ 8742 if (dtrace_provider != NULL) { 8743 provider->dtpv_next = dtrace_provider->dtpv_next; 8744 dtrace_provider->dtpv_next = provider; 8745 } else { 8746 dtrace_provider = provider; 8747 } 8748 8749 if (dtrace_retained != NULL) { 8750 dtrace_enabling_provide(provider); 8751 8752 /* 8753 * Now we need to call dtrace_enabling_matchall() -- which 8754 * will acquire cpu_lock and dtrace_lock. We therefore need 8755 * to drop all of our locks before calling into it... 8756 */ 8757 mutex_exit(&dtrace_lock); 8758 mutex_exit(&dtrace_provider_lock); 8759 dtrace_enabling_matchall(); 8760 8761 return (0); 8762 } 8763 8764 mutex_exit(&dtrace_lock); 8765 mutex_exit(&dtrace_provider_lock); 8766 8767 return (0); 8768 } 8769 8770 /* 8771 * Unregister the specified provider from the DTrace framework. This should 8772 * generally be called by DTrace providers in their detach(9E) entry point. 8773 */ 8774 int 8775 dtrace_unregister(dtrace_provider_id_t id) 8776 { 8777 dtrace_provider_t *old = (dtrace_provider_t *)id; 8778 dtrace_provider_t *prev = NULL; 8779 int i, self = 0, noreap = 0; 8780 dtrace_probe_t *probe, *first = NULL; 8781 8782 if (old->dtpv_pops.dtps_enable == 8783 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop) { 8784 /* 8785 * If DTrace itself is the provider, we're called with locks 8786 * already held. 8787 */ 8788 ASSERT(old == dtrace_provider); 8789 #ifdef illumos 8790 ASSERT(dtrace_devi != NULL); 8791 #endif 8792 ASSERT(MUTEX_HELD(&dtrace_provider_lock)); 8793 ASSERT(MUTEX_HELD(&dtrace_lock)); 8794 self = 1; 8795 8796 if (dtrace_provider->dtpv_next != NULL) { 8797 /* 8798 * There's another provider here; return failure. 8799 */ 8800 return (EBUSY); 8801 } 8802 } else { 8803 mutex_enter(&dtrace_provider_lock); 8804 #ifdef illumos 8805 mutex_enter(&mod_lock); 8806 #endif 8807 mutex_enter(&dtrace_lock); 8808 } 8809 8810 /* 8811 * If anyone has /dev/dtrace open, or if there are anonymous enabled 8812 * probes, we refuse to let providers slither away, unless this 8813 * provider has already been explicitly invalidated. 8814 */ 8815 if (!old->dtpv_defunct && 8816 (dtrace_opens || (dtrace_anon.dta_state != NULL && 8817 dtrace_anon.dta_state->dts_necbs > 0))) { 8818 if (!self) { 8819 mutex_exit(&dtrace_lock); 8820 #ifdef illumos 8821 mutex_exit(&mod_lock); 8822 #endif 8823 mutex_exit(&dtrace_provider_lock); 8824 } 8825 return (EBUSY); 8826 } 8827 8828 /* 8829 * Attempt to destroy the probes associated with this provider. 8830 */ 8831 for (i = 0; i < dtrace_nprobes; i++) { 8832 if ((probe = dtrace_probes[i]) == NULL) 8833 continue; 8834 8835 if (probe->dtpr_provider != old) 8836 continue; 8837 8838 if (probe->dtpr_ecb == NULL) 8839 continue; 8840 8841 /* 8842 * If we are trying to unregister a defunct provider, and the 8843 * provider was made defunct within the interval dictated by 8844 * dtrace_unregister_defunct_reap, we'll (asynchronously) 8845 * attempt to reap our enablings. To denote that the provider 8846 * should reattempt to unregister itself at some point in the 8847 * future, we will return a differentiable error code (EAGAIN 8848 * instead of EBUSY) in this case. 8849 */ 8850 if (dtrace_gethrtime() - old->dtpv_defunct > 8851 dtrace_unregister_defunct_reap) 8852 noreap = 1; 8853 8854 if (!self) { 8855 mutex_exit(&dtrace_lock); 8856 #ifdef illumos 8857 mutex_exit(&mod_lock); 8858 #endif 8859 mutex_exit(&dtrace_provider_lock); 8860 } 8861 8862 if (noreap) 8863 return (EBUSY); 8864 8865 (void) taskq_dispatch(dtrace_taskq, 8866 (task_func_t *)dtrace_enabling_reap, NULL, TQ_SLEEP); 8867 8868 return (EAGAIN); 8869 } 8870 8871 /* 8872 * All of the probes for this provider are disabled; we can safely 8873 * remove all of them from their hash chains and from the probe array. 8874 */ 8875 for (i = 0; i < dtrace_nprobes; i++) { 8876 if ((probe = dtrace_probes[i]) == NULL) 8877 continue; 8878 8879 if (probe->dtpr_provider != old) 8880 continue; 8881 8882 dtrace_probes[i] = NULL; 8883 8884 dtrace_hash_remove(dtrace_bymod, probe); 8885 dtrace_hash_remove(dtrace_byfunc, probe); 8886 dtrace_hash_remove(dtrace_byname, probe); 8887 8888 if (first == NULL) { 8889 first = probe; 8890 probe->dtpr_nextmod = NULL; 8891 } else { 8892 probe->dtpr_nextmod = first; 8893 first = probe; 8894 } 8895 } 8896 8897 /* 8898 * The provider's probes have been removed from the hash chains and 8899 * from the probe array. Now issue a dtrace_sync() to be sure that 8900 * everyone has cleared out from any probe array processing. 8901 */ 8902 dtrace_sync(); 8903 8904 for (probe = first; probe != NULL; probe = first) { 8905 first = probe->dtpr_nextmod; 8906 8907 old->dtpv_pops.dtps_destroy(old->dtpv_arg, probe->dtpr_id, 8908 probe->dtpr_arg); 8909 kmem_free(probe->dtpr_mod, strlen(probe->dtpr_mod) + 1); 8910 kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1); 8911 kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1); 8912 #ifdef illumos 8913 vmem_free(dtrace_arena, (void *)(uintptr_t)(probe->dtpr_id), 1); 8914 #else 8915 free_unr(dtrace_arena, probe->dtpr_id); 8916 #endif 8917 kmem_free(probe, sizeof (dtrace_probe_t)); 8918 } 8919 8920 if ((prev = dtrace_provider) == old) { 8921 #ifdef illumos 8922 ASSERT(self || dtrace_devi == NULL); 8923 ASSERT(old->dtpv_next == NULL || dtrace_devi == NULL); 8924 #endif 8925 dtrace_provider = old->dtpv_next; 8926 } else { 8927 while (prev != NULL && prev->dtpv_next != old) 8928 prev = prev->dtpv_next; 8929 8930 if (prev == NULL) { 8931 panic("attempt to unregister non-existent " 8932 "dtrace provider %p\n", (void *)id); 8933 } 8934 8935 prev->dtpv_next = old->dtpv_next; 8936 } 8937 8938 if (!self) { 8939 mutex_exit(&dtrace_lock); 8940 #ifdef illumos 8941 mutex_exit(&mod_lock); 8942 #endif 8943 mutex_exit(&dtrace_provider_lock); 8944 } 8945 8946 kmem_free(old->dtpv_name, strlen(old->dtpv_name) + 1); 8947 kmem_free(old, sizeof (dtrace_provider_t)); 8948 8949 return (0); 8950 } 8951 8952 /* 8953 * Invalidate the specified provider. All subsequent probe lookups for the 8954 * specified provider will fail, but its probes will not be removed. 8955 */ 8956 void 8957 dtrace_invalidate(dtrace_provider_id_t id) 8958 { 8959 dtrace_provider_t *pvp = (dtrace_provider_t *)id; 8960 8961 ASSERT(pvp->dtpv_pops.dtps_enable != 8962 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop); 8963 8964 mutex_enter(&dtrace_provider_lock); 8965 mutex_enter(&dtrace_lock); 8966 8967 pvp->dtpv_defunct = dtrace_gethrtime(); 8968 8969 mutex_exit(&dtrace_lock); 8970 mutex_exit(&dtrace_provider_lock); 8971 } 8972 8973 /* 8974 * Indicate whether or not DTrace has attached. 8975 */ 8976 int 8977 dtrace_attached(void) 8978 { 8979 /* 8980 * dtrace_provider will be non-NULL iff the DTrace driver has 8981 * attached. (It's non-NULL because DTrace is always itself a 8982 * provider.) 8983 */ 8984 return (dtrace_provider != NULL); 8985 } 8986 8987 /* 8988 * Remove all the unenabled probes for the given provider. This function is 8989 * not unlike dtrace_unregister(), except that it doesn't remove the provider 8990 * -- just as many of its associated probes as it can. 8991 */ 8992 int 8993 dtrace_condense(dtrace_provider_id_t id) 8994 { 8995 dtrace_provider_t *prov = (dtrace_provider_t *)id; 8996 int i; 8997 dtrace_probe_t *probe; 8998 8999 /* 9000 * Make sure this isn't the dtrace provider itself. 9001 */ 9002 ASSERT(prov->dtpv_pops.dtps_enable != 9003 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop); 9004 9005 mutex_enter(&dtrace_provider_lock); 9006 mutex_enter(&dtrace_lock); 9007 9008 /* 9009 * Attempt to destroy the probes associated with this provider. 9010 */ 9011 for (i = 0; i < dtrace_nprobes; i++) { 9012 if ((probe = dtrace_probes[i]) == NULL) 9013 continue; 9014 9015 if (probe->dtpr_provider != prov) 9016 continue; 9017 9018 if (probe->dtpr_ecb != NULL) 9019 continue; 9020 9021 dtrace_probes[i] = NULL; 9022 9023 dtrace_hash_remove(dtrace_bymod, probe); 9024 dtrace_hash_remove(dtrace_byfunc, probe); 9025 dtrace_hash_remove(dtrace_byname, probe); 9026 9027 prov->dtpv_pops.dtps_destroy(prov->dtpv_arg, i + 1, 9028 probe->dtpr_arg); 9029 kmem_free(probe->dtpr_mod, strlen(probe->dtpr_mod) + 1); 9030 kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1); 9031 kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1); 9032 kmem_free(probe, sizeof (dtrace_probe_t)); 9033 #ifdef illumos 9034 vmem_free(dtrace_arena, (void *)((uintptr_t)i + 1), 1); 9035 #else 9036 free_unr(dtrace_arena, i + 1); 9037 #endif 9038 } 9039 9040 mutex_exit(&dtrace_lock); 9041 mutex_exit(&dtrace_provider_lock); 9042 9043 return (0); 9044 } 9045 9046 /* 9047 * DTrace Probe Management Functions 9048 * 9049 * The functions in this section perform the DTrace probe management, 9050 * including functions to create probes, look-up probes, and call into the 9051 * providers to request that probes be provided. Some of these functions are 9052 * in the Provider-to-Framework API; these functions can be identified by the 9053 * fact that they are not declared "static". 9054 */ 9055 9056 /* 9057 * Create a probe with the specified module name, function name, and name. 9058 */ 9059 dtrace_id_t 9060 dtrace_probe_create(dtrace_provider_id_t prov, const char *mod, 9061 const char *func, const char *name, int aframes, void *arg) 9062 { 9063 dtrace_probe_t *probe, **probes; 9064 dtrace_provider_t *provider = (dtrace_provider_t *)prov; 9065 dtrace_id_t id; 9066 9067 if (provider == dtrace_provider) { 9068 ASSERT(MUTEX_HELD(&dtrace_lock)); 9069 } else { 9070 mutex_enter(&dtrace_lock); 9071 } 9072 9073 #ifdef illumos 9074 id = (dtrace_id_t)(uintptr_t)vmem_alloc(dtrace_arena, 1, 9075 VM_BESTFIT | VM_SLEEP); 9076 #else 9077 id = alloc_unr(dtrace_arena); 9078 #endif 9079 probe = kmem_zalloc(sizeof (dtrace_probe_t), KM_SLEEP); 9080 9081 probe->dtpr_id = id; 9082 probe->dtpr_gen = dtrace_probegen++; 9083 probe->dtpr_mod = dtrace_strdup(mod); 9084 probe->dtpr_func = dtrace_strdup(func); 9085 probe->dtpr_name = dtrace_strdup(name); 9086 probe->dtpr_arg = arg; 9087 probe->dtpr_aframes = aframes; 9088 probe->dtpr_provider = provider; 9089 9090 dtrace_hash_add(dtrace_bymod, probe); 9091 dtrace_hash_add(dtrace_byfunc, probe); 9092 dtrace_hash_add(dtrace_byname, probe); 9093 9094 if (id - 1 >= dtrace_nprobes) { 9095 size_t osize = dtrace_nprobes * sizeof (dtrace_probe_t *); 9096 size_t nsize = osize << 1; 9097 9098 if (nsize == 0) { 9099 ASSERT(osize == 0); 9100 ASSERT(dtrace_probes == NULL); 9101 nsize = sizeof (dtrace_probe_t *); 9102 } 9103 9104 probes = kmem_zalloc(nsize, KM_SLEEP); 9105 9106 if (dtrace_probes == NULL) { 9107 ASSERT(osize == 0); 9108 dtrace_probes = probes; 9109 dtrace_nprobes = 1; 9110 } else { 9111 dtrace_probe_t **oprobes = dtrace_probes; 9112 9113 bcopy(oprobes, probes, osize); 9114 dtrace_membar_producer(); 9115 dtrace_probes = probes; 9116 9117 dtrace_sync(); 9118 9119 /* 9120 * All CPUs are now seeing the new probes array; we can 9121 * safely free the old array. 9122 */ 9123 kmem_free(oprobes, osize); 9124 dtrace_nprobes <<= 1; 9125 } 9126 9127 ASSERT(id - 1 < dtrace_nprobes); 9128 } 9129 9130 ASSERT(dtrace_probes[id - 1] == NULL); 9131 dtrace_probes[id - 1] = probe; 9132 9133 if (provider != dtrace_provider) 9134 mutex_exit(&dtrace_lock); 9135 9136 return (id); 9137 } 9138 9139 static dtrace_probe_t * 9140 dtrace_probe_lookup_id(dtrace_id_t id) 9141 { 9142 ASSERT(MUTEX_HELD(&dtrace_lock)); 9143 9144 if (id == 0 || id > dtrace_nprobes) 9145 return (NULL); 9146 9147 return (dtrace_probes[id - 1]); 9148 } 9149 9150 static int 9151 dtrace_probe_lookup_match(dtrace_probe_t *probe, void *arg) 9152 { 9153 *((dtrace_id_t *)arg) = probe->dtpr_id; 9154 9155 return (DTRACE_MATCH_DONE); 9156 } 9157 9158 /* 9159 * Look up a probe based on provider and one or more of module name, function 9160 * name and probe name. 9161 */ 9162 dtrace_id_t 9163 dtrace_probe_lookup(dtrace_provider_id_t prid, char *mod, 9164 char *func, char *name) 9165 { 9166 dtrace_probekey_t pkey; 9167 dtrace_id_t id; 9168 int match; 9169 9170 pkey.dtpk_prov = ((dtrace_provider_t *)prid)->dtpv_name; 9171 pkey.dtpk_pmatch = &dtrace_match_string; 9172 pkey.dtpk_mod = mod; 9173 pkey.dtpk_mmatch = mod ? &dtrace_match_string : &dtrace_match_nul; 9174 pkey.dtpk_func = func; 9175 pkey.dtpk_fmatch = func ? &dtrace_match_string : &dtrace_match_nul; 9176 pkey.dtpk_name = name; 9177 pkey.dtpk_nmatch = name ? &dtrace_match_string : &dtrace_match_nul; 9178 pkey.dtpk_id = DTRACE_IDNONE; 9179 9180 mutex_enter(&dtrace_lock); 9181 match = dtrace_match(&pkey, DTRACE_PRIV_ALL, 0, 0, 9182 dtrace_probe_lookup_match, &id); 9183 mutex_exit(&dtrace_lock); 9184 9185 ASSERT(match == 1 || match == 0); 9186 return (match ? id : 0); 9187 } 9188 9189 /* 9190 * Returns the probe argument associated with the specified probe. 9191 */ 9192 void * 9193 dtrace_probe_arg(dtrace_provider_id_t id, dtrace_id_t pid) 9194 { 9195 dtrace_probe_t *probe; 9196 void *rval = NULL; 9197 9198 mutex_enter(&dtrace_lock); 9199 9200 if ((probe = dtrace_probe_lookup_id(pid)) != NULL && 9201 probe->dtpr_provider == (dtrace_provider_t *)id) 9202 rval = probe->dtpr_arg; 9203 9204 mutex_exit(&dtrace_lock); 9205 9206 return (rval); 9207 } 9208 9209 /* 9210 * Copy a probe into a probe description. 9211 */ 9212 static void 9213 dtrace_probe_description(const dtrace_probe_t *prp, dtrace_probedesc_t *pdp) 9214 { 9215 bzero(pdp, sizeof (dtrace_probedesc_t)); 9216 pdp->dtpd_id = prp->dtpr_id; 9217 9218 (void) strncpy(pdp->dtpd_provider, 9219 prp->dtpr_provider->dtpv_name, DTRACE_PROVNAMELEN - 1); 9220 9221 (void) strncpy(pdp->dtpd_mod, prp->dtpr_mod, DTRACE_MODNAMELEN - 1); 9222 (void) strncpy(pdp->dtpd_func, prp->dtpr_func, DTRACE_FUNCNAMELEN - 1); 9223 (void) strncpy(pdp->dtpd_name, prp->dtpr_name, DTRACE_NAMELEN - 1); 9224 } 9225 9226 /* 9227 * Called to indicate that a probe -- or probes -- should be provided by a 9228 * specfied provider. If the specified description is NULL, the provider will 9229 * be told to provide all of its probes. (This is done whenever a new 9230 * consumer comes along, or whenever a retained enabling is to be matched.) If 9231 * the specified description is non-NULL, the provider is given the 9232 * opportunity to dynamically provide the specified probe, allowing providers 9233 * to support the creation of probes on-the-fly. (So-called _autocreated_ 9234 * probes.) If the provider is NULL, the operations will be applied to all 9235 * providers; if the provider is non-NULL the operations will only be applied 9236 * to the specified provider. The dtrace_provider_lock must be held, and the 9237 * dtrace_lock must _not_ be held -- the provider's dtps_provide() operation 9238 * will need to grab the dtrace_lock when it reenters the framework through 9239 * dtrace_probe_lookup(), dtrace_probe_create(), etc. 9240 */ 9241 static void 9242 dtrace_probe_provide(dtrace_probedesc_t *desc, dtrace_provider_t *prv) 9243 { 9244 #ifdef illumos 9245 modctl_t *ctl; 9246 #endif 9247 int all = 0; 9248 9249 ASSERT(MUTEX_HELD(&dtrace_provider_lock)); 9250 9251 if (prv == NULL) { 9252 all = 1; 9253 prv = dtrace_provider; 9254 } 9255 9256 do { 9257 /* 9258 * First, call the blanket provide operation. 9259 */ 9260 prv->dtpv_pops.dtps_provide(prv->dtpv_arg, desc); 9261 9262 #ifdef illumos 9263 /* 9264 * Now call the per-module provide operation. We will grab 9265 * mod_lock to prevent the list from being modified. Note 9266 * that this also prevents the mod_busy bits from changing. 9267 * (mod_busy can only be changed with mod_lock held.) 9268 */ 9269 mutex_enter(&mod_lock); 9270 9271 ctl = &modules; 9272 do { 9273 if (ctl->mod_busy || ctl->mod_mp == NULL) 9274 continue; 9275 9276 prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ctl); 9277 9278 } while ((ctl = ctl->mod_next) != &modules); 9279 9280 mutex_exit(&mod_lock); 9281 #endif 9282 } while (all && (prv = prv->dtpv_next) != NULL); 9283 } 9284 9285 #ifdef illumos 9286 /* 9287 * Iterate over each probe, and call the Framework-to-Provider API function 9288 * denoted by offs. 9289 */ 9290 static void 9291 dtrace_probe_foreach(uintptr_t offs) 9292 { 9293 dtrace_provider_t *prov; 9294 void (*func)(void *, dtrace_id_t, void *); 9295 dtrace_probe_t *probe; 9296 dtrace_icookie_t cookie; 9297 int i; 9298 9299 /* 9300 * We disable interrupts to walk through the probe array. This is 9301 * safe -- the dtrace_sync() in dtrace_unregister() assures that we 9302 * won't see stale data. 9303 */ 9304 cookie = dtrace_interrupt_disable(); 9305 9306 for (i = 0; i < dtrace_nprobes; i++) { 9307 if ((probe = dtrace_probes[i]) == NULL) 9308 continue; 9309 9310 if (probe->dtpr_ecb == NULL) { 9311 /* 9312 * This probe isn't enabled -- don't call the function. 9313 */ 9314 continue; 9315 } 9316 9317 prov = probe->dtpr_provider; 9318 func = *((void(**)(void *, dtrace_id_t, void *)) 9319 ((uintptr_t)&prov->dtpv_pops + offs)); 9320 9321 func(prov->dtpv_arg, i + 1, probe->dtpr_arg); 9322 } 9323 9324 dtrace_interrupt_enable(cookie); 9325 } 9326 #endif 9327 9328 static int 9329 dtrace_probe_enable(dtrace_probedesc_t *desc, dtrace_enabling_t *enab) 9330 { 9331 dtrace_probekey_t pkey; 9332 uint32_t priv; 9333 uid_t uid; 9334 zoneid_t zoneid; 9335 9336 ASSERT(MUTEX_HELD(&dtrace_lock)); 9337 dtrace_ecb_create_cache = NULL; 9338 9339 if (desc == NULL) { 9340 /* 9341 * If we're passed a NULL description, we're being asked to 9342 * create an ECB with a NULL probe. 9343 */ 9344 (void) dtrace_ecb_create_enable(NULL, enab); 9345 return (0); 9346 } 9347 9348 dtrace_probekey(desc, &pkey); 9349 dtrace_cred2priv(enab->dten_vstate->dtvs_state->dts_cred.dcr_cred, 9350 &priv, &uid, &zoneid); 9351 9352 return (dtrace_match(&pkey, priv, uid, zoneid, dtrace_ecb_create_enable, 9353 enab)); 9354 } 9355 9356 /* 9357 * DTrace Helper Provider Functions 9358 */ 9359 static void 9360 dtrace_dofattr2attr(dtrace_attribute_t *attr, const dof_attr_t dofattr) 9361 { 9362 attr->dtat_name = DOF_ATTR_NAME(dofattr); 9363 attr->dtat_data = DOF_ATTR_DATA(dofattr); 9364 attr->dtat_class = DOF_ATTR_CLASS(dofattr); 9365 } 9366 9367 static void 9368 dtrace_dofprov2hprov(dtrace_helper_provdesc_t *hprov, 9369 const dof_provider_t *dofprov, char *strtab) 9370 { 9371 hprov->dthpv_provname = strtab + dofprov->dofpv_name; 9372 dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_provider, 9373 dofprov->dofpv_provattr); 9374 dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_mod, 9375 dofprov->dofpv_modattr); 9376 dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_func, 9377 dofprov->dofpv_funcattr); 9378 dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_name, 9379 dofprov->dofpv_nameattr); 9380 dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_args, 9381 dofprov->dofpv_argsattr); 9382 } 9383 9384 static void 9385 dtrace_helper_provide_one(dof_helper_t *dhp, dof_sec_t *sec, pid_t pid) 9386 { 9387 uintptr_t daddr = (uintptr_t)dhp->dofhp_dof; 9388 dof_hdr_t *dof = (dof_hdr_t *)daddr; 9389 dof_sec_t *str_sec, *prb_sec, *arg_sec, *off_sec, *enoff_sec; 9390 dof_provider_t *provider; 9391 dof_probe_t *probe; 9392 uint32_t *off, *enoff; 9393 uint8_t *arg; 9394 char *strtab; 9395 uint_t i, nprobes; 9396 dtrace_helper_provdesc_t dhpv; 9397 dtrace_helper_probedesc_t dhpb; 9398 dtrace_meta_t *meta = dtrace_meta_pid; 9399 dtrace_mops_t *mops = &meta->dtm_mops; 9400 void *parg; 9401 9402 provider = (dof_provider_t *)(uintptr_t)(daddr + sec->dofs_offset); 9403 str_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff + 9404 provider->dofpv_strtab * dof->dofh_secsize); 9405 prb_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff + 9406 provider->dofpv_probes * dof->dofh_secsize); 9407 arg_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff + 9408 provider->dofpv_prargs * dof->dofh_secsize); 9409 off_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff + 9410 provider->dofpv_proffs * dof->dofh_secsize); 9411 9412 strtab = (char *)(uintptr_t)(daddr + str_sec->dofs_offset); 9413 off = (uint32_t *)(uintptr_t)(daddr + off_sec->dofs_offset); 9414 arg = (uint8_t *)(uintptr_t)(daddr + arg_sec->dofs_offset); 9415 enoff = NULL; 9416 9417 /* 9418 * See dtrace_helper_provider_validate(). 9419 */ 9420 if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1 && 9421 provider->dofpv_prenoffs != DOF_SECT_NONE) { 9422 enoff_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff + 9423 provider->dofpv_prenoffs * dof->dofh_secsize); 9424 enoff = (uint32_t *)(uintptr_t)(daddr + enoff_sec->dofs_offset); 9425 } 9426 9427 nprobes = prb_sec->dofs_size / prb_sec->dofs_entsize; 9428 9429 /* 9430 * Create the provider. 9431 */ 9432 dtrace_dofprov2hprov(&dhpv, provider, strtab); 9433 9434 if ((parg = mops->dtms_provide_pid(meta->dtm_arg, &dhpv, pid)) == NULL) 9435 return; 9436 9437 meta->dtm_count++; 9438 9439 /* 9440 * Create the probes. 9441 */ 9442 for (i = 0; i < nprobes; i++) { 9443 probe = (dof_probe_t *)(uintptr_t)(daddr + 9444 prb_sec->dofs_offset + i * prb_sec->dofs_entsize); 9445 9446 /* See the check in dtrace_helper_provider_validate(). */ 9447 if (strlen(strtab + probe->dofpr_func) >= DTRACE_FUNCNAMELEN) 9448 continue; 9449 9450 dhpb.dthpb_mod = dhp->dofhp_mod; 9451 dhpb.dthpb_func = strtab + probe->dofpr_func; 9452 dhpb.dthpb_name = strtab + probe->dofpr_name; 9453 dhpb.dthpb_base = probe->dofpr_addr; 9454 dhpb.dthpb_offs = off + probe->dofpr_offidx; 9455 dhpb.dthpb_noffs = probe->dofpr_noffs; 9456 if (enoff != NULL) { 9457 dhpb.dthpb_enoffs = enoff + probe->dofpr_enoffidx; 9458 dhpb.dthpb_nenoffs = probe->dofpr_nenoffs; 9459 } else { 9460 dhpb.dthpb_enoffs = NULL; 9461 dhpb.dthpb_nenoffs = 0; 9462 } 9463 dhpb.dthpb_args = arg + probe->dofpr_argidx; 9464 dhpb.dthpb_nargc = probe->dofpr_nargc; 9465 dhpb.dthpb_xargc = probe->dofpr_xargc; 9466 dhpb.dthpb_ntypes = strtab + probe->dofpr_nargv; 9467 dhpb.dthpb_xtypes = strtab + probe->dofpr_xargv; 9468 9469 mops->dtms_create_probe(meta->dtm_arg, parg, &dhpb); 9470 } 9471 } 9472 9473 static void 9474 dtrace_helper_provide(dof_helper_t *dhp, pid_t pid) 9475 { 9476 uintptr_t daddr = (uintptr_t)dhp->dofhp_dof; 9477 dof_hdr_t *dof = (dof_hdr_t *)daddr; 9478 int i; 9479 9480 ASSERT(MUTEX_HELD(&dtrace_meta_lock)); 9481 9482 for (i = 0; i < dof->dofh_secnum; i++) { 9483 dof_sec_t *sec = (dof_sec_t *)(uintptr_t)(daddr + 9484 dof->dofh_secoff + i * dof->dofh_secsize); 9485 9486 if (sec->dofs_type != DOF_SECT_PROVIDER) 9487 continue; 9488 9489 dtrace_helper_provide_one(dhp, sec, pid); 9490 } 9491 9492 /* 9493 * We may have just created probes, so we must now rematch against 9494 * any retained enablings. Note that this call will acquire both 9495 * cpu_lock and dtrace_lock; the fact that we are holding 9496 * dtrace_meta_lock now is what defines the ordering with respect to 9497 * these three locks. 9498 */ 9499 dtrace_enabling_matchall(); 9500 } 9501 9502 static void 9503 dtrace_helper_provider_remove_one(dof_helper_t *dhp, dof_sec_t *sec, pid_t pid) 9504 { 9505 uintptr_t daddr = (uintptr_t)dhp->dofhp_dof; 9506 dof_hdr_t *dof = (dof_hdr_t *)daddr; 9507 dof_sec_t *str_sec; 9508 dof_provider_t *provider; 9509 char *strtab; 9510 dtrace_helper_provdesc_t dhpv; 9511 dtrace_meta_t *meta = dtrace_meta_pid; 9512 dtrace_mops_t *mops = &meta->dtm_mops; 9513 9514 provider = (dof_provider_t *)(uintptr_t)(daddr + sec->dofs_offset); 9515 str_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff + 9516 provider->dofpv_strtab * dof->dofh_secsize); 9517 9518 strtab = (char *)(uintptr_t)(daddr + str_sec->dofs_offset); 9519 9520 /* 9521 * Create the provider. 9522 */ 9523 dtrace_dofprov2hprov(&dhpv, provider, strtab); 9524 9525 mops->dtms_remove_pid(meta->dtm_arg, &dhpv, pid); 9526 9527 meta->dtm_count--; 9528 } 9529 9530 static void 9531 dtrace_helper_provider_remove(dof_helper_t *dhp, pid_t pid) 9532 { 9533 uintptr_t daddr = (uintptr_t)dhp->dofhp_dof; 9534 dof_hdr_t *dof = (dof_hdr_t *)daddr; 9535 int i; 9536 9537 ASSERT(MUTEX_HELD(&dtrace_meta_lock)); 9538 9539 for (i = 0; i < dof->dofh_secnum; i++) { 9540 dof_sec_t *sec = (dof_sec_t *)(uintptr_t)(daddr + 9541 dof->dofh_secoff + i * dof->dofh_secsize); 9542 9543 if (sec->dofs_type != DOF_SECT_PROVIDER) 9544 continue; 9545 9546 dtrace_helper_provider_remove_one(dhp, sec, pid); 9547 } 9548 } 9549 9550 /* 9551 * DTrace Meta Provider-to-Framework API Functions 9552 * 9553 * These functions implement the Meta Provider-to-Framework API, as described 9554 * in <sys/dtrace.h>. 9555 */ 9556 int 9557 dtrace_meta_register(const char *name, const dtrace_mops_t *mops, void *arg, 9558 dtrace_meta_provider_id_t *idp) 9559 { 9560 dtrace_meta_t *meta; 9561 dtrace_helpers_t *help, *next; 9562 int i; 9563 9564 *idp = DTRACE_METAPROVNONE; 9565 9566 /* 9567 * We strictly don't need the name, but we hold onto it for 9568 * debuggability. All hail error queues! 9569 */ 9570 if (name == NULL) { 9571 cmn_err(CE_WARN, "failed to register meta-provider: " 9572 "invalid name"); 9573 return (EINVAL); 9574 } 9575 9576 if (mops == NULL || 9577 mops->dtms_create_probe == NULL || 9578 mops->dtms_provide_pid == NULL || 9579 mops->dtms_remove_pid == NULL) { 9580 cmn_err(CE_WARN, "failed to register meta-register %s: " 9581 "invalid ops", name); 9582 return (EINVAL); 9583 } 9584 9585 meta = kmem_zalloc(sizeof (dtrace_meta_t), KM_SLEEP); 9586 meta->dtm_mops = *mops; 9587 meta->dtm_name = kmem_alloc(strlen(name) + 1, KM_SLEEP); 9588 (void) strcpy(meta->dtm_name, name); 9589 meta->dtm_arg = arg; 9590 9591 mutex_enter(&dtrace_meta_lock); 9592 mutex_enter(&dtrace_lock); 9593 9594 if (dtrace_meta_pid != NULL) { 9595 mutex_exit(&dtrace_lock); 9596 mutex_exit(&dtrace_meta_lock); 9597 cmn_err(CE_WARN, "failed to register meta-register %s: " 9598 "user-land meta-provider exists", name); 9599 kmem_free(meta->dtm_name, strlen(meta->dtm_name) + 1); 9600 kmem_free(meta, sizeof (dtrace_meta_t)); 9601 return (EINVAL); 9602 } 9603 9604 dtrace_meta_pid = meta; 9605 *idp = (dtrace_meta_provider_id_t)meta; 9606 9607 /* 9608 * If there are providers and probes ready to go, pass them 9609 * off to the new meta provider now. 9610 */ 9611 9612 help = dtrace_deferred_pid; 9613 dtrace_deferred_pid = NULL; 9614 9615 mutex_exit(&dtrace_lock); 9616 9617 while (help != NULL) { 9618 for (i = 0; i < help->dthps_nprovs; i++) { 9619 dtrace_helper_provide(&help->dthps_provs[i]->dthp_prov, 9620 help->dthps_pid); 9621 } 9622 9623 next = help->dthps_next; 9624 help->dthps_next = NULL; 9625 help->dthps_prev = NULL; 9626 help->dthps_deferred = 0; 9627 help = next; 9628 } 9629 9630 mutex_exit(&dtrace_meta_lock); 9631 9632 return (0); 9633 } 9634 9635 int 9636 dtrace_meta_unregister(dtrace_meta_provider_id_t id) 9637 { 9638 dtrace_meta_t **pp, *old = (dtrace_meta_t *)id; 9639 9640 mutex_enter(&dtrace_meta_lock); 9641 mutex_enter(&dtrace_lock); 9642 9643 if (old == dtrace_meta_pid) { 9644 pp = &dtrace_meta_pid; 9645 } else { 9646 panic("attempt to unregister non-existent " 9647 "dtrace meta-provider %p\n", (void *)old); 9648 } 9649 9650 if (old->dtm_count != 0) { 9651 mutex_exit(&dtrace_lock); 9652 mutex_exit(&dtrace_meta_lock); 9653 return (EBUSY); 9654 } 9655 9656 *pp = NULL; 9657 9658 mutex_exit(&dtrace_lock); 9659 mutex_exit(&dtrace_meta_lock); 9660 9661 kmem_free(old->dtm_name, strlen(old->dtm_name) + 1); 9662 kmem_free(old, sizeof (dtrace_meta_t)); 9663 9664 return (0); 9665 } 9666 9667 9668 /* 9669 * DTrace DIF Object Functions 9670 */ 9671 static int 9672 dtrace_difo_err(uint_t pc, const char *format, ...) 9673 { 9674 if (dtrace_err_verbose) { 9675 va_list alist; 9676 9677 (void) uprintf("dtrace DIF object error: [%u]: ", pc); 9678 va_start(alist, format); 9679 (void) vuprintf(format, alist); 9680 va_end(alist); 9681 } 9682 9683 #ifdef DTRACE_ERRDEBUG 9684 dtrace_errdebug(format); 9685 #endif 9686 return (1); 9687 } 9688 9689 /* 9690 * Validate a DTrace DIF object by checking the IR instructions. The following 9691 * rules are currently enforced by dtrace_difo_validate(): 9692 * 9693 * 1. Each instruction must have a valid opcode 9694 * 2. Each register, string, variable, or subroutine reference must be valid 9695 * 3. No instruction can modify register %r0 (must be zero) 9696 * 4. All instruction reserved bits must be set to zero 9697 * 5. The last instruction must be a "ret" instruction 9698 * 6. All branch targets must reference a valid instruction _after_ the branch 9699 */ 9700 static int 9701 dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, 9702 cred_t *cr) 9703 { 9704 int err = 0, i; 9705 int (*efunc)(uint_t pc, const char *, ...) = dtrace_difo_err; 9706 int kcheckload; 9707 uint_t pc; 9708 int maxglobal = -1, maxlocal = -1, maxtlocal = -1; 9709 9710 kcheckload = cr == NULL || 9711 (vstate->dtvs_state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL) == 0; 9712 9713 dp->dtdo_destructive = 0; 9714 9715 for (pc = 0; pc < dp->dtdo_len && err == 0; pc++) { 9716 dif_instr_t instr = dp->dtdo_buf[pc]; 9717 9718 uint_t r1 = DIF_INSTR_R1(instr); 9719 uint_t r2 = DIF_INSTR_R2(instr); 9720 uint_t rd = DIF_INSTR_RD(instr); 9721 uint_t rs = DIF_INSTR_RS(instr); 9722 uint_t label = DIF_INSTR_LABEL(instr); 9723 uint_t v = DIF_INSTR_VAR(instr); 9724 uint_t subr = DIF_INSTR_SUBR(instr); 9725 uint_t type = DIF_INSTR_TYPE(instr); 9726 uint_t op = DIF_INSTR_OP(instr); 9727 9728 switch (op) { 9729 case DIF_OP_OR: 9730 case DIF_OP_XOR: 9731 case DIF_OP_AND: 9732 case DIF_OP_SLL: 9733 case DIF_OP_SRL: 9734 case DIF_OP_SRA: 9735 case DIF_OP_SUB: 9736 case DIF_OP_ADD: 9737 case DIF_OP_MUL: 9738 case DIF_OP_SDIV: 9739 case DIF_OP_UDIV: 9740 case DIF_OP_SREM: 9741 case DIF_OP_UREM: 9742 case DIF_OP_COPYS: 9743 if (r1 >= nregs) 9744 err += efunc(pc, "invalid register %u\n", r1); 9745 if (r2 >= nregs) 9746 err += efunc(pc, "invalid register %u\n", r2); 9747 if (rd >= nregs) 9748 err += efunc(pc, "invalid register %u\n", rd); 9749 if (rd == 0) 9750 err += efunc(pc, "cannot write to %r0\n"); 9751 break; 9752 case DIF_OP_NOT: 9753 case DIF_OP_MOV: 9754 case DIF_OP_ALLOCS: 9755 if (r1 >= nregs) 9756 err += efunc(pc, "invalid register %u\n", r1); 9757 if (r2 != 0) 9758 err += efunc(pc, "non-zero reserved bits\n"); 9759 if (rd >= nregs) 9760 err += efunc(pc, "invalid register %u\n", rd); 9761 if (rd == 0) 9762 err += efunc(pc, "cannot write to %r0\n"); 9763 break; 9764 case DIF_OP_LDSB: 9765 case DIF_OP_LDSH: 9766 case DIF_OP_LDSW: 9767 case DIF_OP_LDUB: 9768 case DIF_OP_LDUH: 9769 case DIF_OP_LDUW: 9770 case DIF_OP_LDX: 9771 if (r1 >= nregs) 9772 err += efunc(pc, "invalid register %u\n", r1); 9773 if (r2 != 0) 9774 err += efunc(pc, "non-zero reserved bits\n"); 9775 if (rd >= nregs) 9776 err += efunc(pc, "invalid register %u\n", rd); 9777 if (rd == 0) 9778 err += efunc(pc, "cannot write to %r0\n"); 9779 if (kcheckload) 9780 dp->dtdo_buf[pc] = DIF_INSTR_LOAD(op + 9781 DIF_OP_RLDSB - DIF_OP_LDSB, r1, rd); 9782 break; 9783 case DIF_OP_RLDSB: 9784 case DIF_OP_RLDSH: 9785 case DIF_OP_RLDSW: 9786 case DIF_OP_RLDUB: 9787 case DIF_OP_RLDUH: 9788 case DIF_OP_RLDUW: 9789 case DIF_OP_RLDX: 9790 if (r1 >= nregs) 9791 err += efunc(pc, "invalid register %u\n", r1); 9792 if (r2 != 0) 9793 err += efunc(pc, "non-zero reserved bits\n"); 9794 if (rd >= nregs) 9795 err += efunc(pc, "invalid register %u\n", rd); 9796 if (rd == 0) 9797 err += efunc(pc, "cannot write to %r0\n"); 9798 break; 9799 case DIF_OP_ULDSB: 9800 case DIF_OP_ULDSH: 9801 case DIF_OP_ULDSW: 9802 case DIF_OP_ULDUB: 9803 case DIF_OP_ULDUH: 9804 case DIF_OP_ULDUW: 9805 case DIF_OP_ULDX: 9806 if (r1 >= nregs) 9807 err += efunc(pc, "invalid register %u\n", r1); 9808 if (r2 != 0) 9809 err += efunc(pc, "non-zero reserved bits\n"); 9810 if (rd >= nregs) 9811 err += efunc(pc, "invalid register %u\n", rd); 9812 if (rd == 0) 9813 err += efunc(pc, "cannot write to %r0\n"); 9814 break; 9815 case DIF_OP_STB: 9816 case DIF_OP_STH: 9817 case DIF_OP_STW: 9818 case DIF_OP_STX: 9819 if (r1 >= nregs) 9820 err += efunc(pc, "invalid register %u\n", r1); 9821 if (r2 != 0) 9822 err += efunc(pc, "non-zero reserved bits\n"); 9823 if (rd >= nregs) 9824 err += efunc(pc, "invalid register %u\n", rd); 9825 if (rd == 0) 9826 err += efunc(pc, "cannot write to 0 address\n"); 9827 break; 9828 case DIF_OP_CMP: 9829 case DIF_OP_SCMP: 9830 if (r1 >= nregs) 9831 err += efunc(pc, "invalid register %u\n", r1); 9832 if (r2 >= nregs) 9833 err += efunc(pc, "invalid register %u\n", r2); 9834 if (rd != 0) 9835 err += efunc(pc, "non-zero reserved bits\n"); 9836 break; 9837 case DIF_OP_TST: 9838 if (r1 >= nregs) 9839 err += efunc(pc, "invalid register %u\n", r1); 9840 if (r2 != 0 || rd != 0) 9841 err += efunc(pc, "non-zero reserved bits\n"); 9842 break; 9843 case DIF_OP_BA: 9844 case DIF_OP_BE: 9845 case DIF_OP_BNE: 9846 case DIF_OP_BG: 9847 case DIF_OP_BGU: 9848 case DIF_OP_BGE: 9849 case DIF_OP_BGEU: 9850 case DIF_OP_BL: 9851 case DIF_OP_BLU: 9852 case DIF_OP_BLE: 9853 case DIF_OP_BLEU: 9854 if (label >= dp->dtdo_len) { 9855 err += efunc(pc, "invalid branch target %u\n", 9856 label); 9857 } 9858 if (label <= pc) { 9859 err += efunc(pc, "backward branch to %u\n", 9860 label); 9861 } 9862 break; 9863 case DIF_OP_RET: 9864 if (r1 != 0 || r2 != 0) 9865 err += efunc(pc, "non-zero reserved bits\n"); 9866 if (rd >= nregs) 9867 err += efunc(pc, "invalid register %u\n", rd); 9868 break; 9869 case DIF_OP_NOP: 9870 case DIF_OP_POPTS: 9871 case DIF_OP_FLUSHTS: 9872 if (r1 != 0 || r2 != 0 || rd != 0) 9873 err += efunc(pc, "non-zero reserved bits\n"); 9874 break; 9875 case DIF_OP_SETX: 9876 if (DIF_INSTR_INTEGER(instr) >= dp->dtdo_intlen) { 9877 err += efunc(pc, "invalid integer ref %u\n", 9878 DIF_INSTR_INTEGER(instr)); 9879 } 9880 if (rd >= nregs) 9881 err += efunc(pc, "invalid register %u\n", rd); 9882 if (rd == 0) 9883 err += efunc(pc, "cannot write to %r0\n"); 9884 break; 9885 case DIF_OP_SETS: 9886 if (DIF_INSTR_STRING(instr) >= dp->dtdo_strlen) { 9887 err += efunc(pc, "invalid string ref %u\n", 9888 DIF_INSTR_STRING(instr)); 9889 } 9890 if (rd >= nregs) 9891 err += efunc(pc, "invalid register %u\n", rd); 9892 if (rd == 0) 9893 err += efunc(pc, "cannot write to %r0\n"); 9894 break; 9895 case DIF_OP_LDGA: 9896 case DIF_OP_LDTA: 9897 if (r1 > DIF_VAR_ARRAY_MAX) 9898 err += efunc(pc, "invalid array %u\n", r1); 9899 if (r2 >= nregs) 9900 err += efunc(pc, "invalid register %u\n", r2); 9901 if (rd >= nregs) 9902 err += efunc(pc, "invalid register %u\n", rd); 9903 if (rd == 0) 9904 err += efunc(pc, "cannot write to %r0\n"); 9905 break; 9906 case DIF_OP_LDGS: 9907 case DIF_OP_LDTS: 9908 case DIF_OP_LDLS: 9909 case DIF_OP_LDGAA: 9910 case DIF_OP_LDTAA: 9911 if (v < DIF_VAR_OTHER_MIN || v > DIF_VAR_OTHER_MAX) 9912 err += efunc(pc, "invalid variable %u\n", v); 9913 if (rd >= nregs) 9914 err += efunc(pc, "invalid register %u\n", rd); 9915 if (rd == 0) 9916 err += efunc(pc, "cannot write to %r0\n"); 9917 break; 9918 case DIF_OP_STGS: 9919 case DIF_OP_STTS: 9920 case DIF_OP_STLS: 9921 case DIF_OP_STGAA: 9922 case DIF_OP_STTAA: 9923 if (v < DIF_VAR_OTHER_UBASE || v > DIF_VAR_OTHER_MAX) 9924 err += efunc(pc, "invalid variable %u\n", v); 9925 if (rs >= nregs) 9926 err += efunc(pc, "invalid register %u\n", rd); 9927 break; 9928 case DIF_OP_CALL: 9929 if (subr > DIF_SUBR_MAX) 9930 err += efunc(pc, "invalid subr %u\n", subr); 9931 if (rd >= nregs) 9932 err += efunc(pc, "invalid register %u\n", rd); 9933 if (rd == 0) 9934 err += efunc(pc, "cannot write to %r0\n"); 9935 9936 if (subr == DIF_SUBR_COPYOUT || 9937 subr == DIF_SUBR_COPYOUTSTR) { 9938 dp->dtdo_destructive = 1; 9939 } 9940 9941 if (subr == DIF_SUBR_GETF) { 9942 /* 9943 * If we have a getf() we need to record that 9944 * in our state. Note that our state can be 9945 * NULL if this is a helper -- but in that 9946 * case, the call to getf() is itself illegal, 9947 * and will be caught (slightly later) when 9948 * the helper is validated. 9949 */ 9950 if (vstate->dtvs_state != NULL) 9951 vstate->dtvs_state->dts_getf++; 9952 } 9953 9954 break; 9955 case DIF_OP_PUSHTR: 9956 if (type != DIF_TYPE_STRING && type != DIF_TYPE_CTF) 9957 err += efunc(pc, "invalid ref type %u\n", type); 9958 if (r2 >= nregs) 9959 err += efunc(pc, "invalid register %u\n", r2); 9960 if (rs >= nregs) 9961 err += efunc(pc, "invalid register %u\n", rs); 9962 break; 9963 case DIF_OP_PUSHTV: 9964 if (type != DIF_TYPE_CTF) 9965 err += efunc(pc, "invalid val type %u\n", type); 9966 if (r2 >= nregs) 9967 err += efunc(pc, "invalid register %u\n", r2); 9968 if (rs >= nregs) 9969 err += efunc(pc, "invalid register %u\n", rs); 9970 break; 9971 default: 9972 err += efunc(pc, "invalid opcode %u\n", 9973 DIF_INSTR_OP(instr)); 9974 } 9975 } 9976 9977 if (dp->dtdo_len != 0 && 9978 DIF_INSTR_OP(dp->dtdo_buf[dp->dtdo_len - 1]) != DIF_OP_RET) { 9979 err += efunc(dp->dtdo_len - 1, 9980 "expected 'ret' as last DIF instruction\n"); 9981 } 9982 9983 if (!(dp->dtdo_rtype.dtdt_flags & (DIF_TF_BYREF | DIF_TF_BYUREF))) { 9984 /* 9985 * If we're not returning by reference, the size must be either 9986 * 0 or the size of one of the base types. 9987 */ 9988 switch (dp->dtdo_rtype.dtdt_size) { 9989 case 0: 9990 case sizeof (uint8_t): 9991 case sizeof (uint16_t): 9992 case sizeof (uint32_t): 9993 case sizeof (uint64_t): 9994 break; 9995 9996 default: 9997 err += efunc(dp->dtdo_len - 1, "bad return size\n"); 9998 } 9999 } 10000 10001 for (i = 0; i < dp->dtdo_varlen && err == 0; i++) { 10002 dtrace_difv_t *v = &dp->dtdo_vartab[i], *existing = NULL; 10003 dtrace_diftype_t *vt, *et; 10004 uint_t id, ndx; 10005 10006 if (v->dtdv_scope != DIFV_SCOPE_GLOBAL && 10007 v->dtdv_scope != DIFV_SCOPE_THREAD && 10008 v->dtdv_scope != DIFV_SCOPE_LOCAL) { 10009 err += efunc(i, "unrecognized variable scope %d\n", 10010 v->dtdv_scope); 10011 break; 10012 } 10013 10014 if (v->dtdv_kind != DIFV_KIND_ARRAY && 10015 v->dtdv_kind != DIFV_KIND_SCALAR) { 10016 err += efunc(i, "unrecognized variable type %d\n", 10017 v->dtdv_kind); 10018 break; 10019 } 10020 10021 if ((id = v->dtdv_id) > DIF_VARIABLE_MAX) { 10022 err += efunc(i, "%d exceeds variable id limit\n", id); 10023 break; 10024 } 10025 10026 if (id < DIF_VAR_OTHER_UBASE) 10027 continue; 10028 10029 /* 10030 * For user-defined variables, we need to check that this 10031 * definition is identical to any previous definition that we 10032 * encountered. 10033 */ 10034 ndx = id - DIF_VAR_OTHER_UBASE; 10035 10036 switch (v->dtdv_scope) { 10037 case DIFV_SCOPE_GLOBAL: 10038 if (maxglobal == -1 || ndx > maxglobal) 10039 maxglobal = ndx; 10040 10041 if (ndx < vstate->dtvs_nglobals) { 10042 dtrace_statvar_t *svar; 10043 10044 if ((svar = vstate->dtvs_globals[ndx]) != NULL) 10045 existing = &svar->dtsv_var; 10046 } 10047 10048 break; 10049 10050 case DIFV_SCOPE_THREAD: 10051 if (maxtlocal == -1 || ndx > maxtlocal) 10052 maxtlocal = ndx; 10053 10054 if (ndx < vstate->dtvs_ntlocals) 10055 existing = &vstate->dtvs_tlocals[ndx]; 10056 break; 10057 10058 case DIFV_SCOPE_LOCAL: 10059 if (maxlocal == -1 || ndx > maxlocal) 10060 maxlocal = ndx; 10061 10062 if (ndx < vstate->dtvs_nlocals) { 10063 dtrace_statvar_t *svar; 10064 10065 if ((svar = vstate->dtvs_locals[ndx]) != NULL) 10066 existing = &svar->dtsv_var; 10067 } 10068 10069 break; 10070 } 10071 10072 vt = &v->dtdv_type; 10073 10074 if (vt->dtdt_flags & DIF_TF_BYREF) { 10075 if (vt->dtdt_size == 0) { 10076 err += efunc(i, "zero-sized variable\n"); 10077 break; 10078 } 10079 10080 if ((v->dtdv_scope == DIFV_SCOPE_GLOBAL || 10081 v->dtdv_scope == DIFV_SCOPE_LOCAL) && 10082 vt->dtdt_size > dtrace_statvar_maxsize) { 10083 err += efunc(i, "oversized by-ref static\n"); 10084 break; 10085 } 10086 } 10087 10088 if (existing == NULL || existing->dtdv_id == 0) 10089 continue; 10090 10091 ASSERT(existing->dtdv_id == v->dtdv_id); 10092 ASSERT(existing->dtdv_scope == v->dtdv_scope); 10093 10094 if (existing->dtdv_kind != v->dtdv_kind) 10095 err += efunc(i, "%d changed variable kind\n", id); 10096 10097 et = &existing->dtdv_type; 10098 10099 if (vt->dtdt_flags != et->dtdt_flags) { 10100 err += efunc(i, "%d changed variable type flags\n", id); 10101 break; 10102 } 10103 10104 if (vt->dtdt_size != 0 && vt->dtdt_size != et->dtdt_size) { 10105 err += efunc(i, "%d changed variable type size\n", id); 10106 break; 10107 } 10108 } 10109 10110 for (pc = 0; pc < dp->dtdo_len && err == 0; pc++) { 10111 dif_instr_t instr = dp->dtdo_buf[pc]; 10112 10113 uint_t v = DIF_INSTR_VAR(instr); 10114 uint_t op = DIF_INSTR_OP(instr); 10115 10116 switch (op) { 10117 case DIF_OP_LDGS: 10118 case DIF_OP_LDGAA: 10119 case DIF_OP_STGS: 10120 case DIF_OP_STGAA: 10121 if (v > DIF_VAR_OTHER_UBASE + maxglobal) 10122 err += efunc(pc, "invalid variable %u\n", v); 10123 break; 10124 case DIF_OP_LDTS: 10125 case DIF_OP_LDTAA: 10126 case DIF_OP_STTS: 10127 case DIF_OP_STTAA: 10128 if (v > DIF_VAR_OTHER_UBASE + maxtlocal) 10129 err += efunc(pc, "invalid variable %u\n", v); 10130 break; 10131 case DIF_OP_LDLS: 10132 case DIF_OP_STLS: 10133 if (v > DIF_VAR_OTHER_UBASE + maxlocal) 10134 err += efunc(pc, "invalid variable %u\n", v); 10135 break; 10136 default: 10137 break; 10138 } 10139 } 10140 10141 return (err); 10142 } 10143 10144 /* 10145 * Validate a DTrace DIF object that it is to be used as a helper. Helpers 10146 * are much more constrained than normal DIFOs. Specifically, they may 10147 * not: 10148 * 10149 * 1. Make calls to subroutines other than copyin(), copyinstr() or 10150 * miscellaneous string routines 10151 * 2. Access DTrace variables other than the args[] array, and the 10152 * curthread, pid, ppid, tid, execname, zonename, uid and gid variables. 10153 * 3. Have thread-local variables. 10154 * 4. Have dynamic variables. 10155 */ 10156 static int 10157 dtrace_difo_validate_helper(dtrace_difo_t *dp) 10158 { 10159 int (*efunc)(uint_t pc, const char *, ...) = dtrace_difo_err; 10160 int err = 0; 10161 uint_t pc; 10162 10163 for (pc = 0; pc < dp->dtdo_len; pc++) { 10164 dif_instr_t instr = dp->dtdo_buf[pc]; 10165 10166 uint_t v = DIF_INSTR_VAR(instr); 10167 uint_t subr = DIF_INSTR_SUBR(instr); 10168 uint_t op = DIF_INSTR_OP(instr); 10169 10170 switch (op) { 10171 case DIF_OP_OR: 10172 case DIF_OP_XOR: 10173 case DIF_OP_AND: 10174 case DIF_OP_SLL: 10175 case DIF_OP_SRL: 10176 case DIF_OP_SRA: 10177 case DIF_OP_SUB: 10178 case DIF_OP_ADD: 10179 case DIF_OP_MUL: 10180 case DIF_OP_SDIV: 10181 case DIF_OP_UDIV: 10182 case DIF_OP_SREM: 10183 case DIF_OP_UREM: 10184 case DIF_OP_COPYS: 10185 case DIF_OP_NOT: 10186 case DIF_OP_MOV: 10187 case DIF_OP_RLDSB: 10188 case DIF_OP_RLDSH: 10189 case DIF_OP_RLDSW: 10190 case DIF_OP_RLDUB: 10191 case DIF_OP_RLDUH: 10192 case DIF_OP_RLDUW: 10193 case DIF_OP_RLDX: 10194 case DIF_OP_ULDSB: 10195 case DIF_OP_ULDSH: 10196 case DIF_OP_ULDSW: 10197 case DIF_OP_ULDUB: 10198 case DIF_OP_ULDUH: 10199 case DIF_OP_ULDUW: 10200 case DIF_OP_ULDX: 10201 case DIF_OP_STB: 10202 case DIF_OP_STH: 10203 case DIF_OP_STW: 10204 case DIF_OP_STX: 10205 case DIF_OP_ALLOCS: 10206 case DIF_OP_CMP: 10207 case DIF_OP_SCMP: 10208 case DIF_OP_TST: 10209 case DIF_OP_BA: 10210 case DIF_OP_BE: 10211 case DIF_OP_BNE: 10212 case DIF_OP_BG: 10213 case DIF_OP_BGU: 10214 case DIF_OP_BGE: 10215 case DIF_OP_BGEU: 10216 case DIF_OP_BL: 10217 case DIF_OP_BLU: 10218 case DIF_OP_BLE: 10219 case DIF_OP_BLEU: 10220 case DIF_OP_RET: 10221 case DIF_OP_NOP: 10222 case DIF_OP_POPTS: 10223 case DIF_OP_FLUSHTS: 10224 case DIF_OP_SETX: 10225 case DIF_OP_SETS: 10226 case DIF_OP_LDGA: 10227 case DIF_OP_LDLS: 10228 case DIF_OP_STGS: 10229 case DIF_OP_STLS: 10230 case DIF_OP_PUSHTR: 10231 case DIF_OP_PUSHTV: 10232 break; 10233 10234 case DIF_OP_LDGS: 10235 if (v >= DIF_VAR_OTHER_UBASE) 10236 break; 10237 10238 if (v >= DIF_VAR_ARG0 && v <= DIF_VAR_ARG9) 10239 break; 10240 10241 if (v == DIF_VAR_CURTHREAD || v == DIF_VAR_PID || 10242 v == DIF_VAR_PPID || v == DIF_VAR_TID || 10243 v == DIF_VAR_EXECARGS || 10244 v == DIF_VAR_EXECNAME || v == DIF_VAR_ZONENAME || 10245 v == DIF_VAR_UID || v == DIF_VAR_GID) 10246 break; 10247 10248 err += efunc(pc, "illegal variable %u\n", v); 10249 break; 10250 10251 case DIF_OP_LDTA: 10252 case DIF_OP_LDTS: 10253 case DIF_OP_LDGAA: 10254 case DIF_OP_LDTAA: 10255 err += efunc(pc, "illegal dynamic variable load\n"); 10256 break; 10257 10258 case DIF_OP_STTS: 10259 case DIF_OP_STGAA: 10260 case DIF_OP_STTAA: 10261 err += efunc(pc, "illegal dynamic variable store\n"); 10262 break; 10263 10264 case DIF_OP_CALL: 10265 if (subr == DIF_SUBR_ALLOCA || 10266 subr == DIF_SUBR_BCOPY || 10267 subr == DIF_SUBR_COPYIN || 10268 subr == DIF_SUBR_COPYINTO || 10269 subr == DIF_SUBR_COPYINSTR || 10270 subr == DIF_SUBR_INDEX || 10271 subr == DIF_SUBR_INET_NTOA || 10272 subr == DIF_SUBR_INET_NTOA6 || 10273 subr == DIF_SUBR_INET_NTOP || 10274 subr == DIF_SUBR_JSON || 10275 subr == DIF_SUBR_LLTOSTR || 10276 subr == DIF_SUBR_STRTOLL || 10277 subr == DIF_SUBR_RINDEX || 10278 subr == DIF_SUBR_STRCHR || 10279 subr == DIF_SUBR_STRJOIN || 10280 subr == DIF_SUBR_STRRCHR || 10281 subr == DIF_SUBR_STRSTR || 10282 subr == DIF_SUBR_HTONS || 10283 subr == DIF_SUBR_HTONL || 10284 subr == DIF_SUBR_HTONLL || 10285 subr == DIF_SUBR_NTOHS || 10286 subr == DIF_SUBR_NTOHL || 10287 subr == DIF_SUBR_NTOHLL || 10288 subr == DIF_SUBR_MEMREF) 10289 break; 10290 #ifdef __FreeBSD__ 10291 if (subr == DIF_SUBR_MEMSTR) 10292 break; 10293 #endif 10294 10295 err += efunc(pc, "invalid subr %u\n", subr); 10296 break; 10297 10298 default: 10299 err += efunc(pc, "invalid opcode %u\n", 10300 DIF_INSTR_OP(instr)); 10301 } 10302 } 10303 10304 return (err); 10305 } 10306 10307 /* 10308 * Returns 1 if the expression in the DIF object can be cached on a per-thread 10309 * basis; 0 if not. 10310 */ 10311 static int 10312 dtrace_difo_cacheable(dtrace_difo_t *dp) 10313 { 10314 int i; 10315 10316 if (dp == NULL) 10317 return (0); 10318 10319 for (i = 0; i < dp->dtdo_varlen; i++) { 10320 dtrace_difv_t *v = &dp->dtdo_vartab[i]; 10321 10322 if (v->dtdv_scope != DIFV_SCOPE_GLOBAL) 10323 continue; 10324 10325 switch (v->dtdv_id) { 10326 case DIF_VAR_CURTHREAD: 10327 case DIF_VAR_PID: 10328 case DIF_VAR_TID: 10329 case DIF_VAR_EXECARGS: 10330 case DIF_VAR_EXECNAME: 10331 case DIF_VAR_ZONENAME: 10332 break; 10333 10334 default: 10335 return (0); 10336 } 10337 } 10338 10339 /* 10340 * This DIF object may be cacheable. Now we need to look for any 10341 * array loading instructions, any memory loading instructions, or 10342 * any stores to thread-local variables. 10343 */ 10344 for (i = 0; i < dp->dtdo_len; i++) { 10345 uint_t op = DIF_INSTR_OP(dp->dtdo_buf[i]); 10346 10347 if ((op >= DIF_OP_LDSB && op <= DIF_OP_LDX) || 10348 (op >= DIF_OP_ULDSB && op <= DIF_OP_ULDX) || 10349 (op >= DIF_OP_RLDSB && op <= DIF_OP_RLDX) || 10350 op == DIF_OP_LDGA || op == DIF_OP_STTS) 10351 return (0); 10352 } 10353 10354 return (1); 10355 } 10356 10357 static void 10358 dtrace_difo_hold(dtrace_difo_t *dp) 10359 { 10360 int i; 10361 10362 ASSERT(MUTEX_HELD(&dtrace_lock)); 10363 10364 dp->dtdo_refcnt++; 10365 ASSERT(dp->dtdo_refcnt != 0); 10366 10367 /* 10368 * We need to check this DIF object for references to the variable 10369 * DIF_VAR_VTIMESTAMP. 10370 */ 10371 for (i = 0; i < dp->dtdo_varlen; i++) { 10372 dtrace_difv_t *v = &dp->dtdo_vartab[i]; 10373 10374 if (v->dtdv_id != DIF_VAR_VTIMESTAMP) 10375 continue; 10376 10377 if (dtrace_vtime_references++ == 0) 10378 dtrace_vtime_enable(); 10379 } 10380 } 10381 10382 /* 10383 * This routine calculates the dynamic variable chunksize for a given DIF 10384 * object. The calculation is not fool-proof, and can probably be tricked by 10385 * malicious DIF -- but it works for all compiler-generated DIF. Because this 10386 * calculation is likely imperfect, dtrace_dynvar() is able to gracefully fail 10387 * if a dynamic variable size exceeds the chunksize. 10388 */ 10389 static void 10390 dtrace_difo_chunksize(dtrace_difo_t *dp, dtrace_vstate_t *vstate) 10391 { 10392 uint64_t sval = 0; 10393 dtrace_key_t tupregs[DIF_DTR_NREGS + 2]; /* +2 for thread and id */ 10394 const dif_instr_t *text = dp->dtdo_buf; 10395 uint_t pc, srd = 0; 10396 uint_t ttop = 0; 10397 size_t size, ksize; 10398 uint_t id, i; 10399 10400 for (pc = 0; pc < dp->dtdo_len; pc++) { 10401 dif_instr_t instr = text[pc]; 10402 uint_t op = DIF_INSTR_OP(instr); 10403 uint_t rd = DIF_INSTR_RD(instr); 10404 uint_t r1 = DIF_INSTR_R1(instr); 10405 uint_t nkeys = 0; 10406 uchar_t scope = 0; 10407 10408 dtrace_key_t *key = tupregs; 10409 10410 switch (op) { 10411 case DIF_OP_SETX: 10412 sval = dp->dtdo_inttab[DIF_INSTR_INTEGER(instr)]; 10413 srd = rd; 10414 continue; 10415 10416 case DIF_OP_STTS: 10417 key = &tupregs[DIF_DTR_NREGS]; 10418 key[0].dttk_size = 0; 10419 key[1].dttk_size = 0; 10420 nkeys = 2; 10421 scope = DIFV_SCOPE_THREAD; 10422 break; 10423 10424 case DIF_OP_STGAA: 10425 case DIF_OP_STTAA: 10426 nkeys = ttop; 10427 10428 if (DIF_INSTR_OP(instr) == DIF_OP_STTAA) 10429 key[nkeys++].dttk_size = 0; 10430 10431 key[nkeys++].dttk_size = 0; 10432 10433 if (op == DIF_OP_STTAA) { 10434 scope = DIFV_SCOPE_THREAD; 10435 } else { 10436 scope = DIFV_SCOPE_GLOBAL; 10437 } 10438 10439 break; 10440 10441 case DIF_OP_PUSHTR: 10442 if (ttop == DIF_DTR_NREGS) 10443 return; 10444 10445 if ((srd == 0 || sval == 0) && r1 == DIF_TYPE_STRING) { 10446 /* 10447 * If the register for the size of the "pushtr" 10448 * is %r0 (or the value is 0) and the type is 10449 * a string, we'll use the system-wide default 10450 * string size. 10451 */ 10452 tupregs[ttop++].dttk_size = 10453 dtrace_strsize_default; 10454 } else { 10455 if (srd == 0) 10456 return; 10457 10458 if (sval > LONG_MAX) 10459 return; 10460 10461 tupregs[ttop++].dttk_size = sval; 10462 } 10463 10464 break; 10465 10466 case DIF_OP_PUSHTV: 10467 if (ttop == DIF_DTR_NREGS) 10468 return; 10469 10470 tupregs[ttop++].dttk_size = 0; 10471 break; 10472 10473 case DIF_OP_FLUSHTS: 10474 ttop = 0; 10475 break; 10476 10477 case DIF_OP_POPTS: 10478 if (ttop != 0) 10479 ttop--; 10480 break; 10481 } 10482 10483 sval = 0; 10484 srd = 0; 10485 10486 if (nkeys == 0) 10487 continue; 10488 10489 /* 10490 * We have a dynamic variable allocation; calculate its size. 10491 */ 10492 for (ksize = 0, i = 0; i < nkeys; i++) 10493 ksize += P2ROUNDUP(key[i].dttk_size, sizeof (uint64_t)); 10494 10495 size = sizeof (dtrace_dynvar_t); 10496 size += sizeof (dtrace_key_t) * (nkeys - 1); 10497 size += ksize; 10498 10499 /* 10500 * Now we need to determine the size of the stored data. 10501 */ 10502 id = DIF_INSTR_VAR(instr); 10503 10504 for (i = 0; i < dp->dtdo_varlen; i++) { 10505 dtrace_difv_t *v = &dp->dtdo_vartab[i]; 10506 10507 if (v->dtdv_id == id && v->dtdv_scope == scope) { 10508 size += v->dtdv_type.dtdt_size; 10509 break; 10510 } 10511 } 10512 10513 if (i == dp->dtdo_varlen) 10514 return; 10515 10516 /* 10517 * We have the size. If this is larger than the chunk size 10518 * for our dynamic variable state, reset the chunk size. 10519 */ 10520 size = P2ROUNDUP(size, sizeof (uint64_t)); 10521 10522 /* 10523 * Before setting the chunk size, check that we're not going 10524 * to set it to a negative value... 10525 */ 10526 if (size > LONG_MAX) 10527 return; 10528 10529 /* 10530 * ...and make certain that we didn't badly overflow. 10531 */ 10532 if (size < ksize || size < sizeof (dtrace_dynvar_t)) 10533 return; 10534 10535 if (size > vstate->dtvs_dynvars.dtds_chunksize) 10536 vstate->dtvs_dynvars.dtds_chunksize = size; 10537 } 10538 } 10539 10540 static void 10541 dtrace_difo_init(dtrace_difo_t *dp, dtrace_vstate_t *vstate) 10542 { 10543 int i, oldsvars, osz, nsz, otlocals, ntlocals; 10544 uint_t id; 10545 10546 ASSERT(MUTEX_HELD(&dtrace_lock)); 10547 ASSERT(dp->dtdo_buf != NULL && dp->dtdo_len != 0); 10548 10549 for (i = 0; i < dp->dtdo_varlen; i++) { 10550 dtrace_difv_t *v = &dp->dtdo_vartab[i]; 10551 dtrace_statvar_t *svar, ***svarp = NULL; 10552 size_t dsize = 0; 10553 uint8_t scope = v->dtdv_scope; 10554 int *np = NULL; 10555 10556 if ((id = v->dtdv_id) < DIF_VAR_OTHER_UBASE) 10557 continue; 10558 10559 id -= DIF_VAR_OTHER_UBASE; 10560 10561 switch (scope) { 10562 case DIFV_SCOPE_THREAD: 10563 while (id >= (otlocals = vstate->dtvs_ntlocals)) { 10564 dtrace_difv_t *tlocals; 10565 10566 if ((ntlocals = (otlocals << 1)) == 0) 10567 ntlocals = 1; 10568 10569 osz = otlocals * sizeof (dtrace_difv_t); 10570 nsz = ntlocals * sizeof (dtrace_difv_t); 10571 10572 tlocals = kmem_zalloc(nsz, KM_SLEEP); 10573 10574 if (osz != 0) { 10575 bcopy(vstate->dtvs_tlocals, 10576 tlocals, osz); 10577 kmem_free(vstate->dtvs_tlocals, osz); 10578 } 10579 10580 vstate->dtvs_tlocals = tlocals; 10581 vstate->dtvs_ntlocals = ntlocals; 10582 } 10583 10584 vstate->dtvs_tlocals[id] = *v; 10585 continue; 10586 10587 case DIFV_SCOPE_LOCAL: 10588 np = &vstate->dtvs_nlocals; 10589 svarp = &vstate->dtvs_locals; 10590 10591 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) 10592 dsize = NCPU * (v->dtdv_type.dtdt_size + 10593 sizeof (uint64_t)); 10594 else 10595 dsize = NCPU * sizeof (uint64_t); 10596 10597 break; 10598 10599 case DIFV_SCOPE_GLOBAL: 10600 np = &vstate->dtvs_nglobals; 10601 svarp = &vstate->dtvs_globals; 10602 10603 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) 10604 dsize = v->dtdv_type.dtdt_size + 10605 sizeof (uint64_t); 10606 10607 break; 10608 10609 default: 10610 ASSERT(0); 10611 } 10612 10613 while (id >= (oldsvars = *np)) { 10614 dtrace_statvar_t **statics; 10615 int newsvars, oldsize, newsize; 10616 10617 if ((newsvars = (oldsvars << 1)) == 0) 10618 newsvars = 1; 10619 10620 oldsize = oldsvars * sizeof (dtrace_statvar_t *); 10621 newsize = newsvars * sizeof (dtrace_statvar_t *); 10622 10623 statics = kmem_zalloc(newsize, KM_SLEEP); 10624 10625 if (oldsize != 0) { 10626 bcopy(*svarp, statics, oldsize); 10627 kmem_free(*svarp, oldsize); 10628 } 10629 10630 *svarp = statics; 10631 *np = newsvars; 10632 } 10633 10634 if ((svar = (*svarp)[id]) == NULL) { 10635 svar = kmem_zalloc(sizeof (dtrace_statvar_t), KM_SLEEP); 10636 svar->dtsv_var = *v; 10637 10638 if ((svar->dtsv_size = dsize) != 0) { 10639 svar->dtsv_data = (uint64_t)(uintptr_t) 10640 kmem_zalloc(dsize, KM_SLEEP); 10641 } 10642 10643 (*svarp)[id] = svar; 10644 } 10645 10646 svar->dtsv_refcnt++; 10647 } 10648 10649 dtrace_difo_chunksize(dp, vstate); 10650 dtrace_difo_hold(dp); 10651 } 10652 10653 static dtrace_difo_t * 10654 dtrace_difo_duplicate(dtrace_difo_t *dp, dtrace_vstate_t *vstate) 10655 { 10656 dtrace_difo_t *new; 10657 size_t sz; 10658 10659 ASSERT(dp->dtdo_buf != NULL); 10660 ASSERT(dp->dtdo_refcnt != 0); 10661 10662 new = kmem_zalloc(sizeof (dtrace_difo_t), KM_SLEEP); 10663 10664 ASSERT(dp->dtdo_buf != NULL); 10665 sz = dp->dtdo_len * sizeof (dif_instr_t); 10666 new->dtdo_buf = kmem_alloc(sz, KM_SLEEP); 10667 bcopy(dp->dtdo_buf, new->dtdo_buf, sz); 10668 new->dtdo_len = dp->dtdo_len; 10669 10670 if (dp->dtdo_strtab != NULL) { 10671 ASSERT(dp->dtdo_strlen != 0); 10672 new->dtdo_strtab = kmem_alloc(dp->dtdo_strlen, KM_SLEEP); 10673 bcopy(dp->dtdo_strtab, new->dtdo_strtab, dp->dtdo_strlen); 10674 new->dtdo_strlen = dp->dtdo_strlen; 10675 } 10676 10677 if (dp->dtdo_inttab != NULL) { 10678 ASSERT(dp->dtdo_intlen != 0); 10679 sz = dp->dtdo_intlen * sizeof (uint64_t); 10680 new->dtdo_inttab = kmem_alloc(sz, KM_SLEEP); 10681 bcopy(dp->dtdo_inttab, new->dtdo_inttab, sz); 10682 new->dtdo_intlen = dp->dtdo_intlen; 10683 } 10684 10685 if (dp->dtdo_vartab != NULL) { 10686 ASSERT(dp->dtdo_varlen != 0); 10687 sz = dp->dtdo_varlen * sizeof (dtrace_difv_t); 10688 new->dtdo_vartab = kmem_alloc(sz, KM_SLEEP); 10689 bcopy(dp->dtdo_vartab, new->dtdo_vartab, sz); 10690 new->dtdo_varlen = dp->dtdo_varlen; 10691 } 10692 10693 dtrace_difo_init(new, vstate); 10694 return (new); 10695 } 10696 10697 static void 10698 dtrace_difo_destroy(dtrace_difo_t *dp, dtrace_vstate_t *vstate) 10699 { 10700 int i; 10701 10702 ASSERT(dp->dtdo_refcnt == 0); 10703 10704 for (i = 0; i < dp->dtdo_varlen; i++) { 10705 dtrace_difv_t *v = &dp->dtdo_vartab[i]; 10706 dtrace_statvar_t *svar, **svarp = NULL; 10707 uint_t id; 10708 uint8_t scope = v->dtdv_scope; 10709 int *np = NULL; 10710 10711 switch (scope) { 10712 case DIFV_SCOPE_THREAD: 10713 continue; 10714 10715 case DIFV_SCOPE_LOCAL: 10716 np = &vstate->dtvs_nlocals; 10717 svarp = vstate->dtvs_locals; 10718 break; 10719 10720 case DIFV_SCOPE_GLOBAL: 10721 np = &vstate->dtvs_nglobals; 10722 svarp = vstate->dtvs_globals; 10723 break; 10724 10725 default: 10726 ASSERT(0); 10727 } 10728 10729 if ((id = v->dtdv_id) < DIF_VAR_OTHER_UBASE) 10730 continue; 10731 10732 id -= DIF_VAR_OTHER_UBASE; 10733 ASSERT(id < *np); 10734 10735 svar = svarp[id]; 10736 ASSERT(svar != NULL); 10737 ASSERT(svar->dtsv_refcnt > 0); 10738 10739 if (--svar->dtsv_refcnt > 0) 10740 continue; 10741 10742 if (svar->dtsv_size != 0) { 10743 ASSERT(svar->dtsv_data != 0); 10744 kmem_free((void *)(uintptr_t)svar->dtsv_data, 10745 svar->dtsv_size); 10746 } 10747 10748 kmem_free(svar, sizeof (dtrace_statvar_t)); 10749 svarp[id] = NULL; 10750 } 10751 10752 if (dp->dtdo_buf != NULL) 10753 kmem_free(dp->dtdo_buf, dp->dtdo_len * sizeof (dif_instr_t)); 10754 if (dp->dtdo_inttab != NULL) 10755 kmem_free(dp->dtdo_inttab, dp->dtdo_intlen * sizeof (uint64_t)); 10756 if (dp->dtdo_strtab != NULL) 10757 kmem_free(dp->dtdo_strtab, dp->dtdo_strlen); 10758 if (dp->dtdo_vartab != NULL) 10759 kmem_free(dp->dtdo_vartab, dp->dtdo_varlen * sizeof (dtrace_difv_t)); 10760 10761 kmem_free(dp, sizeof (dtrace_difo_t)); 10762 } 10763 10764 static void 10765 dtrace_difo_release(dtrace_difo_t *dp, dtrace_vstate_t *vstate) 10766 { 10767 int i; 10768 10769 ASSERT(MUTEX_HELD(&dtrace_lock)); 10770 ASSERT(dp->dtdo_refcnt != 0); 10771 10772 for (i = 0; i < dp->dtdo_varlen; i++) { 10773 dtrace_difv_t *v = &dp->dtdo_vartab[i]; 10774 10775 if (v->dtdv_id != DIF_VAR_VTIMESTAMP) 10776 continue; 10777 10778 ASSERT(dtrace_vtime_references > 0); 10779 if (--dtrace_vtime_references == 0) 10780 dtrace_vtime_disable(); 10781 } 10782 10783 if (--dp->dtdo_refcnt == 0) 10784 dtrace_difo_destroy(dp, vstate); 10785 } 10786 10787 /* 10788 * DTrace Format Functions 10789 */ 10790 static uint16_t 10791 dtrace_format_add(dtrace_state_t *state, char *str) 10792 { 10793 char *fmt, **new; 10794 uint16_t ndx, len = strlen(str) + 1; 10795 10796 fmt = kmem_zalloc(len, KM_SLEEP); 10797 bcopy(str, fmt, len); 10798 10799 for (ndx = 0; ndx < state->dts_nformats; ndx++) { 10800 if (state->dts_formats[ndx] == NULL) { 10801 state->dts_formats[ndx] = fmt; 10802 return (ndx + 1); 10803 } 10804 } 10805 10806 if (state->dts_nformats == USHRT_MAX) { 10807 /* 10808 * This is only likely if a denial-of-service attack is being 10809 * attempted. As such, it's okay to fail silently here. 10810 */ 10811 kmem_free(fmt, len); 10812 return (0); 10813 } 10814 10815 /* 10816 * For simplicity, we always resize the formats array to be exactly the 10817 * number of formats. 10818 */ 10819 ndx = state->dts_nformats++; 10820 new = kmem_alloc((ndx + 1) * sizeof (char *), KM_SLEEP); 10821 10822 if (state->dts_formats != NULL) { 10823 ASSERT(ndx != 0); 10824 bcopy(state->dts_formats, new, ndx * sizeof (char *)); 10825 kmem_free(state->dts_formats, ndx * sizeof (char *)); 10826 } 10827 10828 state->dts_formats = new; 10829 state->dts_formats[ndx] = fmt; 10830 10831 return (ndx + 1); 10832 } 10833 10834 static void 10835 dtrace_format_remove(dtrace_state_t *state, uint16_t format) 10836 { 10837 char *fmt; 10838 10839 ASSERT(state->dts_formats != NULL); 10840 ASSERT(format <= state->dts_nformats); 10841 ASSERT(state->dts_formats[format - 1] != NULL); 10842 10843 fmt = state->dts_formats[format - 1]; 10844 kmem_free(fmt, strlen(fmt) + 1); 10845 state->dts_formats[format - 1] = NULL; 10846 } 10847 10848 static void 10849 dtrace_format_destroy(dtrace_state_t *state) 10850 { 10851 int i; 10852 10853 if (state->dts_nformats == 0) { 10854 ASSERT(state->dts_formats == NULL); 10855 return; 10856 } 10857 10858 ASSERT(state->dts_formats != NULL); 10859 10860 for (i = 0; i < state->dts_nformats; i++) { 10861 char *fmt = state->dts_formats[i]; 10862 10863 if (fmt == NULL) 10864 continue; 10865 10866 kmem_free(fmt, strlen(fmt) + 1); 10867 } 10868 10869 kmem_free(state->dts_formats, state->dts_nformats * sizeof (char *)); 10870 state->dts_nformats = 0; 10871 state->dts_formats = NULL; 10872 } 10873 10874 /* 10875 * DTrace Predicate Functions 10876 */ 10877 static dtrace_predicate_t * 10878 dtrace_predicate_create(dtrace_difo_t *dp) 10879 { 10880 dtrace_predicate_t *pred; 10881 10882 ASSERT(MUTEX_HELD(&dtrace_lock)); 10883 ASSERT(dp->dtdo_refcnt != 0); 10884 10885 pred = kmem_zalloc(sizeof (dtrace_predicate_t), KM_SLEEP); 10886 pred->dtp_difo = dp; 10887 pred->dtp_refcnt = 1; 10888 10889 if (!dtrace_difo_cacheable(dp)) 10890 return (pred); 10891 10892 if (dtrace_predcache_id == DTRACE_CACHEIDNONE) { 10893 /* 10894 * This is only theoretically possible -- we have had 2^32 10895 * cacheable predicates on this machine. We cannot allow any 10896 * more predicates to become cacheable: as unlikely as it is, 10897 * there may be a thread caching a (now stale) predicate cache 10898 * ID. (N.B.: the temptation is being successfully resisted to 10899 * have this cmn_err() "Holy shit -- we executed this code!") 10900 */ 10901 return (pred); 10902 } 10903 10904 pred->dtp_cacheid = dtrace_predcache_id++; 10905 10906 return (pred); 10907 } 10908 10909 static void 10910 dtrace_predicate_hold(dtrace_predicate_t *pred) 10911 { 10912 ASSERT(MUTEX_HELD(&dtrace_lock)); 10913 ASSERT(pred->dtp_difo != NULL && pred->dtp_difo->dtdo_refcnt != 0); 10914 ASSERT(pred->dtp_refcnt > 0); 10915 10916 pred->dtp_refcnt++; 10917 } 10918 10919 static void 10920 dtrace_predicate_release(dtrace_predicate_t *pred, dtrace_vstate_t *vstate) 10921 { 10922 dtrace_difo_t *dp = pred->dtp_difo; 10923 10924 ASSERT(MUTEX_HELD(&dtrace_lock)); 10925 ASSERT(dp != NULL && dp->dtdo_refcnt != 0); 10926 ASSERT(pred->dtp_refcnt > 0); 10927 10928 if (--pred->dtp_refcnt == 0) { 10929 dtrace_difo_release(pred->dtp_difo, vstate); 10930 kmem_free(pred, sizeof (dtrace_predicate_t)); 10931 } 10932 } 10933 10934 /* 10935 * DTrace Action Description Functions 10936 */ 10937 static dtrace_actdesc_t * 10938 dtrace_actdesc_create(dtrace_actkind_t kind, uint32_t ntuple, 10939 uint64_t uarg, uint64_t arg) 10940 { 10941 dtrace_actdesc_t *act; 10942 10943 #ifdef illumos 10944 ASSERT(!DTRACEACT_ISPRINTFLIKE(kind) || (arg != NULL && 10945 arg >= KERNELBASE) || (arg == NULL && kind == DTRACEACT_PRINTA)); 10946 #endif 10947 10948 act = kmem_zalloc(sizeof (dtrace_actdesc_t), KM_SLEEP); 10949 act->dtad_kind = kind; 10950 act->dtad_ntuple = ntuple; 10951 act->dtad_uarg = uarg; 10952 act->dtad_arg = arg; 10953 act->dtad_refcnt = 1; 10954 10955 return (act); 10956 } 10957 10958 static void 10959 dtrace_actdesc_hold(dtrace_actdesc_t *act) 10960 { 10961 ASSERT(act->dtad_refcnt >= 1); 10962 act->dtad_refcnt++; 10963 } 10964 10965 static void 10966 dtrace_actdesc_release(dtrace_actdesc_t *act, dtrace_vstate_t *vstate) 10967 { 10968 dtrace_actkind_t kind = act->dtad_kind; 10969 dtrace_difo_t *dp; 10970 10971 ASSERT(act->dtad_refcnt >= 1); 10972 10973 if (--act->dtad_refcnt != 0) 10974 return; 10975 10976 if ((dp = act->dtad_difo) != NULL) 10977 dtrace_difo_release(dp, vstate); 10978 10979 if (DTRACEACT_ISPRINTFLIKE(kind)) { 10980 char *str = (char *)(uintptr_t)act->dtad_arg; 10981 10982 #ifdef illumos 10983 ASSERT((str != NULL && (uintptr_t)str >= KERNELBASE) || 10984 (str == NULL && act->dtad_kind == DTRACEACT_PRINTA)); 10985 #endif 10986 10987 if (str != NULL) 10988 kmem_free(str, strlen(str) + 1); 10989 } 10990 10991 kmem_free(act, sizeof (dtrace_actdesc_t)); 10992 } 10993 10994 /* 10995 * DTrace ECB Functions 10996 */ 10997 static dtrace_ecb_t * 10998 dtrace_ecb_add(dtrace_state_t *state, dtrace_probe_t *probe) 10999 { 11000 dtrace_ecb_t *ecb; 11001 dtrace_epid_t epid; 11002 11003 ASSERT(MUTEX_HELD(&dtrace_lock)); 11004 11005 ecb = kmem_zalloc(sizeof (dtrace_ecb_t), KM_SLEEP); 11006 ecb->dte_predicate = NULL; 11007 ecb->dte_probe = probe; 11008 11009 /* 11010 * The default size is the size of the default action: recording 11011 * the header. 11012 */ 11013 ecb->dte_size = ecb->dte_needed = sizeof (dtrace_rechdr_t); 11014 ecb->dte_alignment = sizeof (dtrace_epid_t); 11015 11016 epid = state->dts_epid++; 11017 11018 if (epid - 1 >= state->dts_necbs) { 11019 dtrace_ecb_t **oecbs = state->dts_ecbs, **ecbs; 11020 int necbs = state->dts_necbs << 1; 11021 11022 ASSERT(epid == state->dts_necbs + 1); 11023 11024 if (necbs == 0) { 11025 ASSERT(oecbs == NULL); 11026 necbs = 1; 11027 } 11028 11029 ecbs = kmem_zalloc(necbs * sizeof (*ecbs), KM_SLEEP); 11030 11031 if (oecbs != NULL) 11032 bcopy(oecbs, ecbs, state->dts_necbs * sizeof (*ecbs)); 11033 11034 dtrace_membar_producer(); 11035 state->dts_ecbs = ecbs; 11036 11037 if (oecbs != NULL) { 11038 /* 11039 * If this state is active, we must dtrace_sync() 11040 * before we can free the old dts_ecbs array: we're 11041 * coming in hot, and there may be active ring 11042 * buffer processing (which indexes into the dts_ecbs 11043 * array) on another CPU. 11044 */ 11045 if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) 11046 dtrace_sync(); 11047 11048 kmem_free(oecbs, state->dts_necbs * sizeof (*ecbs)); 11049 } 11050 11051 dtrace_membar_producer(); 11052 state->dts_necbs = necbs; 11053 } 11054 11055 ecb->dte_state = state; 11056 11057 ASSERT(state->dts_ecbs[epid - 1] == NULL); 11058 dtrace_membar_producer(); 11059 state->dts_ecbs[(ecb->dte_epid = epid) - 1] = ecb; 11060 11061 return (ecb); 11062 } 11063 11064 static void 11065 dtrace_ecb_enable(dtrace_ecb_t *ecb) 11066 { 11067 dtrace_probe_t *probe = ecb->dte_probe; 11068 11069 ASSERT(MUTEX_HELD(&cpu_lock)); 11070 ASSERT(MUTEX_HELD(&dtrace_lock)); 11071 ASSERT(ecb->dte_next == NULL); 11072 11073 if (probe == NULL) { 11074 /* 11075 * This is the NULL probe -- there's nothing to do. 11076 */ 11077 return; 11078 } 11079 11080 if (probe->dtpr_ecb == NULL) { 11081 dtrace_provider_t *prov = probe->dtpr_provider; 11082 11083 /* 11084 * We're the first ECB on this probe. 11085 */ 11086 probe->dtpr_ecb = probe->dtpr_ecb_last = ecb; 11087 11088 if (ecb->dte_predicate != NULL) 11089 probe->dtpr_predcache = ecb->dte_predicate->dtp_cacheid; 11090 11091 prov->dtpv_pops.dtps_enable(prov->dtpv_arg, 11092 probe->dtpr_id, probe->dtpr_arg); 11093 } else { 11094 /* 11095 * This probe is already active. Swing the last pointer to 11096 * point to the new ECB, and issue a dtrace_sync() to assure 11097 * that all CPUs have seen the change. 11098 */ 11099 ASSERT(probe->dtpr_ecb_last != NULL); 11100 probe->dtpr_ecb_last->dte_next = ecb; 11101 probe->dtpr_ecb_last = ecb; 11102 probe->dtpr_predcache = 0; 11103 11104 dtrace_sync(); 11105 } 11106 } 11107 11108 static int 11109 dtrace_ecb_resize(dtrace_ecb_t *ecb) 11110 { 11111 dtrace_action_t *act; 11112 uint32_t curneeded = UINT32_MAX; 11113 uint32_t aggbase = UINT32_MAX; 11114 11115 /* 11116 * If we record anything, we always record the dtrace_rechdr_t. (And 11117 * we always record it first.) 11118 */ 11119 ecb->dte_size = sizeof (dtrace_rechdr_t); 11120 ecb->dte_alignment = sizeof (dtrace_epid_t); 11121 11122 for (act = ecb->dte_action; act != NULL; act = act->dta_next) { 11123 dtrace_recdesc_t *rec = &act->dta_rec; 11124 ASSERT(rec->dtrd_size > 0 || rec->dtrd_alignment == 1); 11125 11126 ecb->dte_alignment = MAX(ecb->dte_alignment, 11127 rec->dtrd_alignment); 11128 11129 if (DTRACEACT_ISAGG(act->dta_kind)) { 11130 dtrace_aggregation_t *agg = (dtrace_aggregation_t *)act; 11131 11132 ASSERT(rec->dtrd_size != 0); 11133 ASSERT(agg->dtag_first != NULL); 11134 ASSERT(act->dta_prev->dta_intuple); 11135 ASSERT(aggbase != UINT32_MAX); 11136 ASSERT(curneeded != UINT32_MAX); 11137 11138 agg->dtag_base = aggbase; 11139 11140 curneeded = P2ROUNDUP(curneeded, rec->dtrd_alignment); 11141 rec->dtrd_offset = curneeded; 11142 if (curneeded + rec->dtrd_size < curneeded) 11143 return (EINVAL); 11144 curneeded += rec->dtrd_size; 11145 ecb->dte_needed = MAX(ecb->dte_needed, curneeded); 11146 11147 aggbase = UINT32_MAX; 11148 curneeded = UINT32_MAX; 11149 } else if (act->dta_intuple) { 11150 if (curneeded == UINT32_MAX) { 11151 /* 11152 * This is the first record in a tuple. Align 11153 * curneeded to be at offset 4 in an 8-byte 11154 * aligned block. 11155 */ 11156 ASSERT(act->dta_prev == NULL || 11157 !act->dta_prev->dta_intuple); 11158 ASSERT3U(aggbase, ==, UINT32_MAX); 11159 curneeded = P2PHASEUP(ecb->dte_size, 11160 sizeof (uint64_t), sizeof (dtrace_aggid_t)); 11161 11162 aggbase = curneeded - sizeof (dtrace_aggid_t); 11163 ASSERT(IS_P2ALIGNED(aggbase, 11164 sizeof (uint64_t))); 11165 } 11166 curneeded = P2ROUNDUP(curneeded, rec->dtrd_alignment); 11167 rec->dtrd_offset = curneeded; 11168 if (curneeded + rec->dtrd_size < curneeded) 11169 return (EINVAL); 11170 curneeded += rec->dtrd_size; 11171 } else { 11172 /* tuples must be followed by an aggregation */ 11173 ASSERT(act->dta_prev == NULL || 11174 !act->dta_prev->dta_intuple); 11175 11176 ecb->dte_size = P2ROUNDUP(ecb->dte_size, 11177 rec->dtrd_alignment); 11178 rec->dtrd_offset = ecb->dte_size; 11179 if (ecb->dte_size + rec->dtrd_size < ecb->dte_size) 11180 return (EINVAL); 11181 ecb->dte_size += rec->dtrd_size; 11182 ecb->dte_needed = MAX(ecb->dte_needed, ecb->dte_size); 11183 } 11184 } 11185 11186 if ((act = ecb->dte_action) != NULL && 11187 !(act->dta_kind == DTRACEACT_SPECULATE && act->dta_next == NULL) && 11188 ecb->dte_size == sizeof (dtrace_rechdr_t)) { 11189 /* 11190 * If the size is still sizeof (dtrace_rechdr_t), then all 11191 * actions store no data; set the size to 0. 11192 */ 11193 ecb->dte_size = 0; 11194 } 11195 11196 ecb->dte_size = P2ROUNDUP(ecb->dte_size, sizeof (dtrace_epid_t)); 11197 ecb->dte_needed = P2ROUNDUP(ecb->dte_needed, (sizeof (dtrace_epid_t))); 11198 ecb->dte_state->dts_needed = MAX(ecb->dte_state->dts_needed, 11199 ecb->dte_needed); 11200 return (0); 11201 } 11202 11203 static dtrace_action_t * 11204 dtrace_ecb_aggregation_create(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc) 11205 { 11206 dtrace_aggregation_t *agg; 11207 size_t size = sizeof (uint64_t); 11208 int ntuple = desc->dtad_ntuple; 11209 dtrace_action_t *act; 11210 dtrace_recdesc_t *frec; 11211 dtrace_aggid_t aggid; 11212 dtrace_state_t *state = ecb->dte_state; 11213 11214 agg = kmem_zalloc(sizeof (dtrace_aggregation_t), KM_SLEEP); 11215 agg->dtag_ecb = ecb; 11216 11217 ASSERT(DTRACEACT_ISAGG(desc->dtad_kind)); 11218 11219 switch (desc->dtad_kind) { 11220 case DTRACEAGG_MIN: 11221 agg->dtag_initial = INT64_MAX; 11222 agg->dtag_aggregate = dtrace_aggregate_min; 11223 break; 11224 11225 case DTRACEAGG_MAX: 11226 agg->dtag_initial = INT64_MIN; 11227 agg->dtag_aggregate = dtrace_aggregate_max; 11228 break; 11229 11230 case DTRACEAGG_COUNT: 11231 agg->dtag_aggregate = dtrace_aggregate_count; 11232 break; 11233 11234 case DTRACEAGG_QUANTIZE: 11235 agg->dtag_aggregate = dtrace_aggregate_quantize; 11236 size = (((sizeof (uint64_t) * NBBY) - 1) * 2 + 1) * 11237 sizeof (uint64_t); 11238 break; 11239 11240 case DTRACEAGG_LQUANTIZE: { 11241 uint16_t step = DTRACE_LQUANTIZE_STEP(desc->dtad_arg); 11242 uint16_t levels = DTRACE_LQUANTIZE_LEVELS(desc->dtad_arg); 11243 11244 agg->dtag_initial = desc->dtad_arg; 11245 agg->dtag_aggregate = dtrace_aggregate_lquantize; 11246 11247 if (step == 0 || levels == 0) 11248 goto err; 11249 11250 size = levels * sizeof (uint64_t) + 3 * sizeof (uint64_t); 11251 break; 11252 } 11253 11254 case DTRACEAGG_LLQUANTIZE: { 11255 uint16_t factor = DTRACE_LLQUANTIZE_FACTOR(desc->dtad_arg); 11256 uint16_t low = DTRACE_LLQUANTIZE_LOW(desc->dtad_arg); 11257 uint16_t high = DTRACE_LLQUANTIZE_HIGH(desc->dtad_arg); 11258 uint16_t nsteps = DTRACE_LLQUANTIZE_NSTEP(desc->dtad_arg); 11259 int64_t v; 11260 11261 agg->dtag_initial = desc->dtad_arg; 11262 agg->dtag_aggregate = dtrace_aggregate_llquantize; 11263 11264 if (factor < 2 || low >= high || nsteps < factor) 11265 goto err; 11266 11267 /* 11268 * Now check that the number of steps evenly divides a power 11269 * of the factor. (This assures both integer bucket size and 11270 * linearity within each magnitude.) 11271 */ 11272 for (v = factor; v < nsteps; v *= factor) 11273 continue; 11274 11275 if ((v % nsteps) || (nsteps % factor)) 11276 goto err; 11277 11278 size = (dtrace_aggregate_llquantize_bucket(factor, 11279 low, high, nsteps, INT64_MAX) + 2) * sizeof (uint64_t); 11280 break; 11281 } 11282 11283 case DTRACEAGG_AVG: 11284 agg->dtag_aggregate = dtrace_aggregate_avg; 11285 size = sizeof (uint64_t) * 2; 11286 break; 11287 11288 case DTRACEAGG_STDDEV: 11289 agg->dtag_aggregate = dtrace_aggregate_stddev; 11290 size = sizeof (uint64_t) * 4; 11291 break; 11292 11293 case DTRACEAGG_SUM: 11294 agg->dtag_aggregate = dtrace_aggregate_sum; 11295 break; 11296 11297 default: 11298 goto err; 11299 } 11300 11301 agg->dtag_action.dta_rec.dtrd_size = size; 11302 11303 if (ntuple == 0) 11304 goto err; 11305 11306 /* 11307 * We must make sure that we have enough actions for the n-tuple. 11308 */ 11309 for (act = ecb->dte_action_last; act != NULL; act = act->dta_prev) { 11310 if (DTRACEACT_ISAGG(act->dta_kind)) 11311 break; 11312 11313 if (--ntuple == 0) { 11314 /* 11315 * This is the action with which our n-tuple begins. 11316 */ 11317 agg->dtag_first = act; 11318 goto success; 11319 } 11320 } 11321 11322 /* 11323 * This n-tuple is short by ntuple elements. Return failure. 11324 */ 11325 ASSERT(ntuple != 0); 11326 err: 11327 kmem_free(agg, sizeof (dtrace_aggregation_t)); 11328 return (NULL); 11329 11330 success: 11331 /* 11332 * If the last action in the tuple has a size of zero, it's actually 11333 * an expression argument for the aggregating action. 11334 */ 11335 ASSERT(ecb->dte_action_last != NULL); 11336 act = ecb->dte_action_last; 11337 11338 if (act->dta_kind == DTRACEACT_DIFEXPR) { 11339 ASSERT(act->dta_difo != NULL); 11340 11341 if (act->dta_difo->dtdo_rtype.dtdt_size == 0) 11342 agg->dtag_hasarg = 1; 11343 } 11344 11345 /* 11346 * We need to allocate an id for this aggregation. 11347 */ 11348 #ifdef illumos 11349 aggid = (dtrace_aggid_t)(uintptr_t)vmem_alloc(state->dts_aggid_arena, 1, 11350 VM_BESTFIT | VM_SLEEP); 11351 #else 11352 aggid = alloc_unr(state->dts_aggid_arena); 11353 #endif 11354 11355 if (aggid - 1 >= state->dts_naggregations) { 11356 dtrace_aggregation_t **oaggs = state->dts_aggregations; 11357 dtrace_aggregation_t **aggs; 11358 int naggs = state->dts_naggregations << 1; 11359 int onaggs = state->dts_naggregations; 11360 11361 ASSERT(aggid == state->dts_naggregations + 1); 11362 11363 if (naggs == 0) { 11364 ASSERT(oaggs == NULL); 11365 naggs = 1; 11366 } 11367 11368 aggs = kmem_zalloc(naggs * sizeof (*aggs), KM_SLEEP); 11369 11370 if (oaggs != NULL) { 11371 bcopy(oaggs, aggs, onaggs * sizeof (*aggs)); 11372 kmem_free(oaggs, onaggs * sizeof (*aggs)); 11373 } 11374 11375 state->dts_aggregations = aggs; 11376 state->dts_naggregations = naggs; 11377 } 11378 11379 ASSERT(state->dts_aggregations[aggid - 1] == NULL); 11380 state->dts_aggregations[(agg->dtag_id = aggid) - 1] = agg; 11381 11382 frec = &agg->dtag_first->dta_rec; 11383 if (frec->dtrd_alignment < sizeof (dtrace_aggid_t)) 11384 frec->dtrd_alignment = sizeof (dtrace_aggid_t); 11385 11386 for (act = agg->dtag_first; act != NULL; act = act->dta_next) { 11387 ASSERT(!act->dta_intuple); 11388 act->dta_intuple = 1; 11389 } 11390 11391 return (&agg->dtag_action); 11392 } 11393 11394 static void 11395 dtrace_ecb_aggregation_destroy(dtrace_ecb_t *ecb, dtrace_action_t *act) 11396 { 11397 dtrace_aggregation_t *agg = (dtrace_aggregation_t *)act; 11398 dtrace_state_t *state = ecb->dte_state; 11399 dtrace_aggid_t aggid = agg->dtag_id; 11400 11401 ASSERT(DTRACEACT_ISAGG(act->dta_kind)); 11402 #ifdef illumos 11403 vmem_free(state->dts_aggid_arena, (void *)(uintptr_t)aggid, 1); 11404 #else 11405 free_unr(state->dts_aggid_arena, aggid); 11406 #endif 11407 11408 ASSERT(state->dts_aggregations[aggid - 1] == agg); 11409 state->dts_aggregations[aggid - 1] = NULL; 11410 11411 kmem_free(agg, sizeof (dtrace_aggregation_t)); 11412 } 11413 11414 static int 11415 dtrace_ecb_action_add(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc) 11416 { 11417 dtrace_action_t *action, *last; 11418 dtrace_difo_t *dp = desc->dtad_difo; 11419 uint32_t size = 0, align = sizeof (uint8_t), mask; 11420 uint16_t format = 0; 11421 dtrace_recdesc_t *rec; 11422 dtrace_state_t *state = ecb->dte_state; 11423 dtrace_optval_t *opt = state->dts_options, nframes = 0, strsize; 11424 uint64_t arg = desc->dtad_arg; 11425 11426 ASSERT(MUTEX_HELD(&dtrace_lock)); 11427 ASSERT(ecb->dte_action == NULL || ecb->dte_action->dta_refcnt == 1); 11428 11429 if (DTRACEACT_ISAGG(desc->dtad_kind)) { 11430 /* 11431 * If this is an aggregating action, there must be neither 11432 * a speculate nor a commit on the action chain. 11433 */ 11434 dtrace_action_t *act; 11435 11436 for (act = ecb->dte_action; act != NULL; act = act->dta_next) { 11437 if (act->dta_kind == DTRACEACT_COMMIT) 11438 return (EINVAL); 11439 11440 if (act->dta_kind == DTRACEACT_SPECULATE) 11441 return (EINVAL); 11442 } 11443 11444 action = dtrace_ecb_aggregation_create(ecb, desc); 11445 11446 if (action == NULL) 11447 return (EINVAL); 11448 } else { 11449 if (DTRACEACT_ISDESTRUCTIVE(desc->dtad_kind) || 11450 (desc->dtad_kind == DTRACEACT_DIFEXPR && 11451 dp != NULL && dp->dtdo_destructive)) { 11452 state->dts_destructive = 1; 11453 } 11454 11455 switch (desc->dtad_kind) { 11456 case DTRACEACT_PRINTF: 11457 case DTRACEACT_PRINTA: 11458 case DTRACEACT_SYSTEM: 11459 case DTRACEACT_FREOPEN: 11460 case DTRACEACT_DIFEXPR: 11461 /* 11462 * We know that our arg is a string -- turn it into a 11463 * format. 11464 */ 11465 if (arg == 0) { 11466 ASSERT(desc->dtad_kind == DTRACEACT_PRINTA || 11467 desc->dtad_kind == DTRACEACT_DIFEXPR); 11468 format = 0; 11469 } else { 11470 ASSERT(arg != 0); 11471 #ifdef illumos 11472 ASSERT(arg > KERNELBASE); 11473 #endif 11474 format = dtrace_format_add(state, 11475 (char *)(uintptr_t)arg); 11476 } 11477 11478 /*FALLTHROUGH*/ 11479 case DTRACEACT_LIBACT: 11480 case DTRACEACT_TRACEMEM: 11481 case DTRACEACT_TRACEMEM_DYNSIZE: 11482 if (dp == NULL) 11483 return (EINVAL); 11484 11485 if ((size = dp->dtdo_rtype.dtdt_size) != 0) 11486 break; 11487 11488 if (dp->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING) { 11489 if (!(dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF)) 11490 return (EINVAL); 11491 11492 size = opt[DTRACEOPT_STRSIZE]; 11493 } 11494 11495 break; 11496 11497 case DTRACEACT_STACK: 11498 if ((nframes = arg) == 0) { 11499 nframes = opt[DTRACEOPT_STACKFRAMES]; 11500 ASSERT(nframes > 0); 11501 arg = nframes; 11502 } 11503 11504 size = nframes * sizeof (pc_t); 11505 break; 11506 11507 case DTRACEACT_JSTACK: 11508 if ((strsize = DTRACE_USTACK_STRSIZE(arg)) == 0) 11509 strsize = opt[DTRACEOPT_JSTACKSTRSIZE]; 11510 11511 if ((nframes = DTRACE_USTACK_NFRAMES(arg)) == 0) 11512 nframes = opt[DTRACEOPT_JSTACKFRAMES]; 11513 11514 arg = DTRACE_USTACK_ARG(nframes, strsize); 11515 11516 /*FALLTHROUGH*/ 11517 case DTRACEACT_USTACK: 11518 if (desc->dtad_kind != DTRACEACT_JSTACK && 11519 (nframes = DTRACE_USTACK_NFRAMES(arg)) == 0) { 11520 strsize = DTRACE_USTACK_STRSIZE(arg); 11521 nframes = opt[DTRACEOPT_USTACKFRAMES]; 11522 ASSERT(nframes > 0); 11523 arg = DTRACE_USTACK_ARG(nframes, strsize); 11524 } 11525 11526 /* 11527 * Save a slot for the pid. 11528 */ 11529 size = (nframes + 1) * sizeof (uint64_t); 11530 size += DTRACE_USTACK_STRSIZE(arg); 11531 size = P2ROUNDUP(size, (uint32_t)(sizeof (uintptr_t))); 11532 11533 break; 11534 11535 case DTRACEACT_SYM: 11536 case DTRACEACT_MOD: 11537 if (dp == NULL || ((size = dp->dtdo_rtype.dtdt_size) != 11538 sizeof (uint64_t)) || 11539 (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF)) 11540 return (EINVAL); 11541 break; 11542 11543 case DTRACEACT_USYM: 11544 case DTRACEACT_UMOD: 11545 case DTRACEACT_UADDR: 11546 if (dp == NULL || 11547 (dp->dtdo_rtype.dtdt_size != sizeof (uint64_t)) || 11548 (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF)) 11549 return (EINVAL); 11550 11551 /* 11552 * We have a slot for the pid, plus a slot for the 11553 * argument. To keep things simple (aligned with 11554 * bitness-neutral sizing), we store each as a 64-bit 11555 * quantity. 11556 */ 11557 size = 2 * sizeof (uint64_t); 11558 break; 11559 11560 case DTRACEACT_STOP: 11561 case DTRACEACT_BREAKPOINT: 11562 case DTRACEACT_PANIC: 11563 break; 11564 11565 case DTRACEACT_CHILL: 11566 case DTRACEACT_DISCARD: 11567 case DTRACEACT_RAISE: 11568 if (dp == NULL) 11569 return (EINVAL); 11570 break; 11571 11572 case DTRACEACT_EXIT: 11573 if (dp == NULL || 11574 (size = dp->dtdo_rtype.dtdt_size) != sizeof (int) || 11575 (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF)) 11576 return (EINVAL); 11577 break; 11578 11579 case DTRACEACT_SPECULATE: 11580 if (ecb->dte_size > sizeof (dtrace_rechdr_t)) 11581 return (EINVAL); 11582 11583 if (dp == NULL) 11584 return (EINVAL); 11585 11586 state->dts_speculates = 1; 11587 break; 11588 11589 case DTRACEACT_PRINTM: 11590 size = dp->dtdo_rtype.dtdt_size; 11591 break; 11592 11593 case DTRACEACT_COMMIT: { 11594 dtrace_action_t *act = ecb->dte_action; 11595 11596 for (; act != NULL; act = act->dta_next) { 11597 if (act->dta_kind == DTRACEACT_COMMIT) 11598 return (EINVAL); 11599 } 11600 11601 if (dp == NULL) 11602 return (EINVAL); 11603 break; 11604 } 11605 11606 default: 11607 return (EINVAL); 11608 } 11609 11610 if (size != 0 || desc->dtad_kind == DTRACEACT_SPECULATE) { 11611 /* 11612 * If this is a data-storing action or a speculate, 11613 * we must be sure that there isn't a commit on the 11614 * action chain. 11615 */ 11616 dtrace_action_t *act = ecb->dte_action; 11617 11618 for (; act != NULL; act = act->dta_next) { 11619 if (act->dta_kind == DTRACEACT_COMMIT) 11620 return (EINVAL); 11621 } 11622 } 11623 11624 action = kmem_zalloc(sizeof (dtrace_action_t), KM_SLEEP); 11625 action->dta_rec.dtrd_size = size; 11626 } 11627 11628 action->dta_refcnt = 1; 11629 rec = &action->dta_rec; 11630 size = rec->dtrd_size; 11631 11632 for (mask = sizeof (uint64_t) - 1; size != 0 && mask > 0; mask >>= 1) { 11633 if (!(size & mask)) { 11634 align = mask + 1; 11635 break; 11636 } 11637 } 11638 11639 action->dta_kind = desc->dtad_kind; 11640 11641 if ((action->dta_difo = dp) != NULL) 11642 dtrace_difo_hold(dp); 11643 11644 rec->dtrd_action = action->dta_kind; 11645 rec->dtrd_arg = arg; 11646 rec->dtrd_uarg = desc->dtad_uarg; 11647 rec->dtrd_alignment = (uint16_t)align; 11648 rec->dtrd_format = format; 11649 11650 if ((last = ecb->dte_action_last) != NULL) { 11651 ASSERT(ecb->dte_action != NULL); 11652 action->dta_prev = last; 11653 last->dta_next = action; 11654 } else { 11655 ASSERT(ecb->dte_action == NULL); 11656 ecb->dte_action = action; 11657 } 11658 11659 ecb->dte_action_last = action; 11660 11661 return (0); 11662 } 11663 11664 static void 11665 dtrace_ecb_action_remove(dtrace_ecb_t *ecb) 11666 { 11667 dtrace_action_t *act = ecb->dte_action, *next; 11668 dtrace_vstate_t *vstate = &ecb->dte_state->dts_vstate; 11669 dtrace_difo_t *dp; 11670 uint16_t format; 11671 11672 if (act != NULL && act->dta_refcnt > 1) { 11673 ASSERT(act->dta_next == NULL || act->dta_next->dta_refcnt == 1); 11674 act->dta_refcnt--; 11675 } else { 11676 for (; act != NULL; act = next) { 11677 next = act->dta_next; 11678 ASSERT(next != NULL || act == ecb->dte_action_last); 11679 ASSERT(act->dta_refcnt == 1); 11680 11681 if ((format = act->dta_rec.dtrd_format) != 0) 11682 dtrace_format_remove(ecb->dte_state, format); 11683 11684 if ((dp = act->dta_difo) != NULL) 11685 dtrace_difo_release(dp, vstate); 11686 11687 if (DTRACEACT_ISAGG(act->dta_kind)) { 11688 dtrace_ecb_aggregation_destroy(ecb, act); 11689 } else { 11690 kmem_free(act, sizeof (dtrace_action_t)); 11691 } 11692 } 11693 } 11694 11695 ecb->dte_action = NULL; 11696 ecb->dte_action_last = NULL; 11697 ecb->dte_size = 0; 11698 } 11699 11700 static void 11701 dtrace_ecb_disable(dtrace_ecb_t *ecb) 11702 { 11703 /* 11704 * We disable the ECB by removing it from its probe. 11705 */ 11706 dtrace_ecb_t *pecb, *prev = NULL; 11707 dtrace_probe_t *probe = ecb->dte_probe; 11708 11709 ASSERT(MUTEX_HELD(&dtrace_lock)); 11710 11711 if (probe == NULL) { 11712 /* 11713 * This is the NULL probe; there is nothing to disable. 11714 */ 11715 return; 11716 } 11717 11718 for (pecb = probe->dtpr_ecb; pecb != NULL; pecb = pecb->dte_next) { 11719 if (pecb == ecb) 11720 break; 11721 prev = pecb; 11722 } 11723 11724 ASSERT(pecb != NULL); 11725 11726 if (prev == NULL) { 11727 probe->dtpr_ecb = ecb->dte_next; 11728 } else { 11729 prev->dte_next = ecb->dte_next; 11730 } 11731 11732 if (ecb == probe->dtpr_ecb_last) { 11733 ASSERT(ecb->dte_next == NULL); 11734 probe->dtpr_ecb_last = prev; 11735 } 11736 11737 /* 11738 * The ECB has been disconnected from the probe; now sync to assure 11739 * that all CPUs have seen the change before returning. 11740 */ 11741 dtrace_sync(); 11742 11743 if (probe->dtpr_ecb == NULL) { 11744 /* 11745 * That was the last ECB on the probe; clear the predicate 11746 * cache ID for the probe, disable it and sync one more time 11747 * to assure that we'll never hit it again. 11748 */ 11749 dtrace_provider_t *prov = probe->dtpr_provider; 11750 11751 ASSERT(ecb->dte_next == NULL); 11752 ASSERT(probe->dtpr_ecb_last == NULL); 11753 probe->dtpr_predcache = DTRACE_CACHEIDNONE; 11754 prov->dtpv_pops.dtps_disable(prov->dtpv_arg, 11755 probe->dtpr_id, probe->dtpr_arg); 11756 dtrace_sync(); 11757 } else { 11758 /* 11759 * There is at least one ECB remaining on the probe. If there 11760 * is _exactly_ one, set the probe's predicate cache ID to be 11761 * the predicate cache ID of the remaining ECB. 11762 */ 11763 ASSERT(probe->dtpr_ecb_last != NULL); 11764 ASSERT(probe->dtpr_predcache == DTRACE_CACHEIDNONE); 11765 11766 if (probe->dtpr_ecb == probe->dtpr_ecb_last) { 11767 dtrace_predicate_t *p = probe->dtpr_ecb->dte_predicate; 11768 11769 ASSERT(probe->dtpr_ecb->dte_next == NULL); 11770 11771 if (p != NULL) 11772 probe->dtpr_predcache = p->dtp_cacheid; 11773 } 11774 11775 ecb->dte_next = NULL; 11776 } 11777 } 11778 11779 static void 11780 dtrace_ecb_destroy(dtrace_ecb_t *ecb) 11781 { 11782 dtrace_state_t *state = ecb->dte_state; 11783 dtrace_vstate_t *vstate = &state->dts_vstate; 11784 dtrace_predicate_t *pred; 11785 dtrace_epid_t epid = ecb->dte_epid; 11786 11787 ASSERT(MUTEX_HELD(&dtrace_lock)); 11788 ASSERT(ecb->dte_next == NULL); 11789 ASSERT(ecb->dte_probe == NULL || ecb->dte_probe->dtpr_ecb != ecb); 11790 11791 if ((pred = ecb->dte_predicate) != NULL) 11792 dtrace_predicate_release(pred, vstate); 11793 11794 dtrace_ecb_action_remove(ecb); 11795 11796 ASSERT(state->dts_ecbs[epid - 1] == ecb); 11797 state->dts_ecbs[epid - 1] = NULL; 11798 11799 kmem_free(ecb, sizeof (dtrace_ecb_t)); 11800 } 11801 11802 static dtrace_ecb_t * 11803 dtrace_ecb_create(dtrace_state_t *state, dtrace_probe_t *probe, 11804 dtrace_enabling_t *enab) 11805 { 11806 dtrace_ecb_t *ecb; 11807 dtrace_predicate_t *pred; 11808 dtrace_actdesc_t *act; 11809 dtrace_provider_t *prov; 11810 dtrace_ecbdesc_t *desc = enab->dten_current; 11811 11812 ASSERT(MUTEX_HELD(&dtrace_lock)); 11813 ASSERT(state != NULL); 11814 11815 ecb = dtrace_ecb_add(state, probe); 11816 ecb->dte_uarg = desc->dted_uarg; 11817 11818 if ((pred = desc->dted_pred.dtpdd_predicate) != NULL) { 11819 dtrace_predicate_hold(pred); 11820 ecb->dte_predicate = pred; 11821 } 11822 11823 if (probe != NULL) { 11824 /* 11825 * If the provider shows more leg than the consumer is old 11826 * enough to see, we need to enable the appropriate implicit 11827 * predicate bits to prevent the ecb from activating at 11828 * revealing times. 11829 * 11830 * Providers specifying DTRACE_PRIV_USER at register time 11831 * are stating that they need the /proc-style privilege 11832 * model to be enforced, and this is what DTRACE_COND_OWNER 11833 * and DTRACE_COND_ZONEOWNER will then do at probe time. 11834 */ 11835 prov = probe->dtpr_provider; 11836 if (!(state->dts_cred.dcr_visible & DTRACE_CRV_ALLPROC) && 11837 (prov->dtpv_priv.dtpp_flags & DTRACE_PRIV_USER)) 11838 ecb->dte_cond |= DTRACE_COND_OWNER; 11839 11840 if (!(state->dts_cred.dcr_visible & DTRACE_CRV_ALLZONE) && 11841 (prov->dtpv_priv.dtpp_flags & DTRACE_PRIV_USER)) 11842 ecb->dte_cond |= DTRACE_COND_ZONEOWNER; 11843 11844 /* 11845 * If the provider shows us kernel innards and the user 11846 * is lacking sufficient privilege, enable the 11847 * DTRACE_COND_USERMODE implicit predicate. 11848 */ 11849 if (!(state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL) && 11850 (prov->dtpv_priv.dtpp_flags & DTRACE_PRIV_KERNEL)) 11851 ecb->dte_cond |= DTRACE_COND_USERMODE; 11852 } 11853 11854 if (dtrace_ecb_create_cache != NULL) { 11855 /* 11856 * If we have a cached ecb, we'll use its action list instead 11857 * of creating our own (saving both time and space). 11858 */ 11859 dtrace_ecb_t *cached = dtrace_ecb_create_cache; 11860 dtrace_action_t *act = cached->dte_action; 11861 11862 if (act != NULL) { 11863 ASSERT(act->dta_refcnt > 0); 11864 act->dta_refcnt++; 11865 ecb->dte_action = act; 11866 ecb->dte_action_last = cached->dte_action_last; 11867 ecb->dte_needed = cached->dte_needed; 11868 ecb->dte_size = cached->dte_size; 11869 ecb->dte_alignment = cached->dte_alignment; 11870 } 11871 11872 return (ecb); 11873 } 11874 11875 for (act = desc->dted_action; act != NULL; act = act->dtad_next) { 11876 if ((enab->dten_error = dtrace_ecb_action_add(ecb, act)) != 0) { 11877 dtrace_ecb_destroy(ecb); 11878 return (NULL); 11879 } 11880 } 11881 11882 if ((enab->dten_error = dtrace_ecb_resize(ecb)) != 0) { 11883 dtrace_ecb_destroy(ecb); 11884 return (NULL); 11885 } 11886 11887 return (dtrace_ecb_create_cache = ecb); 11888 } 11889 11890 static int 11891 dtrace_ecb_create_enable(dtrace_probe_t *probe, void *arg) 11892 { 11893 dtrace_ecb_t *ecb; 11894 dtrace_enabling_t *enab = arg; 11895 dtrace_state_t *state = enab->dten_vstate->dtvs_state; 11896 11897 ASSERT(state != NULL); 11898 11899 if (probe != NULL && probe->dtpr_gen < enab->dten_probegen) { 11900 /* 11901 * This probe was created in a generation for which this 11902 * enabling has previously created ECBs; we don't want to 11903 * enable it again, so just kick out. 11904 */ 11905 return (DTRACE_MATCH_NEXT); 11906 } 11907 11908 if ((ecb = dtrace_ecb_create(state, probe, enab)) == NULL) 11909 return (DTRACE_MATCH_DONE); 11910 11911 dtrace_ecb_enable(ecb); 11912 return (DTRACE_MATCH_NEXT); 11913 } 11914 11915 static dtrace_ecb_t * 11916 dtrace_epid2ecb(dtrace_state_t *state, dtrace_epid_t id) 11917 { 11918 dtrace_ecb_t *ecb; 11919 11920 ASSERT(MUTEX_HELD(&dtrace_lock)); 11921 11922 if (id == 0 || id > state->dts_necbs) 11923 return (NULL); 11924 11925 ASSERT(state->dts_necbs > 0 && state->dts_ecbs != NULL); 11926 ASSERT((ecb = state->dts_ecbs[id - 1]) == NULL || ecb->dte_epid == id); 11927 11928 return (state->dts_ecbs[id - 1]); 11929 } 11930 11931 static dtrace_aggregation_t * 11932 dtrace_aggid2agg(dtrace_state_t *state, dtrace_aggid_t id) 11933 { 11934 dtrace_aggregation_t *agg; 11935 11936 ASSERT(MUTEX_HELD(&dtrace_lock)); 11937 11938 if (id == 0 || id > state->dts_naggregations) 11939 return (NULL); 11940 11941 ASSERT(state->dts_naggregations > 0 && state->dts_aggregations != NULL); 11942 ASSERT((agg = state->dts_aggregations[id - 1]) == NULL || 11943 agg->dtag_id == id); 11944 11945 return (state->dts_aggregations[id - 1]); 11946 } 11947 11948 /* 11949 * DTrace Buffer Functions 11950 * 11951 * The following functions manipulate DTrace buffers. Most of these functions 11952 * are called in the context of establishing or processing consumer state; 11953 * exceptions are explicitly noted. 11954 */ 11955 11956 /* 11957 * Note: called from cross call context. This function switches the two 11958 * buffers on a given CPU. The atomicity of this operation is assured by 11959 * disabling interrupts while the actual switch takes place; the disabling of 11960 * interrupts serializes the execution with any execution of dtrace_probe() on 11961 * the same CPU. 11962 */ 11963 static void 11964 dtrace_buffer_switch(dtrace_buffer_t *buf) 11965 { 11966 caddr_t tomax = buf->dtb_tomax; 11967 caddr_t xamot = buf->dtb_xamot; 11968 dtrace_icookie_t cookie; 11969 hrtime_t now; 11970 11971 ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH)); 11972 ASSERT(!(buf->dtb_flags & DTRACEBUF_RING)); 11973 11974 cookie = dtrace_interrupt_disable(); 11975 now = dtrace_gethrtime(); 11976 buf->dtb_tomax = xamot; 11977 buf->dtb_xamot = tomax; 11978 buf->dtb_xamot_drops = buf->dtb_drops; 11979 buf->dtb_xamot_offset = buf->dtb_offset; 11980 buf->dtb_xamot_errors = buf->dtb_errors; 11981 buf->dtb_xamot_flags = buf->dtb_flags; 11982 buf->dtb_offset = 0; 11983 buf->dtb_drops = 0; 11984 buf->dtb_errors = 0; 11985 buf->dtb_flags &= ~(DTRACEBUF_ERROR | DTRACEBUF_DROPPED); 11986 buf->dtb_interval = now - buf->dtb_switched; 11987 buf->dtb_switched = now; 11988 dtrace_interrupt_enable(cookie); 11989 } 11990 11991 /* 11992 * Note: called from cross call context. This function activates a buffer 11993 * on a CPU. As with dtrace_buffer_switch(), the atomicity of the operation 11994 * is guaranteed by the disabling of interrupts. 11995 */ 11996 static void 11997 dtrace_buffer_activate(dtrace_state_t *state) 11998 { 11999 dtrace_buffer_t *buf; 12000 dtrace_icookie_t cookie = dtrace_interrupt_disable(); 12001 12002 buf = &state->dts_buffer[curcpu]; 12003 12004 if (buf->dtb_tomax != NULL) { 12005 /* 12006 * We might like to assert that the buffer is marked inactive, 12007 * but this isn't necessarily true: the buffer for the CPU 12008 * that processes the BEGIN probe has its buffer activated 12009 * manually. In this case, we take the (harmless) action 12010 * re-clearing the bit INACTIVE bit. 12011 */ 12012 buf->dtb_flags &= ~DTRACEBUF_INACTIVE; 12013 } 12014 12015 dtrace_interrupt_enable(cookie); 12016 } 12017 12018 #ifdef __FreeBSD__ 12019 /* 12020 * Activate the specified per-CPU buffer. This is used instead of 12021 * dtrace_buffer_activate() when APs have not yet started, i.e. when 12022 * activating anonymous state. 12023 */ 12024 static void 12025 dtrace_buffer_activate_cpu(dtrace_state_t *state, int cpu) 12026 { 12027 12028 if (state->dts_buffer[cpu].dtb_tomax != NULL) 12029 state->dts_buffer[cpu].dtb_flags &= ~DTRACEBUF_INACTIVE; 12030 } 12031 #endif 12032 12033 static int 12034 dtrace_buffer_alloc(dtrace_buffer_t *bufs, size_t size, int flags, 12035 processorid_t cpu, int *factor) 12036 { 12037 #ifdef illumos 12038 cpu_t *cp; 12039 #endif 12040 dtrace_buffer_t *buf; 12041 int allocated = 0, desired = 0; 12042 12043 #ifdef illumos 12044 ASSERT(MUTEX_HELD(&cpu_lock)); 12045 ASSERT(MUTEX_HELD(&dtrace_lock)); 12046 12047 *factor = 1; 12048 12049 if (size > dtrace_nonroot_maxsize && 12050 !PRIV_POLICY_CHOICE(CRED(), PRIV_ALL, B_FALSE)) 12051 return (EFBIG); 12052 12053 cp = cpu_list; 12054 12055 do { 12056 if (cpu != DTRACE_CPUALL && cpu != cp->cpu_id) 12057 continue; 12058 12059 buf = &bufs[cp->cpu_id]; 12060 12061 /* 12062 * If there is already a buffer allocated for this CPU, it 12063 * is only possible that this is a DR event. In this case, 12064 */ 12065 if (buf->dtb_tomax != NULL) { 12066 ASSERT(buf->dtb_size == size); 12067 continue; 12068 } 12069 12070 ASSERT(buf->dtb_xamot == NULL); 12071 12072 if ((buf->dtb_tomax = kmem_zalloc(size, 12073 KM_NOSLEEP | KM_NORMALPRI)) == NULL) 12074 goto err; 12075 12076 buf->dtb_size = size; 12077 buf->dtb_flags = flags; 12078 buf->dtb_offset = 0; 12079 buf->dtb_drops = 0; 12080 12081 if (flags & DTRACEBUF_NOSWITCH) 12082 continue; 12083 12084 if ((buf->dtb_xamot = kmem_zalloc(size, 12085 KM_NOSLEEP | KM_NORMALPRI)) == NULL) 12086 goto err; 12087 } while ((cp = cp->cpu_next) != cpu_list); 12088 12089 return (0); 12090 12091 err: 12092 cp = cpu_list; 12093 12094 do { 12095 if (cpu != DTRACE_CPUALL && cpu != cp->cpu_id) 12096 continue; 12097 12098 buf = &bufs[cp->cpu_id]; 12099 desired += 2; 12100 12101 if (buf->dtb_xamot != NULL) { 12102 ASSERT(buf->dtb_tomax != NULL); 12103 ASSERT(buf->dtb_size == size); 12104 kmem_free(buf->dtb_xamot, size); 12105 allocated++; 12106 } 12107 12108 if (buf->dtb_tomax != NULL) { 12109 ASSERT(buf->dtb_size == size); 12110 kmem_free(buf->dtb_tomax, size); 12111 allocated++; 12112 } 12113 12114 buf->dtb_tomax = NULL; 12115 buf->dtb_xamot = NULL; 12116 buf->dtb_size = 0; 12117 } while ((cp = cp->cpu_next) != cpu_list); 12118 #else 12119 int i; 12120 12121 *factor = 1; 12122 #if defined(__aarch64__) || defined(__amd64__) || defined(__arm__) || \ 12123 defined(__mips__) || defined(__powerpc__) || defined(__riscv__) 12124 /* 12125 * FreeBSD isn't good at limiting the amount of memory we 12126 * ask to malloc, so let's place a limit here before trying 12127 * to do something that might well end in tears at bedtime. 12128 */ 12129 if (size > physmem * PAGE_SIZE / (128 * (mp_maxid + 1))) 12130 return (ENOMEM); 12131 #endif 12132 12133 ASSERT(MUTEX_HELD(&dtrace_lock)); 12134 CPU_FOREACH(i) { 12135 if (cpu != DTRACE_CPUALL && cpu != i) 12136 continue; 12137 12138 buf = &bufs[i]; 12139 12140 /* 12141 * If there is already a buffer allocated for this CPU, it 12142 * is only possible that this is a DR event. In this case, 12143 * the buffer size must match our specified size. 12144 */ 12145 if (buf->dtb_tomax != NULL) { 12146 ASSERT(buf->dtb_size == size); 12147 continue; 12148 } 12149 12150 ASSERT(buf->dtb_xamot == NULL); 12151 12152 if ((buf->dtb_tomax = kmem_zalloc(size, 12153 KM_NOSLEEP | KM_NORMALPRI)) == NULL) 12154 goto err; 12155 12156 buf->dtb_size = size; 12157 buf->dtb_flags = flags; 12158 buf->dtb_offset = 0; 12159 buf->dtb_drops = 0; 12160 12161 if (flags & DTRACEBUF_NOSWITCH) 12162 continue; 12163 12164 if ((buf->dtb_xamot = kmem_zalloc(size, 12165 KM_NOSLEEP | KM_NORMALPRI)) == NULL) 12166 goto err; 12167 } 12168 12169 return (0); 12170 12171 err: 12172 /* 12173 * Error allocating memory, so free the buffers that were 12174 * allocated before the failed allocation. 12175 */ 12176 CPU_FOREACH(i) { 12177 if (cpu != DTRACE_CPUALL && cpu != i) 12178 continue; 12179 12180 buf = &bufs[i]; 12181 desired += 2; 12182 12183 if (buf->dtb_xamot != NULL) { 12184 ASSERT(buf->dtb_tomax != NULL); 12185 ASSERT(buf->dtb_size == size); 12186 kmem_free(buf->dtb_xamot, size); 12187 allocated++; 12188 } 12189 12190 if (buf->dtb_tomax != NULL) { 12191 ASSERT(buf->dtb_size == size); 12192 kmem_free(buf->dtb_tomax, size); 12193 allocated++; 12194 } 12195 12196 buf->dtb_tomax = NULL; 12197 buf->dtb_xamot = NULL; 12198 buf->dtb_size = 0; 12199 12200 } 12201 #endif 12202 *factor = desired / (allocated > 0 ? allocated : 1); 12203 12204 return (ENOMEM); 12205 } 12206 12207 /* 12208 * Note: called from probe context. This function just increments the drop 12209 * count on a buffer. It has been made a function to allow for the 12210 * possibility of understanding the source of mysterious drop counts. (A 12211 * problem for which one may be particularly disappointed that DTrace cannot 12212 * be used to understand DTrace.) 12213 */ 12214 static void 12215 dtrace_buffer_drop(dtrace_buffer_t *buf) 12216 { 12217 buf->dtb_drops++; 12218 } 12219 12220 /* 12221 * Note: called from probe context. This function is called to reserve space 12222 * in a buffer. If mstate is non-NULL, sets the scratch base and size in the 12223 * mstate. Returns the new offset in the buffer, or a negative value if an 12224 * error has occurred. 12225 */ 12226 static intptr_t 12227 dtrace_buffer_reserve(dtrace_buffer_t *buf, size_t needed, size_t align, 12228 dtrace_state_t *state, dtrace_mstate_t *mstate) 12229 { 12230 intptr_t offs = buf->dtb_offset, soffs; 12231 intptr_t woffs; 12232 caddr_t tomax; 12233 size_t total; 12234 12235 if (buf->dtb_flags & DTRACEBUF_INACTIVE) 12236 return (-1); 12237 12238 if ((tomax = buf->dtb_tomax) == NULL) { 12239 dtrace_buffer_drop(buf); 12240 return (-1); 12241 } 12242 12243 if (!(buf->dtb_flags & (DTRACEBUF_RING | DTRACEBUF_FILL))) { 12244 while (offs & (align - 1)) { 12245 /* 12246 * Assert that our alignment is off by a number which 12247 * is itself sizeof (uint32_t) aligned. 12248 */ 12249 ASSERT(!((align - (offs & (align - 1))) & 12250 (sizeof (uint32_t) - 1))); 12251 DTRACE_STORE(uint32_t, tomax, offs, DTRACE_EPIDNONE); 12252 offs += sizeof (uint32_t); 12253 } 12254 12255 if ((soffs = offs + needed) > buf->dtb_size) { 12256 dtrace_buffer_drop(buf); 12257 return (-1); 12258 } 12259 12260 if (mstate == NULL) 12261 return (offs); 12262 12263 mstate->dtms_scratch_base = (uintptr_t)tomax + soffs; 12264 mstate->dtms_scratch_size = buf->dtb_size - soffs; 12265 mstate->dtms_scratch_ptr = mstate->dtms_scratch_base; 12266 12267 return (offs); 12268 } 12269 12270 if (buf->dtb_flags & DTRACEBUF_FILL) { 12271 if (state->dts_activity != DTRACE_ACTIVITY_COOLDOWN && 12272 (buf->dtb_flags & DTRACEBUF_FULL)) 12273 return (-1); 12274 goto out; 12275 } 12276 12277 total = needed + (offs & (align - 1)); 12278 12279 /* 12280 * For a ring buffer, life is quite a bit more complicated. Before 12281 * we can store any padding, we need to adjust our wrapping offset. 12282 * (If we've never before wrapped or we're not about to, no adjustment 12283 * is required.) 12284 */ 12285 if ((buf->dtb_flags & DTRACEBUF_WRAPPED) || 12286 offs + total > buf->dtb_size) { 12287 woffs = buf->dtb_xamot_offset; 12288 12289 if (offs + total > buf->dtb_size) { 12290 /* 12291 * We can't fit in the end of the buffer. First, a 12292 * sanity check that we can fit in the buffer at all. 12293 */ 12294 if (total > buf->dtb_size) { 12295 dtrace_buffer_drop(buf); 12296 return (-1); 12297 } 12298 12299 /* 12300 * We're going to be storing at the top of the buffer, 12301 * so now we need to deal with the wrapped offset. We 12302 * only reset our wrapped offset to 0 if it is 12303 * currently greater than the current offset. If it 12304 * is less than the current offset, it is because a 12305 * previous allocation induced a wrap -- but the 12306 * allocation didn't subsequently take the space due 12307 * to an error or false predicate evaluation. In this 12308 * case, we'll just leave the wrapped offset alone: if 12309 * the wrapped offset hasn't been advanced far enough 12310 * for this allocation, it will be adjusted in the 12311 * lower loop. 12312 */ 12313 if (buf->dtb_flags & DTRACEBUF_WRAPPED) { 12314 if (woffs >= offs) 12315 woffs = 0; 12316 } else { 12317 woffs = 0; 12318 } 12319 12320 /* 12321 * Now we know that we're going to be storing to the 12322 * top of the buffer and that there is room for us 12323 * there. We need to clear the buffer from the current 12324 * offset to the end (there may be old gunk there). 12325 */ 12326 while (offs < buf->dtb_size) 12327 tomax[offs++] = 0; 12328 12329 /* 12330 * We need to set our offset to zero. And because we 12331 * are wrapping, we need to set the bit indicating as 12332 * much. We can also adjust our needed space back 12333 * down to the space required by the ECB -- we know 12334 * that the top of the buffer is aligned. 12335 */ 12336 offs = 0; 12337 total = needed; 12338 buf->dtb_flags |= DTRACEBUF_WRAPPED; 12339 } else { 12340 /* 12341 * There is room for us in the buffer, so we simply 12342 * need to check the wrapped offset. 12343 */ 12344 if (woffs < offs) { 12345 /* 12346 * The wrapped offset is less than the offset. 12347 * This can happen if we allocated buffer space 12348 * that induced a wrap, but then we didn't 12349 * subsequently take the space due to an error 12350 * or false predicate evaluation. This is 12351 * okay; we know that _this_ allocation isn't 12352 * going to induce a wrap. We still can't 12353 * reset the wrapped offset to be zero, 12354 * however: the space may have been trashed in 12355 * the previous failed probe attempt. But at 12356 * least the wrapped offset doesn't need to 12357 * be adjusted at all... 12358 */ 12359 goto out; 12360 } 12361 } 12362 12363 while (offs + total > woffs) { 12364 dtrace_epid_t epid = *(uint32_t *)(tomax + woffs); 12365 size_t size; 12366 12367 if (epid == DTRACE_EPIDNONE) { 12368 size = sizeof (uint32_t); 12369 } else { 12370 ASSERT3U(epid, <=, state->dts_necbs); 12371 ASSERT(state->dts_ecbs[epid - 1] != NULL); 12372 12373 size = state->dts_ecbs[epid - 1]->dte_size; 12374 } 12375 12376 ASSERT(woffs + size <= buf->dtb_size); 12377 ASSERT(size != 0); 12378 12379 if (woffs + size == buf->dtb_size) { 12380 /* 12381 * We've reached the end of the buffer; we want 12382 * to set the wrapped offset to 0 and break 12383 * out. However, if the offs is 0, then we're 12384 * in a strange edge-condition: the amount of 12385 * space that we want to reserve plus the size 12386 * of the record that we're overwriting is 12387 * greater than the size of the buffer. This 12388 * is problematic because if we reserve the 12389 * space but subsequently don't consume it (due 12390 * to a failed predicate or error) the wrapped 12391 * offset will be 0 -- yet the EPID at offset 0 12392 * will not be committed. This situation is 12393 * relatively easy to deal with: if we're in 12394 * this case, the buffer is indistinguishable 12395 * from one that hasn't wrapped; we need only 12396 * finish the job by clearing the wrapped bit, 12397 * explicitly setting the offset to be 0, and 12398 * zero'ing out the old data in the buffer. 12399 */ 12400 if (offs == 0) { 12401 buf->dtb_flags &= ~DTRACEBUF_WRAPPED; 12402 buf->dtb_offset = 0; 12403 woffs = total; 12404 12405 while (woffs < buf->dtb_size) 12406 tomax[woffs++] = 0; 12407 } 12408 12409 woffs = 0; 12410 break; 12411 } 12412 12413 woffs += size; 12414 } 12415 12416 /* 12417 * We have a wrapped offset. It may be that the wrapped offset 12418 * has become zero -- that's okay. 12419 */ 12420 buf->dtb_xamot_offset = woffs; 12421 } 12422 12423 out: 12424 /* 12425 * Now we can plow the buffer with any necessary padding. 12426 */ 12427 while (offs & (align - 1)) { 12428 /* 12429 * Assert that our alignment is off by a number which 12430 * is itself sizeof (uint32_t) aligned. 12431 */ 12432 ASSERT(!((align - (offs & (align - 1))) & 12433 (sizeof (uint32_t) - 1))); 12434 DTRACE_STORE(uint32_t, tomax, offs, DTRACE_EPIDNONE); 12435 offs += sizeof (uint32_t); 12436 } 12437 12438 if (buf->dtb_flags & DTRACEBUF_FILL) { 12439 if (offs + needed > buf->dtb_size - state->dts_reserve) { 12440 buf->dtb_flags |= DTRACEBUF_FULL; 12441 return (-1); 12442 } 12443 } 12444 12445 if (mstate == NULL) 12446 return (offs); 12447 12448 /* 12449 * For ring buffers and fill buffers, the scratch space is always 12450 * the inactive buffer. 12451 */ 12452 mstate->dtms_scratch_base = (uintptr_t)buf->dtb_xamot; 12453 mstate->dtms_scratch_size = buf->dtb_size; 12454 mstate->dtms_scratch_ptr = mstate->dtms_scratch_base; 12455 12456 return (offs); 12457 } 12458 12459 static void 12460 dtrace_buffer_polish(dtrace_buffer_t *buf) 12461 { 12462 ASSERT(buf->dtb_flags & DTRACEBUF_RING); 12463 ASSERT(MUTEX_HELD(&dtrace_lock)); 12464 12465 if (!(buf->dtb_flags & DTRACEBUF_WRAPPED)) 12466 return; 12467 12468 /* 12469 * We need to polish the ring buffer. There are three cases: 12470 * 12471 * - The first (and presumably most common) is that there is no gap 12472 * between the buffer offset and the wrapped offset. In this case, 12473 * there is nothing in the buffer that isn't valid data; we can 12474 * mark the buffer as polished and return. 12475 * 12476 * - The second (less common than the first but still more common 12477 * than the third) is that there is a gap between the buffer offset 12478 * and the wrapped offset, and the wrapped offset is larger than the 12479 * buffer offset. This can happen because of an alignment issue, or 12480 * can happen because of a call to dtrace_buffer_reserve() that 12481 * didn't subsequently consume the buffer space. In this case, 12482 * we need to zero the data from the buffer offset to the wrapped 12483 * offset. 12484 * 12485 * - The third (and least common) is that there is a gap between the 12486 * buffer offset and the wrapped offset, but the wrapped offset is 12487 * _less_ than the buffer offset. This can only happen because a 12488 * call to dtrace_buffer_reserve() induced a wrap, but the space 12489 * was not subsequently consumed. In this case, we need to zero the 12490 * space from the offset to the end of the buffer _and_ from the 12491 * top of the buffer to the wrapped offset. 12492 */ 12493 if (buf->dtb_offset < buf->dtb_xamot_offset) { 12494 bzero(buf->dtb_tomax + buf->dtb_offset, 12495 buf->dtb_xamot_offset - buf->dtb_offset); 12496 } 12497 12498 if (buf->dtb_offset > buf->dtb_xamot_offset) { 12499 bzero(buf->dtb_tomax + buf->dtb_offset, 12500 buf->dtb_size - buf->dtb_offset); 12501 bzero(buf->dtb_tomax, buf->dtb_xamot_offset); 12502 } 12503 } 12504 12505 /* 12506 * This routine determines if data generated at the specified time has likely 12507 * been entirely consumed at user-level. This routine is called to determine 12508 * if an ECB on a defunct probe (but for an active enabling) can be safely 12509 * disabled and destroyed. 12510 */ 12511 static int 12512 dtrace_buffer_consumed(dtrace_buffer_t *bufs, hrtime_t when) 12513 { 12514 int i; 12515 12516 for (i = 0; i < NCPU; i++) { 12517 dtrace_buffer_t *buf = &bufs[i]; 12518 12519 if (buf->dtb_size == 0) 12520 continue; 12521 12522 if (buf->dtb_flags & DTRACEBUF_RING) 12523 return (0); 12524 12525 if (!buf->dtb_switched && buf->dtb_offset != 0) 12526 return (0); 12527 12528 if (buf->dtb_switched - buf->dtb_interval < when) 12529 return (0); 12530 } 12531 12532 return (1); 12533 } 12534 12535 static void 12536 dtrace_buffer_free(dtrace_buffer_t *bufs) 12537 { 12538 int i; 12539 12540 for (i = 0; i < NCPU; i++) { 12541 dtrace_buffer_t *buf = &bufs[i]; 12542 12543 if (buf->dtb_tomax == NULL) { 12544 ASSERT(buf->dtb_xamot == NULL); 12545 ASSERT(buf->dtb_size == 0); 12546 continue; 12547 } 12548 12549 if (buf->dtb_xamot != NULL) { 12550 ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH)); 12551 kmem_free(buf->dtb_xamot, buf->dtb_size); 12552 } 12553 12554 kmem_free(buf->dtb_tomax, buf->dtb_size); 12555 buf->dtb_size = 0; 12556 buf->dtb_tomax = NULL; 12557 buf->dtb_xamot = NULL; 12558 } 12559 } 12560 12561 /* 12562 * DTrace Enabling Functions 12563 */ 12564 static dtrace_enabling_t * 12565 dtrace_enabling_create(dtrace_vstate_t *vstate) 12566 { 12567 dtrace_enabling_t *enab; 12568 12569 enab = kmem_zalloc(sizeof (dtrace_enabling_t), KM_SLEEP); 12570 enab->dten_vstate = vstate; 12571 12572 return (enab); 12573 } 12574 12575 static void 12576 dtrace_enabling_add(dtrace_enabling_t *enab, dtrace_ecbdesc_t *ecb) 12577 { 12578 dtrace_ecbdesc_t **ndesc; 12579 size_t osize, nsize; 12580 12581 /* 12582 * We can't add to enablings after we've enabled them, or after we've 12583 * retained them. 12584 */ 12585 ASSERT(enab->dten_probegen == 0); 12586 ASSERT(enab->dten_next == NULL && enab->dten_prev == NULL); 12587 12588 if (enab->dten_ndesc < enab->dten_maxdesc) { 12589 enab->dten_desc[enab->dten_ndesc++] = ecb; 12590 return; 12591 } 12592 12593 osize = enab->dten_maxdesc * sizeof (dtrace_enabling_t *); 12594 12595 if (enab->dten_maxdesc == 0) { 12596 enab->dten_maxdesc = 1; 12597 } else { 12598 enab->dten_maxdesc <<= 1; 12599 } 12600 12601 ASSERT(enab->dten_ndesc < enab->dten_maxdesc); 12602 12603 nsize = enab->dten_maxdesc * sizeof (dtrace_enabling_t *); 12604 ndesc = kmem_zalloc(nsize, KM_SLEEP); 12605 bcopy(enab->dten_desc, ndesc, osize); 12606 if (enab->dten_desc != NULL) 12607 kmem_free(enab->dten_desc, osize); 12608 12609 enab->dten_desc = ndesc; 12610 enab->dten_desc[enab->dten_ndesc++] = ecb; 12611 } 12612 12613 static void 12614 dtrace_enabling_addlike(dtrace_enabling_t *enab, dtrace_ecbdesc_t *ecb, 12615 dtrace_probedesc_t *pd) 12616 { 12617 dtrace_ecbdesc_t *new; 12618 dtrace_predicate_t *pred; 12619 dtrace_actdesc_t *act; 12620 12621 /* 12622 * We're going to create a new ECB description that matches the 12623 * specified ECB in every way, but has the specified probe description. 12624 */ 12625 new = kmem_zalloc(sizeof (dtrace_ecbdesc_t), KM_SLEEP); 12626 12627 if ((pred = ecb->dted_pred.dtpdd_predicate) != NULL) 12628 dtrace_predicate_hold(pred); 12629 12630 for (act = ecb->dted_action; act != NULL; act = act->dtad_next) 12631 dtrace_actdesc_hold(act); 12632 12633 new->dted_action = ecb->dted_action; 12634 new->dted_pred = ecb->dted_pred; 12635 new->dted_probe = *pd; 12636 new->dted_uarg = ecb->dted_uarg; 12637 12638 dtrace_enabling_add(enab, new); 12639 } 12640 12641 static void 12642 dtrace_enabling_dump(dtrace_enabling_t *enab) 12643 { 12644 int i; 12645 12646 for (i = 0; i < enab->dten_ndesc; i++) { 12647 dtrace_probedesc_t *desc = &enab->dten_desc[i]->dted_probe; 12648 12649 #ifdef __FreeBSD__ 12650 printf("dtrace: enabling probe %d (%s:%s:%s:%s)\n", i, 12651 desc->dtpd_provider, desc->dtpd_mod, 12652 desc->dtpd_func, desc->dtpd_name); 12653 #else 12654 cmn_err(CE_NOTE, "enabling probe %d (%s:%s:%s:%s)", i, 12655 desc->dtpd_provider, desc->dtpd_mod, 12656 desc->dtpd_func, desc->dtpd_name); 12657 #endif 12658 } 12659 } 12660 12661 static void 12662 dtrace_enabling_destroy(dtrace_enabling_t *enab) 12663 { 12664 int i; 12665 dtrace_ecbdesc_t *ep; 12666 dtrace_vstate_t *vstate = enab->dten_vstate; 12667 12668 ASSERT(MUTEX_HELD(&dtrace_lock)); 12669 12670 for (i = 0; i < enab->dten_ndesc; i++) { 12671 dtrace_actdesc_t *act, *next; 12672 dtrace_predicate_t *pred; 12673 12674 ep = enab->dten_desc[i]; 12675 12676 if ((pred = ep->dted_pred.dtpdd_predicate) != NULL) 12677 dtrace_predicate_release(pred, vstate); 12678 12679 for (act = ep->dted_action; act != NULL; act = next) { 12680 next = act->dtad_next; 12681 dtrace_actdesc_release(act, vstate); 12682 } 12683 12684 kmem_free(ep, sizeof (dtrace_ecbdesc_t)); 12685 } 12686 12687 if (enab->dten_desc != NULL) 12688 kmem_free(enab->dten_desc, 12689 enab->dten_maxdesc * sizeof (dtrace_enabling_t *)); 12690 12691 /* 12692 * If this was a retained enabling, decrement the dts_nretained count 12693 * and take it off of the dtrace_retained list. 12694 */ 12695 if (enab->dten_prev != NULL || enab->dten_next != NULL || 12696 dtrace_retained == enab) { 12697 ASSERT(enab->dten_vstate->dtvs_state != NULL); 12698 ASSERT(enab->dten_vstate->dtvs_state->dts_nretained > 0); 12699 enab->dten_vstate->dtvs_state->dts_nretained--; 12700 dtrace_retained_gen++; 12701 } 12702 12703 if (enab->dten_prev == NULL) { 12704 if (dtrace_retained == enab) { 12705 dtrace_retained = enab->dten_next; 12706 12707 if (dtrace_retained != NULL) 12708 dtrace_retained->dten_prev = NULL; 12709 } 12710 } else { 12711 ASSERT(enab != dtrace_retained); 12712 ASSERT(dtrace_retained != NULL); 12713 enab->dten_prev->dten_next = enab->dten_next; 12714 } 12715 12716 if (enab->dten_next != NULL) { 12717 ASSERT(dtrace_retained != NULL); 12718 enab->dten_next->dten_prev = enab->dten_prev; 12719 } 12720 12721 kmem_free(enab, sizeof (dtrace_enabling_t)); 12722 } 12723 12724 static int 12725 dtrace_enabling_retain(dtrace_enabling_t *enab) 12726 { 12727 dtrace_state_t *state; 12728 12729 ASSERT(MUTEX_HELD(&dtrace_lock)); 12730 ASSERT(enab->dten_next == NULL && enab->dten_prev == NULL); 12731 ASSERT(enab->dten_vstate != NULL); 12732 12733 state = enab->dten_vstate->dtvs_state; 12734 ASSERT(state != NULL); 12735 12736 /* 12737 * We only allow each state to retain dtrace_retain_max enablings. 12738 */ 12739 if (state->dts_nretained >= dtrace_retain_max) 12740 return (ENOSPC); 12741 12742 state->dts_nretained++; 12743 dtrace_retained_gen++; 12744 12745 if (dtrace_retained == NULL) { 12746 dtrace_retained = enab; 12747 return (0); 12748 } 12749 12750 enab->dten_next = dtrace_retained; 12751 dtrace_retained->dten_prev = enab; 12752 dtrace_retained = enab; 12753 12754 return (0); 12755 } 12756 12757 static int 12758 dtrace_enabling_replicate(dtrace_state_t *state, dtrace_probedesc_t *match, 12759 dtrace_probedesc_t *create) 12760 { 12761 dtrace_enabling_t *new, *enab; 12762 int found = 0, err = ENOENT; 12763 12764 ASSERT(MUTEX_HELD(&dtrace_lock)); 12765 ASSERT(strlen(match->dtpd_provider) < DTRACE_PROVNAMELEN); 12766 ASSERT(strlen(match->dtpd_mod) < DTRACE_MODNAMELEN); 12767 ASSERT(strlen(match->dtpd_func) < DTRACE_FUNCNAMELEN); 12768 ASSERT(strlen(match->dtpd_name) < DTRACE_NAMELEN); 12769 12770 new = dtrace_enabling_create(&state->dts_vstate); 12771 12772 /* 12773 * Iterate over all retained enablings, looking for enablings that 12774 * match the specified state. 12775 */ 12776 for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) { 12777 int i; 12778 12779 /* 12780 * dtvs_state can only be NULL for helper enablings -- and 12781 * helper enablings can't be retained. 12782 */ 12783 ASSERT(enab->dten_vstate->dtvs_state != NULL); 12784 12785 if (enab->dten_vstate->dtvs_state != state) 12786 continue; 12787 12788 /* 12789 * Now iterate over each probe description; we're looking for 12790 * an exact match to the specified probe description. 12791 */ 12792 for (i = 0; i < enab->dten_ndesc; i++) { 12793 dtrace_ecbdesc_t *ep = enab->dten_desc[i]; 12794 dtrace_probedesc_t *pd = &ep->dted_probe; 12795 12796 if (strcmp(pd->dtpd_provider, match->dtpd_provider)) 12797 continue; 12798 12799 if (strcmp(pd->dtpd_mod, match->dtpd_mod)) 12800 continue; 12801 12802 if (strcmp(pd->dtpd_func, match->dtpd_func)) 12803 continue; 12804 12805 if (strcmp(pd->dtpd_name, match->dtpd_name)) 12806 continue; 12807 12808 /* 12809 * We have a winning probe! Add it to our growing 12810 * enabling. 12811 */ 12812 found = 1; 12813 dtrace_enabling_addlike(new, ep, create); 12814 } 12815 } 12816 12817 if (!found || (err = dtrace_enabling_retain(new)) != 0) { 12818 dtrace_enabling_destroy(new); 12819 return (err); 12820 } 12821 12822 return (0); 12823 } 12824 12825 static void 12826 dtrace_enabling_retract(dtrace_state_t *state) 12827 { 12828 dtrace_enabling_t *enab, *next; 12829 12830 ASSERT(MUTEX_HELD(&dtrace_lock)); 12831 12832 /* 12833 * Iterate over all retained enablings, destroy the enablings retained 12834 * for the specified state. 12835 */ 12836 for (enab = dtrace_retained; enab != NULL; enab = next) { 12837 next = enab->dten_next; 12838 12839 /* 12840 * dtvs_state can only be NULL for helper enablings -- and 12841 * helper enablings can't be retained. 12842 */ 12843 ASSERT(enab->dten_vstate->dtvs_state != NULL); 12844 12845 if (enab->dten_vstate->dtvs_state == state) { 12846 ASSERT(state->dts_nretained > 0); 12847 dtrace_enabling_destroy(enab); 12848 } 12849 } 12850 12851 ASSERT(state->dts_nretained == 0); 12852 } 12853 12854 static int 12855 dtrace_enabling_match(dtrace_enabling_t *enab, int *nmatched) 12856 { 12857 int i = 0; 12858 int matched = 0; 12859 12860 ASSERT(MUTEX_HELD(&cpu_lock)); 12861 ASSERT(MUTEX_HELD(&dtrace_lock)); 12862 12863 for (i = 0; i < enab->dten_ndesc; i++) { 12864 dtrace_ecbdesc_t *ep = enab->dten_desc[i]; 12865 12866 enab->dten_current = ep; 12867 enab->dten_error = 0; 12868 12869 matched += dtrace_probe_enable(&ep->dted_probe, enab); 12870 12871 if (enab->dten_error != 0) { 12872 /* 12873 * If we get an error half-way through enabling the 12874 * probes, we kick out -- perhaps with some number of 12875 * them enabled. Leaving enabled probes enabled may 12876 * be slightly confusing for user-level, but we expect 12877 * that no one will attempt to actually drive on in 12878 * the face of such errors. If this is an anonymous 12879 * enabling (indicated with a NULL nmatched pointer), 12880 * we cmn_err() a message. We aren't expecting to 12881 * get such an error -- such as it can exist at all, 12882 * it would be a result of corrupted DOF in the driver 12883 * properties. 12884 */ 12885 if (nmatched == NULL) { 12886 cmn_err(CE_WARN, "dtrace_enabling_match() " 12887 "error on %p: %d", (void *)ep, 12888 enab->dten_error); 12889 } 12890 12891 return (enab->dten_error); 12892 } 12893 } 12894 12895 enab->dten_probegen = dtrace_probegen; 12896 if (nmatched != NULL) 12897 *nmatched = matched; 12898 12899 return (0); 12900 } 12901 12902 static void 12903 dtrace_enabling_matchall(void) 12904 { 12905 dtrace_enabling_t *enab; 12906 12907 mutex_enter(&cpu_lock); 12908 mutex_enter(&dtrace_lock); 12909 12910 /* 12911 * Iterate over all retained enablings to see if any probes match 12912 * against them. We only perform this operation on enablings for which 12913 * we have sufficient permissions by virtue of being in the global zone 12914 * or in the same zone as the DTrace client. Because we can be called 12915 * after dtrace_detach() has been called, we cannot assert that there 12916 * are retained enablings. We can safely load from dtrace_retained, 12917 * however: the taskq_destroy() at the end of dtrace_detach() will 12918 * block pending our completion. 12919 */ 12920 for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) { 12921 #ifdef illumos 12922 cred_t *cr = enab->dten_vstate->dtvs_state->dts_cred.dcr_cred; 12923 12924 if (INGLOBALZONE(curproc) || 12925 cr != NULL && getzoneid() == crgetzoneid(cr)) 12926 #endif 12927 (void) dtrace_enabling_match(enab, NULL); 12928 } 12929 12930 mutex_exit(&dtrace_lock); 12931 mutex_exit(&cpu_lock); 12932 } 12933 12934 /* 12935 * If an enabling is to be enabled without having matched probes (that is, if 12936 * dtrace_state_go() is to be called on the underlying dtrace_state_t), the 12937 * enabling must be _primed_ by creating an ECB for every ECB description. 12938 * This must be done to assure that we know the number of speculations, the 12939 * number of aggregations, the minimum buffer size needed, etc. before we 12940 * transition out of DTRACE_ACTIVITY_INACTIVE. To do this without actually 12941 * enabling any probes, we create ECBs for every ECB decription, but with a 12942 * NULL probe -- which is exactly what this function does. 12943 */ 12944 static void 12945 dtrace_enabling_prime(dtrace_state_t *state) 12946 { 12947 dtrace_enabling_t *enab; 12948 int i; 12949 12950 for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) { 12951 ASSERT(enab->dten_vstate->dtvs_state != NULL); 12952 12953 if (enab->dten_vstate->dtvs_state != state) 12954 continue; 12955 12956 /* 12957 * We don't want to prime an enabling more than once, lest 12958 * we allow a malicious user to induce resource exhaustion. 12959 * (The ECBs that result from priming an enabling aren't 12960 * leaked -- but they also aren't deallocated until the 12961 * consumer state is destroyed.) 12962 */ 12963 if (enab->dten_primed) 12964 continue; 12965 12966 for (i = 0; i < enab->dten_ndesc; i++) { 12967 enab->dten_current = enab->dten_desc[i]; 12968 (void) dtrace_probe_enable(NULL, enab); 12969 } 12970 12971 enab->dten_primed = 1; 12972 } 12973 } 12974 12975 /* 12976 * Called to indicate that probes should be provided due to retained 12977 * enablings. This is implemented in terms of dtrace_probe_provide(), but it 12978 * must take an initial lap through the enabling calling the dtps_provide() 12979 * entry point explicitly to allow for autocreated probes. 12980 */ 12981 static void 12982 dtrace_enabling_provide(dtrace_provider_t *prv) 12983 { 12984 int i, all = 0; 12985 dtrace_probedesc_t desc; 12986 dtrace_genid_t gen; 12987 12988 ASSERT(MUTEX_HELD(&dtrace_lock)); 12989 ASSERT(MUTEX_HELD(&dtrace_provider_lock)); 12990 12991 if (prv == NULL) { 12992 all = 1; 12993 prv = dtrace_provider; 12994 } 12995 12996 do { 12997 dtrace_enabling_t *enab; 12998 void *parg = prv->dtpv_arg; 12999 13000 retry: 13001 gen = dtrace_retained_gen; 13002 for (enab = dtrace_retained; enab != NULL; 13003 enab = enab->dten_next) { 13004 for (i = 0; i < enab->dten_ndesc; i++) { 13005 desc = enab->dten_desc[i]->dted_probe; 13006 mutex_exit(&dtrace_lock); 13007 prv->dtpv_pops.dtps_provide(parg, &desc); 13008 mutex_enter(&dtrace_lock); 13009 /* 13010 * Process the retained enablings again if 13011 * they have changed while we weren't holding 13012 * dtrace_lock. 13013 */ 13014 if (gen != dtrace_retained_gen) 13015 goto retry; 13016 } 13017 } 13018 } while (all && (prv = prv->dtpv_next) != NULL); 13019 13020 mutex_exit(&dtrace_lock); 13021 dtrace_probe_provide(NULL, all ? NULL : prv); 13022 mutex_enter(&dtrace_lock); 13023 } 13024 13025 /* 13026 * Called to reap ECBs that are attached to probes from defunct providers. 13027 */ 13028 static void 13029 dtrace_enabling_reap(void) 13030 { 13031 dtrace_provider_t *prov; 13032 dtrace_probe_t *probe; 13033 dtrace_ecb_t *ecb; 13034 hrtime_t when; 13035 int i; 13036 13037 mutex_enter(&cpu_lock); 13038 mutex_enter(&dtrace_lock); 13039 13040 for (i = 0; i < dtrace_nprobes; i++) { 13041 if ((probe = dtrace_probes[i]) == NULL) 13042 continue; 13043 13044 if (probe->dtpr_ecb == NULL) 13045 continue; 13046 13047 prov = probe->dtpr_provider; 13048 13049 if ((when = prov->dtpv_defunct) == 0) 13050 continue; 13051 13052 /* 13053 * We have ECBs on a defunct provider: we want to reap these 13054 * ECBs to allow the provider to unregister. The destruction 13055 * of these ECBs must be done carefully: if we destroy the ECB 13056 * and the consumer later wishes to consume an EPID that 13057 * corresponds to the destroyed ECB (and if the EPID metadata 13058 * has not been previously consumed), the consumer will abort 13059 * processing on the unknown EPID. To reduce (but not, sadly, 13060 * eliminate) the possibility of this, we will only destroy an 13061 * ECB for a defunct provider if, for the state that 13062 * corresponds to the ECB: 13063 * 13064 * (a) There is no speculative tracing (which can effectively 13065 * cache an EPID for an arbitrary amount of time). 13066 * 13067 * (b) The principal buffers have been switched twice since the 13068 * provider became defunct. 13069 * 13070 * (c) The aggregation buffers are of zero size or have been 13071 * switched twice since the provider became defunct. 13072 * 13073 * We use dts_speculates to determine (a) and call a function 13074 * (dtrace_buffer_consumed()) to determine (b) and (c). Note 13075 * that as soon as we've been unable to destroy one of the ECBs 13076 * associated with the probe, we quit trying -- reaping is only 13077 * fruitful in as much as we can destroy all ECBs associated 13078 * with the defunct provider's probes. 13079 */ 13080 while ((ecb = probe->dtpr_ecb) != NULL) { 13081 dtrace_state_t *state = ecb->dte_state; 13082 dtrace_buffer_t *buf = state->dts_buffer; 13083 dtrace_buffer_t *aggbuf = state->dts_aggbuffer; 13084 13085 if (state->dts_speculates) 13086 break; 13087 13088 if (!dtrace_buffer_consumed(buf, when)) 13089 break; 13090 13091 if (!dtrace_buffer_consumed(aggbuf, when)) 13092 break; 13093 13094 dtrace_ecb_disable(ecb); 13095 ASSERT(probe->dtpr_ecb != ecb); 13096 dtrace_ecb_destroy(ecb); 13097 } 13098 } 13099 13100 mutex_exit(&dtrace_lock); 13101 mutex_exit(&cpu_lock); 13102 } 13103 13104 /* 13105 * DTrace DOF Functions 13106 */ 13107 /*ARGSUSED*/ 13108 static void 13109 dtrace_dof_error(dof_hdr_t *dof, const char *str) 13110 { 13111 if (dtrace_err_verbose) 13112 cmn_err(CE_WARN, "failed to process DOF: %s", str); 13113 13114 #ifdef DTRACE_ERRDEBUG 13115 dtrace_errdebug(str); 13116 #endif 13117 } 13118 13119 /* 13120 * Create DOF out of a currently enabled state. Right now, we only create 13121 * DOF containing the run-time options -- but this could be expanded to create 13122 * complete DOF representing the enabled state. 13123 */ 13124 static dof_hdr_t * 13125 dtrace_dof_create(dtrace_state_t *state) 13126 { 13127 dof_hdr_t *dof; 13128 dof_sec_t *sec; 13129 dof_optdesc_t *opt; 13130 int i, len = sizeof (dof_hdr_t) + 13131 roundup(sizeof (dof_sec_t), sizeof (uint64_t)) + 13132 sizeof (dof_optdesc_t) * DTRACEOPT_MAX; 13133 13134 ASSERT(MUTEX_HELD(&dtrace_lock)); 13135 13136 dof = kmem_zalloc(len, KM_SLEEP); 13137 dof->dofh_ident[DOF_ID_MAG0] = DOF_MAG_MAG0; 13138 dof->dofh_ident[DOF_ID_MAG1] = DOF_MAG_MAG1; 13139 dof->dofh_ident[DOF_ID_MAG2] = DOF_MAG_MAG2; 13140 dof->dofh_ident[DOF_ID_MAG3] = DOF_MAG_MAG3; 13141 13142 dof->dofh_ident[DOF_ID_MODEL] = DOF_MODEL_NATIVE; 13143 dof->dofh_ident[DOF_ID_ENCODING] = DOF_ENCODE_NATIVE; 13144 dof->dofh_ident[DOF_ID_VERSION] = DOF_VERSION; 13145 dof->dofh_ident[DOF_ID_DIFVERS] = DIF_VERSION; 13146 dof->dofh_ident[DOF_ID_DIFIREG] = DIF_DIR_NREGS; 13147 dof->dofh_ident[DOF_ID_DIFTREG] = DIF_DTR_NREGS; 13148 13149 dof->dofh_flags = 0; 13150 dof->dofh_hdrsize = sizeof (dof_hdr_t); 13151 dof->dofh_secsize = sizeof (dof_sec_t); 13152 dof->dofh_secnum = 1; /* only DOF_SECT_OPTDESC */ 13153 dof->dofh_secoff = sizeof (dof_hdr_t); 13154 dof->dofh_loadsz = len; 13155 dof->dofh_filesz = len; 13156 dof->dofh_pad = 0; 13157 13158 /* 13159 * Fill in the option section header... 13160 */ 13161 sec = (dof_sec_t *)((uintptr_t)dof + sizeof (dof_hdr_t)); 13162 sec->dofs_type = DOF_SECT_OPTDESC; 13163 sec->dofs_align = sizeof (uint64_t); 13164 sec->dofs_flags = DOF_SECF_LOAD; 13165 sec->dofs_entsize = sizeof (dof_optdesc_t); 13166 13167 opt = (dof_optdesc_t *)((uintptr_t)sec + 13168 roundup(sizeof (dof_sec_t), sizeof (uint64_t))); 13169 13170 sec->dofs_offset = (uintptr_t)opt - (uintptr_t)dof; 13171 sec->dofs_size = sizeof (dof_optdesc_t) * DTRACEOPT_MAX; 13172 13173 for (i = 0; i < DTRACEOPT_MAX; i++) { 13174 opt[i].dofo_option = i; 13175 opt[i].dofo_strtab = DOF_SECIDX_NONE; 13176 opt[i].dofo_value = state->dts_options[i]; 13177 } 13178 13179 return (dof); 13180 } 13181 13182 static dof_hdr_t * 13183 dtrace_dof_copyin(uintptr_t uarg, int *errp) 13184 { 13185 dof_hdr_t hdr, *dof; 13186 13187 ASSERT(!MUTEX_HELD(&dtrace_lock)); 13188 13189 /* 13190 * First, we're going to copyin() the sizeof (dof_hdr_t). 13191 */ 13192 if (copyin((void *)uarg, &hdr, sizeof (hdr)) != 0) { 13193 dtrace_dof_error(NULL, "failed to copyin DOF header"); 13194 *errp = EFAULT; 13195 return (NULL); 13196 } 13197 13198 /* 13199 * Now we'll allocate the entire DOF and copy it in -- provided 13200 * that the length isn't outrageous. 13201 */ 13202 if (hdr.dofh_loadsz >= dtrace_dof_maxsize) { 13203 dtrace_dof_error(&hdr, "load size exceeds maximum"); 13204 *errp = E2BIG; 13205 return (NULL); 13206 } 13207 13208 if (hdr.dofh_loadsz < sizeof (hdr)) { 13209 dtrace_dof_error(&hdr, "invalid load size"); 13210 *errp = EINVAL; 13211 return (NULL); 13212 } 13213 13214 dof = kmem_alloc(hdr.dofh_loadsz, KM_SLEEP); 13215 13216 if (copyin((void *)uarg, dof, hdr.dofh_loadsz) != 0 || 13217 dof->dofh_loadsz != hdr.dofh_loadsz) { 13218 kmem_free(dof, hdr.dofh_loadsz); 13219 *errp = EFAULT; 13220 return (NULL); 13221 } 13222 13223 return (dof); 13224 } 13225 13226 #ifdef __FreeBSD__ 13227 static dof_hdr_t * 13228 dtrace_dof_copyin_proc(struct proc *p, uintptr_t uarg, int *errp) 13229 { 13230 dof_hdr_t hdr, *dof; 13231 struct thread *td; 13232 size_t loadsz; 13233 13234 ASSERT(!MUTEX_HELD(&dtrace_lock)); 13235 13236 td = curthread; 13237 13238 /* 13239 * First, we're going to copyin() the sizeof (dof_hdr_t). 13240 */ 13241 if (proc_readmem(td, p, uarg, &hdr, sizeof(hdr)) != sizeof(hdr)) { 13242 dtrace_dof_error(NULL, "failed to copyin DOF header"); 13243 *errp = EFAULT; 13244 return (NULL); 13245 } 13246 13247 /* 13248 * Now we'll allocate the entire DOF and copy it in -- provided 13249 * that the length isn't outrageous. 13250 */ 13251 if (hdr.dofh_loadsz >= dtrace_dof_maxsize) { 13252 dtrace_dof_error(&hdr, "load size exceeds maximum"); 13253 *errp = E2BIG; 13254 return (NULL); 13255 } 13256 loadsz = (size_t)hdr.dofh_loadsz; 13257 13258 if (loadsz < sizeof (hdr)) { 13259 dtrace_dof_error(&hdr, "invalid load size"); 13260 *errp = EINVAL; 13261 return (NULL); 13262 } 13263 13264 dof = kmem_alloc(loadsz, KM_SLEEP); 13265 13266 if (proc_readmem(td, p, uarg, dof, loadsz) != loadsz || 13267 dof->dofh_loadsz != loadsz) { 13268 kmem_free(dof, hdr.dofh_loadsz); 13269 *errp = EFAULT; 13270 return (NULL); 13271 } 13272 13273 return (dof); 13274 } 13275 13276 static __inline uchar_t 13277 dtrace_dof_char(char c) 13278 { 13279 13280 switch (c) { 13281 case '0': 13282 case '1': 13283 case '2': 13284 case '3': 13285 case '4': 13286 case '5': 13287 case '6': 13288 case '7': 13289 case '8': 13290 case '9': 13291 return (c - '0'); 13292 case 'A': 13293 case 'B': 13294 case 'C': 13295 case 'D': 13296 case 'E': 13297 case 'F': 13298 return (c - 'A' + 10); 13299 case 'a': 13300 case 'b': 13301 case 'c': 13302 case 'd': 13303 case 'e': 13304 case 'f': 13305 return (c - 'a' + 10); 13306 } 13307 /* Should not reach here. */ 13308 return (UCHAR_MAX); 13309 } 13310 #endif /* __FreeBSD__ */ 13311 13312 static dof_hdr_t * 13313 dtrace_dof_property(const char *name) 13314 { 13315 #ifdef __FreeBSD__ 13316 uint8_t *dofbuf; 13317 u_char *data, *eol; 13318 caddr_t doffile; 13319 size_t bytes, len, i; 13320 dof_hdr_t *dof; 13321 u_char c1, c2; 13322 13323 dof = NULL; 13324 13325 doffile = preload_search_by_type("dtrace_dof"); 13326 if (doffile == NULL) 13327 return (NULL); 13328 13329 data = preload_fetch_addr(doffile); 13330 len = preload_fetch_size(doffile); 13331 for (;;) { 13332 /* Look for the end of the line. All lines end in a newline. */ 13333 eol = memchr(data, '\n', len); 13334 if (eol == NULL) 13335 return (NULL); 13336 13337 if (strncmp(name, data, strlen(name)) == 0) 13338 break; 13339 13340 eol++; /* skip past the newline */ 13341 len -= eol - data; 13342 data = eol; 13343 } 13344 13345 /* We've found the data corresponding to the specified key. */ 13346 13347 data += strlen(name) + 1; /* skip past the '=' */ 13348 len = eol - data; 13349 if (len % 2 != 0) { 13350 dtrace_dof_error(NULL, "invalid DOF encoding length"); 13351 goto doferr; 13352 } 13353 bytes = len / 2; 13354 if (bytes < sizeof(dof_hdr_t)) { 13355 dtrace_dof_error(NULL, "truncated header"); 13356 goto doferr; 13357 } 13358 13359 /* 13360 * Each byte is represented by the two ASCII characters in its hex 13361 * representation. 13362 */ 13363 dofbuf = malloc(bytes, M_SOLARIS, M_WAITOK); 13364 for (i = 0; i < bytes; i++) { 13365 c1 = dtrace_dof_char(data[i * 2]); 13366 c2 = dtrace_dof_char(data[i * 2 + 1]); 13367 if (c1 == UCHAR_MAX || c2 == UCHAR_MAX) { 13368 dtrace_dof_error(NULL, "invalid hex char in DOF"); 13369 goto doferr; 13370 } 13371 dofbuf[i] = c1 * 16 + c2; 13372 } 13373 13374 dof = (dof_hdr_t *)dofbuf; 13375 if (bytes < dof->dofh_loadsz) { 13376 dtrace_dof_error(NULL, "truncated DOF"); 13377 goto doferr; 13378 } 13379 13380 if (dof->dofh_loadsz >= dtrace_dof_maxsize) { 13381 dtrace_dof_error(NULL, "oversized DOF"); 13382 goto doferr; 13383 } 13384 13385 return (dof); 13386 13387 doferr: 13388 free(dof, M_SOLARIS); 13389 return (NULL); 13390 #else /* __FreeBSD__ */ 13391 uchar_t *buf; 13392 uint64_t loadsz; 13393 unsigned int len, i; 13394 dof_hdr_t *dof; 13395 13396 /* 13397 * Unfortunately, array of values in .conf files are always (and 13398 * only) interpreted to be integer arrays. We must read our DOF 13399 * as an integer array, and then squeeze it into a byte array. 13400 */ 13401 if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dtrace_devi, 0, 13402 (char *)name, (int **)&buf, &len) != DDI_PROP_SUCCESS) 13403 return (NULL); 13404 13405 for (i = 0; i < len; i++) 13406 buf[i] = (uchar_t)(((int *)buf)[i]); 13407 13408 if (len < sizeof (dof_hdr_t)) { 13409 ddi_prop_free(buf); 13410 dtrace_dof_error(NULL, "truncated header"); 13411 return (NULL); 13412 } 13413 13414 if (len < (loadsz = ((dof_hdr_t *)buf)->dofh_loadsz)) { 13415 ddi_prop_free(buf); 13416 dtrace_dof_error(NULL, "truncated DOF"); 13417 return (NULL); 13418 } 13419 13420 if (loadsz >= dtrace_dof_maxsize) { 13421 ddi_prop_free(buf); 13422 dtrace_dof_error(NULL, "oversized DOF"); 13423 return (NULL); 13424 } 13425 13426 dof = kmem_alloc(loadsz, KM_SLEEP); 13427 bcopy(buf, dof, loadsz); 13428 ddi_prop_free(buf); 13429 13430 return (dof); 13431 #endif /* !__FreeBSD__ */ 13432 } 13433 13434 static void 13435 dtrace_dof_destroy(dof_hdr_t *dof) 13436 { 13437 kmem_free(dof, dof->dofh_loadsz); 13438 } 13439 13440 /* 13441 * Return the dof_sec_t pointer corresponding to a given section index. If the 13442 * index is not valid, dtrace_dof_error() is called and NULL is returned. If 13443 * a type other than DOF_SECT_NONE is specified, the header is checked against 13444 * this type and NULL is returned if the types do not match. 13445 */ 13446 static dof_sec_t * 13447 dtrace_dof_sect(dof_hdr_t *dof, uint32_t type, dof_secidx_t i) 13448 { 13449 dof_sec_t *sec = (dof_sec_t *)(uintptr_t) 13450 ((uintptr_t)dof + dof->dofh_secoff + i * dof->dofh_secsize); 13451 13452 if (i >= dof->dofh_secnum) { 13453 dtrace_dof_error(dof, "referenced section index is invalid"); 13454 return (NULL); 13455 } 13456 13457 if (!(sec->dofs_flags & DOF_SECF_LOAD)) { 13458 dtrace_dof_error(dof, "referenced section is not loadable"); 13459 return (NULL); 13460 } 13461 13462 if (type != DOF_SECT_NONE && type != sec->dofs_type) { 13463 dtrace_dof_error(dof, "referenced section is the wrong type"); 13464 return (NULL); 13465 } 13466 13467 return (sec); 13468 } 13469 13470 static dtrace_probedesc_t * 13471 dtrace_dof_probedesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_probedesc_t *desc) 13472 { 13473 dof_probedesc_t *probe; 13474 dof_sec_t *strtab; 13475 uintptr_t daddr = (uintptr_t)dof; 13476 uintptr_t str; 13477 size_t size; 13478 13479 if (sec->dofs_type != DOF_SECT_PROBEDESC) { 13480 dtrace_dof_error(dof, "invalid probe section"); 13481 return (NULL); 13482 } 13483 13484 if (sec->dofs_align != sizeof (dof_secidx_t)) { 13485 dtrace_dof_error(dof, "bad alignment in probe description"); 13486 return (NULL); 13487 } 13488 13489 if (sec->dofs_offset + sizeof (dof_probedesc_t) > dof->dofh_loadsz) { 13490 dtrace_dof_error(dof, "truncated probe description"); 13491 return (NULL); 13492 } 13493 13494 probe = (dof_probedesc_t *)(uintptr_t)(daddr + sec->dofs_offset); 13495 strtab = dtrace_dof_sect(dof, DOF_SECT_STRTAB, probe->dofp_strtab); 13496 13497 if (strtab == NULL) 13498 return (NULL); 13499 13500 str = daddr + strtab->dofs_offset; 13501 size = strtab->dofs_size; 13502 13503 if (probe->dofp_provider >= strtab->dofs_size) { 13504 dtrace_dof_error(dof, "corrupt probe provider"); 13505 return (NULL); 13506 } 13507 13508 (void) strncpy(desc->dtpd_provider, 13509 (char *)(str + probe->dofp_provider), 13510 MIN(DTRACE_PROVNAMELEN - 1, size - probe->dofp_provider)); 13511 13512 if (probe->dofp_mod >= strtab->dofs_size) { 13513 dtrace_dof_error(dof, "corrupt probe module"); 13514 return (NULL); 13515 } 13516 13517 (void) strncpy(desc->dtpd_mod, (char *)(str + probe->dofp_mod), 13518 MIN(DTRACE_MODNAMELEN - 1, size - probe->dofp_mod)); 13519 13520 if (probe->dofp_func >= strtab->dofs_size) { 13521 dtrace_dof_error(dof, "corrupt probe function"); 13522 return (NULL); 13523 } 13524 13525 (void) strncpy(desc->dtpd_func, (char *)(str + probe->dofp_func), 13526 MIN(DTRACE_FUNCNAMELEN - 1, size - probe->dofp_func)); 13527 13528 if (probe->dofp_name >= strtab->dofs_size) { 13529 dtrace_dof_error(dof, "corrupt probe name"); 13530 return (NULL); 13531 } 13532 13533 (void) strncpy(desc->dtpd_name, (char *)(str + probe->dofp_name), 13534 MIN(DTRACE_NAMELEN - 1, size - probe->dofp_name)); 13535 13536 return (desc); 13537 } 13538 13539 static dtrace_difo_t * 13540 dtrace_dof_difo(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate, 13541 cred_t *cr) 13542 { 13543 dtrace_difo_t *dp; 13544 size_t ttl = 0; 13545 dof_difohdr_t *dofd; 13546 uintptr_t daddr = (uintptr_t)dof; 13547 size_t max = dtrace_difo_maxsize; 13548 int i, l, n; 13549 13550 static const struct { 13551 int section; 13552 int bufoffs; 13553 int lenoffs; 13554 int entsize; 13555 int align; 13556 const char *msg; 13557 } difo[] = { 13558 { DOF_SECT_DIF, offsetof(dtrace_difo_t, dtdo_buf), 13559 offsetof(dtrace_difo_t, dtdo_len), sizeof (dif_instr_t), 13560 sizeof (dif_instr_t), "multiple DIF sections" }, 13561 13562 { DOF_SECT_INTTAB, offsetof(dtrace_difo_t, dtdo_inttab), 13563 offsetof(dtrace_difo_t, dtdo_intlen), sizeof (uint64_t), 13564 sizeof (uint64_t), "multiple integer tables" }, 13565 13566 { DOF_SECT_STRTAB, offsetof(dtrace_difo_t, dtdo_strtab), 13567 offsetof(dtrace_difo_t, dtdo_strlen), 0, 13568 sizeof (char), "multiple string tables" }, 13569 13570 { DOF_SECT_VARTAB, offsetof(dtrace_difo_t, dtdo_vartab), 13571 offsetof(dtrace_difo_t, dtdo_varlen), sizeof (dtrace_difv_t), 13572 sizeof (uint_t), "multiple variable tables" }, 13573 13574 { DOF_SECT_NONE, 0, 0, 0, 0, NULL } 13575 }; 13576 13577 if (sec->dofs_type != DOF_SECT_DIFOHDR) { 13578 dtrace_dof_error(dof, "invalid DIFO header section"); 13579 return (NULL); 13580 } 13581 13582 if (sec->dofs_align != sizeof (dof_secidx_t)) { 13583 dtrace_dof_error(dof, "bad alignment in DIFO header"); 13584 return (NULL); 13585 } 13586 13587 if (sec->dofs_size < sizeof (dof_difohdr_t) || 13588 sec->dofs_size % sizeof (dof_secidx_t)) { 13589 dtrace_dof_error(dof, "bad size in DIFO header"); 13590 return (NULL); 13591 } 13592 13593 dofd = (dof_difohdr_t *)(uintptr_t)(daddr + sec->dofs_offset); 13594 n = (sec->dofs_size - sizeof (*dofd)) / sizeof (dof_secidx_t) + 1; 13595 13596 dp = kmem_zalloc(sizeof (dtrace_difo_t), KM_SLEEP); 13597 dp->dtdo_rtype = dofd->dofd_rtype; 13598 13599 for (l = 0; l < n; l++) { 13600 dof_sec_t *subsec; 13601 void **bufp; 13602 uint32_t *lenp; 13603 13604 if ((subsec = dtrace_dof_sect(dof, DOF_SECT_NONE, 13605 dofd->dofd_links[l])) == NULL) 13606 goto err; /* invalid section link */ 13607 13608 if (ttl + subsec->dofs_size > max) { 13609 dtrace_dof_error(dof, "exceeds maximum size"); 13610 goto err; 13611 } 13612 13613 ttl += subsec->dofs_size; 13614 13615 for (i = 0; difo[i].section != DOF_SECT_NONE; i++) { 13616 if (subsec->dofs_type != difo[i].section) 13617 continue; 13618 13619 if (!(subsec->dofs_flags & DOF_SECF_LOAD)) { 13620 dtrace_dof_error(dof, "section not loaded"); 13621 goto err; 13622 } 13623 13624 if (subsec->dofs_align != difo[i].align) { 13625 dtrace_dof_error(dof, "bad alignment"); 13626 goto err; 13627 } 13628 13629 bufp = (void **)((uintptr_t)dp + difo[i].bufoffs); 13630 lenp = (uint32_t *)((uintptr_t)dp + difo[i].lenoffs); 13631 13632 if (*bufp != NULL) { 13633 dtrace_dof_error(dof, difo[i].msg); 13634 goto err; 13635 } 13636 13637 if (difo[i].entsize != subsec->dofs_entsize) { 13638 dtrace_dof_error(dof, "entry size mismatch"); 13639 goto err; 13640 } 13641 13642 if (subsec->dofs_entsize != 0 && 13643 (subsec->dofs_size % subsec->dofs_entsize) != 0) { 13644 dtrace_dof_error(dof, "corrupt entry size"); 13645 goto err; 13646 } 13647 13648 *lenp = subsec->dofs_size; 13649 *bufp = kmem_alloc(subsec->dofs_size, KM_SLEEP); 13650 bcopy((char *)(uintptr_t)(daddr + subsec->dofs_offset), 13651 *bufp, subsec->dofs_size); 13652 13653 if (subsec->dofs_entsize != 0) 13654 *lenp /= subsec->dofs_entsize; 13655 13656 break; 13657 } 13658 13659 /* 13660 * If we encounter a loadable DIFO sub-section that is not 13661 * known to us, assume this is a broken program and fail. 13662 */ 13663 if (difo[i].section == DOF_SECT_NONE && 13664 (subsec->dofs_flags & DOF_SECF_LOAD)) { 13665 dtrace_dof_error(dof, "unrecognized DIFO subsection"); 13666 goto err; 13667 } 13668 } 13669 13670 if (dp->dtdo_buf == NULL) { 13671 /* 13672 * We can't have a DIF object without DIF text. 13673 */ 13674 dtrace_dof_error(dof, "missing DIF text"); 13675 goto err; 13676 } 13677 13678 /* 13679 * Before we validate the DIF object, run through the variable table 13680 * looking for the strings -- if any of their size are under, we'll set 13681 * their size to be the system-wide default string size. Note that 13682 * this should _not_ happen if the "strsize" option has been set -- 13683 * in this case, the compiler should have set the size to reflect the 13684 * setting of the option. 13685 */ 13686 for (i = 0; i < dp->dtdo_varlen; i++) { 13687 dtrace_difv_t *v = &dp->dtdo_vartab[i]; 13688 dtrace_diftype_t *t = &v->dtdv_type; 13689 13690 if (v->dtdv_id < DIF_VAR_OTHER_UBASE) 13691 continue; 13692 13693 if (t->dtdt_kind == DIF_TYPE_STRING && t->dtdt_size == 0) 13694 t->dtdt_size = dtrace_strsize_default; 13695 } 13696 13697 if (dtrace_difo_validate(dp, vstate, DIF_DIR_NREGS, cr) != 0) 13698 goto err; 13699 13700 dtrace_difo_init(dp, vstate); 13701 return (dp); 13702 13703 err: 13704 kmem_free(dp->dtdo_buf, dp->dtdo_len * sizeof (dif_instr_t)); 13705 kmem_free(dp->dtdo_inttab, dp->dtdo_intlen * sizeof (uint64_t)); 13706 kmem_free(dp->dtdo_strtab, dp->dtdo_strlen); 13707 kmem_free(dp->dtdo_vartab, dp->dtdo_varlen * sizeof (dtrace_difv_t)); 13708 13709 kmem_free(dp, sizeof (dtrace_difo_t)); 13710 return (NULL); 13711 } 13712 13713 static dtrace_predicate_t * 13714 dtrace_dof_predicate(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate, 13715 cred_t *cr) 13716 { 13717 dtrace_difo_t *dp; 13718 13719 if ((dp = dtrace_dof_difo(dof, sec, vstate, cr)) == NULL) 13720 return (NULL); 13721 13722 return (dtrace_predicate_create(dp)); 13723 } 13724 13725 static dtrace_actdesc_t * 13726 dtrace_dof_actdesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate, 13727 cred_t *cr) 13728 { 13729 dtrace_actdesc_t *act, *first = NULL, *last = NULL, *next; 13730 dof_actdesc_t *desc; 13731 dof_sec_t *difosec; 13732 size_t offs; 13733 uintptr_t daddr = (uintptr_t)dof; 13734 uint64_t arg; 13735 dtrace_actkind_t kind; 13736 13737 if (sec->dofs_type != DOF_SECT_ACTDESC) { 13738 dtrace_dof_error(dof, "invalid action section"); 13739 return (NULL); 13740 } 13741 13742 if (sec->dofs_offset + sizeof (dof_actdesc_t) > dof->dofh_loadsz) { 13743 dtrace_dof_error(dof, "truncated action description"); 13744 return (NULL); 13745 } 13746 13747 if (sec->dofs_align != sizeof (uint64_t)) { 13748 dtrace_dof_error(dof, "bad alignment in action description"); 13749 return (NULL); 13750 } 13751 13752 if (sec->dofs_size < sec->dofs_entsize) { 13753 dtrace_dof_error(dof, "section entry size exceeds total size"); 13754 return (NULL); 13755 } 13756 13757 if (sec->dofs_entsize != sizeof (dof_actdesc_t)) { 13758 dtrace_dof_error(dof, "bad entry size in action description"); 13759 return (NULL); 13760 } 13761 13762 if (sec->dofs_size / sec->dofs_entsize > dtrace_actions_max) { 13763 dtrace_dof_error(dof, "actions exceed dtrace_actions_max"); 13764 return (NULL); 13765 } 13766 13767 for (offs = 0; offs < sec->dofs_size; offs += sec->dofs_entsize) { 13768 desc = (dof_actdesc_t *)(daddr + 13769 (uintptr_t)sec->dofs_offset + offs); 13770 kind = (dtrace_actkind_t)desc->dofa_kind; 13771 13772 if ((DTRACEACT_ISPRINTFLIKE(kind) && 13773 (kind != DTRACEACT_PRINTA || 13774 desc->dofa_strtab != DOF_SECIDX_NONE)) || 13775 (kind == DTRACEACT_DIFEXPR && 13776 desc->dofa_strtab != DOF_SECIDX_NONE)) { 13777 dof_sec_t *strtab; 13778 char *str, *fmt; 13779 uint64_t i; 13780 13781 /* 13782 * The argument to these actions is an index into the 13783 * DOF string table. For printf()-like actions, this 13784 * is the format string. For print(), this is the 13785 * CTF type of the expression result. 13786 */ 13787 if ((strtab = dtrace_dof_sect(dof, 13788 DOF_SECT_STRTAB, desc->dofa_strtab)) == NULL) 13789 goto err; 13790 13791 str = (char *)((uintptr_t)dof + 13792 (uintptr_t)strtab->dofs_offset); 13793 13794 for (i = desc->dofa_arg; i < strtab->dofs_size; i++) { 13795 if (str[i] == '\0') 13796 break; 13797 } 13798 13799 if (i >= strtab->dofs_size) { 13800 dtrace_dof_error(dof, "bogus format string"); 13801 goto err; 13802 } 13803 13804 if (i == desc->dofa_arg) { 13805 dtrace_dof_error(dof, "empty format string"); 13806 goto err; 13807 } 13808 13809 i -= desc->dofa_arg; 13810 fmt = kmem_alloc(i + 1, KM_SLEEP); 13811 bcopy(&str[desc->dofa_arg], fmt, i + 1); 13812 arg = (uint64_t)(uintptr_t)fmt; 13813 } else { 13814 if (kind == DTRACEACT_PRINTA) { 13815 ASSERT(desc->dofa_strtab == DOF_SECIDX_NONE); 13816 arg = 0; 13817 } else { 13818 arg = desc->dofa_arg; 13819 } 13820 } 13821 13822 act = dtrace_actdesc_create(kind, desc->dofa_ntuple, 13823 desc->dofa_uarg, arg); 13824 13825 if (last != NULL) { 13826 last->dtad_next = act; 13827 } else { 13828 first = act; 13829 } 13830 13831 last = act; 13832 13833 if (desc->dofa_difo == DOF_SECIDX_NONE) 13834 continue; 13835 13836 if ((difosec = dtrace_dof_sect(dof, 13837 DOF_SECT_DIFOHDR, desc->dofa_difo)) == NULL) 13838 goto err; 13839 13840 act->dtad_difo = dtrace_dof_difo(dof, difosec, vstate, cr); 13841 13842 if (act->dtad_difo == NULL) 13843 goto err; 13844 } 13845 13846 ASSERT(first != NULL); 13847 return (first); 13848 13849 err: 13850 for (act = first; act != NULL; act = next) { 13851 next = act->dtad_next; 13852 dtrace_actdesc_release(act, vstate); 13853 } 13854 13855 return (NULL); 13856 } 13857 13858 static dtrace_ecbdesc_t * 13859 dtrace_dof_ecbdesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate, 13860 cred_t *cr) 13861 { 13862 dtrace_ecbdesc_t *ep; 13863 dof_ecbdesc_t *ecb; 13864 dtrace_probedesc_t *desc; 13865 dtrace_predicate_t *pred = NULL; 13866 13867 if (sec->dofs_size < sizeof (dof_ecbdesc_t)) { 13868 dtrace_dof_error(dof, "truncated ECB description"); 13869 return (NULL); 13870 } 13871 13872 if (sec->dofs_align != sizeof (uint64_t)) { 13873 dtrace_dof_error(dof, "bad alignment in ECB description"); 13874 return (NULL); 13875 } 13876 13877 ecb = (dof_ecbdesc_t *)((uintptr_t)dof + (uintptr_t)sec->dofs_offset); 13878 sec = dtrace_dof_sect(dof, DOF_SECT_PROBEDESC, ecb->dofe_probes); 13879 13880 if (sec == NULL) 13881 return (NULL); 13882 13883 ep = kmem_zalloc(sizeof (dtrace_ecbdesc_t), KM_SLEEP); 13884 ep->dted_uarg = ecb->dofe_uarg; 13885 desc = &ep->dted_probe; 13886 13887 if (dtrace_dof_probedesc(dof, sec, desc) == NULL) 13888 goto err; 13889 13890 if (ecb->dofe_pred != DOF_SECIDX_NONE) { 13891 if ((sec = dtrace_dof_sect(dof, 13892 DOF_SECT_DIFOHDR, ecb->dofe_pred)) == NULL) 13893 goto err; 13894 13895 if ((pred = dtrace_dof_predicate(dof, sec, vstate, cr)) == NULL) 13896 goto err; 13897 13898 ep->dted_pred.dtpdd_predicate = pred; 13899 } 13900 13901 if (ecb->dofe_actions != DOF_SECIDX_NONE) { 13902 if ((sec = dtrace_dof_sect(dof, 13903 DOF_SECT_ACTDESC, ecb->dofe_actions)) == NULL) 13904 goto err; 13905 13906 ep->dted_action = dtrace_dof_actdesc(dof, sec, vstate, cr); 13907 13908 if (ep->dted_action == NULL) 13909 goto err; 13910 } 13911 13912 return (ep); 13913 13914 err: 13915 if (pred != NULL) 13916 dtrace_predicate_release(pred, vstate); 13917 kmem_free(ep, sizeof (dtrace_ecbdesc_t)); 13918 return (NULL); 13919 } 13920 13921 /* 13922 * Apply the relocations from the specified 'sec' (a DOF_SECT_URELHDR) to the 13923 * specified DOF. SETX relocations are computed using 'ubase', the base load 13924 * address of the object containing the DOF, and DOFREL relocations are relative 13925 * to the relocation offset within the DOF. 13926 */ 13927 static int 13928 dtrace_dof_relocate(dof_hdr_t *dof, dof_sec_t *sec, uint64_t ubase, 13929 uint64_t udaddr) 13930 { 13931 uintptr_t daddr = (uintptr_t)dof; 13932 dof_relohdr_t *dofr = 13933 (dof_relohdr_t *)(uintptr_t)(daddr + sec->dofs_offset); 13934 dof_sec_t *ss, *rs, *ts; 13935 dof_relodesc_t *r; 13936 uint_t i, n; 13937 13938 if (sec->dofs_size < sizeof (dof_relohdr_t) || 13939 sec->dofs_align != sizeof (dof_secidx_t)) { 13940 dtrace_dof_error(dof, "invalid relocation header"); 13941 return (-1); 13942 } 13943 13944 ss = dtrace_dof_sect(dof, DOF_SECT_STRTAB, dofr->dofr_strtab); 13945 rs = dtrace_dof_sect(dof, DOF_SECT_RELTAB, dofr->dofr_relsec); 13946 ts = dtrace_dof_sect(dof, DOF_SECT_NONE, dofr->dofr_tgtsec); 13947 13948 if (ss == NULL || rs == NULL || ts == NULL) 13949 return (-1); /* dtrace_dof_error() has been called already */ 13950 13951 if (rs->dofs_entsize < sizeof (dof_relodesc_t) || 13952 rs->dofs_align != sizeof (uint64_t)) { 13953 dtrace_dof_error(dof, "invalid relocation section"); 13954 return (-1); 13955 } 13956 13957 r = (dof_relodesc_t *)(uintptr_t)(daddr + rs->dofs_offset); 13958 n = rs->dofs_size / rs->dofs_entsize; 13959 13960 for (i = 0; i < n; i++) { 13961 uintptr_t taddr = daddr + ts->dofs_offset + r->dofr_offset; 13962 13963 switch (r->dofr_type) { 13964 case DOF_RELO_NONE: 13965 break; 13966 case DOF_RELO_SETX: 13967 case DOF_RELO_DOFREL: 13968 if (r->dofr_offset >= ts->dofs_size || r->dofr_offset + 13969 sizeof (uint64_t) > ts->dofs_size) { 13970 dtrace_dof_error(dof, "bad relocation offset"); 13971 return (-1); 13972 } 13973 13974 if (!IS_P2ALIGNED(taddr, sizeof (uint64_t))) { 13975 dtrace_dof_error(dof, "misaligned setx relo"); 13976 return (-1); 13977 } 13978 13979 if (r->dofr_type == DOF_RELO_SETX) 13980 *(uint64_t *)taddr += ubase; 13981 else 13982 *(uint64_t *)taddr += 13983 udaddr + ts->dofs_offset + r->dofr_offset; 13984 break; 13985 default: 13986 dtrace_dof_error(dof, "invalid relocation type"); 13987 return (-1); 13988 } 13989 13990 r = (dof_relodesc_t *)((uintptr_t)r + rs->dofs_entsize); 13991 } 13992 13993 return (0); 13994 } 13995 13996 /* 13997 * The dof_hdr_t passed to dtrace_dof_slurp() should be a partially validated 13998 * header: it should be at the front of a memory region that is at least 13999 * sizeof (dof_hdr_t) in size -- and then at least dof_hdr.dofh_loadsz in 14000 * size. It need not be validated in any other way. 14001 */ 14002 static int 14003 dtrace_dof_slurp(dof_hdr_t *dof, dtrace_vstate_t *vstate, cred_t *cr, 14004 dtrace_enabling_t **enabp, uint64_t ubase, uint64_t udaddr, int noprobes) 14005 { 14006 uint64_t len = dof->dofh_loadsz, seclen; 14007 uintptr_t daddr = (uintptr_t)dof; 14008 dtrace_ecbdesc_t *ep; 14009 dtrace_enabling_t *enab; 14010 uint_t i; 14011 14012 ASSERT(MUTEX_HELD(&dtrace_lock)); 14013 ASSERT(dof->dofh_loadsz >= sizeof (dof_hdr_t)); 14014 14015 /* 14016 * Check the DOF header identification bytes. In addition to checking 14017 * valid settings, we also verify that unused bits/bytes are zeroed so 14018 * we can use them later without fear of regressing existing binaries. 14019 */ 14020 if (bcmp(&dof->dofh_ident[DOF_ID_MAG0], 14021 DOF_MAG_STRING, DOF_MAG_STRLEN) != 0) { 14022 dtrace_dof_error(dof, "DOF magic string mismatch"); 14023 return (-1); 14024 } 14025 14026 if (dof->dofh_ident[DOF_ID_MODEL] != DOF_MODEL_ILP32 && 14027 dof->dofh_ident[DOF_ID_MODEL] != DOF_MODEL_LP64) { 14028 dtrace_dof_error(dof, "DOF has invalid data model"); 14029 return (-1); 14030 } 14031 14032 if (dof->dofh_ident[DOF_ID_ENCODING] != DOF_ENCODE_NATIVE) { 14033 dtrace_dof_error(dof, "DOF encoding mismatch"); 14034 return (-1); 14035 } 14036 14037 if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1 && 14038 dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_2) { 14039 dtrace_dof_error(dof, "DOF version mismatch"); 14040 return (-1); 14041 } 14042 14043 if (dof->dofh_ident[DOF_ID_DIFVERS] != DIF_VERSION_2) { 14044 dtrace_dof_error(dof, "DOF uses unsupported instruction set"); 14045 return (-1); 14046 } 14047 14048 if (dof->dofh_ident[DOF_ID_DIFIREG] > DIF_DIR_NREGS) { 14049 dtrace_dof_error(dof, "DOF uses too many integer registers"); 14050 return (-1); 14051 } 14052 14053 if (dof->dofh_ident[DOF_ID_DIFTREG] > DIF_DTR_NREGS) { 14054 dtrace_dof_error(dof, "DOF uses too many tuple registers"); 14055 return (-1); 14056 } 14057 14058 for (i = DOF_ID_PAD; i < DOF_ID_SIZE; i++) { 14059 if (dof->dofh_ident[i] != 0) { 14060 dtrace_dof_error(dof, "DOF has invalid ident byte set"); 14061 return (-1); 14062 } 14063 } 14064 14065 if (dof->dofh_flags & ~DOF_FL_VALID) { 14066 dtrace_dof_error(dof, "DOF has invalid flag bits set"); 14067 return (-1); 14068 } 14069 14070 if (dof->dofh_secsize == 0) { 14071 dtrace_dof_error(dof, "zero section header size"); 14072 return (-1); 14073 } 14074 14075 /* 14076 * Check that the section headers don't exceed the amount of DOF 14077 * data. Note that we cast the section size and number of sections 14078 * to uint64_t's to prevent possible overflow in the multiplication. 14079 */ 14080 seclen = (uint64_t)dof->dofh_secnum * (uint64_t)dof->dofh_secsize; 14081 14082 if (dof->dofh_secoff > len || seclen > len || 14083 dof->dofh_secoff + seclen > len) { 14084 dtrace_dof_error(dof, "truncated section headers"); 14085 return (-1); 14086 } 14087 14088 if (!IS_P2ALIGNED(dof->dofh_secoff, sizeof (uint64_t))) { 14089 dtrace_dof_error(dof, "misaligned section headers"); 14090 return (-1); 14091 } 14092 14093 if (!IS_P2ALIGNED(dof->dofh_secsize, sizeof (uint64_t))) { 14094 dtrace_dof_error(dof, "misaligned section size"); 14095 return (-1); 14096 } 14097 14098 /* 14099 * Take an initial pass through the section headers to be sure that 14100 * the headers don't have stray offsets. If the 'noprobes' flag is 14101 * set, do not permit sections relating to providers, probes, or args. 14102 */ 14103 for (i = 0; i < dof->dofh_secnum; i++) { 14104 dof_sec_t *sec = (dof_sec_t *)(daddr + 14105 (uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize); 14106 14107 if (noprobes) { 14108 switch (sec->dofs_type) { 14109 case DOF_SECT_PROVIDER: 14110 case DOF_SECT_PROBES: 14111 case DOF_SECT_PRARGS: 14112 case DOF_SECT_PROFFS: 14113 dtrace_dof_error(dof, "illegal sections " 14114 "for enabling"); 14115 return (-1); 14116 } 14117 } 14118 14119 if (DOF_SEC_ISLOADABLE(sec->dofs_type) && 14120 !(sec->dofs_flags & DOF_SECF_LOAD)) { 14121 dtrace_dof_error(dof, "loadable section with load " 14122 "flag unset"); 14123 return (-1); 14124 } 14125 14126 if (!(sec->dofs_flags & DOF_SECF_LOAD)) 14127 continue; /* just ignore non-loadable sections */ 14128 14129 if (!ISP2(sec->dofs_align)) { 14130 dtrace_dof_error(dof, "bad section alignment"); 14131 return (-1); 14132 } 14133 14134 if (sec->dofs_offset & (sec->dofs_align - 1)) { 14135 dtrace_dof_error(dof, "misaligned section"); 14136 return (-1); 14137 } 14138 14139 if (sec->dofs_offset > len || sec->dofs_size > len || 14140 sec->dofs_offset + sec->dofs_size > len) { 14141 dtrace_dof_error(dof, "corrupt section header"); 14142 return (-1); 14143 } 14144 14145 if (sec->dofs_type == DOF_SECT_STRTAB && *((char *)daddr + 14146 sec->dofs_offset + sec->dofs_size - 1) != '\0') { 14147 dtrace_dof_error(dof, "non-terminating string table"); 14148 return (-1); 14149 } 14150 } 14151 14152 /* 14153 * Take a second pass through the sections and locate and perform any 14154 * relocations that are present. We do this after the first pass to 14155 * be sure that all sections have had their headers validated. 14156 */ 14157 for (i = 0; i < dof->dofh_secnum; i++) { 14158 dof_sec_t *sec = (dof_sec_t *)(daddr + 14159 (uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize); 14160 14161 if (!(sec->dofs_flags & DOF_SECF_LOAD)) 14162 continue; /* skip sections that are not loadable */ 14163 14164 switch (sec->dofs_type) { 14165 case DOF_SECT_URELHDR: 14166 if (dtrace_dof_relocate(dof, sec, ubase, udaddr) != 0) 14167 return (-1); 14168 break; 14169 } 14170 } 14171 14172 if ((enab = *enabp) == NULL) 14173 enab = *enabp = dtrace_enabling_create(vstate); 14174 14175 for (i = 0; i < dof->dofh_secnum; i++) { 14176 dof_sec_t *sec = (dof_sec_t *)(daddr + 14177 (uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize); 14178 14179 if (sec->dofs_type != DOF_SECT_ECBDESC) 14180 continue; 14181 14182 if ((ep = dtrace_dof_ecbdesc(dof, sec, vstate, cr)) == NULL) { 14183 dtrace_enabling_destroy(enab); 14184 *enabp = NULL; 14185 return (-1); 14186 } 14187 14188 dtrace_enabling_add(enab, ep); 14189 } 14190 14191 return (0); 14192 } 14193 14194 /* 14195 * Process DOF for any options. This routine assumes that the DOF has been 14196 * at least processed by dtrace_dof_slurp(). 14197 */ 14198 static int 14199 dtrace_dof_options(dof_hdr_t *dof, dtrace_state_t *state) 14200 { 14201 int i, rval; 14202 uint32_t entsize; 14203 size_t offs; 14204 dof_optdesc_t *desc; 14205 14206 for (i = 0; i < dof->dofh_secnum; i++) { 14207 dof_sec_t *sec = (dof_sec_t *)((uintptr_t)dof + 14208 (uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize); 14209 14210 if (sec->dofs_type != DOF_SECT_OPTDESC) 14211 continue; 14212 14213 if (sec->dofs_align != sizeof (uint64_t)) { 14214 dtrace_dof_error(dof, "bad alignment in " 14215 "option description"); 14216 return (EINVAL); 14217 } 14218 14219 if ((entsize = sec->dofs_entsize) == 0) { 14220 dtrace_dof_error(dof, "zeroed option entry size"); 14221 return (EINVAL); 14222 } 14223 14224 if (entsize < sizeof (dof_optdesc_t)) { 14225 dtrace_dof_error(dof, "bad option entry size"); 14226 return (EINVAL); 14227 } 14228 14229 for (offs = 0; offs < sec->dofs_size; offs += entsize) { 14230 desc = (dof_optdesc_t *)((uintptr_t)dof + 14231 (uintptr_t)sec->dofs_offset + offs); 14232 14233 if (desc->dofo_strtab != DOF_SECIDX_NONE) { 14234 dtrace_dof_error(dof, "non-zero option string"); 14235 return (EINVAL); 14236 } 14237 14238 if (desc->dofo_value == DTRACEOPT_UNSET) { 14239 dtrace_dof_error(dof, "unset option"); 14240 return (EINVAL); 14241 } 14242 14243 if ((rval = dtrace_state_option(state, 14244 desc->dofo_option, desc->dofo_value)) != 0) { 14245 dtrace_dof_error(dof, "rejected option"); 14246 return (rval); 14247 } 14248 } 14249 } 14250 14251 return (0); 14252 } 14253 14254 /* 14255 * DTrace Consumer State Functions 14256 */ 14257 static int 14258 dtrace_dstate_init(dtrace_dstate_t *dstate, size_t size) 14259 { 14260 size_t hashsize, maxper, min, chunksize = dstate->dtds_chunksize; 14261 void *base; 14262 uintptr_t limit; 14263 dtrace_dynvar_t *dvar, *next, *start; 14264 int i; 14265 14266 ASSERT(MUTEX_HELD(&dtrace_lock)); 14267 ASSERT(dstate->dtds_base == NULL && dstate->dtds_percpu == NULL); 14268 14269 bzero(dstate, sizeof (dtrace_dstate_t)); 14270 14271 if ((dstate->dtds_chunksize = chunksize) == 0) 14272 dstate->dtds_chunksize = DTRACE_DYNVAR_CHUNKSIZE; 14273 14274 VERIFY(dstate->dtds_chunksize < LONG_MAX); 14275 14276 if (size < (min = dstate->dtds_chunksize + sizeof (dtrace_dynhash_t))) 14277 size = min; 14278 14279 if ((base = kmem_zalloc(size, KM_NOSLEEP | KM_NORMALPRI)) == NULL) 14280 return (ENOMEM); 14281 14282 dstate->dtds_size = size; 14283 dstate->dtds_base = base; 14284 dstate->dtds_percpu = kmem_cache_alloc(dtrace_state_cache, KM_SLEEP); 14285 bzero(dstate->dtds_percpu, NCPU * sizeof (dtrace_dstate_percpu_t)); 14286 14287 hashsize = size / (dstate->dtds_chunksize + sizeof (dtrace_dynhash_t)); 14288 14289 if (hashsize != 1 && (hashsize & 1)) 14290 hashsize--; 14291 14292 dstate->dtds_hashsize = hashsize; 14293 dstate->dtds_hash = dstate->dtds_base; 14294 14295 /* 14296 * Set all of our hash buckets to point to the single sink, and (if 14297 * it hasn't already been set), set the sink's hash value to be the 14298 * sink sentinel value. The sink is needed for dynamic variable 14299 * lookups to know that they have iterated over an entire, valid hash 14300 * chain. 14301 */ 14302 for (i = 0; i < hashsize; i++) 14303 dstate->dtds_hash[i].dtdh_chain = &dtrace_dynhash_sink; 14304 14305 if (dtrace_dynhash_sink.dtdv_hashval != DTRACE_DYNHASH_SINK) 14306 dtrace_dynhash_sink.dtdv_hashval = DTRACE_DYNHASH_SINK; 14307 14308 /* 14309 * Determine number of active CPUs. Divide free list evenly among 14310 * active CPUs. 14311 */ 14312 start = (dtrace_dynvar_t *) 14313 ((uintptr_t)base + hashsize * sizeof (dtrace_dynhash_t)); 14314 limit = (uintptr_t)base + size; 14315 14316 VERIFY((uintptr_t)start < limit); 14317 VERIFY((uintptr_t)start >= (uintptr_t)base); 14318 14319 maxper = (limit - (uintptr_t)start) / NCPU; 14320 maxper = (maxper / dstate->dtds_chunksize) * dstate->dtds_chunksize; 14321 14322 #ifndef illumos 14323 CPU_FOREACH(i) { 14324 #else 14325 for (i = 0; i < NCPU; i++) { 14326 #endif 14327 dstate->dtds_percpu[i].dtdsc_free = dvar = start; 14328 14329 /* 14330 * If we don't even have enough chunks to make it once through 14331 * NCPUs, we're just going to allocate everything to the first 14332 * CPU. And if we're on the last CPU, we're going to allocate 14333 * whatever is left over. In either case, we set the limit to 14334 * be the limit of the dynamic variable space. 14335 */ 14336 if (maxper == 0 || i == NCPU - 1) { 14337 limit = (uintptr_t)base + size; 14338 start = NULL; 14339 } else { 14340 limit = (uintptr_t)start + maxper; 14341 start = (dtrace_dynvar_t *)limit; 14342 } 14343 14344 VERIFY(limit <= (uintptr_t)base + size); 14345 14346 for (;;) { 14347 next = (dtrace_dynvar_t *)((uintptr_t)dvar + 14348 dstate->dtds_chunksize); 14349 14350 if ((uintptr_t)next + dstate->dtds_chunksize >= limit) 14351 break; 14352 14353 VERIFY((uintptr_t)dvar >= (uintptr_t)base && 14354 (uintptr_t)dvar <= (uintptr_t)base + size); 14355 dvar->dtdv_next = next; 14356 dvar = next; 14357 } 14358 14359 if (maxper == 0) 14360 break; 14361 } 14362 14363 return (0); 14364 } 14365 14366 static void 14367 dtrace_dstate_fini(dtrace_dstate_t *dstate) 14368 { 14369 ASSERT(MUTEX_HELD(&cpu_lock)); 14370 14371 if (dstate->dtds_base == NULL) 14372 return; 14373 14374 kmem_free(dstate->dtds_base, dstate->dtds_size); 14375 kmem_cache_free(dtrace_state_cache, dstate->dtds_percpu); 14376 } 14377 14378 static void 14379 dtrace_vstate_fini(dtrace_vstate_t *vstate) 14380 { 14381 /* 14382 * Logical XOR, where are you? 14383 */ 14384 ASSERT((vstate->dtvs_nglobals == 0) ^ (vstate->dtvs_globals != NULL)); 14385 14386 if (vstate->dtvs_nglobals > 0) { 14387 kmem_free(vstate->dtvs_globals, vstate->dtvs_nglobals * 14388 sizeof (dtrace_statvar_t *)); 14389 } 14390 14391 if (vstate->dtvs_ntlocals > 0) { 14392 kmem_free(vstate->dtvs_tlocals, vstate->dtvs_ntlocals * 14393 sizeof (dtrace_difv_t)); 14394 } 14395 14396 ASSERT((vstate->dtvs_nlocals == 0) ^ (vstate->dtvs_locals != NULL)); 14397 14398 if (vstate->dtvs_nlocals > 0) { 14399 kmem_free(vstate->dtvs_locals, vstate->dtvs_nlocals * 14400 sizeof (dtrace_statvar_t *)); 14401 } 14402 } 14403 14404 #ifdef illumos 14405 static void 14406 dtrace_state_clean(dtrace_state_t *state) 14407 { 14408 if (state->dts_activity == DTRACE_ACTIVITY_INACTIVE) 14409 return; 14410 14411 dtrace_dynvar_clean(&state->dts_vstate.dtvs_dynvars); 14412 dtrace_speculation_clean(state); 14413 } 14414 14415 static void 14416 dtrace_state_deadman(dtrace_state_t *state) 14417 { 14418 hrtime_t now; 14419 14420 dtrace_sync(); 14421 14422 now = dtrace_gethrtime(); 14423 14424 if (state != dtrace_anon.dta_state && 14425 now - state->dts_laststatus >= dtrace_deadman_user) 14426 return; 14427 14428 /* 14429 * We must be sure that dts_alive never appears to be less than the 14430 * value upon entry to dtrace_state_deadman(), and because we lack a 14431 * dtrace_cas64(), we cannot store to it atomically. We thus instead 14432 * store INT64_MAX to it, followed by a memory barrier, followed by 14433 * the new value. This assures that dts_alive never appears to be 14434 * less than its true value, regardless of the order in which the 14435 * stores to the underlying storage are issued. 14436 */ 14437 state->dts_alive = INT64_MAX; 14438 dtrace_membar_producer(); 14439 state->dts_alive = now; 14440 } 14441 #else /* !illumos */ 14442 static void 14443 dtrace_state_clean(void *arg) 14444 { 14445 dtrace_state_t *state = arg; 14446 dtrace_optval_t *opt = state->dts_options; 14447 14448 if (state->dts_activity == DTRACE_ACTIVITY_INACTIVE) 14449 return; 14450 14451 dtrace_dynvar_clean(&state->dts_vstate.dtvs_dynvars); 14452 dtrace_speculation_clean(state); 14453 14454 callout_reset(&state->dts_cleaner, hz * opt[DTRACEOPT_CLEANRATE] / NANOSEC, 14455 dtrace_state_clean, state); 14456 } 14457 14458 static void 14459 dtrace_state_deadman(void *arg) 14460 { 14461 dtrace_state_t *state = arg; 14462 hrtime_t now; 14463 14464 dtrace_sync(); 14465 14466 dtrace_debug_output(); 14467 14468 now = dtrace_gethrtime(); 14469 14470 if (state != dtrace_anon.dta_state && 14471 now - state->dts_laststatus >= dtrace_deadman_user) 14472 return; 14473 14474 /* 14475 * We must be sure that dts_alive never appears to be less than the 14476 * value upon entry to dtrace_state_deadman(), and because we lack a 14477 * dtrace_cas64(), we cannot store to it atomically. We thus instead 14478 * store INT64_MAX to it, followed by a memory barrier, followed by 14479 * the new value. This assures that dts_alive never appears to be 14480 * less than its true value, regardless of the order in which the 14481 * stores to the underlying storage are issued. 14482 */ 14483 state->dts_alive = INT64_MAX; 14484 dtrace_membar_producer(); 14485 state->dts_alive = now; 14486 14487 callout_reset(&state->dts_deadman, hz * dtrace_deadman_interval / NANOSEC, 14488 dtrace_state_deadman, state); 14489 } 14490 #endif /* illumos */ 14491 14492 static dtrace_state_t * 14493 #ifdef illumos 14494 dtrace_state_create(dev_t *devp, cred_t *cr) 14495 #else 14496 dtrace_state_create(struct cdev *dev, struct ucred *cred __unused) 14497 #endif 14498 { 14499 #ifdef illumos 14500 minor_t minor; 14501 major_t major; 14502 #else 14503 cred_t *cr = NULL; 14504 int m = 0; 14505 #endif 14506 char c[30]; 14507 dtrace_state_t *state; 14508 dtrace_optval_t *opt; 14509 int bufsize = NCPU * sizeof (dtrace_buffer_t), i; 14510 int cpu_it; 14511 14512 ASSERT(MUTEX_HELD(&dtrace_lock)); 14513 ASSERT(MUTEX_HELD(&cpu_lock)); 14514 14515 #ifdef illumos 14516 minor = (minor_t)(uintptr_t)vmem_alloc(dtrace_minor, 1, 14517 VM_BESTFIT | VM_SLEEP); 14518 14519 if (ddi_soft_state_zalloc(dtrace_softstate, minor) != DDI_SUCCESS) { 14520 vmem_free(dtrace_minor, (void *)(uintptr_t)minor, 1); 14521 return (NULL); 14522 } 14523 14524 state = ddi_get_soft_state(dtrace_softstate, minor); 14525 #else 14526 if (dev != NULL) { 14527 cr = dev->si_cred; 14528 m = dev2unit(dev); 14529 } 14530 14531 /* Allocate memory for the state. */ 14532 state = kmem_zalloc(sizeof(dtrace_state_t), KM_SLEEP); 14533 #endif 14534 14535 state->dts_epid = DTRACE_EPIDNONE + 1; 14536 14537 (void) snprintf(c, sizeof (c), "dtrace_aggid_%d", m); 14538 #ifdef illumos 14539 state->dts_aggid_arena = vmem_create(c, (void *)1, UINT32_MAX, 1, 14540 NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER); 14541 14542 if (devp != NULL) { 14543 major = getemajor(*devp); 14544 } else { 14545 major = ddi_driver_major(dtrace_devi); 14546 } 14547 14548 state->dts_dev = makedevice(major, minor); 14549 14550 if (devp != NULL) 14551 *devp = state->dts_dev; 14552 #else 14553 state->dts_aggid_arena = new_unrhdr(1, INT_MAX, &dtrace_unr_mtx); 14554 state->dts_dev = dev; 14555 #endif 14556 14557 /* 14558 * We allocate NCPU buffers. On the one hand, this can be quite 14559 * a bit of memory per instance (nearly 36K on a Starcat). On the 14560 * other hand, it saves an additional memory reference in the probe 14561 * path. 14562 */ 14563 state->dts_buffer = kmem_zalloc(bufsize, KM_SLEEP); 14564 state->dts_aggbuffer = kmem_zalloc(bufsize, KM_SLEEP); 14565 14566 /* 14567 * Allocate and initialise the per-process per-CPU random state. 14568 * SI_SUB_RANDOM < SI_SUB_DTRACE_ANON therefore entropy device is 14569 * assumed to be seeded at this point (if from Fortuna seed file). 14570 */ 14571 (void) read_random(&state->dts_rstate[0], 2 * sizeof(uint64_t)); 14572 for (cpu_it = 1; cpu_it < NCPU; cpu_it++) { 14573 /* 14574 * Each CPU is assigned a 2^64 period, non-overlapping 14575 * subsequence. 14576 */ 14577 dtrace_xoroshiro128_plus_jump(state->dts_rstate[cpu_it-1], 14578 state->dts_rstate[cpu_it]); 14579 } 14580 14581 #ifdef illumos 14582 state->dts_cleaner = CYCLIC_NONE; 14583 state->dts_deadman = CYCLIC_NONE; 14584 #else 14585 callout_init(&state->dts_cleaner, 1); 14586 callout_init(&state->dts_deadman, 1); 14587 #endif 14588 state->dts_vstate.dtvs_state = state; 14589 14590 for (i = 0; i < DTRACEOPT_MAX; i++) 14591 state->dts_options[i] = DTRACEOPT_UNSET; 14592 14593 /* 14594 * Set the default options. 14595 */ 14596 opt = state->dts_options; 14597 opt[DTRACEOPT_BUFPOLICY] = DTRACEOPT_BUFPOLICY_SWITCH; 14598 opt[DTRACEOPT_BUFRESIZE] = DTRACEOPT_BUFRESIZE_AUTO; 14599 opt[DTRACEOPT_NSPEC] = dtrace_nspec_default; 14600 opt[DTRACEOPT_SPECSIZE] = dtrace_specsize_default; 14601 opt[DTRACEOPT_CPU] = (dtrace_optval_t)DTRACE_CPUALL; 14602 opt[DTRACEOPT_STRSIZE] = dtrace_strsize_default; 14603 opt[DTRACEOPT_STACKFRAMES] = dtrace_stackframes_default; 14604 opt[DTRACEOPT_USTACKFRAMES] = dtrace_ustackframes_default; 14605 opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_default; 14606 opt[DTRACEOPT_AGGRATE] = dtrace_aggrate_default; 14607 opt[DTRACEOPT_SWITCHRATE] = dtrace_switchrate_default; 14608 opt[DTRACEOPT_STATUSRATE] = dtrace_statusrate_default; 14609 opt[DTRACEOPT_JSTACKFRAMES] = dtrace_jstackframes_default; 14610 opt[DTRACEOPT_JSTACKSTRSIZE] = dtrace_jstackstrsize_default; 14611 14612 state->dts_activity = DTRACE_ACTIVITY_INACTIVE; 14613 14614 /* 14615 * Depending on the user credentials, we set flag bits which alter probe 14616 * visibility or the amount of destructiveness allowed. In the case of 14617 * actual anonymous tracing, or the possession of all privileges, all of 14618 * the normal checks are bypassed. 14619 */ 14620 if (cr == NULL || PRIV_POLICY_ONLY(cr, PRIV_ALL, B_FALSE)) { 14621 state->dts_cred.dcr_visible = DTRACE_CRV_ALL; 14622 state->dts_cred.dcr_action = DTRACE_CRA_ALL; 14623 } else { 14624 /* 14625 * Set up the credentials for this instantiation. We take a 14626 * hold on the credential to prevent it from disappearing on 14627 * us; this in turn prevents the zone_t referenced by this 14628 * credential from disappearing. This means that we can 14629 * examine the credential and the zone from probe context. 14630 */ 14631 crhold(cr); 14632 state->dts_cred.dcr_cred = cr; 14633 14634 /* 14635 * CRA_PROC means "we have *some* privilege for dtrace" and 14636 * unlocks the use of variables like pid, zonename, etc. 14637 */ 14638 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_USER, B_FALSE) || 14639 PRIV_POLICY_ONLY(cr, PRIV_DTRACE_PROC, B_FALSE)) { 14640 state->dts_cred.dcr_action |= DTRACE_CRA_PROC; 14641 } 14642 14643 /* 14644 * dtrace_user allows use of syscall and profile providers. 14645 * If the user also has proc_owner and/or proc_zone, we 14646 * extend the scope to include additional visibility and 14647 * destructive power. 14648 */ 14649 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_USER, B_FALSE)) { 14650 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE)) { 14651 state->dts_cred.dcr_visible |= 14652 DTRACE_CRV_ALLPROC; 14653 14654 state->dts_cred.dcr_action |= 14655 DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER; 14656 } 14657 14658 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE)) { 14659 state->dts_cred.dcr_visible |= 14660 DTRACE_CRV_ALLZONE; 14661 14662 state->dts_cred.dcr_action |= 14663 DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE; 14664 } 14665 14666 /* 14667 * If we have all privs in whatever zone this is, 14668 * we can do destructive things to processes which 14669 * have altered credentials. 14670 */ 14671 #ifdef illumos 14672 if (priv_isequalset(priv_getset(cr, PRIV_EFFECTIVE), 14673 cr->cr_zone->zone_privset)) { 14674 state->dts_cred.dcr_action |= 14675 DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG; 14676 } 14677 #endif 14678 } 14679 14680 /* 14681 * Holding the dtrace_kernel privilege also implies that 14682 * the user has the dtrace_user privilege from a visibility 14683 * perspective. But without further privileges, some 14684 * destructive actions are not available. 14685 */ 14686 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_KERNEL, B_FALSE)) { 14687 /* 14688 * Make all probes in all zones visible. However, 14689 * this doesn't mean that all actions become available 14690 * to all zones. 14691 */ 14692 state->dts_cred.dcr_visible |= DTRACE_CRV_KERNEL | 14693 DTRACE_CRV_ALLPROC | DTRACE_CRV_ALLZONE; 14694 14695 state->dts_cred.dcr_action |= DTRACE_CRA_KERNEL | 14696 DTRACE_CRA_PROC; 14697 /* 14698 * Holding proc_owner means that destructive actions 14699 * for *this* zone are allowed. 14700 */ 14701 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE)) 14702 state->dts_cred.dcr_action |= 14703 DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER; 14704 14705 /* 14706 * Holding proc_zone means that destructive actions 14707 * for this user/group ID in all zones is allowed. 14708 */ 14709 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE)) 14710 state->dts_cred.dcr_action |= 14711 DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE; 14712 14713 #ifdef illumos 14714 /* 14715 * If we have all privs in whatever zone this is, 14716 * we can do destructive things to processes which 14717 * have altered credentials. 14718 */ 14719 if (priv_isequalset(priv_getset(cr, PRIV_EFFECTIVE), 14720 cr->cr_zone->zone_privset)) { 14721 state->dts_cred.dcr_action |= 14722 DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG; 14723 } 14724 #endif 14725 } 14726 14727 /* 14728 * Holding the dtrace_proc privilege gives control over fasttrap 14729 * and pid providers. We need to grant wider destructive 14730 * privileges in the event that the user has proc_owner and/or 14731 * proc_zone. 14732 */ 14733 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_PROC, B_FALSE)) { 14734 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE)) 14735 state->dts_cred.dcr_action |= 14736 DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER; 14737 14738 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE)) 14739 state->dts_cred.dcr_action |= 14740 DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE; 14741 } 14742 } 14743 14744 return (state); 14745 } 14746 14747 static int 14748 dtrace_state_buffer(dtrace_state_t *state, dtrace_buffer_t *buf, int which) 14749 { 14750 dtrace_optval_t *opt = state->dts_options, size; 14751 processorid_t cpu = 0;; 14752 int flags = 0, rval, factor, divisor = 1; 14753 14754 ASSERT(MUTEX_HELD(&dtrace_lock)); 14755 ASSERT(MUTEX_HELD(&cpu_lock)); 14756 ASSERT(which < DTRACEOPT_MAX); 14757 ASSERT(state->dts_activity == DTRACE_ACTIVITY_INACTIVE || 14758 (state == dtrace_anon.dta_state && 14759 state->dts_activity == DTRACE_ACTIVITY_ACTIVE)); 14760 14761 if (opt[which] == DTRACEOPT_UNSET || opt[which] == 0) 14762 return (0); 14763 14764 if (opt[DTRACEOPT_CPU] != DTRACEOPT_UNSET) 14765 cpu = opt[DTRACEOPT_CPU]; 14766 14767 if (which == DTRACEOPT_SPECSIZE) 14768 flags |= DTRACEBUF_NOSWITCH; 14769 14770 if (which == DTRACEOPT_BUFSIZE) { 14771 if (opt[DTRACEOPT_BUFPOLICY] == DTRACEOPT_BUFPOLICY_RING) 14772 flags |= DTRACEBUF_RING; 14773 14774 if (opt[DTRACEOPT_BUFPOLICY] == DTRACEOPT_BUFPOLICY_FILL) 14775 flags |= DTRACEBUF_FILL; 14776 14777 if (state != dtrace_anon.dta_state || 14778 state->dts_activity != DTRACE_ACTIVITY_ACTIVE) 14779 flags |= DTRACEBUF_INACTIVE; 14780 } 14781 14782 for (size = opt[which]; size >= sizeof (uint64_t); size /= divisor) { 14783 /* 14784 * The size must be 8-byte aligned. If the size is not 8-byte 14785 * aligned, drop it down by the difference. 14786 */ 14787 if (size & (sizeof (uint64_t) - 1)) 14788 size -= size & (sizeof (uint64_t) - 1); 14789 14790 if (size < state->dts_reserve) { 14791 /* 14792 * Buffers always must be large enough to accommodate 14793 * their prereserved space. We return E2BIG instead 14794 * of ENOMEM in this case to allow for user-level 14795 * software to differentiate the cases. 14796 */ 14797 return (E2BIG); 14798 } 14799 14800 rval = dtrace_buffer_alloc(buf, size, flags, cpu, &factor); 14801 14802 if (rval != ENOMEM) { 14803 opt[which] = size; 14804 return (rval); 14805 } 14806 14807 if (opt[DTRACEOPT_BUFRESIZE] == DTRACEOPT_BUFRESIZE_MANUAL) 14808 return (rval); 14809 14810 for (divisor = 2; divisor < factor; divisor <<= 1) 14811 continue; 14812 } 14813 14814 return (ENOMEM); 14815 } 14816 14817 static int 14818 dtrace_state_buffers(dtrace_state_t *state) 14819 { 14820 dtrace_speculation_t *spec = state->dts_speculations; 14821 int rval, i; 14822 14823 if ((rval = dtrace_state_buffer(state, state->dts_buffer, 14824 DTRACEOPT_BUFSIZE)) != 0) 14825 return (rval); 14826 14827 if ((rval = dtrace_state_buffer(state, state->dts_aggbuffer, 14828 DTRACEOPT_AGGSIZE)) != 0) 14829 return (rval); 14830 14831 for (i = 0; i < state->dts_nspeculations; i++) { 14832 if ((rval = dtrace_state_buffer(state, 14833 spec[i].dtsp_buffer, DTRACEOPT_SPECSIZE)) != 0) 14834 return (rval); 14835 } 14836 14837 return (0); 14838 } 14839 14840 static void 14841 dtrace_state_prereserve(dtrace_state_t *state) 14842 { 14843 dtrace_ecb_t *ecb; 14844 dtrace_probe_t *probe; 14845 14846 state->dts_reserve = 0; 14847 14848 if (state->dts_options[DTRACEOPT_BUFPOLICY] != DTRACEOPT_BUFPOLICY_FILL) 14849 return; 14850 14851 /* 14852 * If our buffer policy is a "fill" buffer policy, we need to set the 14853 * prereserved space to be the space required by the END probes. 14854 */ 14855 probe = dtrace_probes[dtrace_probeid_end - 1]; 14856 ASSERT(probe != NULL); 14857 14858 for (ecb = probe->dtpr_ecb; ecb != NULL; ecb = ecb->dte_next) { 14859 if (ecb->dte_state != state) 14860 continue; 14861 14862 state->dts_reserve += ecb->dte_needed + ecb->dte_alignment; 14863 } 14864 } 14865 14866 static int 14867 dtrace_state_go(dtrace_state_t *state, processorid_t *cpu) 14868 { 14869 dtrace_optval_t *opt = state->dts_options, sz, nspec; 14870 dtrace_speculation_t *spec; 14871 dtrace_buffer_t *buf; 14872 #ifdef illumos 14873 cyc_handler_t hdlr; 14874 cyc_time_t when; 14875 #endif 14876 int rval = 0, i, bufsize = NCPU * sizeof (dtrace_buffer_t); 14877 dtrace_icookie_t cookie; 14878 14879 mutex_enter(&cpu_lock); 14880 mutex_enter(&dtrace_lock); 14881 14882 if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) { 14883 rval = EBUSY; 14884 goto out; 14885 } 14886 14887 /* 14888 * Before we can perform any checks, we must prime all of the 14889 * retained enablings that correspond to this state. 14890 */ 14891 dtrace_enabling_prime(state); 14892 14893 if (state->dts_destructive && !state->dts_cred.dcr_destructive) { 14894 rval = EACCES; 14895 goto out; 14896 } 14897 14898 dtrace_state_prereserve(state); 14899 14900 /* 14901 * Now we want to do is try to allocate our speculations. 14902 * We do not automatically resize the number of speculations; if 14903 * this fails, we will fail the operation. 14904 */ 14905 nspec = opt[DTRACEOPT_NSPEC]; 14906 ASSERT(nspec != DTRACEOPT_UNSET); 14907 14908 if (nspec > INT_MAX) { 14909 rval = ENOMEM; 14910 goto out; 14911 } 14912 14913 spec = kmem_zalloc(nspec * sizeof (dtrace_speculation_t), 14914 KM_NOSLEEP | KM_NORMALPRI); 14915 14916 if (spec == NULL) { 14917 rval = ENOMEM; 14918 goto out; 14919 } 14920 14921 state->dts_speculations = spec; 14922 state->dts_nspeculations = (int)nspec; 14923 14924 for (i = 0; i < nspec; i++) { 14925 if ((buf = kmem_zalloc(bufsize, 14926 KM_NOSLEEP | KM_NORMALPRI)) == NULL) { 14927 rval = ENOMEM; 14928 goto err; 14929 } 14930 14931 spec[i].dtsp_buffer = buf; 14932 } 14933 14934 if (opt[DTRACEOPT_GRABANON] != DTRACEOPT_UNSET) { 14935 if (dtrace_anon.dta_state == NULL) { 14936 rval = ENOENT; 14937 goto out; 14938 } 14939 14940 if (state->dts_necbs != 0) { 14941 rval = EALREADY; 14942 goto out; 14943 } 14944 14945 state->dts_anon = dtrace_anon_grab(); 14946 ASSERT(state->dts_anon != NULL); 14947 state = state->dts_anon; 14948 14949 /* 14950 * We want "grabanon" to be set in the grabbed state, so we'll 14951 * copy that option value from the grabbing state into the 14952 * grabbed state. 14953 */ 14954 state->dts_options[DTRACEOPT_GRABANON] = 14955 opt[DTRACEOPT_GRABANON]; 14956 14957 *cpu = dtrace_anon.dta_beganon; 14958 14959 /* 14960 * If the anonymous state is active (as it almost certainly 14961 * is if the anonymous enabling ultimately matched anything), 14962 * we don't allow any further option processing -- but we 14963 * don't return failure. 14964 */ 14965 if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) 14966 goto out; 14967 } 14968 14969 if (opt[DTRACEOPT_AGGSIZE] != DTRACEOPT_UNSET && 14970 opt[DTRACEOPT_AGGSIZE] != 0) { 14971 if (state->dts_aggregations == NULL) { 14972 /* 14973 * We're not going to create an aggregation buffer 14974 * because we don't have any ECBs that contain 14975 * aggregations -- set this option to 0. 14976 */ 14977 opt[DTRACEOPT_AGGSIZE] = 0; 14978 } else { 14979 /* 14980 * If we have an aggregation buffer, we must also have 14981 * a buffer to use as scratch. 14982 */ 14983 if (opt[DTRACEOPT_BUFSIZE] == DTRACEOPT_UNSET || 14984 opt[DTRACEOPT_BUFSIZE] < state->dts_needed) { 14985 opt[DTRACEOPT_BUFSIZE] = state->dts_needed; 14986 } 14987 } 14988 } 14989 14990 if (opt[DTRACEOPT_SPECSIZE] != DTRACEOPT_UNSET && 14991 opt[DTRACEOPT_SPECSIZE] != 0) { 14992 if (!state->dts_speculates) { 14993 /* 14994 * We're not going to create speculation buffers 14995 * because we don't have any ECBs that actually 14996 * speculate -- set the speculation size to 0. 14997 */ 14998 opt[DTRACEOPT_SPECSIZE] = 0; 14999 } 15000 } 15001 15002 /* 15003 * The bare minimum size for any buffer that we're actually going to 15004 * do anything to is sizeof (uint64_t). 15005 */ 15006 sz = sizeof (uint64_t); 15007 15008 if ((state->dts_needed != 0 && opt[DTRACEOPT_BUFSIZE] < sz) || 15009 (state->dts_speculates && opt[DTRACEOPT_SPECSIZE] < sz) || 15010 (state->dts_aggregations != NULL && opt[DTRACEOPT_AGGSIZE] < sz)) { 15011 /* 15012 * A buffer size has been explicitly set to 0 (or to a size 15013 * that will be adjusted to 0) and we need the space -- we 15014 * need to return failure. We return ENOSPC to differentiate 15015 * it from failing to allocate a buffer due to failure to meet 15016 * the reserve (for which we return E2BIG). 15017 */ 15018 rval = ENOSPC; 15019 goto out; 15020 } 15021 15022 if ((rval = dtrace_state_buffers(state)) != 0) 15023 goto err; 15024 15025 if ((sz = opt[DTRACEOPT_DYNVARSIZE]) == DTRACEOPT_UNSET) 15026 sz = dtrace_dstate_defsize; 15027 15028 do { 15029 rval = dtrace_dstate_init(&state->dts_vstate.dtvs_dynvars, sz); 15030 15031 if (rval == 0) 15032 break; 15033 15034 if (opt[DTRACEOPT_BUFRESIZE] == DTRACEOPT_BUFRESIZE_MANUAL) 15035 goto err; 15036 } while (sz >>= 1); 15037 15038 opt[DTRACEOPT_DYNVARSIZE] = sz; 15039 15040 if (rval != 0) 15041 goto err; 15042 15043 if (opt[DTRACEOPT_STATUSRATE] > dtrace_statusrate_max) 15044 opt[DTRACEOPT_STATUSRATE] = dtrace_statusrate_max; 15045 15046 if (opt[DTRACEOPT_CLEANRATE] == 0) 15047 opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_max; 15048 15049 if (opt[DTRACEOPT_CLEANRATE] < dtrace_cleanrate_min) 15050 opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_min; 15051 15052 if (opt[DTRACEOPT_CLEANRATE] > dtrace_cleanrate_max) 15053 opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_max; 15054 15055 state->dts_alive = state->dts_laststatus = dtrace_gethrtime(); 15056 #ifdef illumos 15057 hdlr.cyh_func = (cyc_func_t)dtrace_state_clean; 15058 hdlr.cyh_arg = state; 15059 hdlr.cyh_level = CY_LOW_LEVEL; 15060 15061 when.cyt_when = 0; 15062 when.cyt_interval = opt[DTRACEOPT_CLEANRATE]; 15063 15064 state->dts_cleaner = cyclic_add(&hdlr, &when); 15065 15066 hdlr.cyh_func = (cyc_func_t)dtrace_state_deadman; 15067 hdlr.cyh_arg = state; 15068 hdlr.cyh_level = CY_LOW_LEVEL; 15069 15070 when.cyt_when = 0; 15071 when.cyt_interval = dtrace_deadman_interval; 15072 15073 state->dts_deadman = cyclic_add(&hdlr, &when); 15074 #else 15075 callout_reset(&state->dts_cleaner, hz * opt[DTRACEOPT_CLEANRATE] / NANOSEC, 15076 dtrace_state_clean, state); 15077 callout_reset(&state->dts_deadman, hz * dtrace_deadman_interval / NANOSEC, 15078 dtrace_state_deadman, state); 15079 #endif 15080 15081 state->dts_activity = DTRACE_ACTIVITY_WARMUP; 15082 15083 #ifdef illumos 15084 if (state->dts_getf != 0 && 15085 !(state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL)) { 15086 /* 15087 * We don't have kernel privs but we have at least one call 15088 * to getf(); we need to bump our zone's count, and (if 15089 * this is the first enabling to have an unprivileged call 15090 * to getf()) we need to hook into closef(). 15091 */ 15092 state->dts_cred.dcr_cred->cr_zone->zone_dtrace_getf++; 15093 15094 if (dtrace_getf++ == 0) { 15095 ASSERT(dtrace_closef == NULL); 15096 dtrace_closef = dtrace_getf_barrier; 15097 } 15098 } 15099 #endif 15100 15101 /* 15102 * Now it's time to actually fire the BEGIN probe. We need to disable 15103 * interrupts here both to record the CPU on which we fired the BEGIN 15104 * probe (the data from this CPU will be processed first at user 15105 * level) and to manually activate the buffer for this CPU. 15106 */ 15107 cookie = dtrace_interrupt_disable(); 15108 *cpu = curcpu; 15109 ASSERT(state->dts_buffer[*cpu].dtb_flags & DTRACEBUF_INACTIVE); 15110 state->dts_buffer[*cpu].dtb_flags &= ~DTRACEBUF_INACTIVE; 15111 15112 dtrace_probe(dtrace_probeid_begin, 15113 (uint64_t)(uintptr_t)state, 0, 0, 0, 0); 15114 dtrace_interrupt_enable(cookie); 15115 /* 15116 * We may have had an exit action from a BEGIN probe; only change our 15117 * state to ACTIVE if we're still in WARMUP. 15118 */ 15119 ASSERT(state->dts_activity == DTRACE_ACTIVITY_WARMUP || 15120 state->dts_activity == DTRACE_ACTIVITY_DRAINING); 15121 15122 if (state->dts_activity == DTRACE_ACTIVITY_WARMUP) 15123 state->dts_activity = DTRACE_ACTIVITY_ACTIVE; 15124 15125 #ifdef __FreeBSD__ 15126 /* 15127 * We enable anonymous tracing before APs are started, so we must 15128 * activate buffers using the current CPU. 15129 */ 15130 if (state == dtrace_anon.dta_state) 15131 for (int i = 0; i < NCPU; i++) 15132 dtrace_buffer_activate_cpu(state, i); 15133 else 15134 dtrace_xcall(DTRACE_CPUALL, 15135 (dtrace_xcall_t)dtrace_buffer_activate, state); 15136 #else 15137 /* 15138 * Regardless of whether or not now we're in ACTIVE or DRAINING, we 15139 * want each CPU to transition its principal buffer out of the 15140 * INACTIVE state. Doing this assures that no CPU will suddenly begin 15141 * processing an ECB halfway down a probe's ECB chain; all CPUs will 15142 * atomically transition from processing none of a state's ECBs to 15143 * processing all of them. 15144 */ 15145 dtrace_xcall(DTRACE_CPUALL, 15146 (dtrace_xcall_t)dtrace_buffer_activate, state); 15147 #endif 15148 goto out; 15149 15150 err: 15151 dtrace_buffer_free(state->dts_buffer); 15152 dtrace_buffer_free(state->dts_aggbuffer); 15153 15154 if ((nspec = state->dts_nspeculations) == 0) { 15155 ASSERT(state->dts_speculations == NULL); 15156 goto out; 15157 } 15158 15159 spec = state->dts_speculations; 15160 ASSERT(spec != NULL); 15161 15162 for (i = 0; i < state->dts_nspeculations; i++) { 15163 if ((buf = spec[i].dtsp_buffer) == NULL) 15164 break; 15165 15166 dtrace_buffer_free(buf); 15167 kmem_free(buf, bufsize); 15168 } 15169 15170 kmem_free(spec, nspec * sizeof (dtrace_speculation_t)); 15171 state->dts_nspeculations = 0; 15172 state->dts_speculations = NULL; 15173 15174 out: 15175 mutex_exit(&dtrace_lock); 15176 mutex_exit(&cpu_lock); 15177 15178 return (rval); 15179 } 15180 15181 static int 15182 dtrace_state_stop(dtrace_state_t *state, processorid_t *cpu) 15183 { 15184 dtrace_icookie_t cookie; 15185 15186 ASSERT(MUTEX_HELD(&dtrace_lock)); 15187 15188 if (state->dts_activity != DTRACE_ACTIVITY_ACTIVE && 15189 state->dts_activity != DTRACE_ACTIVITY_DRAINING) 15190 return (EINVAL); 15191 15192 /* 15193 * We'll set the activity to DTRACE_ACTIVITY_DRAINING, and issue a sync 15194 * to be sure that every CPU has seen it. See below for the details 15195 * on why this is done. 15196 */ 15197 state->dts_activity = DTRACE_ACTIVITY_DRAINING; 15198 dtrace_sync(); 15199 15200 /* 15201 * By this point, it is impossible for any CPU to be still processing 15202 * with DTRACE_ACTIVITY_ACTIVE. We can thus set our activity to 15203 * DTRACE_ACTIVITY_COOLDOWN and know that we're not racing with any 15204 * other CPU in dtrace_buffer_reserve(). This allows dtrace_probe() 15205 * and callees to know that the activity is DTRACE_ACTIVITY_COOLDOWN 15206 * iff we're in the END probe. 15207 */ 15208 state->dts_activity = DTRACE_ACTIVITY_COOLDOWN; 15209 dtrace_sync(); 15210 ASSERT(state->dts_activity == DTRACE_ACTIVITY_COOLDOWN); 15211 15212 /* 15213 * Finally, we can release the reserve and call the END probe. We 15214 * disable interrupts across calling the END probe to allow us to 15215 * return the CPU on which we actually called the END probe. This 15216 * allows user-land to be sure that this CPU's principal buffer is 15217 * processed last. 15218 */ 15219 state->dts_reserve = 0; 15220 15221 cookie = dtrace_interrupt_disable(); 15222 *cpu = curcpu; 15223 dtrace_probe(dtrace_probeid_end, 15224 (uint64_t)(uintptr_t)state, 0, 0, 0, 0); 15225 dtrace_interrupt_enable(cookie); 15226 15227 state->dts_activity = DTRACE_ACTIVITY_STOPPED; 15228 dtrace_sync(); 15229 15230 #ifdef illumos 15231 if (state->dts_getf != 0 && 15232 !(state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL)) { 15233 /* 15234 * We don't have kernel privs but we have at least one call 15235 * to getf(); we need to lower our zone's count, and (if 15236 * this is the last enabling to have an unprivileged call 15237 * to getf()) we need to clear the closef() hook. 15238 */ 15239 ASSERT(state->dts_cred.dcr_cred->cr_zone->zone_dtrace_getf > 0); 15240 ASSERT(dtrace_closef == dtrace_getf_barrier); 15241 ASSERT(dtrace_getf > 0); 15242 15243 state->dts_cred.dcr_cred->cr_zone->zone_dtrace_getf--; 15244 15245 if (--dtrace_getf == 0) 15246 dtrace_closef = NULL; 15247 } 15248 #endif 15249 15250 return (0); 15251 } 15252 15253 static int 15254 dtrace_state_option(dtrace_state_t *state, dtrace_optid_t option, 15255 dtrace_optval_t val) 15256 { 15257 ASSERT(MUTEX_HELD(&dtrace_lock)); 15258 15259 if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) 15260 return (EBUSY); 15261 15262 if (option >= DTRACEOPT_MAX) 15263 return (EINVAL); 15264 15265 if (option != DTRACEOPT_CPU && val < 0) 15266 return (EINVAL); 15267 15268 switch (option) { 15269 case DTRACEOPT_DESTRUCTIVE: 15270 if (dtrace_destructive_disallow) 15271 return (EACCES); 15272 15273 state->dts_cred.dcr_destructive = 1; 15274 break; 15275 15276 case DTRACEOPT_BUFSIZE: 15277 case DTRACEOPT_DYNVARSIZE: 15278 case DTRACEOPT_AGGSIZE: 15279 case DTRACEOPT_SPECSIZE: 15280 case DTRACEOPT_STRSIZE: 15281 if (val < 0) 15282 return (EINVAL); 15283 15284 if (val >= LONG_MAX) { 15285 /* 15286 * If this is an otherwise negative value, set it to 15287 * the highest multiple of 128m less than LONG_MAX. 15288 * Technically, we're adjusting the size without 15289 * regard to the buffer resizing policy, but in fact, 15290 * this has no effect -- if we set the buffer size to 15291 * ~LONG_MAX and the buffer policy is ultimately set to 15292 * be "manual", the buffer allocation is guaranteed to 15293 * fail, if only because the allocation requires two 15294 * buffers. (We set the the size to the highest 15295 * multiple of 128m because it ensures that the size 15296 * will remain a multiple of a megabyte when 15297 * repeatedly halved -- all the way down to 15m.) 15298 */ 15299 val = LONG_MAX - (1 << 27) + 1; 15300 } 15301 } 15302 15303 state->dts_options[option] = val; 15304 15305 return (0); 15306 } 15307 15308 static void 15309 dtrace_state_destroy(dtrace_state_t *state) 15310 { 15311 dtrace_ecb_t *ecb; 15312 dtrace_vstate_t *vstate = &state->dts_vstate; 15313 #ifdef illumos 15314 minor_t minor = getminor(state->dts_dev); 15315 #endif 15316 int i, bufsize = NCPU * sizeof (dtrace_buffer_t); 15317 dtrace_speculation_t *spec = state->dts_speculations; 15318 int nspec = state->dts_nspeculations; 15319 uint32_t match; 15320 15321 ASSERT(MUTEX_HELD(&dtrace_lock)); 15322 ASSERT(MUTEX_HELD(&cpu_lock)); 15323 15324 /* 15325 * First, retract any retained enablings for this state. 15326 */ 15327 dtrace_enabling_retract(state); 15328 ASSERT(state->dts_nretained == 0); 15329 15330 if (state->dts_activity == DTRACE_ACTIVITY_ACTIVE || 15331 state->dts_activity == DTRACE_ACTIVITY_DRAINING) { 15332 /* 15333 * We have managed to come into dtrace_state_destroy() on a 15334 * hot enabling -- almost certainly because of a disorderly 15335 * shutdown of a consumer. (That is, a consumer that is 15336 * exiting without having called dtrace_stop().) In this case, 15337 * we're going to set our activity to be KILLED, and then 15338 * issue a sync to be sure that everyone is out of probe 15339 * context before we start blowing away ECBs. 15340 */ 15341 state->dts_activity = DTRACE_ACTIVITY_KILLED; 15342 dtrace_sync(); 15343 } 15344 15345 /* 15346 * Release the credential hold we took in dtrace_state_create(). 15347 */ 15348 if (state->dts_cred.dcr_cred != NULL) 15349 crfree(state->dts_cred.dcr_cred); 15350 15351 /* 15352 * Now we can safely disable and destroy any enabled probes. Because 15353 * any DTRACE_PRIV_KERNEL probes may actually be slowing our progress 15354 * (especially if they're all enabled), we take two passes through the 15355 * ECBs: in the first, we disable just DTRACE_PRIV_KERNEL probes, and 15356 * in the second we disable whatever is left over. 15357 */ 15358 for (match = DTRACE_PRIV_KERNEL; ; match = 0) { 15359 for (i = 0; i < state->dts_necbs; i++) { 15360 if ((ecb = state->dts_ecbs[i]) == NULL) 15361 continue; 15362 15363 if (match && ecb->dte_probe != NULL) { 15364 dtrace_probe_t *probe = ecb->dte_probe; 15365 dtrace_provider_t *prov = probe->dtpr_provider; 15366 15367 if (!(prov->dtpv_priv.dtpp_flags & match)) 15368 continue; 15369 } 15370 15371 dtrace_ecb_disable(ecb); 15372 dtrace_ecb_destroy(ecb); 15373 } 15374 15375 if (!match) 15376 break; 15377 } 15378 15379 /* 15380 * Before we free the buffers, perform one more sync to assure that 15381 * every CPU is out of probe context. 15382 */ 15383 dtrace_sync(); 15384 15385 dtrace_buffer_free(state->dts_buffer); 15386 dtrace_buffer_free(state->dts_aggbuffer); 15387 15388 for (i = 0; i < nspec; i++) 15389 dtrace_buffer_free(spec[i].dtsp_buffer); 15390 15391 #ifdef illumos 15392 if (state->dts_cleaner != CYCLIC_NONE) 15393 cyclic_remove(state->dts_cleaner); 15394 15395 if (state->dts_deadman != CYCLIC_NONE) 15396 cyclic_remove(state->dts_deadman); 15397 #else 15398 callout_stop(&state->dts_cleaner); 15399 callout_drain(&state->dts_cleaner); 15400 callout_stop(&state->dts_deadman); 15401 callout_drain(&state->dts_deadman); 15402 #endif 15403 15404 dtrace_dstate_fini(&vstate->dtvs_dynvars); 15405 dtrace_vstate_fini(vstate); 15406 if (state->dts_ecbs != NULL) 15407 kmem_free(state->dts_ecbs, state->dts_necbs * sizeof (dtrace_ecb_t *)); 15408 15409 if (state->dts_aggregations != NULL) { 15410 #ifdef DEBUG 15411 for (i = 0; i < state->dts_naggregations; i++) 15412 ASSERT(state->dts_aggregations[i] == NULL); 15413 #endif 15414 ASSERT(state->dts_naggregations > 0); 15415 kmem_free(state->dts_aggregations, 15416 state->dts_naggregations * sizeof (dtrace_aggregation_t *)); 15417 } 15418 15419 kmem_free(state->dts_buffer, bufsize); 15420 kmem_free(state->dts_aggbuffer, bufsize); 15421 15422 for (i = 0; i < nspec; i++) 15423 kmem_free(spec[i].dtsp_buffer, bufsize); 15424 15425 if (spec != NULL) 15426 kmem_free(spec, nspec * sizeof (dtrace_speculation_t)); 15427 15428 dtrace_format_destroy(state); 15429 15430 if (state->dts_aggid_arena != NULL) { 15431 #ifdef illumos 15432 vmem_destroy(state->dts_aggid_arena); 15433 #else 15434 delete_unrhdr(state->dts_aggid_arena); 15435 #endif 15436 state->dts_aggid_arena = NULL; 15437 } 15438 #ifdef illumos 15439 ddi_soft_state_free(dtrace_softstate, minor); 15440 vmem_free(dtrace_minor, (void *)(uintptr_t)minor, 1); 15441 #endif 15442 } 15443 15444 /* 15445 * DTrace Anonymous Enabling Functions 15446 */ 15447 static dtrace_state_t * 15448 dtrace_anon_grab(void) 15449 { 15450 dtrace_state_t *state; 15451 15452 ASSERT(MUTEX_HELD(&dtrace_lock)); 15453 15454 if ((state = dtrace_anon.dta_state) == NULL) { 15455 ASSERT(dtrace_anon.dta_enabling == NULL); 15456 return (NULL); 15457 } 15458 15459 ASSERT(dtrace_anon.dta_enabling != NULL); 15460 ASSERT(dtrace_retained != NULL); 15461 15462 dtrace_enabling_destroy(dtrace_anon.dta_enabling); 15463 dtrace_anon.dta_enabling = NULL; 15464 dtrace_anon.dta_state = NULL; 15465 15466 return (state); 15467 } 15468 15469 static void 15470 dtrace_anon_property(void) 15471 { 15472 int i, rv; 15473 dtrace_state_t *state; 15474 dof_hdr_t *dof; 15475 char c[32]; /* enough for "dof-data-" + digits */ 15476 15477 ASSERT(MUTEX_HELD(&dtrace_lock)); 15478 ASSERT(MUTEX_HELD(&cpu_lock)); 15479 15480 for (i = 0; ; i++) { 15481 (void) snprintf(c, sizeof (c), "dof-data-%d", i); 15482 15483 dtrace_err_verbose = 1; 15484 15485 if ((dof = dtrace_dof_property(c)) == NULL) { 15486 dtrace_err_verbose = 0; 15487 break; 15488 } 15489 15490 #ifdef illumos 15491 /* 15492 * We want to create anonymous state, so we need to transition 15493 * the kernel debugger to indicate that DTrace is active. If 15494 * this fails (e.g. because the debugger has modified text in 15495 * some way), we won't continue with the processing. 15496 */ 15497 if (kdi_dtrace_set(KDI_DTSET_DTRACE_ACTIVATE) != 0) { 15498 cmn_err(CE_NOTE, "kernel debugger active; anonymous " 15499 "enabling ignored."); 15500 dtrace_dof_destroy(dof); 15501 break; 15502 } 15503 #endif 15504 15505 /* 15506 * If we haven't allocated an anonymous state, we'll do so now. 15507 */ 15508 if ((state = dtrace_anon.dta_state) == NULL) { 15509 state = dtrace_state_create(NULL, NULL); 15510 dtrace_anon.dta_state = state; 15511 15512 if (state == NULL) { 15513 /* 15514 * This basically shouldn't happen: the only 15515 * failure mode from dtrace_state_create() is a 15516 * failure of ddi_soft_state_zalloc() that 15517 * itself should never happen. Still, the 15518 * interface allows for a failure mode, and 15519 * we want to fail as gracefully as possible: 15520 * we'll emit an error message and cease 15521 * processing anonymous state in this case. 15522 */ 15523 cmn_err(CE_WARN, "failed to create " 15524 "anonymous state"); 15525 dtrace_dof_destroy(dof); 15526 break; 15527 } 15528 } 15529 15530 rv = dtrace_dof_slurp(dof, &state->dts_vstate, CRED(), 15531 &dtrace_anon.dta_enabling, 0, 0, B_TRUE); 15532 15533 if (rv == 0) 15534 rv = dtrace_dof_options(dof, state); 15535 15536 dtrace_err_verbose = 0; 15537 dtrace_dof_destroy(dof); 15538 15539 if (rv != 0) { 15540 /* 15541 * This is malformed DOF; chuck any anonymous state 15542 * that we created. 15543 */ 15544 ASSERT(dtrace_anon.dta_enabling == NULL); 15545 dtrace_state_destroy(state); 15546 dtrace_anon.dta_state = NULL; 15547 break; 15548 } 15549 15550 ASSERT(dtrace_anon.dta_enabling != NULL); 15551 } 15552 15553 if (dtrace_anon.dta_enabling != NULL) { 15554 int rval; 15555 15556 /* 15557 * dtrace_enabling_retain() can only fail because we are 15558 * trying to retain more enablings than are allowed -- but 15559 * we only have one anonymous enabling, and we are guaranteed 15560 * to be allowed at least one retained enabling; we assert 15561 * that dtrace_enabling_retain() returns success. 15562 */ 15563 rval = dtrace_enabling_retain(dtrace_anon.dta_enabling); 15564 ASSERT(rval == 0); 15565 15566 dtrace_enabling_dump(dtrace_anon.dta_enabling); 15567 } 15568 } 15569 15570 /* 15571 * DTrace Helper Functions 15572 */ 15573 static void 15574 dtrace_helper_trace(dtrace_helper_action_t *helper, 15575 dtrace_mstate_t *mstate, dtrace_vstate_t *vstate, int where) 15576 { 15577 uint32_t size, next, nnext, i; 15578 dtrace_helptrace_t *ent, *buffer; 15579 uint16_t flags = cpu_core[curcpu].cpuc_dtrace_flags; 15580 15581 if ((buffer = dtrace_helptrace_buffer) == NULL) 15582 return; 15583 15584 ASSERT(vstate->dtvs_nlocals <= dtrace_helptrace_nlocals); 15585 15586 /* 15587 * What would a tracing framework be without its own tracing 15588 * framework? (Well, a hell of a lot simpler, for starters...) 15589 */ 15590 size = sizeof (dtrace_helptrace_t) + dtrace_helptrace_nlocals * 15591 sizeof (uint64_t) - sizeof (uint64_t); 15592 15593 /* 15594 * Iterate until we can allocate a slot in the trace buffer. 15595 */ 15596 do { 15597 next = dtrace_helptrace_next; 15598 15599 if (next + size < dtrace_helptrace_bufsize) { 15600 nnext = next + size; 15601 } else { 15602 nnext = size; 15603 } 15604 } while (dtrace_cas32(&dtrace_helptrace_next, next, nnext) != next); 15605 15606 /* 15607 * We have our slot; fill it in. 15608 */ 15609 if (nnext == size) { 15610 dtrace_helptrace_wrapped++; 15611 next = 0; 15612 } 15613 15614 ent = (dtrace_helptrace_t *)((uintptr_t)buffer + next); 15615 ent->dtht_helper = helper; 15616 ent->dtht_where = where; 15617 ent->dtht_nlocals = vstate->dtvs_nlocals; 15618 15619 ent->dtht_fltoffs = (mstate->dtms_present & DTRACE_MSTATE_FLTOFFS) ? 15620 mstate->dtms_fltoffs : -1; 15621 ent->dtht_fault = DTRACE_FLAGS2FLT(flags); 15622 ent->dtht_illval = cpu_core[curcpu].cpuc_dtrace_illval; 15623 15624 for (i = 0; i < vstate->dtvs_nlocals; i++) { 15625 dtrace_statvar_t *svar; 15626 15627 if ((svar = vstate->dtvs_locals[i]) == NULL) 15628 continue; 15629 15630 ASSERT(svar->dtsv_size >= NCPU * sizeof (uint64_t)); 15631 ent->dtht_locals[i] = 15632 ((uint64_t *)(uintptr_t)svar->dtsv_data)[curcpu]; 15633 } 15634 } 15635 15636 static uint64_t 15637 dtrace_helper(int which, dtrace_mstate_t *mstate, 15638 dtrace_state_t *state, uint64_t arg0, uint64_t arg1) 15639 { 15640 uint16_t *flags = &cpu_core[curcpu].cpuc_dtrace_flags; 15641 uint64_t sarg0 = mstate->dtms_arg[0]; 15642 uint64_t sarg1 = mstate->dtms_arg[1]; 15643 uint64_t rval = 0; 15644 dtrace_helpers_t *helpers = curproc->p_dtrace_helpers; 15645 dtrace_helper_action_t *helper; 15646 dtrace_vstate_t *vstate; 15647 dtrace_difo_t *pred; 15648 int i, trace = dtrace_helptrace_buffer != NULL; 15649 15650 ASSERT(which >= 0 && which < DTRACE_NHELPER_ACTIONS); 15651 15652 if (helpers == NULL) 15653 return (0); 15654 15655 if ((helper = helpers->dthps_actions[which]) == NULL) 15656 return (0); 15657 15658 vstate = &helpers->dthps_vstate; 15659 mstate->dtms_arg[0] = arg0; 15660 mstate->dtms_arg[1] = arg1; 15661 15662 /* 15663 * Now iterate over each helper. If its predicate evaluates to 'true', 15664 * we'll call the corresponding actions. Note that the below calls 15665 * to dtrace_dif_emulate() may set faults in machine state. This is 15666 * okay: our caller (the outer dtrace_dif_emulate()) will simply plow 15667 * the stored DIF offset with its own (which is the desired behavior). 15668 * Also, note the calls to dtrace_dif_emulate() may allocate scratch 15669 * from machine state; this is okay, too. 15670 */ 15671 for (; helper != NULL; helper = helper->dtha_next) { 15672 if ((pred = helper->dtha_predicate) != NULL) { 15673 if (trace) 15674 dtrace_helper_trace(helper, mstate, vstate, 0); 15675 15676 if (!dtrace_dif_emulate(pred, mstate, vstate, state)) 15677 goto next; 15678 15679 if (*flags & CPU_DTRACE_FAULT) 15680 goto err; 15681 } 15682 15683 for (i = 0; i < helper->dtha_nactions; i++) { 15684 if (trace) 15685 dtrace_helper_trace(helper, 15686 mstate, vstate, i + 1); 15687 15688 rval = dtrace_dif_emulate(helper->dtha_actions[i], 15689 mstate, vstate, state); 15690 15691 if (*flags & CPU_DTRACE_FAULT) 15692 goto err; 15693 } 15694 15695 next: 15696 if (trace) 15697 dtrace_helper_trace(helper, mstate, vstate, 15698 DTRACE_HELPTRACE_NEXT); 15699 } 15700 15701 if (trace) 15702 dtrace_helper_trace(helper, mstate, vstate, 15703 DTRACE_HELPTRACE_DONE); 15704 15705 /* 15706 * Restore the arg0 that we saved upon entry. 15707 */ 15708 mstate->dtms_arg[0] = sarg0; 15709 mstate->dtms_arg[1] = sarg1; 15710 15711 return (rval); 15712 15713 err: 15714 if (trace) 15715 dtrace_helper_trace(helper, mstate, vstate, 15716 DTRACE_HELPTRACE_ERR); 15717 15718 /* 15719 * Restore the arg0 that we saved upon entry. 15720 */ 15721 mstate->dtms_arg[0] = sarg0; 15722 mstate->dtms_arg[1] = sarg1; 15723 15724 return (0); 15725 } 15726 15727 static void 15728 dtrace_helper_action_destroy(dtrace_helper_action_t *helper, 15729 dtrace_vstate_t *vstate) 15730 { 15731 int i; 15732 15733 if (helper->dtha_predicate != NULL) 15734 dtrace_difo_release(helper->dtha_predicate, vstate); 15735 15736 for (i = 0; i < helper->dtha_nactions; i++) { 15737 ASSERT(helper->dtha_actions[i] != NULL); 15738 dtrace_difo_release(helper->dtha_actions[i], vstate); 15739 } 15740 15741 kmem_free(helper->dtha_actions, 15742 helper->dtha_nactions * sizeof (dtrace_difo_t *)); 15743 kmem_free(helper, sizeof (dtrace_helper_action_t)); 15744 } 15745 15746 static int 15747 dtrace_helper_destroygen(dtrace_helpers_t *help, int gen) 15748 { 15749 proc_t *p = curproc; 15750 dtrace_vstate_t *vstate; 15751 int i; 15752 15753 if (help == NULL) 15754 help = p->p_dtrace_helpers; 15755 15756 ASSERT(MUTEX_HELD(&dtrace_lock)); 15757 15758 if (help == NULL || gen > help->dthps_generation) 15759 return (EINVAL); 15760 15761 vstate = &help->dthps_vstate; 15762 15763 for (i = 0; i < DTRACE_NHELPER_ACTIONS; i++) { 15764 dtrace_helper_action_t *last = NULL, *h, *next; 15765 15766 for (h = help->dthps_actions[i]; h != NULL; h = next) { 15767 next = h->dtha_next; 15768 15769 if (h->dtha_generation == gen) { 15770 if (last != NULL) { 15771 last->dtha_next = next; 15772 } else { 15773 help->dthps_actions[i] = next; 15774 } 15775 15776 dtrace_helper_action_destroy(h, vstate); 15777 } else { 15778 last = h; 15779 } 15780 } 15781 } 15782 15783 /* 15784 * Interate until we've cleared out all helper providers with the 15785 * given generation number. 15786 */ 15787 for (;;) { 15788 dtrace_helper_provider_t *prov; 15789 15790 /* 15791 * Look for a helper provider with the right generation. We 15792 * have to start back at the beginning of the list each time 15793 * because we drop dtrace_lock. It's unlikely that we'll make 15794 * more than two passes. 15795 */ 15796 for (i = 0; i < help->dthps_nprovs; i++) { 15797 prov = help->dthps_provs[i]; 15798 15799 if (prov->dthp_generation == gen) 15800 break; 15801 } 15802 15803 /* 15804 * If there were no matches, we're done. 15805 */ 15806 if (i == help->dthps_nprovs) 15807 break; 15808 15809 /* 15810 * Move the last helper provider into this slot. 15811 */ 15812 help->dthps_nprovs--; 15813 help->dthps_provs[i] = help->dthps_provs[help->dthps_nprovs]; 15814 help->dthps_provs[help->dthps_nprovs] = NULL; 15815 15816 mutex_exit(&dtrace_lock); 15817 15818 /* 15819 * If we have a meta provider, remove this helper provider. 15820 */ 15821 mutex_enter(&dtrace_meta_lock); 15822 if (dtrace_meta_pid != NULL) { 15823 ASSERT(dtrace_deferred_pid == NULL); 15824 dtrace_helper_provider_remove(&prov->dthp_prov, 15825 p->p_pid); 15826 } 15827 mutex_exit(&dtrace_meta_lock); 15828 15829 dtrace_helper_provider_destroy(prov); 15830 15831 mutex_enter(&dtrace_lock); 15832 } 15833 15834 return (0); 15835 } 15836 15837 static int 15838 dtrace_helper_validate(dtrace_helper_action_t *helper) 15839 { 15840 int err = 0, i; 15841 dtrace_difo_t *dp; 15842 15843 if ((dp = helper->dtha_predicate) != NULL) 15844 err += dtrace_difo_validate_helper(dp); 15845 15846 for (i = 0; i < helper->dtha_nactions; i++) 15847 err += dtrace_difo_validate_helper(helper->dtha_actions[i]); 15848 15849 return (err == 0); 15850 } 15851 15852 static int 15853 dtrace_helper_action_add(int which, dtrace_ecbdesc_t *ep, 15854 dtrace_helpers_t *help) 15855 { 15856 dtrace_helper_action_t *helper, *last; 15857 dtrace_actdesc_t *act; 15858 dtrace_vstate_t *vstate; 15859 dtrace_predicate_t *pred; 15860 int count = 0, nactions = 0, i; 15861 15862 if (which < 0 || which >= DTRACE_NHELPER_ACTIONS) 15863 return (EINVAL); 15864 15865 last = help->dthps_actions[which]; 15866 vstate = &help->dthps_vstate; 15867 15868 for (count = 0; last != NULL; last = last->dtha_next) { 15869 count++; 15870 if (last->dtha_next == NULL) 15871 break; 15872 } 15873 15874 /* 15875 * If we already have dtrace_helper_actions_max helper actions for this 15876 * helper action type, we'll refuse to add a new one. 15877 */ 15878 if (count >= dtrace_helper_actions_max) 15879 return (ENOSPC); 15880 15881 helper = kmem_zalloc(sizeof (dtrace_helper_action_t), KM_SLEEP); 15882 helper->dtha_generation = help->dthps_generation; 15883 15884 if ((pred = ep->dted_pred.dtpdd_predicate) != NULL) { 15885 ASSERT(pred->dtp_difo != NULL); 15886 dtrace_difo_hold(pred->dtp_difo); 15887 helper->dtha_predicate = pred->dtp_difo; 15888 } 15889 15890 for (act = ep->dted_action; act != NULL; act = act->dtad_next) { 15891 if (act->dtad_kind != DTRACEACT_DIFEXPR) 15892 goto err; 15893 15894 if (act->dtad_difo == NULL) 15895 goto err; 15896 15897 nactions++; 15898 } 15899 15900 helper->dtha_actions = kmem_zalloc(sizeof (dtrace_difo_t *) * 15901 (helper->dtha_nactions = nactions), KM_SLEEP); 15902 15903 for (act = ep->dted_action, i = 0; act != NULL; act = act->dtad_next) { 15904 dtrace_difo_hold(act->dtad_difo); 15905 helper->dtha_actions[i++] = act->dtad_difo; 15906 } 15907 15908 if (!dtrace_helper_validate(helper)) 15909 goto err; 15910 15911 if (last == NULL) { 15912 help->dthps_actions[which] = helper; 15913 } else { 15914 last->dtha_next = helper; 15915 } 15916 15917 if (vstate->dtvs_nlocals > dtrace_helptrace_nlocals) { 15918 dtrace_helptrace_nlocals = vstate->dtvs_nlocals; 15919 dtrace_helptrace_next = 0; 15920 } 15921 15922 return (0); 15923 err: 15924 dtrace_helper_action_destroy(helper, vstate); 15925 return (EINVAL); 15926 } 15927 15928 static void 15929 dtrace_helper_provider_register(proc_t *p, dtrace_helpers_t *help, 15930 dof_helper_t *dofhp) 15931 { 15932 ASSERT(MUTEX_NOT_HELD(&dtrace_lock)); 15933 15934 mutex_enter(&dtrace_meta_lock); 15935 mutex_enter(&dtrace_lock); 15936 15937 if (!dtrace_attached() || dtrace_meta_pid == NULL) { 15938 /* 15939 * If the dtrace module is loaded but not attached, or if 15940 * there aren't isn't a meta provider registered to deal with 15941 * these provider descriptions, we need to postpone creating 15942 * the actual providers until later. 15943 */ 15944 15945 if (help->dthps_next == NULL && help->dthps_prev == NULL && 15946 dtrace_deferred_pid != help) { 15947 help->dthps_deferred = 1; 15948 help->dthps_pid = p->p_pid; 15949 help->dthps_next = dtrace_deferred_pid; 15950 help->dthps_prev = NULL; 15951 if (dtrace_deferred_pid != NULL) 15952 dtrace_deferred_pid->dthps_prev = help; 15953 dtrace_deferred_pid = help; 15954 } 15955 15956 mutex_exit(&dtrace_lock); 15957 15958 } else if (dofhp != NULL) { 15959 /* 15960 * If the dtrace module is loaded and we have a particular 15961 * helper provider description, pass that off to the 15962 * meta provider. 15963 */ 15964 15965 mutex_exit(&dtrace_lock); 15966 15967 dtrace_helper_provide(dofhp, p->p_pid); 15968 15969 } else { 15970 /* 15971 * Otherwise, just pass all the helper provider descriptions 15972 * off to the meta provider. 15973 */ 15974 15975 int i; 15976 mutex_exit(&dtrace_lock); 15977 15978 for (i = 0; i < help->dthps_nprovs; i++) { 15979 dtrace_helper_provide(&help->dthps_provs[i]->dthp_prov, 15980 p->p_pid); 15981 } 15982 } 15983 15984 mutex_exit(&dtrace_meta_lock); 15985 } 15986 15987 static int 15988 dtrace_helper_provider_add(dof_helper_t *dofhp, dtrace_helpers_t *help, int gen) 15989 { 15990 dtrace_helper_provider_t *hprov, **tmp_provs; 15991 uint_t tmp_maxprovs, i; 15992 15993 ASSERT(MUTEX_HELD(&dtrace_lock)); 15994 ASSERT(help != NULL); 15995 15996 /* 15997 * If we already have dtrace_helper_providers_max helper providers, 15998 * we're refuse to add a new one. 15999 */ 16000 if (help->dthps_nprovs >= dtrace_helper_providers_max) 16001 return (ENOSPC); 16002 16003 /* 16004 * Check to make sure this isn't a duplicate. 16005 */ 16006 for (i = 0; i < help->dthps_nprovs; i++) { 16007 if (dofhp->dofhp_addr == 16008 help->dthps_provs[i]->dthp_prov.dofhp_addr) 16009 return (EALREADY); 16010 } 16011 16012 hprov = kmem_zalloc(sizeof (dtrace_helper_provider_t), KM_SLEEP); 16013 hprov->dthp_prov = *dofhp; 16014 hprov->dthp_ref = 1; 16015 hprov->dthp_generation = gen; 16016 16017 /* 16018 * Allocate a bigger table for helper providers if it's already full. 16019 */ 16020 if (help->dthps_maxprovs == help->dthps_nprovs) { 16021 tmp_maxprovs = help->dthps_maxprovs; 16022 tmp_provs = help->dthps_provs; 16023 16024 if (help->dthps_maxprovs == 0) 16025 help->dthps_maxprovs = 2; 16026 else 16027 help->dthps_maxprovs *= 2; 16028 if (help->dthps_maxprovs > dtrace_helper_providers_max) 16029 help->dthps_maxprovs = dtrace_helper_providers_max; 16030 16031 ASSERT(tmp_maxprovs < help->dthps_maxprovs); 16032 16033 help->dthps_provs = kmem_zalloc(help->dthps_maxprovs * 16034 sizeof (dtrace_helper_provider_t *), KM_SLEEP); 16035 16036 if (tmp_provs != NULL) { 16037 bcopy(tmp_provs, help->dthps_provs, tmp_maxprovs * 16038 sizeof (dtrace_helper_provider_t *)); 16039 kmem_free(tmp_provs, tmp_maxprovs * 16040 sizeof (dtrace_helper_provider_t *)); 16041 } 16042 } 16043 16044 help->dthps_provs[help->dthps_nprovs] = hprov; 16045 help->dthps_nprovs++; 16046 16047 return (0); 16048 } 16049 16050 static void 16051 dtrace_helper_provider_destroy(dtrace_helper_provider_t *hprov) 16052 { 16053 mutex_enter(&dtrace_lock); 16054 16055 if (--hprov->dthp_ref == 0) { 16056 dof_hdr_t *dof; 16057 mutex_exit(&dtrace_lock); 16058 dof = (dof_hdr_t *)(uintptr_t)hprov->dthp_prov.dofhp_dof; 16059 dtrace_dof_destroy(dof); 16060 kmem_free(hprov, sizeof (dtrace_helper_provider_t)); 16061 } else { 16062 mutex_exit(&dtrace_lock); 16063 } 16064 } 16065 16066 static int 16067 dtrace_helper_provider_validate(dof_hdr_t *dof, dof_sec_t *sec) 16068 { 16069 uintptr_t daddr = (uintptr_t)dof; 16070 dof_sec_t *str_sec, *prb_sec, *arg_sec, *off_sec, *enoff_sec; 16071 dof_provider_t *provider; 16072 dof_probe_t *probe; 16073 uint8_t *arg; 16074 char *strtab, *typestr; 16075 dof_stridx_t typeidx; 16076 size_t typesz; 16077 uint_t nprobes, j, k; 16078 16079 ASSERT(sec->dofs_type == DOF_SECT_PROVIDER); 16080 16081 if (sec->dofs_offset & (sizeof (uint_t) - 1)) { 16082 dtrace_dof_error(dof, "misaligned section offset"); 16083 return (-1); 16084 } 16085 16086 /* 16087 * The section needs to be large enough to contain the DOF provider 16088 * structure appropriate for the given version. 16089 */ 16090 if (sec->dofs_size < 16091 ((dof->dofh_ident[DOF_ID_VERSION] == DOF_VERSION_1) ? 16092 offsetof(dof_provider_t, dofpv_prenoffs) : 16093 sizeof (dof_provider_t))) { 16094 dtrace_dof_error(dof, "provider section too small"); 16095 return (-1); 16096 } 16097 16098 provider = (dof_provider_t *)(uintptr_t)(daddr + sec->dofs_offset); 16099 str_sec = dtrace_dof_sect(dof, DOF_SECT_STRTAB, provider->dofpv_strtab); 16100 prb_sec = dtrace_dof_sect(dof, DOF_SECT_PROBES, provider->dofpv_probes); 16101 arg_sec = dtrace_dof_sect(dof, DOF_SECT_PRARGS, provider->dofpv_prargs); 16102 off_sec = dtrace_dof_sect(dof, DOF_SECT_PROFFS, provider->dofpv_proffs); 16103 16104 if (str_sec == NULL || prb_sec == NULL || 16105 arg_sec == NULL || off_sec == NULL) 16106 return (-1); 16107 16108 enoff_sec = NULL; 16109 16110 if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1 && 16111 provider->dofpv_prenoffs != DOF_SECT_NONE && 16112 (enoff_sec = dtrace_dof_sect(dof, DOF_SECT_PRENOFFS, 16113 provider->dofpv_prenoffs)) == NULL) 16114 return (-1); 16115 16116 strtab = (char *)(uintptr_t)(daddr + str_sec->dofs_offset); 16117 16118 if (provider->dofpv_name >= str_sec->dofs_size || 16119 strlen(strtab + provider->dofpv_name) >= DTRACE_PROVNAMELEN) { 16120 dtrace_dof_error(dof, "invalid provider name"); 16121 return (-1); 16122 } 16123 16124 if (prb_sec->dofs_entsize == 0 || 16125 prb_sec->dofs_entsize > prb_sec->dofs_size) { 16126 dtrace_dof_error(dof, "invalid entry size"); 16127 return (-1); 16128 } 16129 16130 if (prb_sec->dofs_entsize & (sizeof (uintptr_t) - 1)) { 16131 dtrace_dof_error(dof, "misaligned entry size"); 16132 return (-1); 16133 } 16134 16135 if (off_sec->dofs_entsize != sizeof (uint32_t)) { 16136 dtrace_dof_error(dof, "invalid entry size"); 16137 return (-1); 16138 } 16139 16140 if (off_sec->dofs_offset & (sizeof (uint32_t) - 1)) { 16141 dtrace_dof_error(dof, "misaligned section offset"); 16142 return (-1); 16143 } 16144 16145 if (arg_sec->dofs_entsize != sizeof (uint8_t)) { 16146 dtrace_dof_error(dof, "invalid entry size"); 16147 return (-1); 16148 } 16149 16150 arg = (uint8_t *)(uintptr_t)(daddr + arg_sec->dofs_offset); 16151 16152 nprobes = prb_sec->dofs_size / prb_sec->dofs_entsize; 16153 16154 /* 16155 * Take a pass through the probes to check for errors. 16156 */ 16157 for (j = 0; j < nprobes; j++) { 16158 probe = (dof_probe_t *)(uintptr_t)(daddr + 16159 prb_sec->dofs_offset + j * prb_sec->dofs_entsize); 16160 16161 if (probe->dofpr_func >= str_sec->dofs_size) { 16162 dtrace_dof_error(dof, "invalid function name"); 16163 return (-1); 16164 } 16165 16166 if (strlen(strtab + probe->dofpr_func) >= DTRACE_FUNCNAMELEN) { 16167 dtrace_dof_error(dof, "function name too long"); 16168 /* 16169 * Keep going if the function name is too long. 16170 * Unlike provider and probe names, we cannot reasonably 16171 * impose restrictions on function names, since they're 16172 * a property of the code being instrumented. We will 16173 * skip this probe in dtrace_helper_provide_one(). 16174 */ 16175 } 16176 16177 if (probe->dofpr_name >= str_sec->dofs_size || 16178 strlen(strtab + probe->dofpr_name) >= DTRACE_NAMELEN) { 16179 dtrace_dof_error(dof, "invalid probe name"); 16180 return (-1); 16181 } 16182 16183 /* 16184 * The offset count must not wrap the index, and the offsets 16185 * must also not overflow the section's data. 16186 */ 16187 if (probe->dofpr_offidx + probe->dofpr_noffs < 16188 probe->dofpr_offidx || 16189 (probe->dofpr_offidx + probe->dofpr_noffs) * 16190 off_sec->dofs_entsize > off_sec->dofs_size) { 16191 dtrace_dof_error(dof, "invalid probe offset"); 16192 return (-1); 16193 } 16194 16195 if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1) { 16196 /* 16197 * If there's no is-enabled offset section, make sure 16198 * there aren't any is-enabled offsets. Otherwise 16199 * perform the same checks as for probe offsets 16200 * (immediately above). 16201 */ 16202 if (enoff_sec == NULL) { 16203 if (probe->dofpr_enoffidx != 0 || 16204 probe->dofpr_nenoffs != 0) { 16205 dtrace_dof_error(dof, "is-enabled " 16206 "offsets with null section"); 16207 return (-1); 16208 } 16209 } else if (probe->dofpr_enoffidx + 16210 probe->dofpr_nenoffs < probe->dofpr_enoffidx || 16211 (probe->dofpr_enoffidx + probe->dofpr_nenoffs) * 16212 enoff_sec->dofs_entsize > enoff_sec->dofs_size) { 16213 dtrace_dof_error(dof, "invalid is-enabled " 16214 "offset"); 16215 return (-1); 16216 } 16217 16218 if (probe->dofpr_noffs + probe->dofpr_nenoffs == 0) { 16219 dtrace_dof_error(dof, "zero probe and " 16220 "is-enabled offsets"); 16221 return (-1); 16222 } 16223 } else if (probe->dofpr_noffs == 0) { 16224 dtrace_dof_error(dof, "zero probe offsets"); 16225 return (-1); 16226 } 16227 16228 if (probe->dofpr_argidx + probe->dofpr_xargc < 16229 probe->dofpr_argidx || 16230 (probe->dofpr_argidx + probe->dofpr_xargc) * 16231 arg_sec->dofs_entsize > arg_sec->dofs_size) { 16232 dtrace_dof_error(dof, "invalid args"); 16233 return (-1); 16234 } 16235 16236 typeidx = probe->dofpr_nargv; 16237 typestr = strtab + probe->dofpr_nargv; 16238 for (k = 0; k < probe->dofpr_nargc; k++) { 16239 if (typeidx >= str_sec->dofs_size) { 16240 dtrace_dof_error(dof, "bad " 16241 "native argument type"); 16242 return (-1); 16243 } 16244 16245 typesz = strlen(typestr) + 1; 16246 if (typesz > DTRACE_ARGTYPELEN) { 16247 dtrace_dof_error(dof, "native " 16248 "argument type too long"); 16249 return (-1); 16250 } 16251 typeidx += typesz; 16252 typestr += typesz; 16253 } 16254 16255 typeidx = probe->dofpr_xargv; 16256 typestr = strtab + probe->dofpr_xargv; 16257 for (k = 0; k < probe->dofpr_xargc; k++) { 16258 if (arg[probe->dofpr_argidx + k] > probe->dofpr_nargc) { 16259 dtrace_dof_error(dof, "bad " 16260 "native argument index"); 16261 return (-1); 16262 } 16263 16264 if (typeidx >= str_sec->dofs_size) { 16265 dtrace_dof_error(dof, "bad " 16266 "translated argument type"); 16267 return (-1); 16268 } 16269 16270 typesz = strlen(typestr) + 1; 16271 if (typesz > DTRACE_ARGTYPELEN) { 16272 dtrace_dof_error(dof, "translated argument " 16273 "type too long"); 16274 return (-1); 16275 } 16276 16277 typeidx += typesz; 16278 typestr += typesz; 16279 } 16280 } 16281 16282 return (0); 16283 } 16284 16285 static int 16286 dtrace_helper_slurp(dof_hdr_t *dof, dof_helper_t *dhp, struct proc *p) 16287 { 16288 dtrace_helpers_t *help; 16289 dtrace_vstate_t *vstate; 16290 dtrace_enabling_t *enab = NULL; 16291 int i, gen, rv, nhelpers = 0, nprovs = 0, destroy = 1; 16292 uintptr_t daddr = (uintptr_t)dof; 16293 16294 ASSERT(MUTEX_HELD(&dtrace_lock)); 16295 16296 if ((help = p->p_dtrace_helpers) == NULL) 16297 help = dtrace_helpers_create(p); 16298 16299 vstate = &help->dthps_vstate; 16300 16301 if ((rv = dtrace_dof_slurp(dof, vstate, NULL, &enab, dhp->dofhp_addr, 16302 dhp->dofhp_dof, B_FALSE)) != 0) { 16303 dtrace_dof_destroy(dof); 16304 return (rv); 16305 } 16306 16307 /* 16308 * Look for helper providers and validate their descriptions. 16309 */ 16310 for (i = 0; i < dof->dofh_secnum; i++) { 16311 dof_sec_t *sec = (dof_sec_t *)(uintptr_t)(daddr + 16312 dof->dofh_secoff + i * dof->dofh_secsize); 16313 16314 if (sec->dofs_type != DOF_SECT_PROVIDER) 16315 continue; 16316 16317 if (dtrace_helper_provider_validate(dof, sec) != 0) { 16318 dtrace_enabling_destroy(enab); 16319 dtrace_dof_destroy(dof); 16320 return (-1); 16321 } 16322 16323 nprovs++; 16324 } 16325 16326 /* 16327 * Now we need to walk through the ECB descriptions in the enabling. 16328 */ 16329 for (i = 0; i < enab->dten_ndesc; i++) { 16330 dtrace_ecbdesc_t *ep = enab->dten_desc[i]; 16331 dtrace_probedesc_t *desc = &ep->dted_probe; 16332 16333 if (strcmp(desc->dtpd_provider, "dtrace") != 0) 16334 continue; 16335 16336 if (strcmp(desc->dtpd_mod, "helper") != 0) 16337 continue; 16338 16339 if (strcmp(desc->dtpd_func, "ustack") != 0) 16340 continue; 16341 16342 if ((rv = dtrace_helper_action_add(DTRACE_HELPER_ACTION_USTACK, 16343 ep, help)) != 0) { 16344 /* 16345 * Adding this helper action failed -- we are now going 16346 * to rip out the entire generation and return failure. 16347 */ 16348 (void) dtrace_helper_destroygen(help, 16349 help->dthps_generation); 16350 dtrace_enabling_destroy(enab); 16351 dtrace_dof_destroy(dof); 16352 return (-1); 16353 } 16354 16355 nhelpers++; 16356 } 16357 16358 if (nhelpers < enab->dten_ndesc) 16359 dtrace_dof_error(dof, "unmatched helpers"); 16360 16361 gen = help->dthps_generation++; 16362 dtrace_enabling_destroy(enab); 16363 16364 if (nprovs > 0) { 16365 /* 16366 * Now that this is in-kernel, we change the sense of the 16367 * members: dofhp_dof denotes the in-kernel copy of the DOF 16368 * and dofhp_addr denotes the address at user-level. 16369 */ 16370 dhp->dofhp_addr = dhp->dofhp_dof; 16371 dhp->dofhp_dof = (uint64_t)(uintptr_t)dof; 16372 16373 if (dtrace_helper_provider_add(dhp, help, gen) == 0) { 16374 mutex_exit(&dtrace_lock); 16375 dtrace_helper_provider_register(p, help, dhp); 16376 mutex_enter(&dtrace_lock); 16377 16378 destroy = 0; 16379 } 16380 } 16381 16382 if (destroy) 16383 dtrace_dof_destroy(dof); 16384 16385 return (gen); 16386 } 16387 16388 static dtrace_helpers_t * 16389 dtrace_helpers_create(proc_t *p) 16390 { 16391 dtrace_helpers_t *help; 16392 16393 ASSERT(MUTEX_HELD(&dtrace_lock)); 16394 ASSERT(p->p_dtrace_helpers == NULL); 16395 16396 help = kmem_zalloc(sizeof (dtrace_helpers_t), KM_SLEEP); 16397 help->dthps_actions = kmem_zalloc(sizeof (dtrace_helper_action_t *) * 16398 DTRACE_NHELPER_ACTIONS, KM_SLEEP); 16399 16400 p->p_dtrace_helpers = help; 16401 dtrace_helpers++; 16402 16403 return (help); 16404 } 16405 16406 #ifdef illumos 16407 static 16408 #endif 16409 void 16410 dtrace_helpers_destroy(proc_t *p) 16411 { 16412 dtrace_helpers_t *help; 16413 dtrace_vstate_t *vstate; 16414 #ifdef illumos 16415 proc_t *p = curproc; 16416 #endif 16417 int i; 16418 16419 mutex_enter(&dtrace_lock); 16420 16421 ASSERT(p->p_dtrace_helpers != NULL); 16422 ASSERT(dtrace_helpers > 0); 16423 16424 help = p->p_dtrace_helpers; 16425 vstate = &help->dthps_vstate; 16426 16427 /* 16428 * We're now going to lose the help from this process. 16429 */ 16430 p->p_dtrace_helpers = NULL; 16431 dtrace_sync(); 16432 16433 /* 16434 * Destory the helper actions. 16435 */ 16436 for (i = 0; i < DTRACE_NHELPER_ACTIONS; i++) { 16437 dtrace_helper_action_t *h, *next; 16438 16439 for (h = help->dthps_actions[i]; h != NULL; h = next) { 16440 next = h->dtha_next; 16441 dtrace_helper_action_destroy(h, vstate); 16442 h = next; 16443 } 16444 } 16445 16446 mutex_exit(&dtrace_lock); 16447 16448 /* 16449 * Destroy the helper providers. 16450 */ 16451 if (help->dthps_maxprovs > 0) { 16452 mutex_enter(&dtrace_meta_lock); 16453 if (dtrace_meta_pid != NULL) { 16454 ASSERT(dtrace_deferred_pid == NULL); 16455 16456 for (i = 0; i < help->dthps_nprovs; i++) { 16457 dtrace_helper_provider_remove( 16458 &help->dthps_provs[i]->dthp_prov, p->p_pid); 16459 } 16460 } else { 16461 mutex_enter(&dtrace_lock); 16462 ASSERT(help->dthps_deferred == 0 || 16463 help->dthps_next != NULL || 16464 help->dthps_prev != NULL || 16465 help == dtrace_deferred_pid); 16466 16467 /* 16468 * Remove the helper from the deferred list. 16469 */ 16470 if (help->dthps_next != NULL) 16471 help->dthps_next->dthps_prev = help->dthps_prev; 16472 if (help->dthps_prev != NULL) 16473 help->dthps_prev->dthps_next = help->dthps_next; 16474 if (dtrace_deferred_pid == help) { 16475 dtrace_deferred_pid = help->dthps_next; 16476 ASSERT(help->dthps_prev == NULL); 16477 } 16478 16479 mutex_exit(&dtrace_lock); 16480 } 16481 16482 mutex_exit(&dtrace_meta_lock); 16483 16484 for (i = 0; i < help->dthps_nprovs; i++) { 16485 dtrace_helper_provider_destroy(help->dthps_provs[i]); 16486 } 16487 16488 kmem_free(help->dthps_provs, help->dthps_maxprovs * 16489 sizeof (dtrace_helper_provider_t *)); 16490 } 16491 16492 mutex_enter(&dtrace_lock); 16493 16494 dtrace_vstate_fini(&help->dthps_vstate); 16495 kmem_free(help->dthps_actions, 16496 sizeof (dtrace_helper_action_t *) * DTRACE_NHELPER_ACTIONS); 16497 kmem_free(help, sizeof (dtrace_helpers_t)); 16498 16499 --dtrace_helpers; 16500 mutex_exit(&dtrace_lock); 16501 } 16502 16503 #ifdef illumos 16504 static 16505 #endif 16506 void 16507 dtrace_helpers_duplicate(proc_t *from, proc_t *to) 16508 { 16509 dtrace_helpers_t *help, *newhelp; 16510 dtrace_helper_action_t *helper, *new, *last; 16511 dtrace_difo_t *dp; 16512 dtrace_vstate_t *vstate; 16513 int i, j, sz, hasprovs = 0; 16514 16515 mutex_enter(&dtrace_lock); 16516 ASSERT(from->p_dtrace_helpers != NULL); 16517 ASSERT(dtrace_helpers > 0); 16518 16519 help = from->p_dtrace_helpers; 16520 newhelp = dtrace_helpers_create(to); 16521 ASSERT(to->p_dtrace_helpers != NULL); 16522 16523 newhelp->dthps_generation = help->dthps_generation; 16524 vstate = &newhelp->dthps_vstate; 16525 16526 /* 16527 * Duplicate the helper actions. 16528 */ 16529 for (i = 0; i < DTRACE_NHELPER_ACTIONS; i++) { 16530 if ((helper = help->dthps_actions[i]) == NULL) 16531 continue; 16532 16533 for (last = NULL; helper != NULL; helper = helper->dtha_next) { 16534 new = kmem_zalloc(sizeof (dtrace_helper_action_t), 16535 KM_SLEEP); 16536 new->dtha_generation = helper->dtha_generation; 16537 16538 if ((dp = helper->dtha_predicate) != NULL) { 16539 dp = dtrace_difo_duplicate(dp, vstate); 16540 new->dtha_predicate = dp; 16541 } 16542 16543 new->dtha_nactions = helper->dtha_nactions; 16544 sz = sizeof (dtrace_difo_t *) * new->dtha_nactions; 16545 new->dtha_actions = kmem_alloc(sz, KM_SLEEP); 16546 16547 for (j = 0; j < new->dtha_nactions; j++) { 16548 dtrace_difo_t *dp = helper->dtha_actions[j]; 16549 16550 ASSERT(dp != NULL); 16551 dp = dtrace_difo_duplicate(dp, vstate); 16552 new->dtha_actions[j] = dp; 16553 } 16554 16555 if (last != NULL) { 16556 last->dtha_next = new; 16557 } else { 16558 newhelp->dthps_actions[i] = new; 16559 } 16560 16561 last = new; 16562 } 16563 } 16564 16565 /* 16566 * Duplicate the helper providers and register them with the 16567 * DTrace framework. 16568 */ 16569 if (help->dthps_nprovs > 0) { 16570 newhelp->dthps_nprovs = help->dthps_nprovs; 16571 newhelp->dthps_maxprovs = help->dthps_nprovs; 16572 newhelp->dthps_provs = kmem_alloc(newhelp->dthps_nprovs * 16573 sizeof (dtrace_helper_provider_t *), KM_SLEEP); 16574 for (i = 0; i < newhelp->dthps_nprovs; i++) { 16575 newhelp->dthps_provs[i] = help->dthps_provs[i]; 16576 newhelp->dthps_provs[i]->dthp_ref++; 16577 } 16578 16579 hasprovs = 1; 16580 } 16581 16582 mutex_exit(&dtrace_lock); 16583 16584 if (hasprovs) 16585 dtrace_helper_provider_register(to, newhelp, NULL); 16586 } 16587 16588 /* 16589 * DTrace Hook Functions 16590 */ 16591 static void 16592 dtrace_module_loaded(modctl_t *ctl) 16593 { 16594 dtrace_provider_t *prv; 16595 16596 mutex_enter(&dtrace_provider_lock); 16597 #ifdef illumos 16598 mutex_enter(&mod_lock); 16599 #endif 16600 16601 #ifdef illumos 16602 ASSERT(ctl->mod_busy); 16603 #endif 16604 16605 /* 16606 * We're going to call each providers per-module provide operation 16607 * specifying only this module. 16608 */ 16609 for (prv = dtrace_provider; prv != NULL; prv = prv->dtpv_next) 16610 prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ctl); 16611 16612 #ifdef illumos 16613 mutex_exit(&mod_lock); 16614 #endif 16615 mutex_exit(&dtrace_provider_lock); 16616 16617 /* 16618 * If we have any retained enablings, we need to match against them. 16619 * Enabling probes requires that cpu_lock be held, and we cannot hold 16620 * cpu_lock here -- it is legal for cpu_lock to be held when loading a 16621 * module. (In particular, this happens when loading scheduling 16622 * classes.) So if we have any retained enablings, we need to dispatch 16623 * our task queue to do the match for us. 16624 */ 16625 mutex_enter(&dtrace_lock); 16626 16627 if (dtrace_retained == NULL) { 16628 mutex_exit(&dtrace_lock); 16629 return; 16630 } 16631 16632 (void) taskq_dispatch(dtrace_taskq, 16633 (task_func_t *)dtrace_enabling_matchall, NULL, TQ_SLEEP); 16634 16635 mutex_exit(&dtrace_lock); 16636 16637 /* 16638 * And now, for a little heuristic sleaze: in general, we want to 16639 * match modules as soon as they load. However, we cannot guarantee 16640 * this, because it would lead us to the lock ordering violation 16641 * outlined above. The common case, of course, is that cpu_lock is 16642 * _not_ held -- so we delay here for a clock tick, hoping that that's 16643 * long enough for the task queue to do its work. If it's not, it's 16644 * not a serious problem -- it just means that the module that we 16645 * just loaded may not be immediately instrumentable. 16646 */ 16647 delay(1); 16648 } 16649 16650 static void 16651 #ifdef illumos 16652 dtrace_module_unloaded(modctl_t *ctl) 16653 #else 16654 dtrace_module_unloaded(modctl_t *ctl, int *error) 16655 #endif 16656 { 16657 dtrace_probe_t template, *probe, *first, *next; 16658 dtrace_provider_t *prov; 16659 #ifndef illumos 16660 char modname[DTRACE_MODNAMELEN]; 16661 size_t len; 16662 #endif 16663 16664 #ifdef illumos 16665 template.dtpr_mod = ctl->mod_modname; 16666 #else 16667 /* Handle the fact that ctl->filename may end in ".ko". */ 16668 strlcpy(modname, ctl->filename, sizeof(modname)); 16669 len = strlen(ctl->filename); 16670 if (len > 3 && strcmp(modname + len - 3, ".ko") == 0) 16671 modname[len - 3] = '\0'; 16672 template.dtpr_mod = modname; 16673 #endif 16674 16675 mutex_enter(&dtrace_provider_lock); 16676 #ifdef illumos 16677 mutex_enter(&mod_lock); 16678 #endif 16679 mutex_enter(&dtrace_lock); 16680 16681 #ifndef illumos 16682 if (ctl->nenabled > 0) { 16683 /* Don't allow unloads if a probe is enabled. */ 16684 mutex_exit(&dtrace_provider_lock); 16685 mutex_exit(&dtrace_lock); 16686 *error = -1; 16687 printf( 16688 "kldunload: attempt to unload module that has DTrace probes enabled\n"); 16689 return; 16690 } 16691 #endif 16692 16693 if (dtrace_bymod == NULL) { 16694 /* 16695 * The DTrace module is loaded (obviously) but not attached; 16696 * we don't have any work to do. 16697 */ 16698 mutex_exit(&dtrace_provider_lock); 16699 #ifdef illumos 16700 mutex_exit(&mod_lock); 16701 #endif 16702 mutex_exit(&dtrace_lock); 16703 return; 16704 } 16705 16706 for (probe = first = dtrace_hash_lookup(dtrace_bymod, &template); 16707 probe != NULL; probe = probe->dtpr_nextmod) { 16708 if (probe->dtpr_ecb != NULL) { 16709 mutex_exit(&dtrace_provider_lock); 16710 #ifdef illumos 16711 mutex_exit(&mod_lock); 16712 #endif 16713 mutex_exit(&dtrace_lock); 16714 16715 /* 16716 * This shouldn't _actually_ be possible -- we're 16717 * unloading a module that has an enabled probe in it. 16718 * (It's normally up to the provider to make sure that 16719 * this can't happen.) However, because dtps_enable() 16720 * doesn't have a failure mode, there can be an 16721 * enable/unload race. Upshot: we don't want to 16722 * assert, but we're not going to disable the 16723 * probe, either. 16724 */ 16725 if (dtrace_err_verbose) { 16726 #ifdef illumos 16727 cmn_err(CE_WARN, "unloaded module '%s' had " 16728 "enabled probes", ctl->mod_modname); 16729 #else 16730 cmn_err(CE_WARN, "unloaded module '%s' had " 16731 "enabled probes", modname); 16732 #endif 16733 } 16734 16735 return; 16736 } 16737 } 16738 16739 probe = first; 16740 16741 for (first = NULL; probe != NULL; probe = next) { 16742 ASSERT(dtrace_probes[probe->dtpr_id - 1] == probe); 16743 16744 dtrace_probes[probe->dtpr_id - 1] = NULL; 16745 16746 next = probe->dtpr_nextmod; 16747 dtrace_hash_remove(dtrace_bymod, probe); 16748 dtrace_hash_remove(dtrace_byfunc, probe); 16749 dtrace_hash_remove(dtrace_byname, probe); 16750 16751 if (first == NULL) { 16752 first = probe; 16753 probe->dtpr_nextmod = NULL; 16754 } else { 16755 probe->dtpr_nextmod = first; 16756 first = probe; 16757 } 16758 } 16759 16760 /* 16761 * We've removed all of the module's probes from the hash chains and 16762 * from the probe array. Now issue a dtrace_sync() to be sure that 16763 * everyone has cleared out from any probe array processing. 16764 */ 16765 dtrace_sync(); 16766 16767 for (probe = first; probe != NULL; probe = first) { 16768 first = probe->dtpr_nextmod; 16769 prov = probe->dtpr_provider; 16770 prov->dtpv_pops.dtps_destroy(prov->dtpv_arg, probe->dtpr_id, 16771 probe->dtpr_arg); 16772 kmem_free(probe->dtpr_mod, strlen(probe->dtpr_mod) + 1); 16773 kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1); 16774 kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1); 16775 #ifdef illumos 16776 vmem_free(dtrace_arena, (void *)(uintptr_t)probe->dtpr_id, 1); 16777 #else 16778 free_unr(dtrace_arena, probe->dtpr_id); 16779 #endif 16780 kmem_free(probe, sizeof (dtrace_probe_t)); 16781 } 16782 16783 mutex_exit(&dtrace_lock); 16784 #ifdef illumos 16785 mutex_exit(&mod_lock); 16786 #endif 16787 mutex_exit(&dtrace_provider_lock); 16788 } 16789 16790 #ifndef illumos 16791 static void 16792 dtrace_kld_load(void *arg __unused, linker_file_t lf) 16793 { 16794 16795 dtrace_module_loaded(lf); 16796 } 16797 16798 static void 16799 dtrace_kld_unload_try(void *arg __unused, linker_file_t lf, int *error) 16800 { 16801 16802 if (*error != 0) 16803 /* We already have an error, so don't do anything. */ 16804 return; 16805 dtrace_module_unloaded(lf, error); 16806 } 16807 #endif 16808 16809 #ifdef illumos 16810 static void 16811 dtrace_suspend(void) 16812 { 16813 dtrace_probe_foreach(offsetof(dtrace_pops_t, dtps_suspend)); 16814 } 16815 16816 static void 16817 dtrace_resume(void) 16818 { 16819 dtrace_probe_foreach(offsetof(dtrace_pops_t, dtps_resume)); 16820 } 16821 #endif 16822 16823 static int 16824 dtrace_cpu_setup(cpu_setup_t what, processorid_t cpu) 16825 { 16826 ASSERT(MUTEX_HELD(&cpu_lock)); 16827 mutex_enter(&dtrace_lock); 16828 16829 switch (what) { 16830 case CPU_CONFIG: { 16831 dtrace_state_t *state; 16832 dtrace_optval_t *opt, rs, c; 16833 16834 /* 16835 * For now, we only allocate a new buffer for anonymous state. 16836 */ 16837 if ((state = dtrace_anon.dta_state) == NULL) 16838 break; 16839 16840 if (state->dts_activity != DTRACE_ACTIVITY_ACTIVE) 16841 break; 16842 16843 opt = state->dts_options; 16844 c = opt[DTRACEOPT_CPU]; 16845 16846 if (c != DTRACE_CPUALL && c != DTRACEOPT_UNSET && c != cpu) 16847 break; 16848 16849 /* 16850 * Regardless of what the actual policy is, we're going to 16851 * temporarily set our resize policy to be manual. We're 16852 * also going to temporarily set our CPU option to denote 16853 * the newly configured CPU. 16854 */ 16855 rs = opt[DTRACEOPT_BUFRESIZE]; 16856 opt[DTRACEOPT_BUFRESIZE] = DTRACEOPT_BUFRESIZE_MANUAL; 16857 opt[DTRACEOPT_CPU] = (dtrace_optval_t)cpu; 16858 16859 (void) dtrace_state_buffers(state); 16860 16861 opt[DTRACEOPT_BUFRESIZE] = rs; 16862 opt[DTRACEOPT_CPU] = c; 16863 16864 break; 16865 } 16866 16867 case CPU_UNCONFIG: 16868 /* 16869 * We don't free the buffer in the CPU_UNCONFIG case. (The 16870 * buffer will be freed when the consumer exits.) 16871 */ 16872 break; 16873 16874 default: 16875 break; 16876 } 16877 16878 mutex_exit(&dtrace_lock); 16879 return (0); 16880 } 16881 16882 #ifdef illumos 16883 static void 16884 dtrace_cpu_setup_initial(processorid_t cpu) 16885 { 16886 (void) dtrace_cpu_setup(CPU_CONFIG, cpu); 16887 } 16888 #endif 16889 16890 static void 16891 dtrace_toxrange_add(uintptr_t base, uintptr_t limit) 16892 { 16893 if (dtrace_toxranges >= dtrace_toxranges_max) { 16894 int osize, nsize; 16895 dtrace_toxrange_t *range; 16896 16897 osize = dtrace_toxranges_max * sizeof (dtrace_toxrange_t); 16898 16899 if (osize == 0) { 16900 ASSERT(dtrace_toxrange == NULL); 16901 ASSERT(dtrace_toxranges_max == 0); 16902 dtrace_toxranges_max = 1; 16903 } else { 16904 dtrace_toxranges_max <<= 1; 16905 } 16906 16907 nsize = dtrace_toxranges_max * sizeof (dtrace_toxrange_t); 16908 range = kmem_zalloc(nsize, KM_SLEEP); 16909 16910 if (dtrace_toxrange != NULL) { 16911 ASSERT(osize != 0); 16912 bcopy(dtrace_toxrange, range, osize); 16913 kmem_free(dtrace_toxrange, osize); 16914 } 16915 16916 dtrace_toxrange = range; 16917 } 16918 16919 ASSERT(dtrace_toxrange[dtrace_toxranges].dtt_base == 0); 16920 ASSERT(dtrace_toxrange[dtrace_toxranges].dtt_limit == 0); 16921 16922 dtrace_toxrange[dtrace_toxranges].dtt_base = base; 16923 dtrace_toxrange[dtrace_toxranges].dtt_limit = limit; 16924 dtrace_toxranges++; 16925 } 16926 16927 static void 16928 dtrace_getf_barrier() 16929 { 16930 #ifdef illumos 16931 /* 16932 * When we have unprivileged (that is, non-DTRACE_CRV_KERNEL) enablings 16933 * that contain calls to getf(), this routine will be called on every 16934 * closef() before either the underlying vnode is released or the 16935 * file_t itself is freed. By the time we are here, it is essential 16936 * that the file_t can no longer be accessed from a call to getf() 16937 * in probe context -- that assures that a dtrace_sync() can be used 16938 * to clear out any enablings referring to the old structures. 16939 */ 16940 if (curthread->t_procp->p_zone->zone_dtrace_getf != 0 || 16941 kcred->cr_zone->zone_dtrace_getf != 0) 16942 dtrace_sync(); 16943 #endif 16944 } 16945 16946 /* 16947 * DTrace Driver Cookbook Functions 16948 */ 16949 #ifdef illumos 16950 /*ARGSUSED*/ 16951 static int 16952 dtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 16953 { 16954 dtrace_provider_id_t id; 16955 dtrace_state_t *state = NULL; 16956 dtrace_enabling_t *enab; 16957 16958 mutex_enter(&cpu_lock); 16959 mutex_enter(&dtrace_provider_lock); 16960 mutex_enter(&dtrace_lock); 16961 16962 if (ddi_soft_state_init(&dtrace_softstate, 16963 sizeof (dtrace_state_t), 0) != 0) { 16964 cmn_err(CE_NOTE, "/dev/dtrace failed to initialize soft state"); 16965 mutex_exit(&cpu_lock); 16966 mutex_exit(&dtrace_provider_lock); 16967 mutex_exit(&dtrace_lock); 16968 return (DDI_FAILURE); 16969 } 16970 16971 if (ddi_create_minor_node(devi, DTRACEMNR_DTRACE, S_IFCHR, 16972 DTRACEMNRN_DTRACE, DDI_PSEUDO, NULL) == DDI_FAILURE || 16973 ddi_create_minor_node(devi, DTRACEMNR_HELPER, S_IFCHR, 16974 DTRACEMNRN_HELPER, DDI_PSEUDO, NULL) == DDI_FAILURE) { 16975 cmn_err(CE_NOTE, "/dev/dtrace couldn't create minor nodes"); 16976 ddi_remove_minor_node(devi, NULL); 16977 ddi_soft_state_fini(&dtrace_softstate); 16978 mutex_exit(&cpu_lock); 16979 mutex_exit(&dtrace_provider_lock); 16980 mutex_exit(&dtrace_lock); 16981 return (DDI_FAILURE); 16982 } 16983 16984 ddi_report_dev(devi); 16985 dtrace_devi = devi; 16986 16987 dtrace_modload = dtrace_module_loaded; 16988 dtrace_modunload = dtrace_module_unloaded; 16989 dtrace_cpu_init = dtrace_cpu_setup_initial; 16990 dtrace_helpers_cleanup = dtrace_helpers_destroy; 16991 dtrace_helpers_fork = dtrace_helpers_duplicate; 16992 dtrace_cpustart_init = dtrace_suspend; 16993 dtrace_cpustart_fini = dtrace_resume; 16994 dtrace_debugger_init = dtrace_suspend; 16995 dtrace_debugger_fini = dtrace_resume; 16996 16997 register_cpu_setup_func((cpu_setup_func_t *)dtrace_cpu_setup, NULL); 16998 16999 ASSERT(MUTEX_HELD(&cpu_lock)); 17000 17001 dtrace_arena = vmem_create("dtrace", (void *)1, UINT32_MAX, 1, 17002 NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER); 17003 dtrace_minor = vmem_create("dtrace_minor", (void *)DTRACEMNRN_CLONE, 17004 UINT32_MAX - DTRACEMNRN_CLONE, 1, NULL, NULL, NULL, 0, 17005 VM_SLEEP | VMC_IDENTIFIER); 17006 dtrace_taskq = taskq_create("dtrace_taskq", 1, maxclsyspri, 17007 1, INT_MAX, 0); 17008 17009 dtrace_state_cache = kmem_cache_create("dtrace_state_cache", 17010 sizeof (dtrace_dstate_percpu_t) * NCPU, DTRACE_STATE_ALIGN, 17011 NULL, NULL, NULL, NULL, NULL, 0); 17012 17013 ASSERT(MUTEX_HELD(&cpu_lock)); 17014 dtrace_bymod = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_mod), 17015 offsetof(dtrace_probe_t, dtpr_nextmod), 17016 offsetof(dtrace_probe_t, dtpr_prevmod)); 17017 17018 dtrace_byfunc = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_func), 17019 offsetof(dtrace_probe_t, dtpr_nextfunc), 17020 offsetof(dtrace_probe_t, dtpr_prevfunc)); 17021 17022 dtrace_byname = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_name), 17023 offsetof(dtrace_probe_t, dtpr_nextname), 17024 offsetof(dtrace_probe_t, dtpr_prevname)); 17025 17026 if (dtrace_retain_max < 1) { 17027 cmn_err(CE_WARN, "illegal value (%lu) for dtrace_retain_max; " 17028 "setting to 1", dtrace_retain_max); 17029 dtrace_retain_max = 1; 17030 } 17031 17032 /* 17033 * Now discover our toxic ranges. 17034 */ 17035 dtrace_toxic_ranges(dtrace_toxrange_add); 17036 17037 /* 17038 * Before we register ourselves as a provider to our own framework, 17039 * we would like to assert that dtrace_provider is NULL -- but that's 17040 * not true if we were loaded as a dependency of a DTrace provider. 17041 * Once we've registered, we can assert that dtrace_provider is our 17042 * pseudo provider. 17043 */ 17044 (void) dtrace_register("dtrace", &dtrace_provider_attr, 17045 DTRACE_PRIV_NONE, 0, &dtrace_provider_ops, NULL, &id); 17046 17047 ASSERT(dtrace_provider != NULL); 17048 ASSERT((dtrace_provider_id_t)dtrace_provider == id); 17049 17050 dtrace_probeid_begin = dtrace_probe_create((dtrace_provider_id_t) 17051 dtrace_provider, NULL, NULL, "BEGIN", 0, NULL); 17052 dtrace_probeid_end = dtrace_probe_create((dtrace_provider_id_t) 17053 dtrace_provider, NULL, NULL, "END", 0, NULL); 17054 dtrace_probeid_error = dtrace_probe_create((dtrace_provider_id_t) 17055 dtrace_provider, NULL, NULL, "ERROR", 1, NULL); 17056 17057 dtrace_anon_property(); 17058 mutex_exit(&cpu_lock); 17059 17060 /* 17061 * If there are already providers, we must ask them to provide their 17062 * probes, and then match any anonymous enabling against them. Note 17063 * that there should be no other retained enablings at this time: 17064 * the only retained enablings at this time should be the anonymous 17065 * enabling. 17066 */ 17067 if (dtrace_anon.dta_enabling != NULL) { 17068 ASSERT(dtrace_retained == dtrace_anon.dta_enabling); 17069 17070 dtrace_enabling_provide(NULL); 17071 state = dtrace_anon.dta_state; 17072 17073 /* 17074 * We couldn't hold cpu_lock across the above call to 17075 * dtrace_enabling_provide(), but we must hold it to actually 17076 * enable the probes. We have to drop all of our locks, pick 17077 * up cpu_lock, and regain our locks before matching the 17078 * retained anonymous enabling. 17079 */ 17080 mutex_exit(&dtrace_lock); 17081 mutex_exit(&dtrace_provider_lock); 17082 17083 mutex_enter(&cpu_lock); 17084 mutex_enter(&dtrace_provider_lock); 17085 mutex_enter(&dtrace_lock); 17086 17087 if ((enab = dtrace_anon.dta_enabling) != NULL) 17088 (void) dtrace_enabling_match(enab, NULL); 17089 17090 mutex_exit(&cpu_lock); 17091 } 17092 17093 mutex_exit(&dtrace_lock); 17094 mutex_exit(&dtrace_provider_lock); 17095 17096 if (state != NULL) { 17097 /* 17098 * If we created any anonymous state, set it going now. 17099 */ 17100 (void) dtrace_state_go(state, &dtrace_anon.dta_beganon); 17101 } 17102 17103 return (DDI_SUCCESS); 17104 } 17105 #endif /* illumos */ 17106 17107 #ifndef illumos 17108 static void dtrace_dtr(void *); 17109 #endif 17110 17111 /*ARGSUSED*/ 17112 static int 17113 #ifdef illumos 17114 dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) 17115 #else 17116 dtrace_open(struct cdev *dev, int oflags, int devtype, struct thread *td) 17117 #endif 17118 { 17119 dtrace_state_t *state; 17120 uint32_t priv; 17121 uid_t uid; 17122 zoneid_t zoneid; 17123 17124 #ifdef illumos 17125 if (getminor(*devp) == DTRACEMNRN_HELPER) 17126 return (0); 17127 17128 /* 17129 * If this wasn't an open with the "helper" minor, then it must be 17130 * the "dtrace" minor. 17131 */ 17132 if (getminor(*devp) == DTRACEMNRN_DTRACE) 17133 return (ENXIO); 17134 #else 17135 cred_t *cred_p = NULL; 17136 cred_p = dev->si_cred; 17137 17138 /* 17139 * If no DTRACE_PRIV_* bits are set in the credential, then the 17140 * caller lacks sufficient permission to do anything with DTrace. 17141 */ 17142 dtrace_cred2priv(cred_p, &priv, &uid, &zoneid); 17143 if (priv == DTRACE_PRIV_NONE) { 17144 #endif 17145 17146 return (EACCES); 17147 } 17148 17149 /* 17150 * Ask all providers to provide all their probes. 17151 */ 17152 mutex_enter(&dtrace_provider_lock); 17153 dtrace_probe_provide(NULL, NULL); 17154 mutex_exit(&dtrace_provider_lock); 17155 17156 mutex_enter(&cpu_lock); 17157 mutex_enter(&dtrace_lock); 17158 dtrace_opens++; 17159 dtrace_membar_producer(); 17160 17161 #ifdef illumos 17162 /* 17163 * If the kernel debugger is active (that is, if the kernel debugger 17164 * modified text in some way), we won't allow the open. 17165 */ 17166 if (kdi_dtrace_set(KDI_DTSET_DTRACE_ACTIVATE) != 0) { 17167 dtrace_opens--; 17168 mutex_exit(&cpu_lock); 17169 mutex_exit(&dtrace_lock); 17170 return (EBUSY); 17171 } 17172 17173 if (dtrace_helptrace_enable && dtrace_helptrace_buffer == NULL) { 17174 /* 17175 * If DTrace helper tracing is enabled, we need to allocate the 17176 * trace buffer and initialize the values. 17177 */ 17178 dtrace_helptrace_buffer = 17179 kmem_zalloc(dtrace_helptrace_bufsize, KM_SLEEP); 17180 dtrace_helptrace_next = 0; 17181 dtrace_helptrace_wrapped = 0; 17182 dtrace_helptrace_enable = 0; 17183 } 17184 17185 state = dtrace_state_create(devp, cred_p); 17186 #else 17187 state = dtrace_state_create(dev, NULL); 17188 devfs_set_cdevpriv(state, dtrace_dtr); 17189 #endif 17190 17191 mutex_exit(&cpu_lock); 17192 17193 if (state == NULL) { 17194 #ifdef illumos 17195 if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL) 17196 (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE); 17197 #else 17198 --dtrace_opens; 17199 #endif 17200 mutex_exit(&dtrace_lock); 17201 return (EAGAIN); 17202 } 17203 17204 mutex_exit(&dtrace_lock); 17205 17206 return (0); 17207 } 17208 17209 /*ARGSUSED*/ 17210 #ifdef illumos 17211 static int 17212 dtrace_close(dev_t dev, int flag, int otyp, cred_t *cred_p) 17213 #else 17214 static void 17215 dtrace_dtr(void *data) 17216 #endif 17217 { 17218 #ifdef illumos 17219 minor_t minor = getminor(dev); 17220 dtrace_state_t *state; 17221 #endif 17222 dtrace_helptrace_t *buf = NULL; 17223 17224 #ifdef illumos 17225 if (minor == DTRACEMNRN_HELPER) 17226 return (0); 17227 17228 state = ddi_get_soft_state(dtrace_softstate, minor); 17229 #else 17230 dtrace_state_t *state = data; 17231 #endif 17232 17233 mutex_enter(&cpu_lock); 17234 mutex_enter(&dtrace_lock); 17235 17236 #ifdef illumos 17237 if (state->dts_anon) 17238 #else 17239 if (state != NULL && state->dts_anon) 17240 #endif 17241 { 17242 /* 17243 * There is anonymous state. Destroy that first. 17244 */ 17245 ASSERT(dtrace_anon.dta_state == NULL); 17246 dtrace_state_destroy(state->dts_anon); 17247 } 17248 17249 if (dtrace_helptrace_disable) { 17250 /* 17251 * If we have been told to disable helper tracing, set the 17252 * buffer to NULL before calling into dtrace_state_destroy(); 17253 * we take advantage of its dtrace_sync() to know that no 17254 * CPU is in probe context with enabled helper tracing 17255 * after it returns. 17256 */ 17257 buf = dtrace_helptrace_buffer; 17258 dtrace_helptrace_buffer = NULL; 17259 } 17260 17261 #ifdef illumos 17262 dtrace_state_destroy(state); 17263 #else 17264 if (state != NULL) { 17265 dtrace_state_destroy(state); 17266 kmem_free(state, 0); 17267 } 17268 #endif 17269 ASSERT(dtrace_opens > 0); 17270 17271 #ifdef illumos 17272 /* 17273 * Only relinquish control of the kernel debugger interface when there 17274 * are no consumers and no anonymous enablings. 17275 */ 17276 if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL) 17277 (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE); 17278 #else 17279 --dtrace_opens; 17280 #endif 17281 17282 if (buf != NULL) { 17283 kmem_free(buf, dtrace_helptrace_bufsize); 17284 dtrace_helptrace_disable = 0; 17285 } 17286 17287 mutex_exit(&dtrace_lock); 17288 mutex_exit(&cpu_lock); 17289 17290 #ifdef illumos 17291 return (0); 17292 #endif 17293 } 17294 17295 #ifdef illumos 17296 /*ARGSUSED*/ 17297 static int 17298 dtrace_ioctl_helper(int cmd, intptr_t arg, int *rv) 17299 { 17300 int rval; 17301 dof_helper_t help, *dhp = NULL; 17302 17303 switch (cmd) { 17304 case DTRACEHIOC_ADDDOF: 17305 if (copyin((void *)arg, &help, sizeof (help)) != 0) { 17306 dtrace_dof_error(NULL, "failed to copyin DOF helper"); 17307 return (EFAULT); 17308 } 17309 17310 dhp = &help; 17311 arg = (intptr_t)help.dofhp_dof; 17312 /*FALLTHROUGH*/ 17313 17314 case DTRACEHIOC_ADD: { 17315 dof_hdr_t *dof = dtrace_dof_copyin(arg, &rval); 17316 17317 if (dof == NULL) 17318 return (rval); 17319 17320 mutex_enter(&dtrace_lock); 17321 17322 /* 17323 * dtrace_helper_slurp() takes responsibility for the dof -- 17324 * it may free it now or it may save it and free it later. 17325 */ 17326 if ((rval = dtrace_helper_slurp(dof, dhp)) != -1) { 17327 *rv = rval; 17328 rval = 0; 17329 } else { 17330 rval = EINVAL; 17331 } 17332 17333 mutex_exit(&dtrace_lock); 17334 return (rval); 17335 } 17336 17337 case DTRACEHIOC_REMOVE: { 17338 mutex_enter(&dtrace_lock); 17339 rval = dtrace_helper_destroygen(NULL, arg); 17340 mutex_exit(&dtrace_lock); 17341 17342 return (rval); 17343 } 17344 17345 default: 17346 break; 17347 } 17348 17349 return (ENOTTY); 17350 } 17351 17352 /*ARGSUSED*/ 17353 static int 17354 dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) 17355 { 17356 minor_t minor = getminor(dev); 17357 dtrace_state_t *state; 17358 int rval; 17359 17360 if (minor == DTRACEMNRN_HELPER) 17361 return (dtrace_ioctl_helper(cmd, arg, rv)); 17362 17363 state = ddi_get_soft_state(dtrace_softstate, minor); 17364 17365 if (state->dts_anon) { 17366 ASSERT(dtrace_anon.dta_state == NULL); 17367 state = state->dts_anon; 17368 } 17369 17370 switch (cmd) { 17371 case DTRACEIOC_PROVIDER: { 17372 dtrace_providerdesc_t pvd; 17373 dtrace_provider_t *pvp; 17374 17375 if (copyin((void *)arg, &pvd, sizeof (pvd)) != 0) 17376 return (EFAULT); 17377 17378 pvd.dtvd_name[DTRACE_PROVNAMELEN - 1] = '\0'; 17379 mutex_enter(&dtrace_provider_lock); 17380 17381 for (pvp = dtrace_provider; pvp != NULL; pvp = pvp->dtpv_next) { 17382 if (strcmp(pvp->dtpv_name, pvd.dtvd_name) == 0) 17383 break; 17384 } 17385 17386 mutex_exit(&dtrace_provider_lock); 17387 17388 if (pvp == NULL) 17389 return (ESRCH); 17390 17391 bcopy(&pvp->dtpv_priv, &pvd.dtvd_priv, sizeof (dtrace_ppriv_t)); 17392 bcopy(&pvp->dtpv_attr, &pvd.dtvd_attr, sizeof (dtrace_pattr_t)); 17393 17394 if (copyout(&pvd, (void *)arg, sizeof (pvd)) != 0) 17395 return (EFAULT); 17396 17397 return (0); 17398 } 17399 17400 case DTRACEIOC_EPROBE: { 17401 dtrace_eprobedesc_t epdesc; 17402 dtrace_ecb_t *ecb; 17403 dtrace_action_t *act; 17404 void *buf; 17405 size_t size; 17406 uintptr_t dest; 17407 int nrecs; 17408 17409 if (copyin((void *)arg, &epdesc, sizeof (epdesc)) != 0) 17410 return (EFAULT); 17411 17412 mutex_enter(&dtrace_lock); 17413 17414 if ((ecb = dtrace_epid2ecb(state, epdesc.dtepd_epid)) == NULL) { 17415 mutex_exit(&dtrace_lock); 17416 return (EINVAL); 17417 } 17418 17419 if (ecb->dte_probe == NULL) { 17420 mutex_exit(&dtrace_lock); 17421 return (EINVAL); 17422 } 17423 17424 epdesc.dtepd_probeid = ecb->dte_probe->dtpr_id; 17425 epdesc.dtepd_uarg = ecb->dte_uarg; 17426 epdesc.dtepd_size = ecb->dte_size; 17427 17428 nrecs = epdesc.dtepd_nrecs; 17429 epdesc.dtepd_nrecs = 0; 17430 for (act = ecb->dte_action; act != NULL; act = act->dta_next) { 17431 if (DTRACEACT_ISAGG(act->dta_kind) || act->dta_intuple) 17432 continue; 17433 17434 epdesc.dtepd_nrecs++; 17435 } 17436 17437 /* 17438 * Now that we have the size, we need to allocate a temporary 17439 * buffer in which to store the complete description. We need 17440 * the temporary buffer to be able to drop dtrace_lock() 17441 * across the copyout(), below. 17442 */ 17443 size = sizeof (dtrace_eprobedesc_t) + 17444 (epdesc.dtepd_nrecs * sizeof (dtrace_recdesc_t)); 17445 17446 buf = kmem_alloc(size, KM_SLEEP); 17447 dest = (uintptr_t)buf; 17448 17449 bcopy(&epdesc, (void *)dest, sizeof (epdesc)); 17450 dest += offsetof(dtrace_eprobedesc_t, dtepd_rec[0]); 17451 17452 for (act = ecb->dte_action; act != NULL; act = act->dta_next) { 17453 if (DTRACEACT_ISAGG(act->dta_kind) || act->dta_intuple) 17454 continue; 17455 17456 if (nrecs-- == 0) 17457 break; 17458 17459 bcopy(&act->dta_rec, (void *)dest, 17460 sizeof (dtrace_recdesc_t)); 17461 dest += sizeof (dtrace_recdesc_t); 17462 } 17463 17464 mutex_exit(&dtrace_lock); 17465 17466 if (copyout(buf, (void *)arg, dest - (uintptr_t)buf) != 0) { 17467 kmem_free(buf, size); 17468 return (EFAULT); 17469 } 17470 17471 kmem_free(buf, size); 17472 return (0); 17473 } 17474 17475 case DTRACEIOC_AGGDESC: { 17476 dtrace_aggdesc_t aggdesc; 17477 dtrace_action_t *act; 17478 dtrace_aggregation_t *agg; 17479 int nrecs; 17480 uint32_t offs; 17481 dtrace_recdesc_t *lrec; 17482 void *buf; 17483 size_t size; 17484 uintptr_t dest; 17485 17486 if (copyin((void *)arg, &aggdesc, sizeof (aggdesc)) != 0) 17487 return (EFAULT); 17488 17489 mutex_enter(&dtrace_lock); 17490 17491 if ((agg = dtrace_aggid2agg(state, aggdesc.dtagd_id)) == NULL) { 17492 mutex_exit(&dtrace_lock); 17493 return (EINVAL); 17494 } 17495 17496 aggdesc.dtagd_epid = agg->dtag_ecb->dte_epid; 17497 17498 nrecs = aggdesc.dtagd_nrecs; 17499 aggdesc.dtagd_nrecs = 0; 17500 17501 offs = agg->dtag_base; 17502 lrec = &agg->dtag_action.dta_rec; 17503 aggdesc.dtagd_size = lrec->dtrd_offset + lrec->dtrd_size - offs; 17504 17505 for (act = agg->dtag_first; ; act = act->dta_next) { 17506 ASSERT(act->dta_intuple || 17507 DTRACEACT_ISAGG(act->dta_kind)); 17508 17509 /* 17510 * If this action has a record size of zero, it 17511 * denotes an argument to the aggregating action. 17512 * Because the presence of this record doesn't (or 17513 * shouldn't) affect the way the data is interpreted, 17514 * we don't copy it out to save user-level the 17515 * confusion of dealing with a zero-length record. 17516 */ 17517 if (act->dta_rec.dtrd_size == 0) { 17518 ASSERT(agg->dtag_hasarg); 17519 continue; 17520 } 17521 17522 aggdesc.dtagd_nrecs++; 17523 17524 if (act == &agg->dtag_action) 17525 break; 17526 } 17527 17528 /* 17529 * Now that we have the size, we need to allocate a temporary 17530 * buffer in which to store the complete description. We need 17531 * the temporary buffer to be able to drop dtrace_lock() 17532 * across the copyout(), below. 17533 */ 17534 size = sizeof (dtrace_aggdesc_t) + 17535 (aggdesc.dtagd_nrecs * sizeof (dtrace_recdesc_t)); 17536 17537 buf = kmem_alloc(size, KM_SLEEP); 17538 dest = (uintptr_t)buf; 17539 17540 bcopy(&aggdesc, (void *)dest, sizeof (aggdesc)); 17541 dest += offsetof(dtrace_aggdesc_t, dtagd_rec[0]); 17542 17543 for (act = agg->dtag_first; ; act = act->dta_next) { 17544 dtrace_recdesc_t rec = act->dta_rec; 17545 17546 /* 17547 * See the comment in the above loop for why we pass 17548 * over zero-length records. 17549 */ 17550 if (rec.dtrd_size == 0) { 17551 ASSERT(agg->dtag_hasarg); 17552 continue; 17553 } 17554 17555 if (nrecs-- == 0) 17556 break; 17557 17558 rec.dtrd_offset -= offs; 17559 bcopy(&rec, (void *)dest, sizeof (rec)); 17560 dest += sizeof (dtrace_recdesc_t); 17561 17562 if (act == &agg->dtag_action) 17563 break; 17564 } 17565 17566 mutex_exit(&dtrace_lock); 17567 17568 if (copyout(buf, (void *)arg, dest - (uintptr_t)buf) != 0) { 17569 kmem_free(buf, size); 17570 return (EFAULT); 17571 } 17572 17573 kmem_free(buf, size); 17574 return (0); 17575 } 17576 17577 case DTRACEIOC_ENABLE: { 17578 dof_hdr_t *dof; 17579 dtrace_enabling_t *enab = NULL; 17580 dtrace_vstate_t *vstate; 17581 int err = 0; 17582 17583 *rv = 0; 17584 17585 /* 17586 * If a NULL argument has been passed, we take this as our 17587 * cue to reevaluate our enablings. 17588 */ 17589 if (arg == NULL) { 17590 dtrace_enabling_matchall(); 17591 17592 return (0); 17593 } 17594 17595 if ((dof = dtrace_dof_copyin(arg, &rval)) == NULL) 17596 return (rval); 17597 17598 mutex_enter(&cpu_lock); 17599 mutex_enter(&dtrace_lock); 17600 vstate = &state->dts_vstate; 17601 17602 if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) { 17603 mutex_exit(&dtrace_lock); 17604 mutex_exit(&cpu_lock); 17605 dtrace_dof_destroy(dof); 17606 return (EBUSY); 17607 } 17608 17609 if (dtrace_dof_slurp(dof, vstate, cr, &enab, 0, B_TRUE) != 0) { 17610 mutex_exit(&dtrace_lock); 17611 mutex_exit(&cpu_lock); 17612 dtrace_dof_destroy(dof); 17613 return (EINVAL); 17614 } 17615 17616 if ((rval = dtrace_dof_options(dof, state)) != 0) { 17617 dtrace_enabling_destroy(enab); 17618 mutex_exit(&dtrace_lock); 17619 mutex_exit(&cpu_lock); 17620 dtrace_dof_destroy(dof); 17621 return (rval); 17622 } 17623 17624 if ((err = dtrace_enabling_match(enab, rv)) == 0) { 17625 err = dtrace_enabling_retain(enab); 17626 } else { 17627 dtrace_enabling_destroy(enab); 17628 } 17629 17630 mutex_exit(&cpu_lock); 17631 mutex_exit(&dtrace_lock); 17632 dtrace_dof_destroy(dof); 17633 17634 return (err); 17635 } 17636 17637 case DTRACEIOC_REPLICATE: { 17638 dtrace_repldesc_t desc; 17639 dtrace_probedesc_t *match = &desc.dtrpd_match; 17640 dtrace_probedesc_t *create = &desc.dtrpd_create; 17641 int err; 17642 17643 if (copyin((void *)arg, &desc, sizeof (desc)) != 0) 17644 return (EFAULT); 17645 17646 match->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0'; 17647 match->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0'; 17648 match->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0'; 17649 match->dtpd_name[DTRACE_NAMELEN - 1] = '\0'; 17650 17651 create->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0'; 17652 create->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0'; 17653 create->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0'; 17654 create->dtpd_name[DTRACE_NAMELEN - 1] = '\0'; 17655 17656 mutex_enter(&dtrace_lock); 17657 err = dtrace_enabling_replicate(state, match, create); 17658 mutex_exit(&dtrace_lock); 17659 17660 return (err); 17661 } 17662 17663 case DTRACEIOC_PROBEMATCH: 17664 case DTRACEIOC_PROBES: { 17665 dtrace_probe_t *probe = NULL; 17666 dtrace_probedesc_t desc; 17667 dtrace_probekey_t pkey; 17668 dtrace_id_t i; 17669 int m = 0; 17670 uint32_t priv; 17671 uid_t uid; 17672 zoneid_t zoneid; 17673 17674 if (copyin((void *)arg, &desc, sizeof (desc)) != 0) 17675 return (EFAULT); 17676 17677 desc.dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0'; 17678 desc.dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0'; 17679 desc.dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0'; 17680 desc.dtpd_name[DTRACE_NAMELEN - 1] = '\0'; 17681 17682 /* 17683 * Before we attempt to match this probe, we want to give 17684 * all providers the opportunity to provide it. 17685 */ 17686 if (desc.dtpd_id == DTRACE_IDNONE) { 17687 mutex_enter(&dtrace_provider_lock); 17688 dtrace_probe_provide(&desc, NULL); 17689 mutex_exit(&dtrace_provider_lock); 17690 desc.dtpd_id++; 17691 } 17692 17693 if (cmd == DTRACEIOC_PROBEMATCH) { 17694 dtrace_probekey(&desc, &pkey); 17695 pkey.dtpk_id = DTRACE_IDNONE; 17696 } 17697 17698 dtrace_cred2priv(cr, &priv, &uid, &zoneid); 17699 17700 mutex_enter(&dtrace_lock); 17701 17702 if (cmd == DTRACEIOC_PROBEMATCH) { 17703 for (i = desc.dtpd_id; i <= dtrace_nprobes; i++) { 17704 if ((probe = dtrace_probes[i - 1]) != NULL && 17705 (m = dtrace_match_probe(probe, &pkey, 17706 priv, uid, zoneid)) != 0) 17707 break; 17708 } 17709 17710 if (m < 0) { 17711 mutex_exit(&dtrace_lock); 17712 return (EINVAL); 17713 } 17714 17715 } else { 17716 for (i = desc.dtpd_id; i <= dtrace_nprobes; i++) { 17717 if ((probe = dtrace_probes[i - 1]) != NULL && 17718 dtrace_match_priv(probe, priv, uid, zoneid)) 17719 break; 17720 } 17721 } 17722 17723 if (probe == NULL) { 17724 mutex_exit(&dtrace_lock); 17725 return (ESRCH); 17726 } 17727 17728 dtrace_probe_description(probe, &desc); 17729 mutex_exit(&dtrace_lock); 17730 17731 if (copyout(&desc, (void *)arg, sizeof (desc)) != 0) 17732 return (EFAULT); 17733 17734 return (0); 17735 } 17736 17737 case DTRACEIOC_PROBEARG: { 17738 dtrace_argdesc_t desc; 17739 dtrace_probe_t *probe; 17740 dtrace_provider_t *prov; 17741 17742 if (copyin((void *)arg, &desc, sizeof (desc)) != 0) 17743 return (EFAULT); 17744 17745 if (desc.dtargd_id == DTRACE_IDNONE) 17746 return (EINVAL); 17747 17748 if (desc.dtargd_ndx == DTRACE_ARGNONE) 17749 return (EINVAL); 17750 17751 mutex_enter(&dtrace_provider_lock); 17752 mutex_enter(&mod_lock); 17753 mutex_enter(&dtrace_lock); 17754 17755 if (desc.dtargd_id > dtrace_nprobes) { 17756 mutex_exit(&dtrace_lock); 17757 mutex_exit(&mod_lock); 17758 mutex_exit(&dtrace_provider_lock); 17759 return (EINVAL); 17760 } 17761 17762 if ((probe = dtrace_probes[desc.dtargd_id - 1]) == NULL) { 17763 mutex_exit(&dtrace_lock); 17764 mutex_exit(&mod_lock); 17765 mutex_exit(&dtrace_provider_lock); 17766 return (EINVAL); 17767 } 17768 17769 mutex_exit(&dtrace_lock); 17770 17771 prov = probe->dtpr_provider; 17772 17773 if (prov->dtpv_pops.dtps_getargdesc == NULL) { 17774 /* 17775 * There isn't any typed information for this probe. 17776 * Set the argument number to DTRACE_ARGNONE. 17777 */ 17778 desc.dtargd_ndx = DTRACE_ARGNONE; 17779 } else { 17780 desc.dtargd_native[0] = '\0'; 17781 desc.dtargd_xlate[0] = '\0'; 17782 desc.dtargd_mapping = desc.dtargd_ndx; 17783 17784 prov->dtpv_pops.dtps_getargdesc(prov->dtpv_arg, 17785 probe->dtpr_id, probe->dtpr_arg, &desc); 17786 } 17787 17788 mutex_exit(&mod_lock); 17789 mutex_exit(&dtrace_provider_lock); 17790 17791 if (copyout(&desc, (void *)arg, sizeof (desc)) != 0) 17792 return (EFAULT); 17793 17794 return (0); 17795 } 17796 17797 case DTRACEIOC_GO: { 17798 processorid_t cpuid; 17799 rval = dtrace_state_go(state, &cpuid); 17800 17801 if (rval != 0) 17802 return (rval); 17803 17804 if (copyout(&cpuid, (void *)arg, sizeof (cpuid)) != 0) 17805 return (EFAULT); 17806 17807 return (0); 17808 } 17809 17810 case DTRACEIOC_STOP: { 17811 processorid_t cpuid; 17812 17813 mutex_enter(&dtrace_lock); 17814 rval = dtrace_state_stop(state, &cpuid); 17815 mutex_exit(&dtrace_lock); 17816 17817 if (rval != 0) 17818 return (rval); 17819 17820 if (copyout(&cpuid, (void *)arg, sizeof (cpuid)) != 0) 17821 return (EFAULT); 17822 17823 return (0); 17824 } 17825 17826 case DTRACEIOC_DOFGET: { 17827 dof_hdr_t hdr, *dof; 17828 uint64_t len; 17829 17830 if (copyin((void *)arg, &hdr, sizeof (hdr)) != 0) 17831 return (EFAULT); 17832 17833 mutex_enter(&dtrace_lock); 17834 dof = dtrace_dof_create(state); 17835 mutex_exit(&dtrace_lock); 17836 17837 len = MIN(hdr.dofh_loadsz, dof->dofh_loadsz); 17838 rval = copyout(dof, (void *)arg, len); 17839 dtrace_dof_destroy(dof); 17840 17841 return (rval == 0 ? 0 : EFAULT); 17842 } 17843 17844 case DTRACEIOC_AGGSNAP: 17845 case DTRACEIOC_BUFSNAP: { 17846 dtrace_bufdesc_t desc; 17847 caddr_t cached; 17848 dtrace_buffer_t *buf; 17849 17850 if (copyin((void *)arg, &desc, sizeof (desc)) != 0) 17851 return (EFAULT); 17852 17853 if (desc.dtbd_cpu < 0 || desc.dtbd_cpu >= NCPU) 17854 return (EINVAL); 17855 17856 mutex_enter(&dtrace_lock); 17857 17858 if (cmd == DTRACEIOC_BUFSNAP) { 17859 buf = &state->dts_buffer[desc.dtbd_cpu]; 17860 } else { 17861 buf = &state->dts_aggbuffer[desc.dtbd_cpu]; 17862 } 17863 17864 if (buf->dtb_flags & (DTRACEBUF_RING | DTRACEBUF_FILL)) { 17865 size_t sz = buf->dtb_offset; 17866 17867 if (state->dts_activity != DTRACE_ACTIVITY_STOPPED) { 17868 mutex_exit(&dtrace_lock); 17869 return (EBUSY); 17870 } 17871 17872 /* 17873 * If this buffer has already been consumed, we're 17874 * going to indicate that there's nothing left here 17875 * to consume. 17876 */ 17877 if (buf->dtb_flags & DTRACEBUF_CONSUMED) { 17878 mutex_exit(&dtrace_lock); 17879 17880 desc.dtbd_size = 0; 17881 desc.dtbd_drops = 0; 17882 desc.dtbd_errors = 0; 17883 desc.dtbd_oldest = 0; 17884 sz = sizeof (desc); 17885 17886 if (copyout(&desc, (void *)arg, sz) != 0) 17887 return (EFAULT); 17888 17889 return (0); 17890 } 17891 17892 /* 17893 * If this is a ring buffer that has wrapped, we want 17894 * to copy the whole thing out. 17895 */ 17896 if (buf->dtb_flags & DTRACEBUF_WRAPPED) { 17897 dtrace_buffer_polish(buf); 17898 sz = buf->dtb_size; 17899 } 17900 17901 if (copyout(buf->dtb_tomax, desc.dtbd_data, sz) != 0) { 17902 mutex_exit(&dtrace_lock); 17903 return (EFAULT); 17904 } 17905 17906 desc.dtbd_size = sz; 17907 desc.dtbd_drops = buf->dtb_drops; 17908 desc.dtbd_errors = buf->dtb_errors; 17909 desc.dtbd_oldest = buf->dtb_xamot_offset; 17910 desc.dtbd_timestamp = dtrace_gethrtime(); 17911 17912 mutex_exit(&dtrace_lock); 17913 17914 if (copyout(&desc, (void *)arg, sizeof (desc)) != 0) 17915 return (EFAULT); 17916 17917 buf->dtb_flags |= DTRACEBUF_CONSUMED; 17918 17919 return (0); 17920 } 17921 17922 if (buf->dtb_tomax == NULL) { 17923 ASSERT(buf->dtb_xamot == NULL); 17924 mutex_exit(&dtrace_lock); 17925 return (ENOENT); 17926 } 17927 17928 cached = buf->dtb_tomax; 17929 ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH)); 17930 17931 dtrace_xcall(desc.dtbd_cpu, 17932 (dtrace_xcall_t)dtrace_buffer_switch, buf); 17933 17934 state->dts_errors += buf->dtb_xamot_errors; 17935 17936 /* 17937 * If the buffers did not actually switch, then the cross call 17938 * did not take place -- presumably because the given CPU is 17939 * not in the ready set. If this is the case, we'll return 17940 * ENOENT. 17941 */ 17942 if (buf->dtb_tomax == cached) { 17943 ASSERT(buf->dtb_xamot != cached); 17944 mutex_exit(&dtrace_lock); 17945 return (ENOENT); 17946 } 17947 17948 ASSERT(cached == buf->dtb_xamot); 17949 17950 /* 17951 * We have our snapshot; now copy it out. 17952 */ 17953 if (copyout(buf->dtb_xamot, desc.dtbd_data, 17954 buf->dtb_xamot_offset) != 0) { 17955 mutex_exit(&dtrace_lock); 17956 return (EFAULT); 17957 } 17958 17959 desc.dtbd_size = buf->dtb_xamot_offset; 17960 desc.dtbd_drops = buf->dtb_xamot_drops; 17961 desc.dtbd_errors = buf->dtb_xamot_errors; 17962 desc.dtbd_oldest = 0; 17963 desc.dtbd_timestamp = buf->dtb_switched; 17964 17965 mutex_exit(&dtrace_lock); 17966 17967 /* 17968 * Finally, copy out the buffer description. 17969 */ 17970 if (copyout(&desc, (void *)arg, sizeof (desc)) != 0) 17971 return (EFAULT); 17972 17973 return (0); 17974 } 17975 17976 case DTRACEIOC_CONF: { 17977 dtrace_conf_t conf; 17978 17979 bzero(&conf, sizeof (conf)); 17980 conf.dtc_difversion = DIF_VERSION; 17981 conf.dtc_difintregs = DIF_DIR_NREGS; 17982 conf.dtc_diftupregs = DIF_DTR_NREGS; 17983 conf.dtc_ctfmodel = CTF_MODEL_NATIVE; 17984 17985 if (copyout(&conf, (void *)arg, sizeof (conf)) != 0) 17986 return (EFAULT); 17987 17988 return (0); 17989 } 17990 17991 case DTRACEIOC_STATUS: { 17992 dtrace_status_t stat; 17993 dtrace_dstate_t *dstate; 17994 int i, j; 17995 uint64_t nerrs; 17996 17997 /* 17998 * See the comment in dtrace_state_deadman() for the reason 17999 * for setting dts_laststatus to INT64_MAX before setting 18000 * it to the correct value. 18001 */ 18002 state->dts_laststatus = INT64_MAX; 18003 dtrace_membar_producer(); 18004 state->dts_laststatus = dtrace_gethrtime(); 18005 18006 bzero(&stat, sizeof (stat)); 18007 18008 mutex_enter(&dtrace_lock); 18009 18010 if (state->dts_activity == DTRACE_ACTIVITY_INACTIVE) { 18011 mutex_exit(&dtrace_lock); 18012 return (ENOENT); 18013 } 18014 18015 if (state->dts_activity == DTRACE_ACTIVITY_DRAINING) 18016 stat.dtst_exiting = 1; 18017 18018 nerrs = state->dts_errors; 18019 dstate = &state->dts_vstate.dtvs_dynvars; 18020 18021 for (i = 0; i < NCPU; i++) { 18022 dtrace_dstate_percpu_t *dcpu = &dstate->dtds_percpu[i]; 18023 18024 stat.dtst_dyndrops += dcpu->dtdsc_drops; 18025 stat.dtst_dyndrops_dirty += dcpu->dtdsc_dirty_drops; 18026 stat.dtst_dyndrops_rinsing += dcpu->dtdsc_rinsing_drops; 18027 18028 if (state->dts_buffer[i].dtb_flags & DTRACEBUF_FULL) 18029 stat.dtst_filled++; 18030 18031 nerrs += state->dts_buffer[i].dtb_errors; 18032 18033 for (j = 0; j < state->dts_nspeculations; j++) { 18034 dtrace_speculation_t *spec; 18035 dtrace_buffer_t *buf; 18036 18037 spec = &state->dts_speculations[j]; 18038 buf = &spec->dtsp_buffer[i]; 18039 stat.dtst_specdrops += buf->dtb_xamot_drops; 18040 } 18041 } 18042 18043 stat.dtst_specdrops_busy = state->dts_speculations_busy; 18044 stat.dtst_specdrops_unavail = state->dts_speculations_unavail; 18045 stat.dtst_stkstroverflows = state->dts_stkstroverflows; 18046 stat.dtst_dblerrors = state->dts_dblerrors; 18047 stat.dtst_killed = 18048 (state->dts_activity == DTRACE_ACTIVITY_KILLED); 18049 stat.dtst_errors = nerrs; 18050 18051 mutex_exit(&dtrace_lock); 18052 18053 if (copyout(&stat, (void *)arg, sizeof (stat)) != 0) 18054 return (EFAULT); 18055 18056 return (0); 18057 } 18058 18059 case DTRACEIOC_FORMAT: { 18060 dtrace_fmtdesc_t fmt; 18061 char *str; 18062 int len; 18063 18064 if (copyin((void *)arg, &fmt, sizeof (fmt)) != 0) 18065 return (EFAULT); 18066 18067 mutex_enter(&dtrace_lock); 18068 18069 if (fmt.dtfd_format == 0 || 18070 fmt.dtfd_format > state->dts_nformats) { 18071 mutex_exit(&dtrace_lock); 18072 return (EINVAL); 18073 } 18074 18075 /* 18076 * Format strings are allocated contiguously and they are 18077 * never freed; if a format index is less than the number 18078 * of formats, we can assert that the format map is non-NULL 18079 * and that the format for the specified index is non-NULL. 18080 */ 18081 ASSERT(state->dts_formats != NULL); 18082 str = state->dts_formats[fmt.dtfd_format - 1]; 18083 ASSERT(str != NULL); 18084 18085 len = strlen(str) + 1; 18086 18087 if (len > fmt.dtfd_length) { 18088 fmt.dtfd_length = len; 18089 18090 if (copyout(&fmt, (void *)arg, sizeof (fmt)) != 0) { 18091 mutex_exit(&dtrace_lock); 18092 return (EINVAL); 18093 } 18094 } else { 18095 if (copyout(str, fmt.dtfd_string, len) != 0) { 18096 mutex_exit(&dtrace_lock); 18097 return (EINVAL); 18098 } 18099 } 18100 18101 mutex_exit(&dtrace_lock); 18102 return (0); 18103 } 18104 18105 default: 18106 break; 18107 } 18108 18109 return (ENOTTY); 18110 } 18111 18112 /*ARGSUSED*/ 18113 static int 18114 dtrace_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 18115 { 18116 dtrace_state_t *state; 18117 18118 switch (cmd) { 18119 case DDI_DETACH: 18120 break; 18121 18122 case DDI_SUSPEND: 18123 return (DDI_SUCCESS); 18124 18125 default: 18126 return (DDI_FAILURE); 18127 } 18128 18129 mutex_enter(&cpu_lock); 18130 mutex_enter(&dtrace_provider_lock); 18131 mutex_enter(&dtrace_lock); 18132 18133 ASSERT(dtrace_opens == 0); 18134 18135 if (dtrace_helpers > 0) { 18136 mutex_exit(&dtrace_provider_lock); 18137 mutex_exit(&dtrace_lock); 18138 mutex_exit(&cpu_lock); 18139 return (DDI_FAILURE); 18140 } 18141 18142 if (dtrace_unregister((dtrace_provider_id_t)dtrace_provider) != 0) { 18143 mutex_exit(&dtrace_provider_lock); 18144 mutex_exit(&dtrace_lock); 18145 mutex_exit(&cpu_lock); 18146 return (DDI_FAILURE); 18147 } 18148 18149 dtrace_provider = NULL; 18150 18151 if ((state = dtrace_anon_grab()) != NULL) { 18152 /* 18153 * If there were ECBs on this state, the provider should 18154 * have not been allowed to detach; assert that there is 18155 * none. 18156 */ 18157 ASSERT(state->dts_necbs == 0); 18158 dtrace_state_destroy(state); 18159 18160 /* 18161 * If we're being detached with anonymous state, we need to 18162 * indicate to the kernel debugger that DTrace is now inactive. 18163 */ 18164 (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE); 18165 } 18166 18167 bzero(&dtrace_anon, sizeof (dtrace_anon_t)); 18168 unregister_cpu_setup_func((cpu_setup_func_t *)dtrace_cpu_setup, NULL); 18169 dtrace_cpu_init = NULL; 18170 dtrace_helpers_cleanup = NULL; 18171 dtrace_helpers_fork = NULL; 18172 dtrace_cpustart_init = NULL; 18173 dtrace_cpustart_fini = NULL; 18174 dtrace_debugger_init = NULL; 18175 dtrace_debugger_fini = NULL; 18176 dtrace_modload = NULL; 18177 dtrace_modunload = NULL; 18178 18179 ASSERT(dtrace_getf == 0); 18180 ASSERT(dtrace_closef == NULL); 18181 18182 mutex_exit(&cpu_lock); 18183 18184 kmem_free(dtrace_probes, dtrace_nprobes * sizeof (dtrace_probe_t *)); 18185 dtrace_probes = NULL; 18186 dtrace_nprobes = 0; 18187 18188 dtrace_hash_destroy(dtrace_bymod); 18189 dtrace_hash_destroy(dtrace_byfunc); 18190 dtrace_hash_destroy(dtrace_byname); 18191 dtrace_bymod = NULL; 18192 dtrace_byfunc = NULL; 18193 dtrace_byname = NULL; 18194 18195 kmem_cache_destroy(dtrace_state_cache); 18196 vmem_destroy(dtrace_minor); 18197 vmem_destroy(dtrace_arena); 18198 18199 if (dtrace_toxrange != NULL) { 18200 kmem_free(dtrace_toxrange, 18201 dtrace_toxranges_max * sizeof (dtrace_toxrange_t)); 18202 dtrace_toxrange = NULL; 18203 dtrace_toxranges = 0; 18204 dtrace_toxranges_max = 0; 18205 } 18206 18207 ddi_remove_minor_node(dtrace_devi, NULL); 18208 dtrace_devi = NULL; 18209 18210 ddi_soft_state_fini(&dtrace_softstate); 18211 18212 ASSERT(dtrace_vtime_references == 0); 18213 ASSERT(dtrace_opens == 0); 18214 ASSERT(dtrace_retained == NULL); 18215 18216 mutex_exit(&dtrace_lock); 18217 mutex_exit(&dtrace_provider_lock); 18218 18219 /* 18220 * We don't destroy the task queue until after we have dropped our 18221 * locks (taskq_destroy() may block on running tasks). To prevent 18222 * attempting to do work after we have effectively detached but before 18223 * the task queue has been destroyed, all tasks dispatched via the 18224 * task queue must check that DTrace is still attached before 18225 * performing any operation. 18226 */ 18227 taskq_destroy(dtrace_taskq); 18228 dtrace_taskq = NULL; 18229 18230 return (DDI_SUCCESS); 18231 } 18232 #endif 18233 18234 #ifdef illumos 18235 /*ARGSUSED*/ 18236 static int 18237 dtrace_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 18238 { 18239 int error; 18240 18241 switch (infocmd) { 18242 case DDI_INFO_DEVT2DEVINFO: 18243 *result = (void *)dtrace_devi; 18244 error = DDI_SUCCESS; 18245 break; 18246 case DDI_INFO_DEVT2INSTANCE: 18247 *result = (void *)0; 18248 error = DDI_SUCCESS; 18249 break; 18250 default: 18251 error = DDI_FAILURE; 18252 } 18253 return (error); 18254 } 18255 #endif 18256 18257 #ifdef illumos 18258 static struct cb_ops dtrace_cb_ops = { 18259 dtrace_open, /* open */ 18260 dtrace_close, /* close */ 18261 nulldev, /* strategy */ 18262 nulldev, /* print */ 18263 nodev, /* dump */ 18264 nodev, /* read */ 18265 nodev, /* write */ 18266 dtrace_ioctl, /* ioctl */ 18267 nodev, /* devmap */ 18268 nodev, /* mmap */ 18269 nodev, /* segmap */ 18270 nochpoll, /* poll */ 18271 ddi_prop_op, /* cb_prop_op */ 18272 0, /* streamtab */ 18273 D_NEW | D_MP /* Driver compatibility flag */ 18274 }; 18275 18276 static struct dev_ops dtrace_ops = { 18277 DEVO_REV, /* devo_rev */ 18278 0, /* refcnt */ 18279 dtrace_info, /* get_dev_info */ 18280 nulldev, /* identify */ 18281 nulldev, /* probe */ 18282 dtrace_attach, /* attach */ 18283 dtrace_detach, /* detach */ 18284 nodev, /* reset */ 18285 &dtrace_cb_ops, /* driver operations */ 18286 NULL, /* bus operations */ 18287 nodev /* dev power */ 18288 }; 18289 18290 static struct modldrv modldrv = { 18291 &mod_driverops, /* module type (this is a pseudo driver) */ 18292 "Dynamic Tracing", /* name of module */ 18293 &dtrace_ops, /* driver ops */ 18294 }; 18295 18296 static struct modlinkage modlinkage = { 18297 MODREV_1, 18298 (void *)&modldrv, 18299 NULL 18300 }; 18301 18302 int 18303 _init(void) 18304 { 18305 return (mod_install(&modlinkage)); 18306 } 18307 18308 int 18309 _info(struct modinfo *modinfop) 18310 { 18311 return (mod_info(&modlinkage, modinfop)); 18312 } 18313 18314 int 18315 _fini(void) 18316 { 18317 return (mod_remove(&modlinkage)); 18318 } 18319 #else 18320 18321 static d_ioctl_t dtrace_ioctl; 18322 static d_ioctl_t dtrace_ioctl_helper; 18323 static void dtrace_load(void *); 18324 static int dtrace_unload(void); 18325 static struct cdev *dtrace_dev; 18326 static struct cdev *helper_dev; 18327 18328 void dtrace_invop_init(void); 18329 void dtrace_invop_uninit(void); 18330 18331 static struct cdevsw dtrace_cdevsw = { 18332 .d_version = D_VERSION, 18333 .d_ioctl = dtrace_ioctl, 18334 .d_open = dtrace_open, 18335 .d_name = "dtrace", 18336 }; 18337 18338 static struct cdevsw helper_cdevsw = { 18339 .d_version = D_VERSION, 18340 .d_ioctl = dtrace_ioctl_helper, 18341 .d_name = "helper", 18342 }; 18343 18344 #include <dtrace_anon.c> 18345 #include <dtrace_ioctl.c> 18346 #include <dtrace_load.c> 18347 #include <dtrace_modevent.c> 18348 #include <dtrace_sysctl.c> 18349 #include <dtrace_unload.c> 18350 #include <dtrace_vtime.c> 18351 #include <dtrace_hacks.c> 18352 #include <dtrace_isa.c> 18353 18354 SYSINIT(dtrace_load, SI_SUB_DTRACE, SI_ORDER_FIRST, dtrace_load, NULL); 18355 SYSUNINIT(dtrace_unload, SI_SUB_DTRACE, SI_ORDER_FIRST, dtrace_unload, NULL); 18356 SYSINIT(dtrace_anon_init, SI_SUB_DTRACE_ANON, SI_ORDER_FIRST, dtrace_anon_init, NULL); 18357 18358 DEV_MODULE(dtrace, dtrace_modevent, NULL); 18359 MODULE_VERSION(dtrace, 1); 18360 MODULE_DEPEND(dtrace, opensolaris, 1, 1, 1); 18361 #endif 18362